ar_indexer 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -3
- data/ar_indexer.gemspec +16 -16
- data/lib/ar_indexer.rb +4 -3
- data/lib/ar_indexer/has_reverse_index.rb +96 -66
- data/lib/ar_indexer/index_search.rb +97 -43
- data/lib/ar_indexer/indexer.rb +158 -107
- data/lib/ar_indexer/reverse_index.rb +17 -9
- data/lib/ar_indexer/stopwords.rb +180 -178
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a949f827bc86002e7bb7241edb513fbe452b51ca
|
4
|
+
data.tar.gz: e04f080e7cd9be0fa8dfb552578cac54c079de7d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f5841ed351d64accee8237245a8ce732aa2784ba32f3189f3dd1f36c2467f25859d8b08edc674b2291be487ff4799fcc7d5dbdf527ecb63a6261c120fa889459
|
7
|
+
data.tar.gz: 930f41e303645f21a73aa061f0557a742ad464af9568285befb81fd2eca1961eb9ebe2f2a550ead28a35b7fad538853fb7c793c753773af33c7ffcc380fe35f8
|
data/README.md
CHANGED
@@ -41,13 +41,15 @@ At this point, ARIndexer will build and maintain a reverse index for each record
|
|
41
41
|
|
42
42
|
###Searching###
|
43
43
|
|
44
|
-
ARIndexer also provides a simple search class for finding records by text search. To initialize an instance of this class, just pass it
|
44
|
+
ARIndexer also provides a simple search class for finding records by text search. To initialize an instance of this class, just pass it an array of ActiveRecord models it needs to search.
|
45
45
|
|
46
|
-
foo = IndexSearch.new(Article)
|
46
|
+
foo = IndexSearch.new([Article])
|
47
|
+
# Or search multiple models
|
48
|
+
# foo = IndexSearch.new([Article, List])
|
47
49
|
|
48
50
|
You can also pass an options hash (which currently has a whole 1 option, `:no_results_message`)
|
49
51
|
|
50
|
-
foo = IndexSearch.new(Article, :no_results_message => "Hey man, there's nothing there.")
|
52
|
+
foo = IndexSearch.new([Article], :no_results_message => "Hey man, there's nothing there.")
|
51
53
|
|
52
54
|
And now you're ready to search against the index that's been built.
|
53
55
|
|
data/ar_indexer.gemspec
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
2
|
+
spec.name = "ar_indexer"
|
3
|
+
spec.version = "0.1.4"
|
4
|
+
spec.date = "2014-02-18"
|
5
|
+
spec.summary = "Allows for reverse indexing selected ActiveRecord models. Handles searching and return of objects"
|
6
|
+
spec.description = spec.summary
|
7
|
+
spec.authors = ["Josh Stump"]
|
8
|
+
spec.email = "joshua.t.stump@gmail.com"
|
9
|
+
spec.homepage = "https://github.com/jstump/ar_indexer"
|
10
|
+
spec.require_paths = ["lib"]
|
11
|
+
spec.files = `git ls-files`.split("\n")
|
12
|
+
spec.license = "GPL-2"
|
13
|
+
|
14
|
+
spec.add_dependency('activerecord')
|
15
|
+
spec.add_dependency('activesupport')
|
16
|
+
spec.add_dependency('htmlentities')
|
17
|
+
spec.add_dependency('fast-stemmer')
|
18
18
|
end
|
data/lib/ar_indexer.rb
CHANGED
@@ -1,19 +1,20 @@
|
|
1
|
-
# Requires for gem dependencies
|
2
1
|
require 'active_record'
|
3
2
|
require 'active_support'
|
4
3
|
require 'active_support/inflector'
|
5
4
|
require 'htmlentities'
|
6
5
|
require 'fast-stemmer'
|
7
|
-
|
6
|
+
|
8
7
|
require 'ar_indexer/reverse_index'
|
9
8
|
require 'ar_indexer/has_reverse_index'
|
10
9
|
require 'ar_indexer/stopwords'
|
11
10
|
require 'ar_indexer/indexer'
|
12
11
|
require 'ar_indexer/index_search'
|
13
12
|
|
13
|
+
# Main gem module
|
14
|
+
|
14
15
|
module ARIndexer
|
15
16
|
|
16
|
-
|
17
|
+
VERSION = "0.1.2"
|
17
18
|
|
18
19
|
end
|
19
20
|
|
@@ -1,70 +1,100 @@
|
|
1
1
|
module ARIndexer
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
3
|
+
# Holds methods that are used to extend ActiveRecord models.
|
4
|
+
# Included automatically whenever ActiveRecord is required
|
5
|
+
|
6
|
+
module Model
|
7
|
+
|
8
|
+
# Extends a specified ActiveRecord model by adding the functions within the ClassMethods module.
|
9
|
+
# Called automatically on all ActiveRecord models
|
10
|
+
|
11
|
+
def self.included(base)
|
12
|
+
base.send :extend, ClassMethods
|
13
|
+
end
|
14
|
+
|
15
|
+
# Class methods that can be called on any ActiveRecord model to extend functionality
|
16
|
+
|
17
|
+
module ClassMethods
|
18
|
+
|
19
|
+
# Marks all string and text fields (or a subset thereof) of an ActiveRecord model
|
20
|
+
# for indexing and adds a necessary set of instance methods.
|
21
|
+
# If the [fields] parameter is set, indexes only the specified fields,
|
22
|
+
# otherwise indexes all string and text fields.
|
23
|
+
#
|
24
|
+
# ==== Parameters
|
25
|
+
#
|
26
|
+
# * fields: optional array of field names (as symbols) to be indexed
|
27
|
+
#
|
28
|
+
# ==== Examples
|
29
|
+
#
|
30
|
+
# class Post < ActiveRecord::Base
|
31
|
+
# has_reverse_index
|
32
|
+
# end
|
33
|
+
#
|
34
|
+
# class Article < ActiveRecord::Base
|
35
|
+
# has_reverse_index([:title, :content])
|
36
|
+
# end
|
37
|
+
|
38
|
+
def has_reverse_index(fields = [])
|
39
|
+
send :include, InstanceMethods
|
40
|
+
|
41
|
+
class_attribute :indexed_fields
|
42
|
+
self.indexed_fields = fields.dup
|
43
|
+
|
44
|
+
after_create :on_create_record
|
45
|
+
after_update :on_update_record
|
46
|
+
before_destroy :on_destroy_record
|
47
|
+
end
|
48
|
+
module_function :has_reverse_index
|
49
|
+
|
50
|
+
# Instance methods available to instances of an ActiveRecord model which has had has_reverse_index()
|
51
|
+
# called on it. Currently, there are no public instance methods.
|
52
|
+
|
53
|
+
module InstanceMethods
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def array_of_values_to_index
|
58
|
+
values_for_indexing = []
|
59
|
+
if self.indexed_fields.empty?
|
60
|
+
self.class.columns.each do |c|
|
61
|
+
if ['string', 'text'].include? c.type.to_s
|
62
|
+
values_for_indexing << self.read_attribute(c.name)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
else
|
66
|
+
self.indexed_fields.each do |f|
|
67
|
+
if ['string', 'text'].include? self.class.columns_hash[f.to_s].type.to_s
|
68
|
+
values_for_indexing << self.read_attribute(f.to_s)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
values_for_indexing.delete_if {|v| [nil, ''].include? v}
|
73
|
+
return values_for_indexing
|
74
|
+
end
|
75
|
+
|
76
|
+
def on_create_record
|
77
|
+
values_for_indexing = array_of_values_to_index
|
78
|
+
unless values_for_indexing.empty?
|
79
|
+
Indexer.build_reverse_index(self.class.to_s.split('::').last.to_s, self.id, values_for_indexing, false)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def on_update_record
|
84
|
+
values_for_indexing = array_of_values_to_index
|
85
|
+
unless values_for_indexing.empty?
|
86
|
+
Indexer.build_reverse_index(self.class.to_s.split('::').last.to_s, self.id, values_for_indexing, true)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def on_destroy_record
|
91
|
+
Indexer.remove_from_reverse_index(self.class.to_s.split('::').last.to_s, self.id)
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
69
99
|
|
70
100
|
end
|
@@ -1,47 +1,101 @@
|
|
1
1
|
module ARIndexer
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
3
|
+
# Simple class for searching the index generated by adding has_reverse_index to ActiveRecord models
|
4
|
+
#
|
5
|
+
# === Initialization
|
6
|
+
#
|
7
|
+
# foo = IndexSearch.new(Article, {:no_results_message => "We're sorry, but no results were found"})
|
8
|
+
#
|
9
|
+
# === Parameters
|
10
|
+
#
|
11
|
+
# * [model_names]: array of constants denoting the ActiveRecord models to search
|
12
|
+
# * opts: optional hash of configuration options
|
13
|
+
#
|
14
|
+
# === Options
|
15
|
+
#
|
16
|
+
# :no_results_message => string which can be requested and displayed if no results are found
|
17
|
+
|
18
|
+
class IndexSearch
|
19
|
+
|
20
|
+
def initialize(model_names, opts = {})
|
21
|
+
@models = {}
|
22
|
+
model_names.each do |model|
|
23
|
+
@models[model.to_s.split('::').last] = model
|
24
|
+
end
|
25
|
+
puts @models
|
26
|
+
|
27
|
+
@options = {
|
28
|
+
:no_results_message => "Your #{@model_name} search returned no results."
|
29
|
+
}
|
30
|
+
|
31
|
+
@options.merge!(opts)
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns either the default :no_results_message, or the one specified on initialization
|
35
|
+
#
|
36
|
+
# ==== Returns
|
37
|
+
#
|
38
|
+
# string
|
39
|
+
|
40
|
+
def no_results_message
|
41
|
+
return @options[:no_results_message]
|
42
|
+
end
|
43
|
+
|
44
|
+
# Runs a text search against the ReverseIndex model for a given ActiveRecord model and search string.
|
45
|
+
# Returns
|
46
|
+
#
|
47
|
+
# ==== Parameters
|
48
|
+
#
|
49
|
+
# * search_string: string on which to search for indexed objects
|
50
|
+
#
|
51
|
+
# ==== Returns
|
52
|
+
#
|
53
|
+
# array of ActiveRecord objects, or an empty array
|
54
|
+
#
|
55
|
+
# ==== Examples
|
56
|
+
#
|
57
|
+
# foo = IndexSearch.new(Article)
|
58
|
+
# foo.run_search("test string")
|
59
|
+
# #=> [<#Article>, <#Article>, ...]
|
60
|
+
|
61
|
+
def run_search(search_string)
|
62
|
+
search_terms = Indexer.expand_lexicon(Indexer.text_to_lexicon(search_string))
|
63
|
+
matches = {}
|
64
|
+
@models.keys.each do |model|
|
65
|
+
matches[model] = {}
|
66
|
+
end
|
67
|
+
search_terms.each do |st|
|
68
|
+
reverse_index_records = ReverseIndex.where(:model_name => [@models.keys], :word => st)
|
69
|
+
if reverse_index_records.count > 0
|
70
|
+
reverse_index_records.each do |rir|
|
71
|
+
rir.retrieve_id_array.each do |id|
|
72
|
+
if matches[rir.model_name].has_key?(id)
|
73
|
+
matches[rir.model_name][id] = matches[rir.model_name][id] + 1
|
74
|
+
else
|
75
|
+
matches[rir.model_name][id] = 1
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
collected_matches = []
|
82
|
+
matches.each do |k,v|
|
83
|
+
unless v.empty?
|
84
|
+
collected_matches << v.to_a.map{|x| x << k}
|
85
|
+
end
|
86
|
+
end
|
87
|
+
unless collected_matches.empty?
|
88
|
+
collected_matches.flatten!(1)
|
89
|
+
objects_to_return = []
|
90
|
+
collected_matches.sort{|x,y| x[1] <=> y[1]}.reverse.each do |match|
|
91
|
+
objects_to_return << @models[match[2]].find(match[0])
|
92
|
+
end
|
93
|
+
return objects_to_return
|
94
|
+
else
|
95
|
+
return []
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
46
100
|
|
47
101
|
end
|
data/lib/ar_indexer/indexer.rb
CHANGED
@@ -1,111 +1,162 @@
|
|
1
1
|
module ARIndexer
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
3
|
+
# Contains functions for creating a forward index from text, then converting it to a reverse index
|
4
|
+
|
5
|
+
module Indexer
|
6
|
+
|
7
|
+
# Turns a string into lexicon array, including basic root words and plurals
|
8
|
+
#
|
9
|
+
# ==== Parameters
|
10
|
+
#
|
11
|
+
# text: the string to be converted
|
12
|
+
#
|
13
|
+
# ==== Returns
|
14
|
+
#
|
15
|
+
# array of strings
|
16
|
+
|
17
|
+
def self.text_to_lexicon(text)
|
18
|
+
# Remove HTML markup
|
19
|
+
text.gsub!(/<[^>]+>/, ' ')
|
20
|
+
# Decode HTML entities
|
21
|
+
coder = HTMLEntities.new
|
22
|
+
text = coder.decode(text)
|
23
|
+
# Remove most punctuation
|
24
|
+
text.gsub!(/[^a-zA-Z0-9\s]/, '')
|
25
|
+
# Move everything to lower case
|
26
|
+
text.downcase!
|
27
|
+
# Split all words into an array
|
28
|
+
lexicon = text.split(' ')
|
29
|
+
# Remove stopwords and duplicates
|
30
|
+
lexicon = (lexicon - Stopwords::STOPWORDS).uniq
|
31
|
+
return lexicon
|
32
|
+
end
|
33
|
+
|
34
|
+
# Expands the lexicon created by text_to_lexicon, adding plurals and root words
|
35
|
+
#
|
36
|
+
# ==== Parameters
|
37
|
+
#
|
38
|
+
# lexicon: array of strings to be expanded
|
39
|
+
#
|
40
|
+
# ==== Returns
|
41
|
+
#
|
42
|
+
# array of strings
|
43
|
+
|
44
|
+
def self.expand_lexicon(lexicon)
|
45
|
+
# Stem and pluralize
|
46
|
+
lexicon.each do |word|
|
47
|
+
root = Stemmer::stem_word(word)
|
48
|
+
if !lexicon.include? root
|
49
|
+
lexicon = lexicon.inject([root], :<<)
|
50
|
+
end
|
51
|
+
plural = word.pluralize
|
52
|
+
if !lexicon.include? plural
|
53
|
+
lexicon = lexicon.inject([plural], :<<)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
# Remove stopwords and duplicates again
|
57
|
+
lexicon = (lexicon - Stopwords::STOPWORDS).uniq
|
58
|
+
return lexicon
|
59
|
+
end
|
60
|
+
|
61
|
+
# Takes an array of strings to be indexed, and calls text_to_lexicon on each.
|
62
|
+
# Returns the combined array flattened, uniquified, and sorted in alphabetical order
|
63
|
+
#
|
64
|
+
# ==== Parameters
|
65
|
+
#
|
66
|
+
# values_to_index: array of string values to index
|
67
|
+
#
|
68
|
+
# ==== Returns
|
69
|
+
#
|
70
|
+
# array of strings
|
71
|
+
|
72
|
+
def self.build_forward_index(values_to_index)
|
73
|
+
forward_index = []
|
74
|
+
# Run text_to_lexicon for each indexed field
|
75
|
+
values_to_index.each do |v|
|
76
|
+
forward_index << self.text_to_lexicon(v) if ![nil, ''].include? v
|
77
|
+
end
|
78
|
+
# Return the lexicon flattened, uniquified, and in alphabetical order
|
79
|
+
forward_index = forward_index.flatten.uniq.sort
|
80
|
+
return forward_index
|
81
|
+
end
|
82
|
+
|
83
|
+
# For a given model name and object id, compares the list of words with the forward index of the text.
|
84
|
+
# If a word exists in the reverse index but not the forward index, removes the object id from the reverse index.
|
85
|
+
# If the id array is empty, removes the reverse index record
|
86
|
+
#
|
87
|
+
# ==== Parameters
|
88
|
+
#
|
89
|
+
# * model_name: string version of the model name to clean records for
|
90
|
+
# * record_id: object id to search for in the reverse index
|
91
|
+
# * forward_index: the array of words to check against
|
92
|
+
|
93
|
+
def self.clean_reverse_index(model_name, record_id, forward_index)
|
94
|
+
reverse_index_records = ReverseIndex.where(:model_name => model_name)
|
95
|
+
reverse_index_records.each do |rir|
|
96
|
+
if rir.id_list.match(/,{0,1}#{record_id},{0,1}/)
|
97
|
+
if !forward_index.include? rir.word
|
98
|
+
id_array = rir.retrieve_id_array
|
99
|
+
id_array.delete(record_id.to_i)
|
100
|
+
if id_array.empty?
|
101
|
+
rir.destroy
|
102
|
+
else
|
103
|
+
new_id_list = id_array.join(',')
|
104
|
+
rir.update(:id_list => new_id_list)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Takes an array of values to index, runs it through build_forward_index(), then builds the reverse index
|
112
|
+
# from the returned values
|
113
|
+
#
|
114
|
+
# ==== Parameters
|
115
|
+
#
|
116
|
+
# * model_name: the string version of the model name
|
117
|
+
# * record_id: the id of the object being indexed
|
118
|
+
# * values_to_index: array of string objects to use in building the reverse index
|
119
|
+
# * cleaning_required: boolean flag, whether or not to run clean_reverse_index()
|
120
|
+
|
121
|
+
def self.build_reverse_index(model_name, record_id, values_to_index, cleaning_required = false)
|
122
|
+
forward_index = self.build_forward_index(values_to_index)
|
123
|
+
forward_index.each do |word|
|
124
|
+
if reverse_index_record = ReverseIndex.where(:model_name => model_name, :word => word).first
|
125
|
+
id_array = reverse_index_record.retrieve_id_array
|
126
|
+
if !id_array.include? record_id
|
127
|
+
new_id_list = (id_array << record_id).join(',')
|
128
|
+
reverse_index_record.update(:id_list => new_id_list)
|
129
|
+
end
|
130
|
+
else
|
131
|
+
ReverseIndex.create(:model_name => model_name, :word => word, :id_list => record_id)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
self.clean_reverse_index(model_name, record_id, forward_index) if cleaning_required
|
135
|
+
end
|
136
|
+
|
137
|
+
# Removes an object id from the reverse index for a specified model.
|
138
|
+
# If the id array is empty after removing the record id, destroys the reverse index record
|
139
|
+
#
|
140
|
+
# ==== Parameters
|
141
|
+
#
|
142
|
+
# model_name: string version of the model name to remove records for
|
143
|
+
# record_id: object id to remove records for
|
144
|
+
|
145
|
+
def self.remove_from_reverse_index(model_name, record_id)
|
146
|
+
reverse_index_records = ReverseIndex.where(:model_name => model_name)
|
147
|
+
reverse_index_records.each do |rir|
|
148
|
+
id_array = rir.retrieve_id_array
|
149
|
+
if id_array.include? record_id.to_i
|
150
|
+
id_array.delete(record_id.to_i)
|
151
|
+
if id_array.empty?
|
152
|
+
rir.destroy
|
153
|
+
else
|
154
|
+
rir.update(:id_list => id_array.join(','))
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
end
|
110
161
|
|
111
162
|
end
|
@@ -1,17 +1,25 @@
|
|
1
1
|
module ARIndexer
|
2
2
|
|
3
|
-
|
3
|
+
class ReverseIndex < ::ActiveRecord::Base
|
4
4
|
|
5
|
-
|
5
|
+
if ::ActiveRecord::VERSION::MAJOR < 4
|
6
|
+
attr_accessible :id_list, :model_name, :word
|
7
|
+
end
|
6
8
|
|
7
|
-
|
9
|
+
validates_uniqueness_of :word, :scope => :model_name
|
8
10
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
# Retrieves the id_list property as a parsed array of integers
|
12
|
+
#
|
13
|
+
# ==== Returns
|
14
|
+
#
|
15
|
+
# array of integer
|
14
16
|
|
15
|
-
|
17
|
+
def retrieve_id_array
|
18
|
+
id_array = self.id_list.split(',')
|
19
|
+
id_array.map! {|id| id.to_i}
|
20
|
+
return id_array
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
16
24
|
|
17
25
|
end
|
data/lib/ar_indexer/stopwords.rb
CHANGED
@@ -1,184 +1,186 @@
|
|
1
1
|
module ARIndexer
|
2
2
|
|
3
|
-
|
3
|
+
# Contains array of stopwords that should not be stopwords
|
4
4
|
|
5
|
-
|
6
|
-
"a",
|
7
|
-
"about",
|
8
|
-
"above",
|
9
|
-
"after",
|
10
|
-
"again",
|
11
|
-
"against",
|
12
|
-
"all",
|
13
|
-
"am",
|
14
|
-
"an",
|
15
|
-
"and",
|
16
|
-
"any",
|
17
|
-
"are",
|
18
|
-
"aren't",
|
19
|
-
"as",
|
20
|
-
"at",
|
21
|
-
"be",
|
22
|
-
"because",
|
23
|
-
"been",
|
24
|
-
"before",
|
25
|
-
"being",
|
26
|
-
"below",
|
27
|
-
"between",
|
28
|
-
"both",
|
29
|
-
"but",
|
30
|
-
"by",
|
31
|
-
"can't",
|
32
|
-
"cannot",
|
33
|
-
"could",
|
34
|
-
"couldn't",
|
35
|
-
"did",
|
36
|
-
"didn't",
|
37
|
-
"do",
|
38
|
-
"does",
|
39
|
-
"doesn't",
|
40
|
-
"doing",
|
41
|
-
"don't",
|
42
|
-
"down",
|
43
|
-
"during",
|
44
|
-
"each",
|
45
|
-
"few",
|
46
|
-
"for",
|
47
|
-
"from",
|
48
|
-
"further",
|
49
|
-
"had",
|
50
|
-
"hadn't",
|
51
|
-
"has",
|
52
|
-
"hasn't",
|
53
|
-
"have",
|
54
|
-
"haven't",
|
55
|
-
"having",
|
56
|
-
"he",
|
57
|
-
"he'd",
|
58
|
-
"he'll",
|
59
|
-
"he's",
|
60
|
-
"her",
|
61
|
-
"here",
|
62
|
-
"here's",
|
63
|
-
"hers",
|
64
|
-
"herself",
|
65
|
-
"him",
|
66
|
-
"himself",
|
67
|
-
"his",
|
68
|
-
"how",
|
69
|
-
"how's",
|
70
|
-
"i",
|
71
|
-
"i'd",
|
72
|
-
"i'll",
|
73
|
-
"i'm",
|
74
|
-
"i've",
|
75
|
-
"if",
|
76
|
-
"in",
|
77
|
-
"into",
|
78
|
-
"is",
|
79
|
-
"isn't",
|
80
|
-
"it",
|
81
|
-
"it's",
|
82
|
-
"its",
|
83
|
-
"itself",
|
84
|
-
"let's",
|
85
|
-
"me",
|
86
|
-
"more",
|
87
|
-
"most",
|
88
|
-
"mustn't",
|
89
|
-
"my",
|
90
|
-
"myself",
|
91
|
-
"no",
|
92
|
-
"nor",
|
93
|
-
"not",
|
94
|
-
"of",
|
95
|
-
"off",
|
96
|
-
"on",
|
97
|
-
"once",
|
98
|
-
"only",
|
99
|
-
"or",
|
100
|
-
"other",
|
101
|
-
"ought",
|
102
|
-
"our",
|
103
|
-
"ours",
|
104
|
-
"ourselves",
|
105
|
-
"out",
|
106
|
-
"over",
|
107
|
-
"own",
|
108
|
-
"same",
|
109
|
-
"shan't",
|
110
|
-
"she",
|
111
|
-
"she'd",
|
112
|
-
"she'll",
|
113
|
-
"she's",
|
114
|
-
"should",
|
115
|
-
"shouldn't",
|
116
|
-
"so",
|
117
|
-
"some",
|
118
|
-
"such",
|
119
|
-
"than",
|
120
|
-
"that",
|
121
|
-
"that's",
|
122
|
-
"the",
|
123
|
-
"their",
|
124
|
-
"theirs",
|
125
|
-
"them",
|
126
|
-
"themselves",
|
127
|
-
"then",
|
128
|
-
"there",
|
129
|
-
"there's",
|
130
|
-
"these",
|
131
|
-
"they",
|
132
|
-
"they'd",
|
133
|
-
"they'll",
|
134
|
-
"they're",
|
135
|
-
"they've",
|
136
|
-
"this",
|
137
|
-
"those",
|
138
|
-
"through",
|
139
|
-
"to",
|
140
|
-
"too",
|
141
|
-
"under",
|
142
|
-
"until",
|
143
|
-
"up",
|
144
|
-
"very",
|
145
|
-
"was",
|
146
|
-
"wasn't",
|
147
|
-
"we",
|
148
|
-
"we'd",
|
149
|
-
"we'll",
|
150
|
-
"we're",
|
151
|
-
"we've",
|
152
|
-
"were",
|
153
|
-
"weren't",
|
154
|
-
"what",
|
155
|
-
"what's",
|
156
|
-
"when",
|
157
|
-
"when's",
|
158
|
-
"where",
|
159
|
-
"where's",
|
160
|
-
"which",
|
161
|
-
"while",
|
162
|
-
"who",
|
163
|
-
"who's",
|
164
|
-
"whom",
|
165
|
-
"why",
|
166
|
-
"why's",
|
167
|
-
"with",
|
168
|
-
"won't",
|
169
|
-
"would",
|
170
|
-
"wouldn't",
|
171
|
-
"you",
|
172
|
-
"you'd",
|
173
|
-
"you'll",
|
174
|
-
"you're",
|
175
|
-
"you've",
|
176
|
-
"your",
|
177
|
-
"yours",
|
178
|
-
"yourself",
|
179
|
-
"yourselves"
|
180
|
-
]
|
5
|
+
module Stopwords
|
181
6
|
|
182
|
-
|
7
|
+
STOPWORDS = [
|
8
|
+
"a",
|
9
|
+
"about",
|
10
|
+
"above",
|
11
|
+
"after",
|
12
|
+
"again",
|
13
|
+
"against",
|
14
|
+
"all",
|
15
|
+
"am",
|
16
|
+
"an",
|
17
|
+
"and",
|
18
|
+
"any",
|
19
|
+
"are",
|
20
|
+
"aren't",
|
21
|
+
"as",
|
22
|
+
"at",
|
23
|
+
"be",
|
24
|
+
"because",
|
25
|
+
"been",
|
26
|
+
"before",
|
27
|
+
"being",
|
28
|
+
"below",
|
29
|
+
"between",
|
30
|
+
"both",
|
31
|
+
"but",
|
32
|
+
"by",
|
33
|
+
"can't",
|
34
|
+
"cannot",
|
35
|
+
"could",
|
36
|
+
"couldn't",
|
37
|
+
"did",
|
38
|
+
"didn't",
|
39
|
+
"do",
|
40
|
+
"does",
|
41
|
+
"doesn't",
|
42
|
+
"doing",
|
43
|
+
"don't",
|
44
|
+
"down",
|
45
|
+
"during",
|
46
|
+
"each",
|
47
|
+
"few",
|
48
|
+
"for",
|
49
|
+
"from",
|
50
|
+
"further",
|
51
|
+
"had",
|
52
|
+
"hadn't",
|
53
|
+
"has",
|
54
|
+
"hasn't",
|
55
|
+
"have",
|
56
|
+
"haven't",
|
57
|
+
"having",
|
58
|
+
"he",
|
59
|
+
"he'd",
|
60
|
+
"he'll",
|
61
|
+
"he's",
|
62
|
+
"her",
|
63
|
+
"here",
|
64
|
+
"here's",
|
65
|
+
"hers",
|
66
|
+
"herself",
|
67
|
+
"him",
|
68
|
+
"himself",
|
69
|
+
"his",
|
70
|
+
"how",
|
71
|
+
"how's",
|
72
|
+
"i",
|
73
|
+
"i'd",
|
74
|
+
"i'll",
|
75
|
+
"i'm",
|
76
|
+
"i've",
|
77
|
+
"if",
|
78
|
+
"in",
|
79
|
+
"into",
|
80
|
+
"is",
|
81
|
+
"isn't",
|
82
|
+
"it",
|
83
|
+
"it's",
|
84
|
+
"its",
|
85
|
+
"itself",
|
86
|
+
"let's",
|
87
|
+
"me",
|
88
|
+
"more",
|
89
|
+
"most",
|
90
|
+
"mustn't",
|
91
|
+
"my",
|
92
|
+
"myself",
|
93
|
+
"no",
|
94
|
+
"nor",
|
95
|
+
"not",
|
96
|
+
"of",
|
97
|
+
"off",
|
98
|
+
"on",
|
99
|
+
"once",
|
100
|
+
"only",
|
101
|
+
"or",
|
102
|
+
"other",
|
103
|
+
"ought",
|
104
|
+
"our",
|
105
|
+
"ours",
|
106
|
+
"ourselves",
|
107
|
+
"out",
|
108
|
+
"over",
|
109
|
+
"own",
|
110
|
+
"same",
|
111
|
+
"shan't",
|
112
|
+
"she",
|
113
|
+
"she'd",
|
114
|
+
"she'll",
|
115
|
+
"she's",
|
116
|
+
"should",
|
117
|
+
"shouldn't",
|
118
|
+
"so",
|
119
|
+
"some",
|
120
|
+
"such",
|
121
|
+
"than",
|
122
|
+
"that",
|
123
|
+
"that's",
|
124
|
+
"the",
|
125
|
+
"their",
|
126
|
+
"theirs",
|
127
|
+
"them",
|
128
|
+
"themselves",
|
129
|
+
"then",
|
130
|
+
"there",
|
131
|
+
"there's",
|
132
|
+
"these",
|
133
|
+
"they",
|
134
|
+
"they'd",
|
135
|
+
"they'll",
|
136
|
+
"they're",
|
137
|
+
"they've",
|
138
|
+
"this",
|
139
|
+
"those",
|
140
|
+
"through",
|
141
|
+
"to",
|
142
|
+
"too",
|
143
|
+
"under",
|
144
|
+
"until",
|
145
|
+
"up",
|
146
|
+
"very",
|
147
|
+
"was",
|
148
|
+
"wasn't",
|
149
|
+
"we",
|
150
|
+
"we'd",
|
151
|
+
"we'll",
|
152
|
+
"we're",
|
153
|
+
"we've",
|
154
|
+
"were",
|
155
|
+
"weren't",
|
156
|
+
"what",
|
157
|
+
"what's",
|
158
|
+
"when",
|
159
|
+
"when's",
|
160
|
+
"where",
|
161
|
+
"where's",
|
162
|
+
"which",
|
163
|
+
"while",
|
164
|
+
"who",
|
165
|
+
"who's",
|
166
|
+
"whom",
|
167
|
+
"why",
|
168
|
+
"why's",
|
169
|
+
"with",
|
170
|
+
"won't",
|
171
|
+
"would",
|
172
|
+
"wouldn't",
|
173
|
+
"you",
|
174
|
+
"you'd",
|
175
|
+
"you'll",
|
176
|
+
"you're",
|
177
|
+
"you've",
|
178
|
+
"your",
|
179
|
+
"yours",
|
180
|
+
"yourself",
|
181
|
+
"yourselves"
|
182
|
+
]
|
183
|
+
|
184
|
+
end
|
183
185
|
|
184
186
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ar_indexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Josh Stump
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-02-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|