ar_indexer 0.1.2 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -3
- data/ar_indexer.gemspec +16 -16
- data/lib/ar_indexer.rb +4 -3
- data/lib/ar_indexer/has_reverse_index.rb +96 -66
- data/lib/ar_indexer/index_search.rb +97 -43
- data/lib/ar_indexer/indexer.rb +158 -107
- data/lib/ar_indexer/reverse_index.rb +17 -9
- data/lib/ar_indexer/stopwords.rb +180 -178
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a949f827bc86002e7bb7241edb513fbe452b51ca
|
4
|
+
data.tar.gz: e04f080e7cd9be0fa8dfb552578cac54c079de7d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f5841ed351d64accee8237245a8ce732aa2784ba32f3189f3dd1f36c2467f25859d8b08edc674b2291be487ff4799fcc7d5dbdf527ecb63a6261c120fa889459
|
7
|
+
data.tar.gz: 930f41e303645f21a73aa061f0557a742ad464af9568285befb81fd2eca1961eb9ebe2f2a550ead28a35b7fad538853fb7c793c753773af33c7ffcc380fe35f8
|
data/README.md
CHANGED
@@ -41,13 +41,15 @@ At this point, ARIndexer will build and maintain a reverse index for each record
|
|
41
41
|
|
42
42
|
###Searching###
|
43
43
|
|
44
|
-
ARIndexer also provides a simple search class for finding records by text search. To initialize an instance of this class, just pass it
|
44
|
+
ARIndexer also provides a simple search class for finding records by text search. To initialize an instance of this class, just pass it an array of ActiveRecord models it needs to search.
|
45
45
|
|
46
|
-
foo = IndexSearch.new(Article)
|
46
|
+
foo = IndexSearch.new([Article])
|
47
|
+
# Or search multiple models
|
48
|
+
# foo = IndexSearch.new([Article, List])
|
47
49
|
|
48
50
|
You can also pass an options hash (which currently has a whole 1 option, `:no_results_message`)
|
49
51
|
|
50
|
-
foo = IndexSearch.new(Article, :no_results_message => "Hey man, there's nothing there.")
|
52
|
+
foo = IndexSearch.new([Article], :no_results_message => "Hey man, there's nothing there.")
|
51
53
|
|
52
54
|
And now you're ready to search against the index that's been built.
|
53
55
|
|
data/ar_indexer.gemspec
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
2
|
+
spec.name = "ar_indexer"
|
3
|
+
spec.version = "0.1.4"
|
4
|
+
spec.date = "2014-02-18"
|
5
|
+
spec.summary = "Allows for reverse indexing selected ActiveRecord models. Handles searching and return of objects"
|
6
|
+
spec.description = spec.summary
|
7
|
+
spec.authors = ["Josh Stump"]
|
8
|
+
spec.email = "joshua.t.stump@gmail.com"
|
9
|
+
spec.homepage = "https://github.com/jstump/ar_indexer"
|
10
|
+
spec.require_paths = ["lib"]
|
11
|
+
spec.files = `git ls-files`.split("\n")
|
12
|
+
spec.license = "GPL-2"
|
13
|
+
|
14
|
+
spec.add_dependency('activerecord')
|
15
|
+
spec.add_dependency('activesupport')
|
16
|
+
spec.add_dependency('htmlentities')
|
17
|
+
spec.add_dependency('fast-stemmer')
|
18
18
|
end
|
data/lib/ar_indexer.rb
CHANGED
@@ -1,19 +1,20 @@
|
|
1
|
-
# Requires for gem dependencies
|
2
1
|
require 'active_record'
|
3
2
|
require 'active_support'
|
4
3
|
require 'active_support/inflector'
|
5
4
|
require 'htmlentities'
|
6
5
|
require 'fast-stemmer'
|
7
|
-
|
6
|
+
|
8
7
|
require 'ar_indexer/reverse_index'
|
9
8
|
require 'ar_indexer/has_reverse_index'
|
10
9
|
require 'ar_indexer/stopwords'
|
11
10
|
require 'ar_indexer/indexer'
|
12
11
|
require 'ar_indexer/index_search'
|
13
12
|
|
13
|
+
# Main gem module
|
14
|
+
|
14
15
|
module ARIndexer
|
15
16
|
|
16
|
-
|
17
|
+
VERSION = "0.1.2"
|
17
18
|
|
18
19
|
end
|
19
20
|
|
@@ -1,70 +1,100 @@
|
|
1
1
|
module ARIndexer
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
3
|
+
# Holds methods that are used to extend ActiveRecord models.
|
4
|
+
# Included automatically whenever ActiveRecord is required
|
5
|
+
|
6
|
+
module Model
|
7
|
+
|
8
|
+
# Extends a specified ActiveRecord model by adding the functions within the ClassMethods module.
|
9
|
+
# Called automatically on all ActiveRecord models
|
10
|
+
|
11
|
+
def self.included(base)
|
12
|
+
base.send :extend, ClassMethods
|
13
|
+
end
|
14
|
+
|
15
|
+
# Class methods that can be called on any ActiveRecord model to extend functionality
|
16
|
+
|
17
|
+
module ClassMethods
|
18
|
+
|
19
|
+
# Marks all string and text fields (or a subset thereof) of an ActiveRecord model
|
20
|
+
# for indexing and adds a necessary set of instance methods.
|
21
|
+
# If the [fields] parameter is set, indexes only the specified fields,
|
22
|
+
# otherwise indexes all string and text fields.
|
23
|
+
#
|
24
|
+
# ==== Parameters
|
25
|
+
#
|
26
|
+
# * fields: optional array of field names (as symbols) to be indexed
|
27
|
+
#
|
28
|
+
# ==== Examples
|
29
|
+
#
|
30
|
+
# class Post < ActiveRecord::Base
|
31
|
+
# has_reverse_index
|
32
|
+
# end
|
33
|
+
#
|
34
|
+
# class Article < ActiveRecord::Base
|
35
|
+
# has_reverse_index([:title, :content])
|
36
|
+
# end
|
37
|
+
|
38
|
+
def has_reverse_index(fields = [])
|
39
|
+
send :include, InstanceMethods
|
40
|
+
|
41
|
+
class_attribute :indexed_fields
|
42
|
+
self.indexed_fields = fields.dup
|
43
|
+
|
44
|
+
after_create :on_create_record
|
45
|
+
after_update :on_update_record
|
46
|
+
before_destroy :on_destroy_record
|
47
|
+
end
|
48
|
+
module_function :has_reverse_index
|
49
|
+
|
50
|
+
# Instance methods available to instances of an ActiveRecord model which has had has_reverse_index()
|
51
|
+
# called on it. Currently, there are no public instance methods.
|
52
|
+
|
53
|
+
module InstanceMethods
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def array_of_values_to_index
|
58
|
+
values_for_indexing = []
|
59
|
+
if self.indexed_fields.empty?
|
60
|
+
self.class.columns.each do |c|
|
61
|
+
if ['string', 'text'].include? c.type.to_s
|
62
|
+
values_for_indexing << self.read_attribute(c.name)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
else
|
66
|
+
self.indexed_fields.each do |f|
|
67
|
+
if ['string', 'text'].include? self.class.columns_hash[f.to_s].type.to_s
|
68
|
+
values_for_indexing << self.read_attribute(f.to_s)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
values_for_indexing.delete_if {|v| [nil, ''].include? v}
|
73
|
+
return values_for_indexing
|
74
|
+
end
|
75
|
+
|
76
|
+
def on_create_record
|
77
|
+
values_for_indexing = array_of_values_to_index
|
78
|
+
unless values_for_indexing.empty?
|
79
|
+
Indexer.build_reverse_index(self.class.to_s.split('::').last.to_s, self.id, values_for_indexing, false)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def on_update_record
|
84
|
+
values_for_indexing = array_of_values_to_index
|
85
|
+
unless values_for_indexing.empty?
|
86
|
+
Indexer.build_reverse_index(self.class.to_s.split('::').last.to_s, self.id, values_for_indexing, true)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def on_destroy_record
|
91
|
+
Indexer.remove_from_reverse_index(self.class.to_s.split('::').last.to_s, self.id)
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
69
99
|
|
70
100
|
end
|
@@ -1,47 +1,101 @@
|
|
1
1
|
module ARIndexer
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
3
|
+
# Simple class for searching the index generated by adding has_reverse_index to ActiveRecord models
|
4
|
+
#
|
5
|
+
# === Initialization
|
6
|
+
#
|
7
|
+
# foo = IndexSearch.new(Article, {:no_results_message => "We're sorry, but no results were found"})
|
8
|
+
#
|
9
|
+
# === Parameters
|
10
|
+
#
|
11
|
+
# * [model_names]: array of constants denoting the ActiveRecord models to search
|
12
|
+
# * opts: optional hash of configuration options
|
13
|
+
#
|
14
|
+
# === Options
|
15
|
+
#
|
16
|
+
# :no_results_message => string which can be requested and displayed if no results are found
|
17
|
+
|
18
|
+
class IndexSearch
|
19
|
+
|
20
|
+
def initialize(model_names, opts = {})
|
21
|
+
@models = {}
|
22
|
+
model_names.each do |model|
|
23
|
+
@models[model.to_s.split('::').last] = model
|
24
|
+
end
|
25
|
+
puts @models
|
26
|
+
|
27
|
+
@options = {
|
28
|
+
:no_results_message => "Your #{@model_name} search returned no results."
|
29
|
+
}
|
30
|
+
|
31
|
+
@options.merge!(opts)
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns either the default :no_results_message, or the one specified on initialization
|
35
|
+
#
|
36
|
+
# ==== Returns
|
37
|
+
#
|
38
|
+
# string
|
39
|
+
|
40
|
+
def no_results_message
|
41
|
+
return @options[:no_results_message]
|
42
|
+
end
|
43
|
+
|
44
|
+
# Runs a text search against the ReverseIndex model for a given ActiveRecord model and search string.
|
45
|
+
# Returns
|
46
|
+
#
|
47
|
+
# ==== Parameters
|
48
|
+
#
|
49
|
+
# * search_string: string on which to search for indexed objects
|
50
|
+
#
|
51
|
+
# ==== Returns
|
52
|
+
#
|
53
|
+
# array of ActiveRecord objects, or an empty array
|
54
|
+
#
|
55
|
+
# ==== Examples
|
56
|
+
#
|
57
|
+
# foo = IndexSearch.new(Article)
|
58
|
+
# foo.run_search("test string")
|
59
|
+
# #=> [<#Article>, <#Article>, ...]
|
60
|
+
|
61
|
+
def run_search(search_string)
|
62
|
+
search_terms = Indexer.expand_lexicon(Indexer.text_to_lexicon(search_string))
|
63
|
+
matches = {}
|
64
|
+
@models.keys.each do |model|
|
65
|
+
matches[model] = {}
|
66
|
+
end
|
67
|
+
search_terms.each do |st|
|
68
|
+
reverse_index_records = ReverseIndex.where(:model_name => [@models.keys], :word => st)
|
69
|
+
if reverse_index_records.count > 0
|
70
|
+
reverse_index_records.each do |rir|
|
71
|
+
rir.retrieve_id_array.each do |id|
|
72
|
+
if matches[rir.model_name].has_key?(id)
|
73
|
+
matches[rir.model_name][id] = matches[rir.model_name][id] + 1
|
74
|
+
else
|
75
|
+
matches[rir.model_name][id] = 1
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
collected_matches = []
|
82
|
+
matches.each do |k,v|
|
83
|
+
unless v.empty?
|
84
|
+
collected_matches << v.to_a.map{|x| x << k}
|
85
|
+
end
|
86
|
+
end
|
87
|
+
unless collected_matches.empty?
|
88
|
+
collected_matches.flatten!(1)
|
89
|
+
objects_to_return = []
|
90
|
+
collected_matches.sort{|x,y| x[1] <=> y[1]}.reverse.each do |match|
|
91
|
+
objects_to_return << @models[match[2]].find(match[0])
|
92
|
+
end
|
93
|
+
return objects_to_return
|
94
|
+
else
|
95
|
+
return []
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
46
100
|
|
47
101
|
end
|
data/lib/ar_indexer/indexer.rb
CHANGED
@@ -1,111 +1,162 @@
|
|
1
1
|
module ARIndexer
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
3
|
+
# Contains functions for creating a forward index from text, then converting it to a reverse index
|
4
|
+
|
5
|
+
module Indexer
|
6
|
+
|
7
|
+
# Turns a string into lexicon array, including basic root words and plurals
|
8
|
+
#
|
9
|
+
# ==== Parameters
|
10
|
+
#
|
11
|
+
# text: the string to be converted
|
12
|
+
#
|
13
|
+
# ==== Returns
|
14
|
+
#
|
15
|
+
# array of strings
|
16
|
+
|
17
|
+
def self.text_to_lexicon(text)
|
18
|
+
# Remove HTML markup
|
19
|
+
text.gsub!(/<[^>]+>/, ' ')
|
20
|
+
# Decode HTML entities
|
21
|
+
coder = HTMLEntities.new
|
22
|
+
text = coder.decode(text)
|
23
|
+
# Remove most punctuation
|
24
|
+
text.gsub!(/[^a-zA-Z0-9\s]/, '')
|
25
|
+
# Move everything to lower case
|
26
|
+
text.downcase!
|
27
|
+
# Split all words into an array
|
28
|
+
lexicon = text.split(' ')
|
29
|
+
# Remove stopwords and duplicates
|
30
|
+
lexicon = (lexicon - Stopwords::STOPWORDS).uniq
|
31
|
+
return lexicon
|
32
|
+
end
|
33
|
+
|
34
|
+
# Expands the lexicon created by text_to_lexicon, adding plurals and root words
|
35
|
+
#
|
36
|
+
# ==== Parameters
|
37
|
+
#
|
38
|
+
# lexicon: array of strings to be expanded
|
39
|
+
#
|
40
|
+
# ==== Returns
|
41
|
+
#
|
42
|
+
# array of strings
|
43
|
+
|
44
|
+
def self.expand_lexicon(lexicon)
|
45
|
+
# Stem and pluralize
|
46
|
+
lexicon.each do |word|
|
47
|
+
root = Stemmer::stem_word(word)
|
48
|
+
if !lexicon.include? root
|
49
|
+
lexicon = lexicon.inject([root], :<<)
|
50
|
+
end
|
51
|
+
plural = word.pluralize
|
52
|
+
if !lexicon.include? plural
|
53
|
+
lexicon = lexicon.inject([plural], :<<)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
# Remove stopwords and duplicates again
|
57
|
+
lexicon = (lexicon - Stopwords::STOPWORDS).uniq
|
58
|
+
return lexicon
|
59
|
+
end
|
60
|
+
|
61
|
+
# Takes an array of strings to be indexed, and calls text_to_lexicon on each.
|
62
|
+
# Returns the combined array flattened, uniquified, and sorted in alphabetical order
|
63
|
+
#
|
64
|
+
# ==== Parameters
|
65
|
+
#
|
66
|
+
# values_to_index: array of string values to index
|
67
|
+
#
|
68
|
+
# ==== Returns
|
69
|
+
#
|
70
|
+
# array of strings
|
71
|
+
|
72
|
+
def self.build_forward_index(values_to_index)
|
73
|
+
forward_index = []
|
74
|
+
# Run text_to_lexicon for each indexed field
|
75
|
+
values_to_index.each do |v|
|
76
|
+
forward_index << self.text_to_lexicon(v) if ![nil, ''].include? v
|
77
|
+
end
|
78
|
+
# Return the lexicon flattened, uniquified, and in alphabetical order
|
79
|
+
forward_index = forward_index.flatten.uniq.sort
|
80
|
+
return forward_index
|
81
|
+
end
|
82
|
+
|
83
|
+
# For a given model name and object id, compares the list of words with the forward index of the text.
|
84
|
+
# If a word exists in the reverse index but not the forward index, removes the object id from the reverse index.
|
85
|
+
# If the id array is empty, removes the reverse index record
|
86
|
+
#
|
87
|
+
# ==== Parameters
|
88
|
+
#
|
89
|
+
# * model_name: string version of the model name to clean records for
|
90
|
+
# * record_id: object id to search for in the reverse index
|
91
|
+
# * forward_index: the array of words to check against
|
92
|
+
|
93
|
+
def self.clean_reverse_index(model_name, record_id, forward_index)
|
94
|
+
reverse_index_records = ReverseIndex.where(:model_name => model_name)
|
95
|
+
reverse_index_records.each do |rir|
|
96
|
+
if rir.id_list.match(/,{0,1}#{record_id},{0,1}/)
|
97
|
+
if !forward_index.include? rir.word
|
98
|
+
id_array = rir.retrieve_id_array
|
99
|
+
id_array.delete(record_id.to_i)
|
100
|
+
if id_array.empty?
|
101
|
+
rir.destroy
|
102
|
+
else
|
103
|
+
new_id_list = id_array.join(',')
|
104
|
+
rir.update(:id_list => new_id_list)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Takes an array of values to index, runs it through build_forward_index(), then builds the reverse index
|
112
|
+
# from the returned values
|
113
|
+
#
|
114
|
+
# ==== Parameters
|
115
|
+
#
|
116
|
+
# * model_name: the string version of the model name
|
117
|
+
# * record_id: the id of the object being indexed
|
118
|
+
# * values_to_index: array of string objects to use in building the reverse index
|
119
|
+
# * cleaning_required: boolean flag, whether or not to run clean_reverse_index()
|
120
|
+
|
121
|
+
def self.build_reverse_index(model_name, record_id, values_to_index, cleaning_required = false)
|
122
|
+
forward_index = self.build_forward_index(values_to_index)
|
123
|
+
forward_index.each do |word|
|
124
|
+
if reverse_index_record = ReverseIndex.where(:model_name => model_name, :word => word).first
|
125
|
+
id_array = reverse_index_record.retrieve_id_array
|
126
|
+
if !id_array.include? record_id
|
127
|
+
new_id_list = (id_array << record_id).join(',')
|
128
|
+
reverse_index_record.update(:id_list => new_id_list)
|
129
|
+
end
|
130
|
+
else
|
131
|
+
ReverseIndex.create(:model_name => model_name, :word => word, :id_list => record_id)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
self.clean_reverse_index(model_name, record_id, forward_index) if cleaning_required
|
135
|
+
end
|
136
|
+
|
137
|
+
# Removes an object id from the reverse index for a specified model.
|
138
|
+
# If the id array is empty after removing the record id, destroys the reverse index record
|
139
|
+
#
|
140
|
+
# ==== Parameters
|
141
|
+
#
|
142
|
+
# model_name: string version of the model name to remove records for
|
143
|
+
# record_id: object id to remove records for
|
144
|
+
|
145
|
+
def self.remove_from_reverse_index(model_name, record_id)
|
146
|
+
reverse_index_records = ReverseIndex.where(:model_name => model_name)
|
147
|
+
reverse_index_records.each do |rir|
|
148
|
+
id_array = rir.retrieve_id_array
|
149
|
+
if id_array.include? record_id.to_i
|
150
|
+
id_array.delete(record_id.to_i)
|
151
|
+
if id_array.empty?
|
152
|
+
rir.destroy
|
153
|
+
else
|
154
|
+
rir.update(:id_list => id_array.join(','))
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
end
|
110
161
|
|
111
162
|
end
|
@@ -1,17 +1,25 @@
|
|
1
1
|
module ARIndexer
|
2
2
|
|
3
|
-
|
3
|
+
class ReverseIndex < ::ActiveRecord::Base
|
4
4
|
|
5
|
-
|
5
|
+
if ::ActiveRecord::VERSION::MAJOR < 4
|
6
|
+
attr_accessible :id_list, :model_name, :word
|
7
|
+
end
|
6
8
|
|
7
|
-
|
9
|
+
validates_uniqueness_of :word, :scope => :model_name
|
8
10
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
# Retrieves the id_list property as a parsed array of integers
|
12
|
+
#
|
13
|
+
# ==== Returns
|
14
|
+
#
|
15
|
+
# array of integer
|
14
16
|
|
15
|
-
|
17
|
+
def retrieve_id_array
|
18
|
+
id_array = self.id_list.split(',')
|
19
|
+
id_array.map! {|id| id.to_i}
|
20
|
+
return id_array
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
16
24
|
|
17
25
|
end
|
data/lib/ar_indexer/stopwords.rb
CHANGED
@@ -1,184 +1,186 @@
|
|
1
1
|
module ARIndexer
|
2
2
|
|
3
|
-
|
3
|
+
# Contains array of stopwords that should not be stopwords
|
4
4
|
|
5
|
-
|
6
|
-
"a",
|
7
|
-
"about",
|
8
|
-
"above",
|
9
|
-
"after",
|
10
|
-
"again",
|
11
|
-
"against",
|
12
|
-
"all",
|
13
|
-
"am",
|
14
|
-
"an",
|
15
|
-
"and",
|
16
|
-
"any",
|
17
|
-
"are",
|
18
|
-
"aren't",
|
19
|
-
"as",
|
20
|
-
"at",
|
21
|
-
"be",
|
22
|
-
"because",
|
23
|
-
"been",
|
24
|
-
"before",
|
25
|
-
"being",
|
26
|
-
"below",
|
27
|
-
"between",
|
28
|
-
"both",
|
29
|
-
"but",
|
30
|
-
"by",
|
31
|
-
"can't",
|
32
|
-
"cannot",
|
33
|
-
"could",
|
34
|
-
"couldn't",
|
35
|
-
"did",
|
36
|
-
"didn't",
|
37
|
-
"do",
|
38
|
-
"does",
|
39
|
-
"doesn't",
|
40
|
-
"doing",
|
41
|
-
"don't",
|
42
|
-
"down",
|
43
|
-
"during",
|
44
|
-
"each",
|
45
|
-
"few",
|
46
|
-
"for",
|
47
|
-
"from",
|
48
|
-
"further",
|
49
|
-
"had",
|
50
|
-
"hadn't",
|
51
|
-
"has",
|
52
|
-
"hasn't",
|
53
|
-
"have",
|
54
|
-
"haven't",
|
55
|
-
"having",
|
56
|
-
"he",
|
57
|
-
"he'd",
|
58
|
-
"he'll",
|
59
|
-
"he's",
|
60
|
-
"her",
|
61
|
-
"here",
|
62
|
-
"here's",
|
63
|
-
"hers",
|
64
|
-
"herself",
|
65
|
-
"him",
|
66
|
-
"himself",
|
67
|
-
"his",
|
68
|
-
"how",
|
69
|
-
"how's",
|
70
|
-
"i",
|
71
|
-
"i'd",
|
72
|
-
"i'll",
|
73
|
-
"i'm",
|
74
|
-
"i've",
|
75
|
-
"if",
|
76
|
-
"in",
|
77
|
-
"into",
|
78
|
-
"is",
|
79
|
-
"isn't",
|
80
|
-
"it",
|
81
|
-
"it's",
|
82
|
-
"its",
|
83
|
-
"itself",
|
84
|
-
"let's",
|
85
|
-
"me",
|
86
|
-
"more",
|
87
|
-
"most",
|
88
|
-
"mustn't",
|
89
|
-
"my",
|
90
|
-
"myself",
|
91
|
-
"no",
|
92
|
-
"nor",
|
93
|
-
"not",
|
94
|
-
"of",
|
95
|
-
"off",
|
96
|
-
"on",
|
97
|
-
"once",
|
98
|
-
"only",
|
99
|
-
"or",
|
100
|
-
"other",
|
101
|
-
"ought",
|
102
|
-
"our",
|
103
|
-
"ours",
|
104
|
-
"ourselves",
|
105
|
-
"out",
|
106
|
-
"over",
|
107
|
-
"own",
|
108
|
-
"same",
|
109
|
-
"shan't",
|
110
|
-
"she",
|
111
|
-
"she'd",
|
112
|
-
"she'll",
|
113
|
-
"she's",
|
114
|
-
"should",
|
115
|
-
"shouldn't",
|
116
|
-
"so",
|
117
|
-
"some",
|
118
|
-
"such",
|
119
|
-
"than",
|
120
|
-
"that",
|
121
|
-
"that's",
|
122
|
-
"the",
|
123
|
-
"their",
|
124
|
-
"theirs",
|
125
|
-
"them",
|
126
|
-
"themselves",
|
127
|
-
"then",
|
128
|
-
"there",
|
129
|
-
"there's",
|
130
|
-
"these",
|
131
|
-
"they",
|
132
|
-
"they'd",
|
133
|
-
"they'll",
|
134
|
-
"they're",
|
135
|
-
"they've",
|
136
|
-
"this",
|
137
|
-
"those",
|
138
|
-
"through",
|
139
|
-
"to",
|
140
|
-
"too",
|
141
|
-
"under",
|
142
|
-
"until",
|
143
|
-
"up",
|
144
|
-
"very",
|
145
|
-
"was",
|
146
|
-
"wasn't",
|
147
|
-
"we",
|
148
|
-
"we'd",
|
149
|
-
"we'll",
|
150
|
-
"we're",
|
151
|
-
"we've",
|
152
|
-
"were",
|
153
|
-
"weren't",
|
154
|
-
"what",
|
155
|
-
"what's",
|
156
|
-
"when",
|
157
|
-
"when's",
|
158
|
-
"where",
|
159
|
-
"where's",
|
160
|
-
"which",
|
161
|
-
"while",
|
162
|
-
"who",
|
163
|
-
"who's",
|
164
|
-
"whom",
|
165
|
-
"why",
|
166
|
-
"why's",
|
167
|
-
"with",
|
168
|
-
"won't",
|
169
|
-
"would",
|
170
|
-
"wouldn't",
|
171
|
-
"you",
|
172
|
-
"you'd",
|
173
|
-
"you'll",
|
174
|
-
"you're",
|
175
|
-
"you've",
|
176
|
-
"your",
|
177
|
-
"yours",
|
178
|
-
"yourself",
|
179
|
-
"yourselves"
|
180
|
-
]
|
5
|
+
module Stopwords
|
181
6
|
|
182
|
-
|
7
|
+
STOPWORDS = [
|
8
|
+
"a",
|
9
|
+
"about",
|
10
|
+
"above",
|
11
|
+
"after",
|
12
|
+
"again",
|
13
|
+
"against",
|
14
|
+
"all",
|
15
|
+
"am",
|
16
|
+
"an",
|
17
|
+
"and",
|
18
|
+
"any",
|
19
|
+
"are",
|
20
|
+
"aren't",
|
21
|
+
"as",
|
22
|
+
"at",
|
23
|
+
"be",
|
24
|
+
"because",
|
25
|
+
"been",
|
26
|
+
"before",
|
27
|
+
"being",
|
28
|
+
"below",
|
29
|
+
"between",
|
30
|
+
"both",
|
31
|
+
"but",
|
32
|
+
"by",
|
33
|
+
"can't",
|
34
|
+
"cannot",
|
35
|
+
"could",
|
36
|
+
"couldn't",
|
37
|
+
"did",
|
38
|
+
"didn't",
|
39
|
+
"do",
|
40
|
+
"does",
|
41
|
+
"doesn't",
|
42
|
+
"doing",
|
43
|
+
"don't",
|
44
|
+
"down",
|
45
|
+
"during",
|
46
|
+
"each",
|
47
|
+
"few",
|
48
|
+
"for",
|
49
|
+
"from",
|
50
|
+
"further",
|
51
|
+
"had",
|
52
|
+
"hadn't",
|
53
|
+
"has",
|
54
|
+
"hasn't",
|
55
|
+
"have",
|
56
|
+
"haven't",
|
57
|
+
"having",
|
58
|
+
"he",
|
59
|
+
"he'd",
|
60
|
+
"he'll",
|
61
|
+
"he's",
|
62
|
+
"her",
|
63
|
+
"here",
|
64
|
+
"here's",
|
65
|
+
"hers",
|
66
|
+
"herself",
|
67
|
+
"him",
|
68
|
+
"himself",
|
69
|
+
"his",
|
70
|
+
"how",
|
71
|
+
"how's",
|
72
|
+
"i",
|
73
|
+
"i'd",
|
74
|
+
"i'll",
|
75
|
+
"i'm",
|
76
|
+
"i've",
|
77
|
+
"if",
|
78
|
+
"in",
|
79
|
+
"into",
|
80
|
+
"is",
|
81
|
+
"isn't",
|
82
|
+
"it",
|
83
|
+
"it's",
|
84
|
+
"its",
|
85
|
+
"itself",
|
86
|
+
"let's",
|
87
|
+
"me",
|
88
|
+
"more",
|
89
|
+
"most",
|
90
|
+
"mustn't",
|
91
|
+
"my",
|
92
|
+
"myself",
|
93
|
+
"no",
|
94
|
+
"nor",
|
95
|
+
"not",
|
96
|
+
"of",
|
97
|
+
"off",
|
98
|
+
"on",
|
99
|
+
"once",
|
100
|
+
"only",
|
101
|
+
"or",
|
102
|
+
"other",
|
103
|
+
"ought",
|
104
|
+
"our",
|
105
|
+
"ours",
|
106
|
+
"ourselves",
|
107
|
+
"out",
|
108
|
+
"over",
|
109
|
+
"own",
|
110
|
+
"same",
|
111
|
+
"shan't",
|
112
|
+
"she",
|
113
|
+
"she'd",
|
114
|
+
"she'll",
|
115
|
+
"she's",
|
116
|
+
"should",
|
117
|
+
"shouldn't",
|
118
|
+
"so",
|
119
|
+
"some",
|
120
|
+
"such",
|
121
|
+
"than",
|
122
|
+
"that",
|
123
|
+
"that's",
|
124
|
+
"the",
|
125
|
+
"their",
|
126
|
+
"theirs",
|
127
|
+
"them",
|
128
|
+
"themselves",
|
129
|
+
"then",
|
130
|
+
"there",
|
131
|
+
"there's",
|
132
|
+
"these",
|
133
|
+
"they",
|
134
|
+
"they'd",
|
135
|
+
"they'll",
|
136
|
+
"they're",
|
137
|
+
"they've",
|
138
|
+
"this",
|
139
|
+
"those",
|
140
|
+
"through",
|
141
|
+
"to",
|
142
|
+
"too",
|
143
|
+
"under",
|
144
|
+
"until",
|
145
|
+
"up",
|
146
|
+
"very",
|
147
|
+
"was",
|
148
|
+
"wasn't",
|
149
|
+
"we",
|
150
|
+
"we'd",
|
151
|
+
"we'll",
|
152
|
+
"we're",
|
153
|
+
"we've",
|
154
|
+
"were",
|
155
|
+
"weren't",
|
156
|
+
"what",
|
157
|
+
"what's",
|
158
|
+
"when",
|
159
|
+
"when's",
|
160
|
+
"where",
|
161
|
+
"where's",
|
162
|
+
"which",
|
163
|
+
"while",
|
164
|
+
"who",
|
165
|
+
"who's",
|
166
|
+
"whom",
|
167
|
+
"why",
|
168
|
+
"why's",
|
169
|
+
"with",
|
170
|
+
"won't",
|
171
|
+
"would",
|
172
|
+
"wouldn't",
|
173
|
+
"you",
|
174
|
+
"you'd",
|
175
|
+
"you'll",
|
176
|
+
"you're",
|
177
|
+
"you've",
|
178
|
+
"your",
|
179
|
+
"yours",
|
180
|
+
"yourself",
|
181
|
+
"yourselves"
|
182
|
+
]
|
183
|
+
|
184
|
+
end
|
183
185
|
|
184
186
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ar_indexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Josh Stump
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-02-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|