ar_indexer 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a949f827bc86002e7bb7241edb513fbe452b51ca
4
- data.tar.gz: e04f080e7cd9be0fa8dfb552578cac54c079de7d
3
+ metadata.gz: 9b173a0feed66c6dfa32f17b624840e9aced0285
4
+ data.tar.gz: 5a2776b09548c4c613bea0d76e592a3ccdc07b8e
5
5
  SHA512:
6
- metadata.gz: f5841ed351d64accee8237245a8ce732aa2784ba32f3189f3dd1f36c2467f25859d8b08edc674b2291be487ff4799fcc7d5dbdf527ecb63a6261c120fa889459
7
- data.tar.gz: 930f41e303645f21a73aa061f0557a742ad464af9568285befb81fd2eca1961eb9ebe2f2a550ead28a35b7fad538853fb7c793c753773af33c7ffcc380fe35f8
6
+ metadata.gz: 3938da4be83743a63650910f5951089fa9b7a15ab8c06623117bf2bbdb3cb4d4c7ed4feb6fc8937600ce1d4b070d361ed93101ef25dbb2c51425bb64796c3294
7
+ data.tar.gz: d8867a7a3d5a46566ba453cd078883a5114180152362bd3560649b6743e8cc010c12b1d198d04e83f35d1b891f19dae3712ce63a878c3e82602611e9bf5500a3
data/README.md CHANGED
@@ -14,10 +14,11 @@ Write a migration to add a reverse_indices table to the database (Rails migratio
14
14
  def change
15
15
  create_table :reverse_indices do |t|
16
16
  t.string :model_name
17
+ t.string :field_name
17
18
  t.string :word
18
19
  t.text :id_list
19
20
  end
20
- add_index :reverse_indices, [:model_name, :word], :unique => true
21
+ add_index :reverse_indices, [:model_name, :field_name, :word], :unique => true
21
22
  end
22
23
  end
23
24
 
@@ -27,7 +28,7 @@ Run `rake db:migrate`
27
28
 
28
29
  ###Indexing###
29
30
 
30
- Have an ActiveRecord model? Want to index some text for searching? Just add the `has_reverse_index` function to your model. Call the function with no parameters and ARIndexer will index all string and text fields. You can pass an optional array of field names (as symbols), and ARIndexer will index only these fields.
31
+ Have an ActiveRecord model? Want to index some text for searching? Just add the `has_reverse_index` function to your model. Call the function with no parameters and ARIndexer will index all string and text fields. You can pass an optional array of field names (as symbols), and ARIndexer will index only these fields. Also, you can pass a hash with association names as keys and a lambda to access the necessary data. Note that the lambda must accept the AR object as its sole argument and must return a string.
31
32
 
32
33
  class Post < ActiveRecord::Base
33
34
  has_reverse_index
@@ -37,6 +38,19 @@ Have an ActiveRecord model? Want to index some text for searching? Just add the
37
38
  has_reverse_index([:title, :content])
38
39
  end
39
40
 
41
+ class Article < ActiveRecord::Base
42
+ has_many :article_tags
43
+ has_many :tags, :through => :article_tags
44
+ has_reverse_index(
45
+ [
46
+ :title,
47
+ :content
48
+ ],
49
+ {
50
+ :tags => lambda {|object| object.tags.collect{|tag| tag.name}.join(', ')}
51
+ }
52
+ )
53
+
40
54
  At this point, ARIndexer will build and maintain a reverse index for each record under these models.
41
55
 
42
56
  ###Searching###
@@ -47,15 +61,50 @@ ARIndexer also provides a simple search class for finding records by text search
47
61
  # Or search multiple models
48
62
  # foo = IndexSearch.new([Article, List])
49
63
 
50
- You can also pass an options hash (which currently has a whole 1 option, `:no_results_message`)
51
-
52
- foo = IndexSearch.new([Article], :no_results_message => "Hey man, there's nothing there.")
64
+ You can also pass an options hash to specify what fields should be searched, how the results should be sorted, a message for displaying if there are no results, etc. The default options hash is displayed below:
65
+
66
+ @options = {
67
+ :fields => [],
68
+ # If left as an empty array, will search all fields for the given model
69
+
70
+ :match => :any,
71
+ # :any will expand your search string and find results that match any keyword
72
+ # :all will only return results that have as many keyword matches as words in the search string
73
+
74
+ :sort => :relevance,
75
+ # :relevance will sort by number of keyword matches
76
+ # :field allows you to specify a field to sort by
77
+
78
+ :sort_method => nil,
79
+ # Allows for a lambda by which to access a sortable value.
80
+ # If nil, will just access the field value
81
+
82
+ :sort_direction => :desc,
83
+ # Sort order, default is DESC so that the most relevant results will be returned first
84
+
85
+ :stopwords => [],
86
+ # An array of words that should not be used in the search.
87
+ # ar_indexer has an internal array of basic stopwords, and these will be added to it
88
+
89
+ :no_results_message => 'No results were returned for the given search term.'
90
+ # A stored message that can be returned if there are no results returned
91
+ }
92
+
93
+ foo = IndexSearch.new([Article],
94
+ {
95
+ :fields => [:title],
96
+ :match => :all,
97
+ :sort => :field,
98
+ :sort_direction => :asc,
99
+ :no_results_message => "Hey man, there's nothing there."
100
+ }
101
+ )
53
102
 
54
103
  And now you're ready to search against the index that's been built.
55
104
 
56
- foo.run_search('some search string')
105
+ foo.search('some search string')
57
106
 
58
- `run_search` will return an array of ActiveRecord objects ordered by the number of matched terms within your search string. If no objects matched your search string, an emtpy array is returned. If no results are returned, you can request the `:no_results_message`
107
+ `foo.search` will return an array of ActiveRecord objects ordered by the number of matched terms within your search string. If no objects matched your search string, an emtpy array is returned. If no results are returned, you can request the `:no_results_message`
59
108
 
60
109
  results = foo.run_search('some search string')
61
110
  unless results.empty?
data/ar_indexer.gemspec CHANGED
@@ -1,18 +1,23 @@
1
1
  Gem::Specification.new do |spec|
2
- spec.name = "ar_indexer"
3
- spec.version = "0.1.4"
4
- spec.date = "2014-02-18"
5
- spec.summary = "Allows for reverse indexing selected ActiveRecord models. Handles searching and return of objects"
6
- spec.description = spec.summary
7
- spec.authors = ["Josh Stump"]
8
- spec.email = "joshua.t.stump@gmail.com"
9
- spec.homepage = "https://github.com/jstump/ar_indexer"
10
- spec.require_paths = ["lib"]
11
- spec.files = `git ls-files`.split("\n")
12
- spec.license = "GPL-2"
2
+ # Basic Gem Description
3
+ spec.name = "ar_indexer"
4
+ spec.version = "0.2.0"
5
+ spec.date = "2014-10-06"
6
+ spec.summary = "Allows for reverse indexing selected ActiveRecord models. Handles searching and return of objects"
7
+ spec.description = spec.summary
8
+ spec.authors = ["Josh MacLachlan"]
9
+ spec.email = "josh.t.maclachlan@gmail.com"
10
+ spec.homepage = "https://github.com/jtmaclachlan/ar_indexer"
11
+ spec.require_paths = ["lib"]
12
+ spec.files = `git ls-files`.split("\n")
13
+ spec.license = "GPL-2"
13
14
 
14
- spec.add_dependency('activerecord')
15
- spec.add_dependency('activesupport')
15
+ # Runtime Dependencies
16
+ spec.add_dependency('activerecord', '>= 3.0.0')
17
+ spec.add_dependency('activesupport', '>= 3.0.0')
16
18
  spec.add_dependency('htmlentities')
17
19
  spec.add_dependency('fast-stemmer')
20
+
21
+ # Post-Install Message
22
+ spec.post_install_message = "If you're upgrading from v0.1.4 to v0.2.0, read the updated documentation. Your application will throw exceptions when trying to access indexed models."
18
23
  end
@@ -1,100 +1,96 @@
1
1
  module ARIndexer
2
-
3
- # Holds methods that are used to extend ActiveRecord models.
4
- # Included automatically whenever ActiveRecord is required
5
-
6
2
  module Model
7
-
8
- # Extends a specified ActiveRecord model by adding the functions within the ClassMethods module.
9
- # Called automatically on all ActiveRecord models
10
-
11
3
  def self.included(base)
12
4
  base.send :extend, ClassMethods
13
5
  end
14
6
 
15
- # Class methods that can be called on any ActiveRecord model to extend functionality
16
-
17
7
  module ClassMethods
8
+ def has_reverse_index(fields = [], associations = {})
9
+ fields.each do |field_name|
10
+ unless self.columns_hash.keys.include?(field_name.to_s)
11
+ unless ['string', 'text'].include?(self.columns_hash[field_name.to_s].type.to_s)
12
+ raise TypeError, 'Model properties provided to has_reverse_index() must be of field type string or text.'
13
+ end
14
+ end
15
+ end
18
16
 
19
- # Marks all string and text fields (or a subset thereof) of an ActiveRecord model
20
- # for indexing and adds a necessary set of instance methods.
21
- # If the [fields] parameter is set, indexes only the specified fields,
22
- # otherwise indexes all string and text fields.
23
- #
24
- # ==== Parameters
25
- #
26
- # * fields: optional array of field names (as symbols) to be indexed
27
- #
28
- # ==== Examples
29
- #
30
- # class Post < ActiveRecord::Base
31
- # has_reverse_index
32
- # end
33
- #
34
- # class Article < ActiveRecord::Base
35
- # has_reverse_index([:title, :content])
36
- # end
17
+ associations.each do |association_name, access_function|
18
+ unless access_function.class == Proc
19
+ raise TypeError, 'Model associations must have a Proc provided in order to reach the appropriate value.'
20
+ end
21
+ end
37
22
 
38
- def has_reverse_index(fields = [])
39
23
  send :include, InstanceMethods
40
24
 
41
25
  class_attribute :indexed_fields
42
- self.indexed_fields = fields.dup
26
+ class_attribute :indexed_associations
27
+ self.indexed_fields = fields.clone || []
28
+ self.indexed_associations = associations.clone || {}
43
29
 
44
- after_create :on_create_record
45
- after_update :on_update_record
46
- before_destroy :on_destroy_record
30
+ after_create :ar_indexer_on_create
31
+ after_update :ar_indexer_on_update
32
+ before_destroy :ar_indexer_on_destroy
47
33
  end
48
34
  module_function :has_reverse_index
49
35
 
50
- # Instance methods available to instances of an ActiveRecord model which has had has_reverse_index()
51
- # called on it. Currently, there are no public instance methods.
52
-
53
36
  module InstanceMethods
54
-
55
37
  private
56
38
 
57
- def array_of_values_to_index
58
- values_for_indexing = []
39
+ def ar_indexer_get_indexable_values
40
+ values_to_index = {}
41
+
59
42
  if self.indexed_fields.empty?
60
- self.class.columns.each do |c|
61
- if ['string', 'text'].include? c.type.to_s
62
- values_for_indexing << self.read_attribute(c.name)
43
+ self.class.columns.each do |column|
44
+ if ['string', 'text'].include? column.type.to_s
45
+ value = self.read_attribute(column.name)
46
+ if value.class == String
47
+ unless value.empty?
48
+ values_to_index[column.name] = value
49
+ end
50
+ end
63
51
  end
64
52
  end
65
53
  else
66
- self.indexed_fields.each do |f|
67
- if ['string', 'text'].include? self.class.columns_hash[f.to_s].type.to_s
68
- values_for_indexing << self.read_attribute(f.to_s)
54
+ self.indexed_fields.each do |field_name|
55
+ value = self[field_name]
56
+ if value.class == String
57
+ unless value.empty?
58
+ values_to_index[field_name.to_s] = value
59
+ end
69
60
  end
70
61
  end
71
62
  end
72
- values_for_indexing.delete_if {|v| [nil, ''].include? v}
73
- return values_for_indexing
74
- end
75
63
 
76
- def on_create_record
77
- values_for_indexing = array_of_values_to_index
78
- unless values_for_indexing.empty?
79
- Indexer.build_reverse_index(self.class.to_s.split('::').last.to_s, self.id, values_for_indexing, false)
64
+ unless self.indexed_associations.empty?
65
+ self.indexed_associations.each do |association_name, access_function|
66
+ value = access_function.call(self)
67
+ if value.class == String
68
+ unless value.empty?
69
+ values_to_index[association_name.to_s] = value
70
+ end
71
+ end
72
+ end
80
73
  end
74
+
75
+ return values_to_index
81
76
  end
82
77
 
83
- def on_update_record
84
- values_for_indexing = array_of_values_to_index
85
- unless values_for_indexing.empty?
86
- Indexer.build_reverse_index(self.class.to_s.split('::').last.to_s, self.id, values_for_indexing, true)
78
+ def ar_indexer_on_create
79
+ ar_indexer_get_indexable_values.each do |field_name, value|
80
+ Indexer.index_string(self.class.to_s.split('::').last, self.id, field_name, value, false)
87
81
  end
88
82
  end
89
83
 
90
- def on_destroy_record
91
- Indexer.remove_from_reverse_index(self.class.to_s.split('::').last.to_s, self.id)
84
+ def ar_indexer_on_update
85
+ ar_indexer_get_indexable_values.each do |field_name, value|
86
+ Indexer.index_string(self.class.to_s.split('::').last, self.id, field_name, value, true)
87
+ end
92
88
  end
93
89
 
90
+ def ar_indexer_on_destroy
91
+ Indexer.remove_index_id(self.class.to_s, self.id)
92
+ end
94
93
  end
95
-
96
94
  end
97
-
98
95
  end
99
-
100
96
  end
@@ -1,101 +1,142 @@
1
1
  module ARIndexer
2
+ module ARSearch
3
+ class IndexSearch
4
+ def initialize(models, opts = {})
5
+ @models = {}
6
+ models.each do |model|
7
+ @models[model.to_s.split('::').last] = model
8
+ end
2
9
 
3
- # Simple class for searching the index generated by adding has_reverse_index to ActiveRecord models
4
- #
5
- # === Initialization
6
- #
7
- # foo = IndexSearch.new(Article, {:no_results_message => "We're sorry, but no results were found"})
8
- #
9
- # === Parameters
10
- #
11
- # * [model_names]: array of constants denoting the ActiveRecord models to search
12
- # * opts: optional hash of configuration options
13
- #
14
- # === Options
15
- #
16
- # :no_results_message => string which can be requested and displayed if no results are found
10
+ @options = {
11
+ :fields => [],
12
+ :match => :any,
13
+ :sort => :relevance,
14
+ :sort_method => nil,
15
+ :sort_direction => :desc,
16
+ :stopwords => [],
17
+ :no_results_message => 'No results were returned for the given search term.'
18
+ }
19
+ @options.merge!(opts)
20
+ end
17
21
 
18
- class IndexSearch
22
+ def search_models
23
+ return @models.keys
24
+ end
19
25
 
20
- def initialize(model_names, opts = {})
21
- @models = {}
22
- model_names.each do |model|
23
- @models[model.to_s.split('::').last] = model
26
+ def options(key)
27
+ return @options[key]
24
28
  end
25
- puts @models
26
29
 
27
- @options = {
28
- :no_results_message => "Your #{@model_name} search returned no results."
29
- }
30
+ def search(value)
31
+ # Build array of words for query `reverse_indices.word IN ('word1', 'word2')`
32
+ if @options[:match] == :any
33
+ search_terms = ARSearch.expand_forward_index(Indexer.break_string(value), @options[:stopwords])
34
+ enforce_threshold = false
35
+ else
36
+ stopwords = (Stopwords::STOPWORDS + @options[:stopwords]).uniq
37
+ search_terms = (Indexer.break_string(value) - stopwords)
38
+ enforce_threshold = true
39
+ end
30
40
 
31
- @options.merge!(opts)
32
- end
41
+ # Execute AR query based on @options[:fields]
42
+ if @options[:fields].empty?
43
+ base_results = ReverseIndex.where(:model_name => self.search_models, :word => search_terms)
44
+ else
45
+ base_results = ReverseIndex.where(:model_name => self.search_models, :field_name => @options[:fields], :word => search_terms)
46
+ end
33
47
 
34
- # Returns either the default :no_results_message, or the one specified on initialization
35
- #
36
- # ==== Returns
37
- #
38
- # string
48
+ unless base_results.empty?
49
+ return ARSearch.method("sort_by_#{@options[:sort]}".to_sym).call(base_results, self, search_terms.count)
50
+ else
51
+ return []
52
+ end
53
+ end
39
54
 
40
- def no_results_message
41
- return @options[:no_results_message]
55
+ def no_results_message
56
+ return @options[:no_results_message]
57
+ end
42
58
  end
43
59
 
44
- # Runs a text search against the ReverseIndex model for a given ActiveRecord model and search string.
45
- # Returns
46
- #
47
- # ==== Parameters
48
- #
49
- # * search_string: string on which to search for indexed objects
50
- #
51
- # ==== Returns
52
- #
53
- # array of ActiveRecord objects, or an empty array
54
- #
55
- # ==== Examples
56
- #
57
- # foo = IndexSearch.new(Article)
58
- # foo.run_search("test string")
59
- # #=> [<#Article>, <#Article>, ...]
60
-
61
- def run_search(search_string)
62
- search_terms = Indexer.expand_lexicon(Indexer.text_to_lexicon(search_string))
63
- matches = {}
64
- @models.keys.each do |model|
65
- matches[model] = {}
60
+ def self.expand_forward_index(forward_index, stopwords)
61
+ # Stem and pluralize
62
+ forward_index.each do |word|
63
+ root = Stemmer::stem_word(word)
64
+ unless forward_index.include? root
65
+ forward_index = forward_index.inject([root], :<<)
66
+ end
67
+ plural = word.pluralize
68
+ unless forward_index.include? plural
69
+ forward_index = forward_index.inject([plural], :<<)
70
+ end
66
71
  end
67
- search_terms.each do |st|
68
- reverse_index_records = ReverseIndex.where(:model_name => [@models.keys], :word => st)
69
- if reverse_index_records.count > 0
70
- reverse_index_records.each do |rir|
71
- rir.retrieve_id_array.each do |id|
72
- if matches[rir.model_name].has_key?(id)
73
- matches[rir.model_name][id] = matches[rir.model_name][id] + 1
72
+
73
+ # Remove stopwords and duplicates again
74
+ stopwords = (Stopwords::STOPWORDS + stopwords).uniq
75
+ forward_index = (forward_index - stopwords).uniq
76
+ return forward_index
77
+ end
78
+
79
+ def self.get_object_counts(base_results, search_models, match_type, match_threshold)
80
+ relevancy_counts = {}
81
+ unsorted_results = []
82
+ search_models.each do |model|
83
+ model_results = base_results.where(:model_name => model)
84
+ unless model_results.empty?
85
+ relevancy_counts[model] = {}
86
+ model_results.each do |result|
87
+ id_array = result.retrieve_id_array
88
+ id_array.each do |object_id|
89
+ if relevancy_counts[model][object_id].nil?
90
+ relevancy_counts[model][object_id] = 1
74
91
  else
75
- matches[rir.model_name][id] = 1
92
+ relevancy_counts[model][object_id] = (relevancy_counts[model][object_id] + 1)
76
93
  end
77
94
  end
78
95
  end
96
+ if match_type == :all
97
+ relevancy_counts[model].delete_if do |object_id, count|
98
+ count < match_threshold
99
+ end
100
+ end
79
101
  end
102
+ unsorted_results << relevancy_counts[model].to_a.map{|result| result << model}
80
103
  end
81
- collected_matches = []
82
- matches.each do |k,v|
83
- unless v.empty?
84
- collected_matches << v.to_a.map{|x| x << k}
104
+ return unsorted_results
105
+ end
106
+
107
+ def self.sort_by_relevance(base_results, search_object, match_threshold)
108
+ unsorted_results = ARSearch.get_object_counts(base_results, search_object.search_models, search_object.options(:match), match_threshold).flatten!(1) || []
109
+ unless unsorted_results.empty?
110
+ sorted_results = unsorted_results.sort_by {|x| [x[1], x[0]]}
111
+ if search_object.options(:sort_direction) == :desc
112
+ sorted_results = sorted_results.reverse
85
113
  end
114
+ return sorted_results.collect {|result| result[2].constantize.find(result[0])}
115
+ else
116
+ return []
86
117
  end
87
- unless collected_matches.empty?
88
- collected_matches.flatten!(1)
89
- objects_to_return = []
90
- collected_matches.sort{|x,y| x[1] <=> y[1]}.reverse.each do |match|
91
- objects_to_return << @models[match[2]].find(match[0])
118
+ end
119
+
120
+ def self.sort_by_field(base_results, search_object, match_threshold)
121
+ unsorted_results = ARSearch.get_object_counts(base_results, search_object.search_models, search_object.options(:match), match_threshold).flatten!(1) || []
122
+ unless unsorted_results.empty?
123
+ unsorted_objects = unsorted_results.collect {|result| result[2].constantize.find(result[0])}
124
+ sort_method = search_object.options(:sort_method)
125
+ case sort_method.class.to_s
126
+ when 'Symbol'
127
+ sorted_results = unsorted_objects.sort_by {|object| object[sort_method]}
128
+ when 'Proc'
129
+ sorted_results = unsorted_objects.sort_by {|object| sort_method.call(object)}
130
+ else
131
+ sorted_results = unsorted_objects
92
132
  end
93
- return objects_to_return
133
+ if search_object.options(:sort_direction) == :desc
134
+ sorted_results = sorted_results.reverse
135
+ end
136
+ return sorted_results
94
137
  else
95
138
  return []
96
139
  end
97
140
  end
98
-
99
141
  end
100
-
101
142
  end
@@ -1,162 +1,255 @@
1
1
  module ARIndexer
2
-
3
- # Contains functions for creating a forward index from text, then converting it to a reverse index
4
-
5
2
  module Indexer
6
-
7
- # Turns a string into lexicon array, including basic root words and plurals
8
- #
9
- # ==== Parameters
10
- #
11
- # text: the string to be converted
12
- #
13
- # ==== Returns
14
- #
15
- # array of strings
16
-
17
- def self.text_to_lexicon(text)
3
+ def self.break_string(value)
18
4
  # Remove HTML markup
19
- text.gsub!(/<[^>]+>/, ' ')
5
+ value.gsub!(/<[^>]+>/, ' ')
20
6
  # Decode HTML entities
21
7
  coder = HTMLEntities.new
22
- text = coder.decode(text)
8
+ value = coder.decode(value)
23
9
  # Remove most punctuation
24
- text.gsub!(/[^a-zA-Z0-9\s]/, '')
10
+ value.gsub!(/[^a-zA-Z0-9\s]/, '')
25
11
  # Move everything to lower case
26
- text.downcase!
12
+ value.downcase!
27
13
  # Split all words into an array
28
- lexicon = text.split(' ')
14
+ forward_index = value.split(' ')
29
15
  # Remove stopwords and duplicates
30
- lexicon = (lexicon - Stopwords::STOPWORDS).uniq
31
- return lexicon
32
- end
33
-
34
- # Expands the lexicon created by text_to_lexicon, adding plurals and root words
35
- #
36
- # ==== Parameters
37
- #
38
- # lexicon: array of strings to be expanded
39
- #
40
- # ==== Returns
41
- #
42
- # array of strings
43
-
44
- def self.expand_lexicon(lexicon)
45
- # Stem and pluralize
46
- lexicon.each do |word|
47
- root = Stemmer::stem_word(word)
48
- if !lexicon.include? root
49
- lexicon = lexicon.inject([root], :<<)
50
- end
51
- plural = word.pluralize
52
- if !lexicon.include? plural
53
- lexicon = lexicon.inject([plural], :<<)
54
- end
55
- end
56
- # Remove stopwords and duplicates again
57
- lexicon = (lexicon - Stopwords::STOPWORDS).uniq
58
- return lexicon
59
- end
60
-
61
- # Takes an array of strings to be indexed, and calls text_to_lexicon on each.
62
- # Returns the combined array flattened, uniquified, and sorted in alphabetical order
63
- #
64
- # ==== Parameters
65
- #
66
- # values_to_index: array of string values to index
67
- #
68
- # ==== Returns
69
- #
70
- # array of strings
71
-
72
- def self.build_forward_index(values_to_index)
73
- forward_index = []
74
- # Run text_to_lexicon for each indexed field
75
- values_to_index.each do |v|
76
- forward_index << self.text_to_lexicon(v) if ![nil, ''].include? v
77
- end
78
- # Return the lexicon flattened, uniquified, and in alphabetical order
79
- forward_index = forward_index.flatten.uniq.sort
16
+ forward_index = (forward_index - Stopwords::STOPWORDS).uniq
80
17
  return forward_index
81
18
  end
82
19
 
83
- # For a given model name and object id, compares the list of words with the forward index of the text.
84
- # If a word exists in the reverse index but not the forward index, removes the object id from the reverse index.
85
- # If the id array is empty, removes the reverse index record
86
- #
87
- # ==== Parameters
88
- #
89
- # * model_name: string version of the model name to clean records for
90
- # * record_id: object id to search for in the reverse index
91
- # * forward_index: the array of words to check against
92
-
93
- def self.clean_reverse_index(model_name, record_id, forward_index)
94
- reverse_index_records = ReverseIndex.where(:model_name => model_name)
95
- reverse_index_records.each do |rir|
96
- if rir.id_list.match(/,{0,1}#{record_id},{0,1}/)
97
- if !forward_index.include? rir.word
98
- id_array = rir.retrieve_id_array
99
- id_array.delete(record_id.to_i)
100
- if id_array.empty?
101
- rir.destroy
102
- else
103
- new_id_list = id_array.join(',')
104
- rir.update(:id_list => new_id_list)
105
- end
20
+ def self.index_string(model_name, object_id, field_name, value, repair_on_completion)
21
+ forward_index = self.break_string(value)
22
+ forward_index.each do |word|
23
+ if index_record = ReverseIndex.where(:model_name => model_name, :field_name => field_name, :word => word).first
24
+ current_id_array = index_record.retrieve_id_array
25
+ unless current_id_array.include? object_id
26
+ new_id_list = (current_id_array << object_id).sort.join(',')
27
+ index_record.update(:id_list => new_id_list)
106
28
  end
29
+ else
30
+ ReverseIndex.create(:model_name => model_name, :field_name => field_name, :word => word, :id_list => object_id)
107
31
  end
108
32
  end
33
+ repair_index(model_name, object_id, field_name, forward_index) if repair_on_completion
109
34
  end
110
35
 
111
- # Takes an array of values to index, runs it through build_forward_index(), then builds the reverse index
112
- # from the returned values
113
- #
114
- # ==== Parameters
115
- #
116
- # * model_name: the string version of the model name
117
- # * record_id: the id of the object being indexed
118
- # * values_to_index: array of string objects to use in building the reverse index
119
- # * cleaning_required: boolean flag, whether or not to run clean_reverse_index()
120
-
121
- def self.build_reverse_index(model_name, record_id, values_to_index, cleaning_required = false)
122
- forward_index = self.build_forward_index(values_to_index)
123
- forward_index.each do |word|
124
- if reverse_index_record = ReverseIndex.where(:model_name => model_name, :word => word).first
125
- id_array = reverse_index_record.retrieve_id_array
126
- if !id_array.include? record_id
127
- new_id_list = (id_array << record_id).join(',')
128
- reverse_index_record.update(:id_list => new_id_list)
36
+ def self.remove_index_id(model_name, object_id)
37
+ index_records = ReverseIndex.where(:model_name => model_name)
38
+ if index_records.count > 0
39
+ index_records.each do |record|
40
+ if record.id_list.match(/#{object_id},{0,1}/)
41
+ current_id_array = record.retrieve_id_array
42
+ if current_id_array.delete(object_id)
43
+ if current_id_array.empty?
44
+ record.destroy
45
+ else
46
+ new_id_list = current_id_array.join(',')
47
+ record.update(:id_list => new_id_list)
48
+ end
49
+ end
129
50
  end
130
- else
131
- ReverseIndex.create(:model_name => model_name, :word => word, :id_list => record_id)
132
51
  end
133
52
  end
134
- self.clean_reverse_index(model_name, record_id, forward_index) if cleaning_required
135
53
  end
136
54
 
137
- # Removes an object id from the reverse index for a specified model.
138
- # If the id array is empty after removing the record id, destroys the reverse index record
139
- #
140
- # ==== Parameters
141
- #
142
- # model_name: string version of the model name to remove records for
143
- # record_id: object id to remove records for
144
-
145
- def self.remove_from_reverse_index(model_name, record_id)
146
- reverse_index_records = ReverseIndex.where(:model_name => model_name)
147
- reverse_index_records.each do |rir|
148
- id_array = rir.retrieve_id_array
149
- if id_array.include? record_id.to_i
150
- id_array.delete(record_id.to_i)
151
- if id_array.empty?
152
- rir.destroy
153
- else
154
- rir.update(:id_list => id_array.join(','))
55
+ def self.repair_index(model_name, object_id, field_name, forward_index)
56
+ index_records = ReverseIndex.where(:model_name => model_name, :field_name => field_name)
57
+ if index_records.count > 0
58
+ index_records.each do |record|
59
+ if record.id_list.match(/#{object_id},{0,1}/)
60
+ unless forward_index.include?(record.word)
61
+ current_id_array = record.retrieve_id_array
62
+ if current_id_array.delete(object_id)
63
+ if current_id_array.empty?
64
+ record.destroy
65
+ else
66
+ new_id_list = current_id_array.join(',')
67
+ record.update(:id_list => new_id_list)
68
+ end
69
+ end
70
+ end
155
71
  end
156
72
  end
157
73
  end
158
74
  end
159
-
160
75
  end
161
76
 
77
+ module Stopwords
78
+ STOPWORDS = [
79
+ "a",
80
+ "about",
81
+ "above",
82
+ "after",
83
+ "again",
84
+ "against",
85
+ "all",
86
+ "am",
87
+ "an",
88
+ "and",
89
+ "any",
90
+ "are",
91
+ "aren't",
92
+ "as",
93
+ "at",
94
+ "be",
95
+ "because",
96
+ "been",
97
+ "before",
98
+ "being",
99
+ "below",
100
+ "between",
101
+ "both",
102
+ "but",
103
+ "by",
104
+ "can't",
105
+ "cannot",
106
+ "could",
107
+ "couldn't",
108
+ "did",
109
+ "didn't",
110
+ "do",
111
+ "does",
112
+ "doesn't",
113
+ "doing",
114
+ "don't",
115
+ "down",
116
+ "during",
117
+ "each",
118
+ "few",
119
+ "for",
120
+ "from",
121
+ "further",
122
+ "had",
123
+ "hadn't",
124
+ "has",
125
+ "hasn't",
126
+ "have",
127
+ "haven't",
128
+ "having",
129
+ "he",
130
+ "he'd",
131
+ "he'll",
132
+ "he's",
133
+ "her",
134
+ "here",
135
+ "here's",
136
+ "hers",
137
+ "herself",
138
+ "him",
139
+ "himself",
140
+ "his",
141
+ "how",
142
+ "how's",
143
+ "i",
144
+ "i'd",
145
+ "i'll",
146
+ "i'm",
147
+ "i've",
148
+ "if",
149
+ "in",
150
+ "into",
151
+ "is",
152
+ "isn't",
153
+ "it",
154
+ "it's",
155
+ "its",
156
+ "itself",
157
+ "let's",
158
+ "me",
159
+ "more",
160
+ "most",
161
+ "mustn't",
162
+ "my",
163
+ "myself",
164
+ "no",
165
+ "nor",
166
+ "not",
167
+ "of",
168
+ "off",
169
+ "on",
170
+ "once",
171
+ "only",
172
+ "or",
173
+ "other",
174
+ "ought",
175
+ "our",
176
+ "ours",
177
+ "ourselves",
178
+ "out",
179
+ "over",
180
+ "own",
181
+ "same",
182
+ "shan't",
183
+ "she",
184
+ "she'd",
185
+ "she'll",
186
+ "she's",
187
+ "should",
188
+ "shouldn't",
189
+ "so",
190
+ "some",
191
+ "such",
192
+ "than",
193
+ "that",
194
+ "that's",
195
+ "the",
196
+ "their",
197
+ "theirs",
198
+ "them",
199
+ "themselves",
200
+ "then",
201
+ "there",
202
+ "there's",
203
+ "these",
204
+ "they",
205
+ "they'd",
206
+ "they'll",
207
+ "they're",
208
+ "they've",
209
+ "this",
210
+ "those",
211
+ "through",
212
+ "to",
213
+ "too",
214
+ "under",
215
+ "until",
216
+ "up",
217
+ "very",
218
+ "was",
219
+ "wasn't",
220
+ "we",
221
+ "we'd",
222
+ "we'll",
223
+ "we're",
224
+ "we've",
225
+ "were",
226
+ "weren't",
227
+ "what",
228
+ "what's",
229
+ "when",
230
+ "when's",
231
+ "where",
232
+ "where's",
233
+ "which",
234
+ "while",
235
+ "who",
236
+ "who's",
237
+ "whom",
238
+ "why",
239
+ "why's",
240
+ "with",
241
+ "won't",
242
+ "would",
243
+ "wouldn't",
244
+ "you",
245
+ "you'd",
246
+ "you'll",
247
+ "you're",
248
+ "you've",
249
+ "your",
250
+ "yours",
251
+ "yourself",
252
+ "yourselves"
253
+ ]
254
+ end
162
255
  end
@@ -1,25 +1,15 @@
1
1
  module ARIndexer
2
-
3
2
  class ReverseIndex < ::ActiveRecord::Base
4
-
5
3
  if ::ActiveRecord::VERSION::MAJOR < 4
6
- attr_accessible :id_list, :model_name, :word
4
+ attr_accessible :model_name, :field_name, :word, :id_list
7
5
  end
8
6
 
9
- validates_uniqueness_of :word, :scope => :model_name
10
-
11
- # Retrieves the id_list property as a parsed array of integers
12
- #
13
- # ==== Returns
14
- #
15
- # array of integer
7
+ validates_uniqueness_of :word, :scope => [:model_name, :field_name]
16
8
 
17
9
  def retrieve_id_array
18
10
  id_array = self.id_list.split(',')
19
11
  id_array.map! {|id| id.to_i}
20
12
  return id_array
21
13
  end
22
-
23
14
  end
24
-
25
15
  end
data/lib/ar_indexer.rb CHANGED
@@ -1,21 +1,26 @@
1
+ # Require runtime dependencies
1
2
  require 'active_record'
2
3
  require 'active_support'
3
4
  require 'active_support/inflector'
4
5
  require 'htmlentities'
5
6
  require 'fast-stemmer'
6
7
 
8
+ # Require gem files
7
9
  require 'ar_indexer/reverse_index'
8
10
  require 'ar_indexer/has_reverse_index'
9
- require 'ar_indexer/stopwords'
10
11
  require 'ar_indexer/indexer'
11
12
  require 'ar_indexer/index_search'
12
13
 
13
14
  # Main gem module
14
-
15
15
  module ARIndexer
16
+ # Gem version storage
17
+ module Version
18
+ MAJOR = '0'
19
+ MINOR = '2'
20
+ BUILD = '0'
16
21
 
17
- VERSION = "0.1.2"
18
-
22
+ STRING = "#{MAJOR}.#{MINOR}.#{BUILD}"
23
+ end
19
24
  end
20
25
 
21
26
  include ARIndexer
metadata CHANGED
@@ -1,79 +1,79 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ar_indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
- - Josh Stump
7
+ - Josh MacLachlan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-18 00:00:00.000000000 Z
11
+ date: 2014-10-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: 3.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: 3.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: activesupport
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: 3.0.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: 3.0.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: htmlentities
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: fast-stemmer
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  description: Allows for reverse indexing selected ActiveRecord models. Handles searching
70
70
  and return of objects
71
- email: joshua.t.stump@gmail.com
71
+ email: josh.t.maclachlan@gmail.com
72
72
  executables: []
73
73
  extensions: []
74
74
  extra_rdoc_files: []
75
75
  files:
76
- - .gitignore
76
+ - ".gitignore"
77
77
  - Gemfile
78
78
  - LICENSE
79
79
  - README.md
@@ -83,28 +83,29 @@ files:
83
83
  - lib/ar_indexer/index_search.rb
84
84
  - lib/ar_indexer/indexer.rb
85
85
  - lib/ar_indexer/reverse_index.rb
86
- - lib/ar_indexer/stopwords.rb
87
- homepage: https://github.com/jstump/ar_indexer
86
+ homepage: https://github.com/jtmaclachlan/ar_indexer
88
87
  licenses:
89
88
  - GPL-2
90
89
  metadata: {}
91
- post_install_message:
90
+ post_install_message: If you're upgrading from v0.1.4 to v0.2.0, read the updated
91
+ documentation. Your application will throw exceptions when trying to access indexed
92
+ models.
92
93
  rdoc_options: []
93
94
  require_paths:
94
95
  - lib
95
96
  required_ruby_version: !ruby/object:Gem::Requirement
96
97
  requirements:
97
- - - '>='
98
+ - - ">="
98
99
  - !ruby/object:Gem::Version
99
100
  version: '0'
100
101
  required_rubygems_version: !ruby/object:Gem::Requirement
101
102
  requirements:
102
- - - '>='
103
+ - - ">="
103
104
  - !ruby/object:Gem::Version
104
105
  version: '0'
105
106
  requirements: []
106
107
  rubyforge_project:
107
- rubygems_version: 2.0.6
108
+ rubygems_version: 2.2.2
108
109
  signing_key:
109
110
  specification_version: 4
110
111
  summary: Allows for reverse indexing selected ActiveRecord models. Handles searching
@@ -1,186 +0,0 @@
1
- module ARIndexer
2
-
3
- # Contains array of stopwords that should not be stopwords
4
-
5
- module Stopwords
6
-
7
- STOPWORDS = [
8
- "a",
9
- "about",
10
- "above",
11
- "after",
12
- "again",
13
- "against",
14
- "all",
15
- "am",
16
- "an",
17
- "and",
18
- "any",
19
- "are",
20
- "aren't",
21
- "as",
22
- "at",
23
- "be",
24
- "because",
25
- "been",
26
- "before",
27
- "being",
28
- "below",
29
- "between",
30
- "both",
31
- "but",
32
- "by",
33
- "can't",
34
- "cannot",
35
- "could",
36
- "couldn't",
37
- "did",
38
- "didn't",
39
- "do",
40
- "does",
41
- "doesn't",
42
- "doing",
43
- "don't",
44
- "down",
45
- "during",
46
- "each",
47
- "few",
48
- "for",
49
- "from",
50
- "further",
51
- "had",
52
- "hadn't",
53
- "has",
54
- "hasn't",
55
- "have",
56
- "haven't",
57
- "having",
58
- "he",
59
- "he'd",
60
- "he'll",
61
- "he's",
62
- "her",
63
- "here",
64
- "here's",
65
- "hers",
66
- "herself",
67
- "him",
68
- "himself",
69
- "his",
70
- "how",
71
- "how's",
72
- "i",
73
- "i'd",
74
- "i'll",
75
- "i'm",
76
- "i've",
77
- "if",
78
- "in",
79
- "into",
80
- "is",
81
- "isn't",
82
- "it",
83
- "it's",
84
- "its",
85
- "itself",
86
- "let's",
87
- "me",
88
- "more",
89
- "most",
90
- "mustn't",
91
- "my",
92
- "myself",
93
- "no",
94
- "nor",
95
- "not",
96
- "of",
97
- "off",
98
- "on",
99
- "once",
100
- "only",
101
- "or",
102
- "other",
103
- "ought",
104
- "our",
105
- "ours",
106
- "ourselves",
107
- "out",
108
- "over",
109
- "own",
110
- "same",
111
- "shan't",
112
- "she",
113
- "she'd",
114
- "she'll",
115
- "she's",
116
- "should",
117
- "shouldn't",
118
- "so",
119
- "some",
120
- "such",
121
- "than",
122
- "that",
123
- "that's",
124
- "the",
125
- "their",
126
- "theirs",
127
- "them",
128
- "themselves",
129
- "then",
130
- "there",
131
- "there's",
132
- "these",
133
- "they",
134
- "they'd",
135
- "they'll",
136
- "they're",
137
- "they've",
138
- "this",
139
- "those",
140
- "through",
141
- "to",
142
- "too",
143
- "under",
144
- "until",
145
- "up",
146
- "very",
147
- "was",
148
- "wasn't",
149
- "we",
150
- "we'd",
151
- "we'll",
152
- "we're",
153
- "we've",
154
- "were",
155
- "weren't",
156
- "what",
157
- "what's",
158
- "when",
159
- "when's",
160
- "where",
161
- "where's",
162
- "which",
163
- "while",
164
- "who",
165
- "who's",
166
- "whom",
167
- "why",
168
- "why's",
169
- "with",
170
- "won't",
171
- "would",
172
- "wouldn't",
173
- "you",
174
- "you'd",
175
- "you'll",
176
- "you're",
177
- "you've",
178
- "your",
179
- "yours",
180
- "yourself",
181
- "yourselves"
182
- ]
183
-
184
- end
185
-
186
- end