mongoid_fulltext 0.6.1 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +47 -0
  3. data/.rspec +1 -1
  4. data/.rubocop.yml +6 -0
  5. data/.rubocop_todo.yml +101 -0
  6. data/.travis.yml +11 -3
  7. data/CHANGELOG.md +9 -2
  8. data/Gemfile +19 -9
  9. data/LICENSE +1 -1
  10. data/README.md +12 -9
  11. data/Rakefile +9 -29
  12. data/lib/mongoid/full_text_search/version.rb +5 -0
  13. data/lib/mongoid/full_text_search.rb +372 -0
  14. data/lib/mongoid/indexable.rb +13 -0
  15. data/lib/mongoid/indexes.rb +13 -0
  16. data/lib/mongoid_fulltext.rb +1 -341
  17. data/mongoid_fulltext.gemspec +16 -82
  18. data/spec/models/accentless_artwork.rb +1 -1
  19. data/spec/models/advanced_artwork.rb +1 -1
  20. data/spec/models/basic_artwork.rb +0 -1
  21. data/spec/models/delayed_artwork.rb +1 -2
  22. data/spec/models/external_artist.rb +1 -2
  23. data/spec/models/external_artwork.rb +1 -2
  24. data/spec/models/external_artwork_no_fields_supplied.rb +2 -2
  25. data/spec/models/filtered_artist.rb +4 -4
  26. data/spec/models/filtered_artwork.rb +7 -7
  27. data/spec/models/filtered_other.rb +3 -3
  28. data/spec/models/hidden_dragon.rb +0 -1
  29. data/spec/models/multi_external_artwork.rb +3 -3
  30. data/spec/models/multi_field_artist.rb +1 -1
  31. data/spec/models/multi_field_artwork.rb +1 -1
  32. data/spec/models/partitioned_artist.rb +8 -9
  33. data/spec/models/russian_artwork.rb +2 -2
  34. data/spec/models/short_prefixes_artwork.rb +3 -4
  35. data/spec/models/stopwords_artwork.rb +3 -4
  36. data/spec/mongoid/full_text_search_spec.rb +752 -0
  37. data/spec/spec_helper.rb +11 -7
  38. metadata +27 -68
  39. data/VERSION +0 -1
  40. data/lib/mongoid_indexes.rb +0 -12
  41. data/spec/config/mongoid.yml +0 -6
  42. data/spec/mongoid/fulltext_spec.rb +0 -799
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MGNlMDBlNzg5YmVkODU3ZTIyYTFiNGI5N2M2ZTRkYTdmODkwNTA5OQ==
5
+ data.tar.gz: !binary |-
6
+ OWFjYzllZThlODIyOGQzMzJkN2MwYjc4Y2U3Y2I4ODBlMDUwZDA4Yw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ODViNTM3ZWYyMDdiYjk4NjBkNzBlZGEzNzM2YTIxNWYwZmI3ZjBmMzQ0ZTAz
10
+ MzUwODY1MWNiNGFmNWIwYWVkMTRkMTc1YjcxM2RjYzMwZjJiOGVlOTEyZjcy
11
+ OGZiYmQ0YmVkMGJhMWIwZjg5YjFkNDc1M2ZlM2NiZmU3MThkMWE=
12
+ data.tar.gz: !binary |-
13
+ MWMyNmYzYmI4MzUxYjYwOWZmM2RmMDgxYzUxOTg2Zjg2NDFhMTRhOGFlYjU2
14
+ NmEyNmU0ZWRhZjRiZmFiNGZhNDY3NTc1YThlODRjNjNkZmI0YThhN2RmZDdi
15
+ OTNmNGE5MzM4ZjQ1MTk5YTM0OGNhZjcxZjk0YTdkZmEwNDAxNDE=
data/.gitignore ADDED
@@ -0,0 +1,47 @@
1
+ # rcov generated
2
+ coverage
3
+
4
+ # rdoc generated
5
+ rdoc
6
+
7
+ # yard generated
8
+ doc
9
+ .yardoc
10
+
11
+ # bundler
12
+ .bundle
13
+
14
+ # jeweler generated
15
+ pkg
16
+
17
+ # RVM
18
+ .rvmrc
19
+
20
+ # Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
21
+ #
22
+ # * Create a file at ~/.gitignore
23
+ # * Include files you want ignored
24
+ # * Run: git config --global core.excludesfile ~/.gitignore
25
+ #
26
+ # After doing this, these files will be ignored in all your git projects,
27
+ # saving you from having to 'pollute' every project you touch with them
28
+ #
29
+ # Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
30
+ #
31
+ # For MacOS:
32
+
33
+ .DS_Store
34
+
35
+ # For TextMate
36
+ *.tmproj
37
+ tmtags
38
+
39
+ # For emacs:
40
+ *~
41
+ \#*
42
+ .\#*
43
+
44
+ # For vim:
45
+ *.swp
46
+
47
+ Gemfile.lock
data/.rspec CHANGED
@@ -1,2 +1,2 @@
1
1
  --color
2
-
2
+ --format documentation
data/.rubocop.yml ADDED
@@ -0,0 +1,6 @@
1
+ AllCops:
2
+ Exclude:
3
+ - vendor/**/*
4
+ - bin/**/*
5
+
6
+ inherit_from: .rubocop_todo.yml
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,101 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2015-09-18 15:56:53 -0400 using RuboCop version 0.34.1.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 1
10
+ # Configuration parameters: AllowSafeAssignment.
11
+ Lint/AssignmentInCondition:
12
+ Exclude:
13
+ - 'lib/mongoid/full_text_search.rb'
14
+
15
+ # Offense count: 1
16
+ # Cop supports --auto-correct.
17
+ # Configuration parameters: AlignWith, SupportedStyles, AutoCorrect.
18
+ Lint/EndAlignment:
19
+ Enabled: false
20
+
21
+ # Offense count: 2
22
+ Lint/HandleExceptions:
23
+ Exclude:
24
+ - 'lib/mongoid/full_text_search.rb'
25
+ - 'spec/mongoid/full_text_search_spec.rb'
26
+
27
+ # Offense count: 1
28
+ Lint/NonLocalExitFromIterator:
29
+ Exclude:
30
+ - 'lib/mongoid/full_text_search.rb'
31
+
32
+ # Offense count: 4
33
+ Lint/UselessAssignment:
34
+ Exclude:
35
+ - 'spec/mongoid/full_text_search_spec.rb'
36
+
37
+ # Offense count: 5
38
+ Metrics/AbcSize:
39
+ Max: 106
40
+
41
+ # Offense count: 4
42
+ Metrics/CyclomaticComplexity:
43
+ Max: 22
44
+
45
+ # Offense count: 262
46
+ # Configuration parameters: AllowURI, URISchemes.
47
+ Metrics/LineLength:
48
+ Max: 174
49
+
50
+ # Offense count: 5
51
+ # Configuration parameters: CountComments.
52
+ Metrics/MethodLength:
53
+ Max: 50
54
+
55
+ # Offense count: 1
56
+ # Configuration parameters: CountComments.
57
+ Metrics/ModuleLength:
58
+ Max: 224
59
+
60
+ # Offense count: 4
61
+ Metrics/PerceivedComplexity:
62
+ Max: 25
63
+
64
+ # Offense count: 1
65
+ Style/AsciiComments:
66
+ Exclude:
67
+ - 'spec/mongoid/full_text_search_spec.rb'
68
+
69
+ # Offense count: 1
70
+ # Configuration parameters: EnforcedStyle, SupportedStyles.
71
+ Style/ClassAndModuleChildren:
72
+ Exclude:
73
+ - 'lib/mongoid/full_text_search.rb'
74
+
75
+ # Offense count: 1
76
+ Style/ConstantName:
77
+ Exclude:
78
+ - 'spec/models/russian_artwork.rb'
79
+
80
+ # Offense count: 22
81
+ Style/Documentation:
82
+ Enabled: false
83
+
84
+ # Offense count: 3
85
+ # Configuration parameters: EnforcedStyle, SupportedStyles.
86
+ Style/FormatString:
87
+ Exclude:
88
+ - 'lib/mongoid/full_text_search.rb'
89
+ - 'spec/models/external_artwork_no_fields_supplied.rb'
90
+
91
+ # Offense count: 2
92
+ Style/MultilineBlockChain:
93
+ Exclude:
94
+ - 'lib/mongoid/full_text_search.rb'
95
+
96
+ # Offense count: 4
97
+ # Configuration parameters: Methods.
98
+ Style/SingleLineBlockParams:
99
+ Exclude:
100
+ - 'lib/mongoid/full_text_search.rb'
101
+ - 'spec/mongoid/full_text_search_spec.rb'
data/.travis.yml CHANGED
@@ -1,7 +1,15 @@
1
1
  rvm:
2
+ - 2.2
3
+ - 2.1
4
+ - 2.0
2
5
  - 1.9.3
3
- - ruby-head
6
+ - rbx-2.2.10
7
+ - jruby-19mode
8
+
4
9
  env:
5
- - MONGOID=3.0.0
6
- - MONGOID=3.1.0
10
+ - MONGOID_VERSION=3.0.0
11
+ - MONGOID_VERSION=3.1.0
12
+ - MONGOID_VERSION=4.0
13
+ - MONGOID_VERSION=5.0
14
+
7
15
  services: mongodb
data/CHANGELOG.md CHANGED
@@ -1,10 +1,17 @@
1
+ 0.7.0 (9/18/2015)
2
+ -----------------
3
+
4
+ * Compatible with Mongoid 4 and 5 - [@dblock](https://github.com/dblock).
5
+ * Rewritten .gemspec, removed Jeweler - [@dblock](https://github.com/dblock).
6
+ * Added RuboCop - [@dblock](https://github.com/dblock).
7
+
1
8
  0.6.1 (4/3/2013)
2
- --------------------
9
+ ----------------
3
10
 
4
11
  * [#6](https://github.com/artsy/mongoid_fulltext/pull/6): Upgrade to Mongoid ~> 3.0 - [@simi](https://github.com/simi).
5
12
 
6
13
  0.6.0 (7/16/2012)
7
- --------------------
14
+ -----------------
8
15
 
9
16
  * [#2](https://github.com/artsy/mongoid_fulltext/pull/2): Upgrade to Mongoid 3.0 - [@volmer](https://github.com/volmer).
10
17
  * [#1](https://github.com/artsy/mongoid_fulltext/pull/1): Fix: downcase destroys non-latin strings - [@netoneko](https://github.com/netoneko).
data/Gemfile CHANGED
@@ -1,15 +1,25 @@
1
- source "http://rubygems.org"
1
+ source 'http://rubygems.org'
2
2
 
3
- if ENV['TRAVIS']
4
- gem "mongoid", "~> #{ENV['MONGOID']}"
3
+ case version = ENV['MONGOID_VERSION'] || '5'
4
+ when /5/
5
+ gem 'mongoid', '~> 5.0'
6
+ when /4/
7
+ gem 'mongoid', '~> 4.0'
8
+ when /3.1.0/
9
+ gem 'mongoid', '~> 3.1.0'
10
+ when /3.0.0/
11
+ gem 'mongoid', '~> 3.0.0'
5
12
  else
6
- gem "mongoid", "~> 3.0"
13
+ gem 'mongoid', version
7
14
  end
8
15
 
9
- gem "unicode_utils", "~> 1.0.0"
16
+ gemspec
10
17
 
11
- group :development, :test do
12
- gem "bundler"
13
- gem "rspec", "~> 2.10.0"
14
- gem "jeweler", "~> 1.8.3"
18
+ group :test do
19
+ gem 'rspec'
20
+ end
21
+
22
+ group :development do
23
+ gem 'rake'
24
+ gem 'rubocop', '0.34.1'
15
25
  end
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2011-2012 by Artsy, Inc.
1
+ Copyright (c) 2011-2015 by Artsy, Inc. & Contributors
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining a copy
4
4
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -1,19 +1,22 @@
1
- Mongoid Fulltext Search [![Build Status](https://secure.travis-ci.org/artsy/mongoid_fulltext.png)](http://travis-ci.org/artsy/mongoid_fulltext)
1
+ Mongoid Fulltext Search
2
2
  =======================
3
3
 
4
+ [![Build Status](https://secure.travis-ci.org/artsy/mongoid_fulltext.svg)](http://travis-ci.org/artsy/mongoid_fulltext)
5
+ [![Gem Version](https://badge.fury.io/rb/mongoid_fulltext.svg)](http://badge.fury.io/rb/mongoid_fulltext)
6
+
4
7
  Full-text search using n-gram matching for the Mongoid ODM. Tested on MongoDB 1.6 and above, but
5
8
  probably works on earlier versions as well.
6
9
 
7
- MongoDB currently has no native full-text search capabilities, so this gem is a good fit for cases
8
- where you want something a little less than a full-blown indexing service like Solr. mongoid_fulltext
10
+ MongoDB introduced full-text search capabilities in v2.4, so this gem is a good fit for cases
11
+ where you want something a little less than a full-blown indexing service. The mongoid_fulltext gem
9
12
  lets you do a fuzzy string search across relatively short strings, which makes it good for populating
10
13
  autocomplete boxes based on the display names of your Rails models but not appropriate for, say,
11
14
  indexing hundreds of thousands of HTML documents.
12
15
 
13
16
  Install
14
- --------------
17
+ -------
15
18
 
16
- Version 0.6.0 or newer of this gem requires Ruby 1.9.3 and Mongoid 3.0.
19
+ Version 0.6.1 or newer of this gem requires Ruby 1.9.3 or newer and works with Mongoid 3, 4 and 5.
17
20
  Use version 0.5.x for Mongoid 2.4.x and Ruby 1.8.7, 1.9.2 or 1.9.3.
18
21
 
19
22
  For Ruby 1.8.7 and/or Mongoid 2.x use [mongoid_fulltext 0.5.x](https://github.com/artsy/mongoid_fulltext/tree/0.5-stable).
@@ -22,8 +25,8 @@ For Ruby 1.8.7 and/or Mongoid 2.x use [mongoid_fulltext 0.5.x](https://github.co
22
25
  gem 'mongoid_fulltext'
23
26
  ```
24
27
 
25
- Some examples:
26
- --------------
28
+ Examples
29
+ --------
27
30
 
28
31
  Suppose you have an `Artist` model and want to index each artist's name:
29
32
 
@@ -395,7 +398,7 @@ Fork the project. Make your feature addition or bug fix with tests. Send a pull
395
398
  Copyright and License
396
399
  ---------------------
397
400
 
398
- MIT License, see [LICENSE](https://github.com/aaw/mongoid_fulltext/blob/master/LICENSE) for details.
401
+ MIT License, see [LICENSE](LICENSE) for details.
399
402
 
400
- (c) 2011-2012 [Art.sy Inc.](http://artsy.github.com)
403
+ (c) 2011-2015 [Artsy Inc.](http://artsy.github.io)
401
404
 
data/Rakefile CHANGED
@@ -1,36 +1,16 @@
1
1
  require 'rubygems'
2
- require 'bundler'
3
- begin
4
- Bundler.setup(:default, :development)
5
- rescue Bundler::BundlerError => e
6
- $stderr.puts e.message
7
- $stderr.puts "Run `bundle install` to install missing gems"
8
- exit e.status_code
9
- end
10
- require 'rake'
11
- require 'rspec/core/rake_task'
2
+ require 'bundler/gem_tasks'
12
3
 
13
- require 'jeweler'
14
- Jeweler::Tasks.new do |gem|
15
- # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
16
- gem.name = "mongoid_fulltext"
17
- gem.homepage = "http://github.com/aaw/mongoid_fulltext"
18
- gem.license = "MIT"
19
- gem.summary = %Q{Full-text search for the Mongoid ORM}
20
- gem.description = %Q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
21
- gem.email = "aaron.windsor@gmail.com"
22
- gem.authors = ["Aaron Windsor"]
23
- # Include your dependencies below. Runtime dependencies are required when using your gem,
24
- # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
25
- # gem.add_runtime_dependency 'jabber4r', '> 0.1'
26
- # gem.add_development_dependency 'rspec', '> 1.2.3'
27
- end
28
- Jeweler::RubygemsDotOrgTasks.new
4
+ Bundler.setup :default, :development
29
5
 
6
+ require 'rspec/core'
7
+ require 'rspec/core/rake_task'
30
8
 
31
- desc "Run all tests"
32
9
  RSpec::Core::RakeTask.new(:spec) do |spec|
33
- spec.pattern = "spec/**/*_spec.rb"
10
+ spec.pattern = FileList['spec/**/*_spec.rb']
34
11
  end
35
12
 
36
- task :default => :spec
13
+ require 'rubocop/rake_task'
14
+ RuboCop::RakeTask.new(:rubocop)
15
+
16
+ task default: [:rubocop, :spec]
@@ -0,0 +1,5 @@
1
+ module Mongoid
2
+ module FullTextSearch
3
+ VERSION = '0.7.0'
4
+ end
5
+ end
@@ -0,0 +1,372 @@
1
+ require 'mongoid'
2
+ require 'mongoid/compatibility'
3
+ if Mongoid::Compatibility::Version.mongoid3?
4
+ require_relative 'indexes'
5
+ else
6
+ require_relative 'indexable'
7
+ end
8
+ require 'unicode_utils'
9
+ require 'cgi'
10
+
11
+ module Mongoid::FullTextSearch
12
+ extend ActiveSupport::Concern
13
+
14
+ included do
15
+ cattr_accessor :mongoid_fulltext_config
16
+ end
17
+
18
+ class UnspecifiedIndexError < StandardError; end
19
+ class UnknownFilterQueryOperator < StandardError; end
20
+
21
+ module ClassMethods
22
+ def fulltext_search_in(*args)
23
+ self.mongoid_fulltext_config = {} if mongoid_fulltext_config.nil?
24
+ options = args.last.is_a?(Hash) ? args.pop : {}
25
+ if options.key?(:index_name)
26
+ index_name = options[:index_name]
27
+ else
28
+ index_name = 'mongoid_fulltext.index_%s_%s' % [name.downcase, mongoid_fulltext_config.count]
29
+ end
30
+
31
+ config = {
32
+ alphabet: 'abcdefghijklmnopqrstuvwxyz0123456789 ',
33
+ word_separators: "-_ \n\t",
34
+ ngram_width: 3,
35
+ max_ngrams_to_search: 6,
36
+ apply_prefix_scoring_to_all_words: true,
37
+ index_full_words: true,
38
+ index_short_prefixes: false,
39
+ max_candidate_set_size: 1000,
40
+ remove_accents: true,
41
+ reindex_immediately: true,
42
+ stop_words: Hash[%w(i a s t me my we he it am is be do an if
43
+ or as of at by to up in on no so our you him
44
+ his she her its who are was has had did the and
45
+ but for out off why how all any few nor not own
46
+ too can don now ours your hers they them what whom
47
+ this that were been have does with into from down over
48
+ then once here when both each more most some such only
49
+ same than very will just yours their which these those
50
+ being doing until while about after above below under
51
+ again there where other myself itself theirs having during
52
+ before should himself herself because against between through
53
+ further yourself ourselves yourselves themselves).map { |x| [x, true] }]
54
+ }
55
+
56
+ config.update(options)
57
+
58
+ args = [:to_s] if args.empty?
59
+ config[:ngram_fields] = args
60
+ config[:alphabet] = Hash[config[:alphabet].split('').map { |ch| [ch, ch] }]
61
+ config[:word_separators] = Hash[config[:word_separators].split('').map { |ch| [ch, ch] }]
62
+ mongoid_fulltext_config[index_name] = config
63
+
64
+ before_save(:update_ngram_index) if config[:reindex_immediately]
65
+ before_destroy :remove_from_ngram_index
66
+ end
67
+
68
+ def create_fulltext_indexes
69
+ return unless mongoid_fulltext_config
70
+ mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
71
+ fulltext_search_ensure_indexes(index_name, fulltext_config)
72
+ end
73
+ end
74
+
75
+ def fulltext_search_ensure_indexes(index_name, config)
76
+ db = collection.database
77
+ coll = db[index_name]
78
+
79
+ # The order of filters matters when the same index is used from two or more collections.
80
+ filter_indexes = (config[:filters] || []).map do |key, _value|
81
+ ["filter_values.#{key}", 1]
82
+ end.sort_by { |filter_index| filter_index[0] }
83
+
84
+ index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes)
85
+
86
+ # Since the definition of the index could have changed, we'll clean up by
87
+ # removing any indexes that aren't on the exact.
88
+ correct_keys = index_definition.map { |field_def| field_def[0] }
89
+ all_filter_keys = filter_indexes.map { |field_def| field_def[0] }
90
+ coll.indexes.each do |idef|
91
+ keys = idef['key'].keys
92
+ next unless keys.member?('ngram')
93
+ all_filter_keys |= keys.find_all { |key| key.starts_with?('filter_values.') }
94
+ next unless keys & correct_keys != correct_keys
95
+ Mongoid.logger.info "Dropping #{idef['name']} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger
96
+ if Mongoid::Compatibility::Version.mongoid5?
97
+ coll.indexes.drop_one(idef['key'])
98
+ else
99
+ coll.indexes.drop(idef['key'])
100
+ end
101
+ end
102
+
103
+ if all_filter_keys.length > filter_indexes.length
104
+ filter_indexes = all_filter_keys.map { |key| [key, 1] }.sort_by { |filter_index| filter_index[0] }
105
+ index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes)
106
+ end
107
+
108
+ Mongoid.logger.info "Ensuring fts_index on #{coll.name}: #{index_definition}" if Mongoid.logger
109
+ if Mongoid::Compatibility::Version.mongoid5?
110
+ coll.indexes.create_one(Hash[index_definition], name: 'fts_index')
111
+ else
112
+ coll.indexes.create(Hash[index_definition], name: 'fts_index')
113
+ end
114
+
115
+ Mongoid.logger.info "Ensuring document_id index on #{coll.name}" if Mongoid.logger
116
+ if Mongoid::Compatibility::Version.mongoid5?
117
+ coll.indexes.create_one('document_id' => 1) # to make removes fast
118
+ else
119
+ coll.indexes.create('document_id' => 1) # to make removes fast
120
+ end
121
+ end
122
+
123
+ def fulltext_search(query_string, options = {})
124
+ max_results = options.key?(:max_results) ? options.delete(:max_results) : 10
125
+ return_scores = options.key?(:return_scores) ? options.delete(:return_scores) : false
126
+ if mongoid_fulltext_config.count > 1 && !options.key?(:index)
127
+ error_message = '%s is indexed by multiple full-text indexes. You must specify one by passing an :index_name parameter'
128
+ fail UnspecifiedIndexError, error_message % name, caller
129
+ end
130
+ index_name = options.key?(:index) ? options.delete(:index) : mongoid_fulltext_config.keys.first
131
+
132
+ # Options hash should only contain filters after this point
133
+
134
+ ngrams = all_ngrams(query_string, mongoid_fulltext_config[index_name])
135
+ return [] if ngrams.empty?
136
+
137
+ # For each ngram, construct the query we'll use to pull index documents and
138
+ # get a count of the number of index documents containing that n-gram
139
+ ordering = { 'score' => -1 }
140
+ limit = mongoid_fulltext_config[index_name][:max_candidate_set_size]
141
+ coll = collection.database[index_name]
142
+ cursors = ngrams.map do |ngram|
143
+ query = { 'ngram' => ngram[0] }
144
+ query.update(map_query_filters options)
145
+ count = coll.find(query).count
146
+ { ngram: ngram, count: count, query: query }
147
+ end.sort! { |record1, record2| record1[:count] <=> record2[:count] }
148
+
149
+ # Using the queries we just constructed and the n-gram frequency counts we
150
+ # just computed, pull in about *:max_candidate_set_size* candidates by
151
+ # considering the n-grams in order of increasing frequency. When we've
152
+ # spent all *:max_candidate_set_size* candidates, pull the top-scoring
153
+ # *max_results* candidates for each remaining n-gram.
154
+ results_so_far = 0
155
+ candidates_list = cursors.map do |doc|
156
+ next if doc[:count] == 0
157
+ query_result = coll.find(doc[:query])
158
+ if results_so_far >= limit
159
+ query_result = query_result.sort(ordering).limit(max_results)
160
+ elsif doc[:count] > limit - results_so_far
161
+ query_result = query_result.sort(ordering).limit(limit - results_so_far)
162
+ end
163
+ results_so_far += doc[:count]
164
+ ngram_score = ngrams[doc[:ngram][0]]
165
+ Hash[query_result.map do |candidate|
166
+ [candidate['document_id'],
167
+ { clazz: candidate['class'], score: candidate['score'] * ngram_score }]
168
+ end]
169
+ end.compact
170
+
171
+ # Finally, score all candidates by matching them up with other candidates that are
172
+ # associated with the same document. This is similar to how you might process a
173
+ # boolean AND query, except that with an AND query, you'd stop after considering
174
+ # the first candidate list and matching its candidates up with candidates from other
175
+ # lists, whereas here we want the search to be a little fuzzier so we'll run through
176
+ # all candidate lists, removing candidates as we match them up.
177
+ all_scores = []
178
+ until candidates_list.empty?
179
+ candidates = candidates_list.pop
180
+ scores = candidates.map do |candidate_id, data|
181
+ { id: candidate_id,
182
+ clazz: data[:clazz],
183
+ score: data[:score] + candidates_list.map { |others| (others.delete(candidate_id) || { score: 0 })[:score] }.sum
184
+ }
185
+ end
186
+ all_scores.concat(scores)
187
+ end
188
+ all_scores.sort! { |document1, document2| -document1[:score] <=> -document2[:score] }
189
+ instantiate_mapreduce_results(all_scores[0..max_results - 1], return_scores: return_scores)
190
+ end
191
+
192
+ def instantiate_mapreduce_result(result)
193
+ result[:clazz].constantize.find(result[:id])
194
+ end
195
+
196
+ def instantiate_mapreduce_results(results, options)
197
+ if options[:return_scores]
198
+ results.map { |result| [instantiate_mapreduce_result(result), result[:score]] }.find_all { |result| !result[0].nil? }
199
+ else
200
+ results.map { |result| instantiate_mapreduce_result(result) }.compact
201
+ end
202
+ end
203
+
204
+ def all_ngrams(str, config, bound_number_returned = true)
205
+ return {} if str.nil?
206
+
207
+ if config[:remove_accents]
208
+ if defined?(UnicodeUtils)
209
+ str = UnicodeUtils.nfkd(str)
210
+ elsif defined?(DiacriticsFu)
211
+ str = DiacriticsFu.escape(str)
212
+ end
213
+ end
214
+
215
+ # Remove any characters that aren't in the alphabet and aren't word separators
216
+ filtered_str = str.mb_chars.downcase.to_s.split('').find_all { |ch| config[:alphabet][ch] || config[:word_separators][ch] }.join('')
217
+
218
+ # Figure out how many ngrams to extract from the string. If we can't afford to extract all ngrams,
219
+ # step over the string in evenly spaced strides to extract ngrams. For example, to extract 3 3-letter
220
+ # ngrams from 'abcdefghijk', we'd want to extract 'abc', 'efg', and 'ijk'.
221
+ if bound_number_returned
222
+ step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max
223
+ else
224
+ step_size = 1
225
+ end
226
+
227
+ # Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the
228
+ # input string using the step size that we just computed. Let score(x,y) be the score of string x
229
+ # compared with string y - assigning scores to ngrams with the square root-based scoring function
230
+ # below and multiplying scores of matching ngrams together yields a score function that has the
231
+ # property that score(x,y) > score(x,z) for any string z containing y and score(x,y) > score(x,z)
232
+ # for any string z contained in y.
233
+ ngram_array = (0..filtered_str.length - config[:ngram_width]).step(step_size).map do |i|
234
+ if i == 0 || (config[:apply_prefix_scoring_to_all_words] && \
235
+ config[:word_separators].key?(filtered_str[i - 1].chr))
236
+ score = Math.sqrt(1 + 1.0 / filtered_str.length)
237
+ else
238
+ score = Math.sqrt(2.0 / filtered_str.length)
239
+ end
240
+ { ngram: filtered_str[i..i + config[:ngram_width] - 1], score: score }
241
+ end
242
+
243
+ # If an ngram appears multiple times in the query string, keep the max score
244
+ ngram_array = ngram_array.group_by { |h| h[:ngram] }.map { |key, values| { ngram: key, score: values.map { |v| v[:score] }.max } }
245
+
246
+ if config[:index_short_prefixes] || config[:index_full_words]
247
+ split_regex_def = config[:word_separators].keys.map { |k| Regexp.escape(k) }.join
248
+ split_regex = Regexp.compile("[#{split_regex_def}]")
249
+ all_words = filtered_str.split(split_regex)
250
+ end
251
+
252
+ # Add 'short prefix' records to the array: prefixes of the string that are length (ngram_width - 1)
253
+ if config[:index_short_prefixes]
254
+ prefixes_seen = {}
255
+ all_words.each do |word|
256
+ next if word.length < config[:ngram_width] - 1
257
+ prefix = word[0...config[:ngram_width] - 1]
258
+ if prefixes_seen[prefix].nil? && (config[:stop_words][word].nil? || word == filtered_str)
259
+ ngram_array << { ngram: prefix, score: 1 + 1.0 / filtered_str.length }
260
+ prefixes_seen[prefix] = true
261
+ end
262
+ end
263
+ end
264
+
265
+ # Add records to the array of ngrams for each full word in the string that isn't a stop word
266
+ if config[:index_full_words]
267
+ full_words_seen = {}
268
+ all_words.each do |word|
269
+ if word.length > 1 && full_words_seen[word].nil? && (config[:stop_words][word].nil? || word == filtered_str)
270
+ ngram_array << { ngram: word, score: 1 + 1.0 / filtered_str.length }
271
+ full_words_seen[word] = true
272
+ end
273
+ end
274
+ end
275
+
276
+ # If an ngram appears as any combination of full word, short prefix, and ngram, keep the sum of the two scores
277
+ Hash[ngram_array.group_by { |h| h[:ngram] }.map { |key, values| [key, values.map { |v| v[:score] }.sum] }]
278
+ end
279
+
280
+ def remove_from_ngram_index
281
+ mongoid_fulltext_config.each_pair do |index_name, _fulltext_config|
282
+ coll = collection.database[index_name]
283
+ if Mongoid::Compatibility::Version.mongoid5?
284
+ coll.find('class' => name).delete_many
285
+ else
286
+ coll.find('class' => name).remove_all
287
+ end
288
+ end
289
+ end
290
+
291
+ def update_ngram_index
292
+ all.each(&:update_ngram_index)
293
+ end
294
+
295
+ private
296
+
297
+ # Take a list of filters to be mapped so they can update the query
298
+ # used upon the fulltext search of the ngrams
299
+ def map_query_filters(filters)
300
+ Hash[filters.map do|key, value|
301
+ case value
302
+ when Hash then
303
+ if value.key? :any then format_query_filter('$in', key, value[:any])
304
+ elsif value.key? :all then format_query_filter('$all', key, value[:all])
305
+ else fail UnknownFilterQueryOperator, value.keys.join(','), caller end
306
+ else format_query_filter('$all', key, value)
307
+ end
308
+ end]
309
+ end
310
+
311
+ def format_query_filter(operator, key, value)
312
+ ['filter_values.%s' % key, { operator => [value].flatten }]
313
+ end
314
+ end
315
+
316
+ def update_ngram_index
317
+ mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
318
+ if condition = fulltext_config[:update_if]
319
+ case condition
320
+ when Symbol then next unless send condition
321
+ when String then next unless instance_eval condition
322
+ when Proc then next unless condition.call self
323
+ else; next
324
+ end
325
+ end
326
+
327
+ # remove existing ngrams from external index
328
+ coll = collection.database[index_name.to_sym]
329
+ if Mongoid::Compatibility::Version.mongoid5?
330
+ coll.find('document_id' => _id).delete_many
331
+ else
332
+ coll.find('document_id' => _id).remove_all
333
+ end
334
+ # extract ngrams from fields
335
+ field_values = fulltext_config[:ngram_fields].map { |field| send(field) }
336
+ ngrams = field_values.inject({}) { |accum, item| accum.update(self.class.all_ngrams(item, fulltext_config, false)) }
337
+ return if ngrams.empty?
338
+ # apply filters, if necessary
339
+ filter_values = nil
340
+ if fulltext_config.key?(:filters)
341
+ filter_values = Hash[fulltext_config[:filters].map do |key, value|
342
+ begin
343
+ [key, value.call(self)]
344
+ rescue
345
+ # Suppress any exceptions caused by filters
346
+ end
347
+ end.compact]
348
+ end
349
+ # insert new ngrams in external index
350
+ ngrams.each_pair do |ngram, score|
351
+ index_document = { 'ngram' => ngram, 'document_id' => _id, 'score' => score, 'class' => self.class.name }
352
+ index_document['filter_values'] = filter_values if fulltext_config.key?(:filters)
353
+ if Mongoid::Compatibility::Version.mongoid5?
354
+ coll.insert_one(index_document)
355
+ else
356
+ coll.insert(index_document)
357
+ end
358
+ end
359
+ end
360
+ end
361
+
362
+ def remove_from_ngram_index
363
+ mongoid_fulltext_config.each_pair do |index_name, _fulltext_config|
364
+ coll = collection.database[index_name]
365
+ if Mongoid::Compatibility::Version.mongoid5?
366
+ coll.find('document_id' => _id).delete_many
367
+ else
368
+ coll.find('document_id' => _id).remove_all
369
+ end
370
+ end
371
+ end
372
+ end