mongoid_fulltext 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +47 -0
- data/.rspec +1 -1
- data/.rubocop.yml +6 -0
- data/.rubocop_todo.yml +101 -0
- data/.travis.yml +11 -3
- data/CHANGELOG.md +9 -2
- data/Gemfile +19 -9
- data/LICENSE +1 -1
- data/README.md +12 -9
- data/Rakefile +9 -29
- data/lib/mongoid/full_text_search/version.rb +5 -0
- data/lib/mongoid/full_text_search.rb +372 -0
- data/lib/mongoid/indexable.rb +13 -0
- data/lib/mongoid/indexes.rb +13 -0
- data/lib/mongoid_fulltext.rb +1 -341
- data/mongoid_fulltext.gemspec +16 -82
- data/spec/models/accentless_artwork.rb +1 -1
- data/spec/models/advanced_artwork.rb +1 -1
- data/spec/models/basic_artwork.rb +0 -1
- data/spec/models/delayed_artwork.rb +1 -2
- data/spec/models/external_artist.rb +1 -2
- data/spec/models/external_artwork.rb +1 -2
- data/spec/models/external_artwork_no_fields_supplied.rb +2 -2
- data/spec/models/filtered_artist.rb +4 -4
- data/spec/models/filtered_artwork.rb +7 -7
- data/spec/models/filtered_other.rb +3 -3
- data/spec/models/hidden_dragon.rb +0 -1
- data/spec/models/multi_external_artwork.rb +3 -3
- data/spec/models/multi_field_artist.rb +1 -1
- data/spec/models/multi_field_artwork.rb +1 -1
- data/spec/models/partitioned_artist.rb +8 -9
- data/spec/models/russian_artwork.rb +2 -2
- data/spec/models/short_prefixes_artwork.rb +3 -4
- data/spec/models/stopwords_artwork.rb +3 -4
- data/spec/mongoid/full_text_search_spec.rb +752 -0
- data/spec/spec_helper.rb +11 -7
- metadata +27 -68
- data/VERSION +0 -1
- data/lib/mongoid_indexes.rb +0 -12
- data/spec/config/mongoid.yml +0 -6
- data/spec/mongoid/fulltext_spec.rb +0 -799
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
MGNlMDBlNzg5YmVkODU3ZTIyYTFiNGI5N2M2ZTRkYTdmODkwNTA5OQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
OWFjYzllZThlODIyOGQzMzJkN2MwYjc4Y2U3Y2I4ODBlMDUwZDA4Yw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ODViNTM3ZWYyMDdiYjk4NjBkNzBlZGEzNzM2YTIxNWYwZmI3ZjBmMzQ0ZTAz
|
10
|
+
MzUwODY1MWNiNGFmNWIwYWVkMTRkMTc1YjcxM2RjYzMwZjJiOGVlOTEyZjcy
|
11
|
+
OGZiYmQ0YmVkMGJhMWIwZjg5YjFkNDc1M2ZlM2NiZmU3MThkMWE=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MWMyNmYzYmI4MzUxYjYwOWZmM2RmMDgxYzUxOTg2Zjg2NDFhMTRhOGFlYjU2
|
14
|
+
NmEyNmU0ZWRhZjRiZmFiNGZhNDY3NTc1YThlODRjNjNkZmI0YThhN2RmZDdi
|
15
|
+
OTNmNGE5MzM4ZjQ1MTk5YTM0OGNhZjcxZjk0YTdkZmEwNDAxNDE=
|
data/.gitignore
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# rcov generated
|
2
|
+
coverage
|
3
|
+
|
4
|
+
# rdoc generated
|
5
|
+
rdoc
|
6
|
+
|
7
|
+
# yard generated
|
8
|
+
doc
|
9
|
+
.yardoc
|
10
|
+
|
11
|
+
# bundler
|
12
|
+
.bundle
|
13
|
+
|
14
|
+
# jeweler generated
|
15
|
+
pkg
|
16
|
+
|
17
|
+
# RVM
|
18
|
+
.rvmrc
|
19
|
+
|
20
|
+
# Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
|
21
|
+
#
|
22
|
+
# * Create a file at ~/.gitignore
|
23
|
+
# * Include files you want ignored
|
24
|
+
# * Run: git config --global core.excludesfile ~/.gitignore
|
25
|
+
#
|
26
|
+
# After doing this, these files will be ignored in all your git projects,
|
27
|
+
# saving you from having to 'pollute' every project you touch with them
|
28
|
+
#
|
29
|
+
# Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
|
30
|
+
#
|
31
|
+
# For MacOS:
|
32
|
+
|
33
|
+
.DS_Store
|
34
|
+
|
35
|
+
# For TextMate
|
36
|
+
*.tmproj
|
37
|
+
tmtags
|
38
|
+
|
39
|
+
# For emacs:
|
40
|
+
*~
|
41
|
+
\#*
|
42
|
+
.\#*
|
43
|
+
|
44
|
+
# For vim:
|
45
|
+
*.swp
|
46
|
+
|
47
|
+
Gemfile.lock
|
data/.rspec
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
--color
|
2
|
-
|
2
|
+
--format documentation
|
data/.rubocop.yml
ADDED
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2015-09-18 15:56:53 -0400 using RuboCop version 0.34.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 1
|
10
|
+
# Configuration parameters: AllowSafeAssignment.
|
11
|
+
Lint/AssignmentInCondition:
|
12
|
+
Exclude:
|
13
|
+
- 'lib/mongoid/full_text_search.rb'
|
14
|
+
|
15
|
+
# Offense count: 1
|
16
|
+
# Cop supports --auto-correct.
|
17
|
+
# Configuration parameters: AlignWith, SupportedStyles, AutoCorrect.
|
18
|
+
Lint/EndAlignment:
|
19
|
+
Enabled: false
|
20
|
+
|
21
|
+
# Offense count: 2
|
22
|
+
Lint/HandleExceptions:
|
23
|
+
Exclude:
|
24
|
+
- 'lib/mongoid/full_text_search.rb'
|
25
|
+
- 'spec/mongoid/full_text_search_spec.rb'
|
26
|
+
|
27
|
+
# Offense count: 1
|
28
|
+
Lint/NonLocalExitFromIterator:
|
29
|
+
Exclude:
|
30
|
+
- 'lib/mongoid/full_text_search.rb'
|
31
|
+
|
32
|
+
# Offense count: 4
|
33
|
+
Lint/UselessAssignment:
|
34
|
+
Exclude:
|
35
|
+
- 'spec/mongoid/full_text_search_spec.rb'
|
36
|
+
|
37
|
+
# Offense count: 5
|
38
|
+
Metrics/AbcSize:
|
39
|
+
Max: 106
|
40
|
+
|
41
|
+
# Offense count: 4
|
42
|
+
Metrics/CyclomaticComplexity:
|
43
|
+
Max: 22
|
44
|
+
|
45
|
+
# Offense count: 262
|
46
|
+
# Configuration parameters: AllowURI, URISchemes.
|
47
|
+
Metrics/LineLength:
|
48
|
+
Max: 174
|
49
|
+
|
50
|
+
# Offense count: 5
|
51
|
+
# Configuration parameters: CountComments.
|
52
|
+
Metrics/MethodLength:
|
53
|
+
Max: 50
|
54
|
+
|
55
|
+
# Offense count: 1
|
56
|
+
# Configuration parameters: CountComments.
|
57
|
+
Metrics/ModuleLength:
|
58
|
+
Max: 224
|
59
|
+
|
60
|
+
# Offense count: 4
|
61
|
+
Metrics/PerceivedComplexity:
|
62
|
+
Max: 25
|
63
|
+
|
64
|
+
# Offense count: 1
|
65
|
+
Style/AsciiComments:
|
66
|
+
Exclude:
|
67
|
+
- 'spec/mongoid/full_text_search_spec.rb'
|
68
|
+
|
69
|
+
# Offense count: 1
|
70
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
71
|
+
Style/ClassAndModuleChildren:
|
72
|
+
Exclude:
|
73
|
+
- 'lib/mongoid/full_text_search.rb'
|
74
|
+
|
75
|
+
# Offense count: 1
|
76
|
+
Style/ConstantName:
|
77
|
+
Exclude:
|
78
|
+
- 'spec/models/russian_artwork.rb'
|
79
|
+
|
80
|
+
# Offense count: 22
|
81
|
+
Style/Documentation:
|
82
|
+
Enabled: false
|
83
|
+
|
84
|
+
# Offense count: 3
|
85
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
86
|
+
Style/FormatString:
|
87
|
+
Exclude:
|
88
|
+
- 'lib/mongoid/full_text_search.rb'
|
89
|
+
- 'spec/models/external_artwork_no_fields_supplied.rb'
|
90
|
+
|
91
|
+
# Offense count: 2
|
92
|
+
Style/MultilineBlockChain:
|
93
|
+
Exclude:
|
94
|
+
- 'lib/mongoid/full_text_search.rb'
|
95
|
+
|
96
|
+
# Offense count: 4
|
97
|
+
# Configuration parameters: Methods.
|
98
|
+
Style/SingleLineBlockParams:
|
99
|
+
Exclude:
|
100
|
+
- 'lib/mongoid/full_text_search.rb'
|
101
|
+
- 'spec/mongoid/full_text_search_spec.rb'
|
data/.travis.yml
CHANGED
@@ -1,7 +1,15 @@
|
|
1
1
|
rvm:
|
2
|
+
- 2.2
|
3
|
+
- 2.1
|
4
|
+
- 2.0
|
2
5
|
- 1.9.3
|
3
|
-
-
|
6
|
+
- rbx-2.2.10
|
7
|
+
- jruby-19mode
|
8
|
+
|
4
9
|
env:
|
5
|
-
-
|
6
|
-
-
|
10
|
+
- MONGOID_VERSION=3.0.0
|
11
|
+
- MONGOID_VERSION=3.1.0
|
12
|
+
- MONGOID_VERSION=4.0
|
13
|
+
- MONGOID_VERSION=5.0
|
14
|
+
|
7
15
|
services: mongodb
|
data/CHANGELOG.md
CHANGED
@@ -1,10 +1,17 @@
|
|
1
|
+
0.7.0 (9/18/2015)
|
2
|
+
-----------------
|
3
|
+
|
4
|
+
* Compatible with Mongoid 4 and 5 - [@dblock](https://github.com/dblock).
|
5
|
+
* Rewritten .gemspec, removed Jeweler - [@dblock](https://github.com/dblock).
|
6
|
+
* Added RuboCop - [@dblock](https://github.com/dblock).
|
7
|
+
|
1
8
|
0.6.1 (4/3/2013)
|
2
|
-
|
9
|
+
----------------
|
3
10
|
|
4
11
|
* [#6](https://github.com/artsy/mongoid_fulltext/pull/6): Upgrade to Mongoid ~> 3.0 - [@simi](https://github.com/simi).
|
5
12
|
|
6
13
|
0.6.0 (7/16/2012)
|
7
|
-
|
14
|
+
-----------------
|
8
15
|
|
9
16
|
* [#2](https://github.com/artsy/mongoid_fulltext/pull/2): Upgrade to Mongoid 3.0 - [@volmer](https://github.com/volmer).
|
10
17
|
* [#1](https://github.com/artsy/mongoid_fulltext/pull/1): Fix: downcase destroys non-latin strings - [@netoneko](https://github.com/netoneko).
|
data/Gemfile
CHANGED
@@ -1,15 +1,25 @@
|
|
1
|
-
source
|
1
|
+
source 'http://rubygems.org'
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
case version = ENV['MONGOID_VERSION'] || '5'
|
4
|
+
when /5/
|
5
|
+
gem 'mongoid', '~> 5.0'
|
6
|
+
when /4/
|
7
|
+
gem 'mongoid', '~> 4.0'
|
8
|
+
when /3.1.0/
|
9
|
+
gem 'mongoid', '~> 3.1.0'
|
10
|
+
when /3.0.0/
|
11
|
+
gem 'mongoid', '~> 3.0.0'
|
5
12
|
else
|
6
|
-
gem
|
13
|
+
gem 'mongoid', version
|
7
14
|
end
|
8
15
|
|
9
|
-
|
16
|
+
gemspec
|
10
17
|
|
11
|
-
group :
|
12
|
-
gem
|
13
|
-
|
14
|
-
|
18
|
+
group :test do
|
19
|
+
gem 'rspec'
|
20
|
+
end
|
21
|
+
|
22
|
+
group :development do
|
23
|
+
gem 'rake'
|
24
|
+
gem 'rubocop', '0.34.1'
|
15
25
|
end
|
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -1,19 +1,22 @@
|
|
1
|
-
Mongoid Fulltext Search
|
1
|
+
Mongoid Fulltext Search
|
2
2
|
=======================
|
3
3
|
|
4
|
+
[](http://travis-ci.org/artsy/mongoid_fulltext)
|
5
|
+
[](http://badge.fury.io/rb/mongoid_fulltext)
|
6
|
+
|
4
7
|
Full-text search using n-gram matching for the Mongoid ODM. Tested on MongoDB 1.6 and above, but
|
5
8
|
probably works on earlier versions as well.
|
6
9
|
|
7
|
-
MongoDB
|
8
|
-
where you want something a little less than a full-blown indexing service
|
10
|
+
MongoDB introduced full-text search capabilities in v2.4, so this gem is a good fit for cases
|
11
|
+
where you want something a little less than a full-blown indexing service. The mongoid_fulltext gem
|
9
12
|
lets you do a fuzzy string search across relatively short strings, which makes it good for populating
|
10
13
|
autocomplete boxes based on the display names of your Rails models but not appropriate for, say,
|
11
14
|
indexing hundreds of thousands of HTML documents.
|
12
15
|
|
13
16
|
Install
|
14
|
-
|
17
|
+
-------
|
15
18
|
|
16
|
-
Version 0.6.
|
19
|
+
Version 0.6.1 or newer of this gem requires Ruby 1.9.3 or newer and works with Mongoid 3, 4 and 5.
|
17
20
|
Use version 0.5.x for Mongoid 2.4.x and Ruby 1.8.7, 1.9.2 or 1.9.3.
|
18
21
|
|
19
22
|
For Ruby 1.8.7 and/or Mongoid 2.x use [mongoid_fulltext 0.5.x](https://github.com/artsy/mongoid_fulltext/tree/0.5-stable).
|
@@ -22,8 +25,8 @@ For Ruby 1.8.7 and/or Mongoid 2.x use [mongoid_fulltext 0.5.x](https://github.co
|
|
22
25
|
gem 'mongoid_fulltext'
|
23
26
|
```
|
24
27
|
|
25
|
-
|
26
|
-
|
28
|
+
Examples
|
29
|
+
--------
|
27
30
|
|
28
31
|
Suppose you have an `Artist` model and want to index each artist's name:
|
29
32
|
|
@@ -395,7 +398,7 @@ Fork the project. Make your feature addition or bug fix with tests. Send a pull
|
|
395
398
|
Copyright and License
|
396
399
|
---------------------
|
397
400
|
|
398
|
-
MIT License, see [LICENSE](
|
401
|
+
MIT License, see [LICENSE](LICENSE) for details.
|
399
402
|
|
400
|
-
(c) 2011-
|
403
|
+
(c) 2011-2015 [Artsy Inc.](http://artsy.github.io)
|
401
404
|
|
data/Rakefile
CHANGED
@@ -1,36 +1,16 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
require 'bundler'
|
3
|
-
begin
|
4
|
-
Bundler.setup(:default, :development)
|
5
|
-
rescue Bundler::BundlerError => e
|
6
|
-
$stderr.puts e.message
|
7
|
-
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
-
exit e.status_code
|
9
|
-
end
|
10
|
-
require 'rake'
|
11
|
-
require 'rspec/core/rake_task'
|
2
|
+
require 'bundler/gem_tasks'
|
12
3
|
|
13
|
-
|
14
|
-
Jeweler::Tasks.new do |gem|
|
15
|
-
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
16
|
-
gem.name = "mongoid_fulltext"
|
17
|
-
gem.homepage = "http://github.com/aaw/mongoid_fulltext"
|
18
|
-
gem.license = "MIT"
|
19
|
-
gem.summary = %Q{Full-text search for the Mongoid ORM}
|
20
|
-
gem.description = %Q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
|
21
|
-
gem.email = "aaron.windsor@gmail.com"
|
22
|
-
gem.authors = ["Aaron Windsor"]
|
23
|
-
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
24
|
-
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
25
|
-
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
26
|
-
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
27
|
-
end
|
28
|
-
Jeweler::RubygemsDotOrgTasks.new
|
4
|
+
Bundler.setup :default, :development
|
29
5
|
|
6
|
+
require 'rspec/core'
|
7
|
+
require 'rspec/core/rake_task'
|
30
8
|
|
31
|
-
desc "Run all tests"
|
32
9
|
RSpec::Core::RakeTask.new(:spec) do |spec|
|
33
|
-
spec.pattern =
|
10
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
34
11
|
end
|
35
12
|
|
36
|
-
|
13
|
+
require 'rubocop/rake_task'
|
14
|
+
RuboCop::RakeTask.new(:rubocop)
|
15
|
+
|
16
|
+
task default: [:rubocop, :spec]
|
@@ -0,0 +1,372 @@
|
|
1
|
+
require 'mongoid'
|
2
|
+
require 'mongoid/compatibility'
|
3
|
+
if Mongoid::Compatibility::Version.mongoid3?
|
4
|
+
require_relative 'indexes'
|
5
|
+
else
|
6
|
+
require_relative 'indexable'
|
7
|
+
end
|
8
|
+
require 'unicode_utils'
|
9
|
+
require 'cgi'
|
10
|
+
|
11
|
+
module Mongoid::FullTextSearch
|
12
|
+
extend ActiveSupport::Concern
|
13
|
+
|
14
|
+
included do
|
15
|
+
cattr_accessor :mongoid_fulltext_config
|
16
|
+
end
|
17
|
+
|
18
|
+
class UnspecifiedIndexError < StandardError; end
|
19
|
+
class UnknownFilterQueryOperator < StandardError; end
|
20
|
+
|
21
|
+
module ClassMethods
|
22
|
+
def fulltext_search_in(*args)
|
23
|
+
self.mongoid_fulltext_config = {} if mongoid_fulltext_config.nil?
|
24
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
25
|
+
if options.key?(:index_name)
|
26
|
+
index_name = options[:index_name]
|
27
|
+
else
|
28
|
+
index_name = 'mongoid_fulltext.index_%s_%s' % [name.downcase, mongoid_fulltext_config.count]
|
29
|
+
end
|
30
|
+
|
31
|
+
config = {
|
32
|
+
alphabet: 'abcdefghijklmnopqrstuvwxyz0123456789 ',
|
33
|
+
word_separators: "-_ \n\t",
|
34
|
+
ngram_width: 3,
|
35
|
+
max_ngrams_to_search: 6,
|
36
|
+
apply_prefix_scoring_to_all_words: true,
|
37
|
+
index_full_words: true,
|
38
|
+
index_short_prefixes: false,
|
39
|
+
max_candidate_set_size: 1000,
|
40
|
+
remove_accents: true,
|
41
|
+
reindex_immediately: true,
|
42
|
+
stop_words: Hash[%w(i a s t me my we he it am is be do an if
|
43
|
+
or as of at by to up in on no so our you him
|
44
|
+
his she her its who are was has had did the and
|
45
|
+
but for out off why how all any few nor not own
|
46
|
+
too can don now ours your hers they them what whom
|
47
|
+
this that were been have does with into from down over
|
48
|
+
then once here when both each more most some such only
|
49
|
+
same than very will just yours their which these those
|
50
|
+
being doing until while about after above below under
|
51
|
+
again there where other myself itself theirs having during
|
52
|
+
before should himself herself because against between through
|
53
|
+
further yourself ourselves yourselves themselves).map { |x| [x, true] }]
|
54
|
+
}
|
55
|
+
|
56
|
+
config.update(options)
|
57
|
+
|
58
|
+
args = [:to_s] if args.empty?
|
59
|
+
config[:ngram_fields] = args
|
60
|
+
config[:alphabet] = Hash[config[:alphabet].split('').map { |ch| [ch, ch] }]
|
61
|
+
config[:word_separators] = Hash[config[:word_separators].split('').map { |ch| [ch, ch] }]
|
62
|
+
mongoid_fulltext_config[index_name] = config
|
63
|
+
|
64
|
+
before_save(:update_ngram_index) if config[:reindex_immediately]
|
65
|
+
before_destroy :remove_from_ngram_index
|
66
|
+
end
|
67
|
+
|
68
|
+
def create_fulltext_indexes
|
69
|
+
return unless mongoid_fulltext_config
|
70
|
+
mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
|
71
|
+
fulltext_search_ensure_indexes(index_name, fulltext_config)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def fulltext_search_ensure_indexes(index_name, config)
|
76
|
+
db = collection.database
|
77
|
+
coll = db[index_name]
|
78
|
+
|
79
|
+
# The order of filters matters when the same index is used from two or more collections.
|
80
|
+
filter_indexes = (config[:filters] || []).map do |key, _value|
|
81
|
+
["filter_values.#{key}", 1]
|
82
|
+
end.sort_by { |filter_index| filter_index[0] }
|
83
|
+
|
84
|
+
index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes)
|
85
|
+
|
86
|
+
# Since the definition of the index could have changed, we'll clean up by
|
87
|
+
# removing any indexes that aren't on the exact.
|
88
|
+
correct_keys = index_definition.map { |field_def| field_def[0] }
|
89
|
+
all_filter_keys = filter_indexes.map { |field_def| field_def[0] }
|
90
|
+
coll.indexes.each do |idef|
|
91
|
+
keys = idef['key'].keys
|
92
|
+
next unless keys.member?('ngram')
|
93
|
+
all_filter_keys |= keys.find_all { |key| key.starts_with?('filter_values.') }
|
94
|
+
next unless keys & correct_keys != correct_keys
|
95
|
+
Mongoid.logger.info "Dropping #{idef['name']} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger
|
96
|
+
if Mongoid::Compatibility::Version.mongoid5?
|
97
|
+
coll.indexes.drop_one(idef['key'])
|
98
|
+
else
|
99
|
+
coll.indexes.drop(idef['key'])
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
if all_filter_keys.length > filter_indexes.length
|
104
|
+
filter_indexes = all_filter_keys.map { |key| [key, 1] }.sort_by { |filter_index| filter_index[0] }
|
105
|
+
index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes)
|
106
|
+
end
|
107
|
+
|
108
|
+
Mongoid.logger.info "Ensuring fts_index on #{coll.name}: #{index_definition}" if Mongoid.logger
|
109
|
+
if Mongoid::Compatibility::Version.mongoid5?
|
110
|
+
coll.indexes.create_one(Hash[index_definition], name: 'fts_index')
|
111
|
+
else
|
112
|
+
coll.indexes.create(Hash[index_definition], name: 'fts_index')
|
113
|
+
end
|
114
|
+
|
115
|
+
Mongoid.logger.info "Ensuring document_id index on #{coll.name}" if Mongoid.logger
|
116
|
+
if Mongoid::Compatibility::Version.mongoid5?
|
117
|
+
coll.indexes.create_one('document_id' => 1) # to make removes fast
|
118
|
+
else
|
119
|
+
coll.indexes.create('document_id' => 1) # to make removes fast
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def fulltext_search(query_string, options = {})
|
124
|
+
max_results = options.key?(:max_results) ? options.delete(:max_results) : 10
|
125
|
+
return_scores = options.key?(:return_scores) ? options.delete(:return_scores) : false
|
126
|
+
if mongoid_fulltext_config.count > 1 && !options.key?(:index)
|
127
|
+
error_message = '%s is indexed by multiple full-text indexes. You must specify one by passing an :index_name parameter'
|
128
|
+
fail UnspecifiedIndexError, error_message % name, caller
|
129
|
+
end
|
130
|
+
index_name = options.key?(:index) ? options.delete(:index) : mongoid_fulltext_config.keys.first
|
131
|
+
|
132
|
+
# Options hash should only contain filters after this point
|
133
|
+
|
134
|
+
ngrams = all_ngrams(query_string, mongoid_fulltext_config[index_name])
|
135
|
+
return [] if ngrams.empty?
|
136
|
+
|
137
|
+
# For each ngram, construct the query we'll use to pull index documents and
|
138
|
+
# get a count of the number of index documents containing that n-gram
|
139
|
+
ordering = { 'score' => -1 }
|
140
|
+
limit = mongoid_fulltext_config[index_name][:max_candidate_set_size]
|
141
|
+
coll = collection.database[index_name]
|
142
|
+
cursors = ngrams.map do |ngram|
|
143
|
+
query = { 'ngram' => ngram[0] }
|
144
|
+
query.update(map_query_filters options)
|
145
|
+
count = coll.find(query).count
|
146
|
+
{ ngram: ngram, count: count, query: query }
|
147
|
+
end.sort! { |record1, record2| record1[:count] <=> record2[:count] }
|
148
|
+
|
149
|
+
# Using the queries we just constructed and the n-gram frequency counts we
|
150
|
+
# just computed, pull in about *:max_candidate_set_size* candidates by
|
151
|
+
# considering the n-grams in order of increasing frequency. When we've
|
152
|
+
# spent all *:max_candidate_set_size* candidates, pull the top-scoring
|
153
|
+
# *max_results* candidates for each remaining n-gram.
|
154
|
+
results_so_far = 0
|
155
|
+
candidates_list = cursors.map do |doc|
|
156
|
+
next if doc[:count] == 0
|
157
|
+
query_result = coll.find(doc[:query])
|
158
|
+
if results_so_far >= limit
|
159
|
+
query_result = query_result.sort(ordering).limit(max_results)
|
160
|
+
elsif doc[:count] > limit - results_so_far
|
161
|
+
query_result = query_result.sort(ordering).limit(limit - results_so_far)
|
162
|
+
end
|
163
|
+
results_so_far += doc[:count]
|
164
|
+
ngram_score = ngrams[doc[:ngram][0]]
|
165
|
+
Hash[query_result.map do |candidate|
|
166
|
+
[candidate['document_id'],
|
167
|
+
{ clazz: candidate['class'], score: candidate['score'] * ngram_score }]
|
168
|
+
end]
|
169
|
+
end.compact
|
170
|
+
|
171
|
+
# Finally, score all candidates by matching them up with other candidates that are
|
172
|
+
# associated with the same document. This is similar to how you might process a
|
173
|
+
# boolean AND query, except that with an AND query, you'd stop after considering
|
174
|
+
# the first candidate list and matching its candidates up with candidates from other
|
175
|
+
# lists, whereas here we want the search to be a little fuzzier so we'll run through
|
176
|
+
# all candidate lists, removing candidates as we match them up.
|
177
|
+
all_scores = []
|
178
|
+
until candidates_list.empty?
|
179
|
+
candidates = candidates_list.pop
|
180
|
+
scores = candidates.map do |candidate_id, data|
|
181
|
+
{ id: candidate_id,
|
182
|
+
clazz: data[:clazz],
|
183
|
+
score: data[:score] + candidates_list.map { |others| (others.delete(candidate_id) || { score: 0 })[:score] }.sum
|
184
|
+
}
|
185
|
+
end
|
186
|
+
all_scores.concat(scores)
|
187
|
+
end
|
188
|
+
all_scores.sort! { |document1, document2| -document1[:score] <=> -document2[:score] }
|
189
|
+
instantiate_mapreduce_results(all_scores[0..max_results - 1], return_scores: return_scores)
|
190
|
+
end
|
191
|
+
|
192
|
+
def instantiate_mapreduce_result(result)
|
193
|
+
result[:clazz].constantize.find(result[:id])
|
194
|
+
end
|
195
|
+
|
196
|
+
def instantiate_mapreduce_results(results, options)
|
197
|
+
if options[:return_scores]
|
198
|
+
results.map { |result| [instantiate_mapreduce_result(result), result[:score]] }.find_all { |result| !result[0].nil? }
|
199
|
+
else
|
200
|
+
results.map { |result| instantiate_mapreduce_result(result) }.compact
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def all_ngrams(str, config, bound_number_returned = true)
|
205
|
+
return {} if str.nil?
|
206
|
+
|
207
|
+
if config[:remove_accents]
|
208
|
+
if defined?(UnicodeUtils)
|
209
|
+
str = UnicodeUtils.nfkd(str)
|
210
|
+
elsif defined?(DiacriticsFu)
|
211
|
+
str = DiacriticsFu.escape(str)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
# Remove any characters that aren't in the alphabet and aren't word separators
|
216
|
+
filtered_str = str.mb_chars.downcase.to_s.split('').find_all { |ch| config[:alphabet][ch] || config[:word_separators][ch] }.join('')
|
217
|
+
|
218
|
+
# Figure out how many ngrams to extract from the string. If we can't afford to extract all ngrams,
|
219
|
+
# step over the string in evenly spaced strides to extract ngrams. For example, to extract 3 3-letter
|
220
|
+
# ngrams from 'abcdefghijk', we'd want to extract 'abc', 'efg', and 'ijk'.
|
221
|
+
if bound_number_returned
|
222
|
+
step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max
|
223
|
+
else
|
224
|
+
step_size = 1
|
225
|
+
end
|
226
|
+
|
227
|
+
# Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the
|
228
|
+
# input string using the step size that we just computed. Let score(x,y) be the score of string x
|
229
|
+
# compared with string y - assigning scores to ngrams with the square root-based scoring function
|
230
|
+
# below and multiplying scores of matching ngrams together yields a score function that has the
|
231
|
+
# property that score(x,y) > score(x,z) for any string z containing y and score(x,y) > score(x,z)
|
232
|
+
# for any string z contained in y.
|
233
|
+
ngram_array = (0..filtered_str.length - config[:ngram_width]).step(step_size).map do |i|
|
234
|
+
if i == 0 || (config[:apply_prefix_scoring_to_all_words] && \
|
235
|
+
config[:word_separators].key?(filtered_str[i - 1].chr))
|
236
|
+
score = Math.sqrt(1 + 1.0 / filtered_str.length)
|
237
|
+
else
|
238
|
+
score = Math.sqrt(2.0 / filtered_str.length)
|
239
|
+
end
|
240
|
+
{ ngram: filtered_str[i..i + config[:ngram_width] - 1], score: score }
|
241
|
+
end
|
242
|
+
|
243
|
+
# If an ngram appears multiple times in the query string, keep the max score
|
244
|
+
ngram_array = ngram_array.group_by { |h| h[:ngram] }.map { |key, values| { ngram: key, score: values.map { |v| v[:score] }.max } }
|
245
|
+
|
246
|
+
if config[:index_short_prefixes] || config[:index_full_words]
|
247
|
+
split_regex_def = config[:word_separators].keys.map { |k| Regexp.escape(k) }.join
|
248
|
+
split_regex = Regexp.compile("[#{split_regex_def}]")
|
249
|
+
all_words = filtered_str.split(split_regex)
|
250
|
+
end
|
251
|
+
|
252
|
+
# Add 'short prefix' records to the array: prefixes of the string that are length (ngram_width - 1)
|
253
|
+
if config[:index_short_prefixes]
|
254
|
+
prefixes_seen = {}
|
255
|
+
all_words.each do |word|
|
256
|
+
next if word.length < config[:ngram_width] - 1
|
257
|
+
prefix = word[0...config[:ngram_width] - 1]
|
258
|
+
if prefixes_seen[prefix].nil? && (config[:stop_words][word].nil? || word == filtered_str)
|
259
|
+
ngram_array << { ngram: prefix, score: 1 + 1.0 / filtered_str.length }
|
260
|
+
prefixes_seen[prefix] = true
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
# Add records to the array of ngrams for each full word in the string that isn't a stop word
|
266
|
+
if config[:index_full_words]
|
267
|
+
full_words_seen = {}
|
268
|
+
all_words.each do |word|
|
269
|
+
if word.length > 1 && full_words_seen[word].nil? && (config[:stop_words][word].nil? || word == filtered_str)
|
270
|
+
ngram_array << { ngram: word, score: 1 + 1.0 / filtered_str.length }
|
271
|
+
full_words_seen[word] = true
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
# If an ngram appears as any combination of full word, short prefix, and ngram, keep the sum of the two scores
|
277
|
+
Hash[ngram_array.group_by { |h| h[:ngram] }.map { |key, values| [key, values.map { |v| v[:score] }.sum] }]
|
278
|
+
end
|
279
|
+
|
280
|
+
def remove_from_ngram_index
|
281
|
+
mongoid_fulltext_config.each_pair do |index_name, _fulltext_config|
|
282
|
+
coll = collection.database[index_name]
|
283
|
+
if Mongoid::Compatibility::Version.mongoid5?
|
284
|
+
coll.find('class' => name).delete_many
|
285
|
+
else
|
286
|
+
coll.find('class' => name).remove_all
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
def update_ngram_index
|
292
|
+
all.each(&:update_ngram_index)
|
293
|
+
end
|
294
|
+
|
295
|
+
private
|
296
|
+
|
297
|
+
# Take a list of filters to be mapped so they can update the query
|
298
|
+
# used upon the fulltext search of the ngrams
|
299
|
+
def map_query_filters(filters)
|
300
|
+
Hash[filters.map do|key, value|
|
301
|
+
case value
|
302
|
+
when Hash then
|
303
|
+
if value.key? :any then format_query_filter('$in', key, value[:any])
|
304
|
+
elsif value.key? :all then format_query_filter('$all', key, value[:all])
|
305
|
+
else fail UnknownFilterQueryOperator, value.keys.join(','), caller end
|
306
|
+
else format_query_filter('$all', key, value)
|
307
|
+
end
|
308
|
+
end]
|
309
|
+
end
|
310
|
+
|
311
|
+
def format_query_filter(operator, key, value)
|
312
|
+
['filter_values.%s' % key, { operator => [value].flatten }]
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
def update_ngram_index
|
317
|
+
mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
|
318
|
+
if condition = fulltext_config[:update_if]
|
319
|
+
case condition
|
320
|
+
when Symbol then next unless send condition
|
321
|
+
when String then next unless instance_eval condition
|
322
|
+
when Proc then next unless condition.call self
|
323
|
+
else; next
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
# remove existing ngrams from external index
|
328
|
+
coll = collection.database[index_name.to_sym]
|
329
|
+
if Mongoid::Compatibility::Version.mongoid5?
|
330
|
+
coll.find('document_id' => _id).delete_many
|
331
|
+
else
|
332
|
+
coll.find('document_id' => _id).remove_all
|
333
|
+
end
|
334
|
+
# extract ngrams from fields
|
335
|
+
field_values = fulltext_config[:ngram_fields].map { |field| send(field) }
|
336
|
+
ngrams = field_values.inject({}) { |accum, item| accum.update(self.class.all_ngrams(item, fulltext_config, false)) }
|
337
|
+
return if ngrams.empty?
|
338
|
+
# apply filters, if necessary
|
339
|
+
filter_values = nil
|
340
|
+
if fulltext_config.key?(:filters)
|
341
|
+
filter_values = Hash[fulltext_config[:filters].map do |key, value|
|
342
|
+
begin
|
343
|
+
[key, value.call(self)]
|
344
|
+
rescue
|
345
|
+
# Suppress any exceptions caused by filters
|
346
|
+
end
|
347
|
+
end.compact]
|
348
|
+
end
|
349
|
+
# insert new ngrams in external index
|
350
|
+
ngrams.each_pair do |ngram, score|
|
351
|
+
index_document = { 'ngram' => ngram, 'document_id' => _id, 'score' => score, 'class' => self.class.name }
|
352
|
+
index_document['filter_values'] = filter_values if fulltext_config.key?(:filters)
|
353
|
+
if Mongoid::Compatibility::Version.mongoid5?
|
354
|
+
coll.insert_one(index_document)
|
355
|
+
else
|
356
|
+
coll.insert(index_document)
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
def remove_from_ngram_index
|
363
|
+
mongoid_fulltext_config.each_pair do |index_name, _fulltext_config|
|
364
|
+
coll = collection.database[index_name]
|
365
|
+
if Mongoid::Compatibility::Version.mongoid5?
|
366
|
+
coll.find('document_id' => _id).delete_many
|
367
|
+
else
|
368
|
+
coll.find('document_id' => _id).remove_all
|
369
|
+
end
|
370
|
+
end
|
371
|
+
end
|
372
|
+
end
|