mongoid_fulltext 0.6.1 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +47 -0
- data/.rspec +1 -1
- data/.rubocop.yml +6 -0
- data/.rubocop_todo.yml +101 -0
- data/.travis.yml +11 -3
- data/CHANGELOG.md +9 -2
- data/Gemfile +19 -9
- data/LICENSE +1 -1
- data/README.md +12 -9
- data/Rakefile +9 -29
- data/lib/mongoid/full_text_search/version.rb +5 -0
- data/lib/mongoid/full_text_search.rb +372 -0
- data/lib/mongoid/indexable.rb +13 -0
- data/lib/mongoid/indexes.rb +13 -0
- data/lib/mongoid_fulltext.rb +1 -341
- data/mongoid_fulltext.gemspec +16 -82
- data/spec/models/accentless_artwork.rb +1 -1
- data/spec/models/advanced_artwork.rb +1 -1
- data/spec/models/basic_artwork.rb +0 -1
- data/spec/models/delayed_artwork.rb +1 -2
- data/spec/models/external_artist.rb +1 -2
- data/spec/models/external_artwork.rb +1 -2
- data/spec/models/external_artwork_no_fields_supplied.rb +2 -2
- data/spec/models/filtered_artist.rb +4 -4
- data/spec/models/filtered_artwork.rb +7 -7
- data/spec/models/filtered_other.rb +3 -3
- data/spec/models/hidden_dragon.rb +0 -1
- data/spec/models/multi_external_artwork.rb +3 -3
- data/spec/models/multi_field_artist.rb +1 -1
- data/spec/models/multi_field_artwork.rb +1 -1
- data/spec/models/partitioned_artist.rb +8 -9
- data/spec/models/russian_artwork.rb +2 -2
- data/spec/models/short_prefixes_artwork.rb +3 -4
- data/spec/models/stopwords_artwork.rb +3 -4
- data/spec/mongoid/full_text_search_spec.rb +752 -0
- data/spec/spec_helper.rb +11 -7
- metadata +27 -68
- data/VERSION +0 -1
- data/lib/mongoid_indexes.rb +0 -12
- data/spec/config/mongoid.yml +0 -6
- data/spec/mongoid/fulltext_spec.rb +0 -799
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
MGNlMDBlNzg5YmVkODU3ZTIyYTFiNGI5N2M2ZTRkYTdmODkwNTA5OQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
OWFjYzllZThlODIyOGQzMzJkN2MwYjc4Y2U3Y2I4ODBlMDUwZDA4Yw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ODViNTM3ZWYyMDdiYjk4NjBkNzBlZGEzNzM2YTIxNWYwZmI3ZjBmMzQ0ZTAz
|
10
|
+
MzUwODY1MWNiNGFmNWIwYWVkMTRkMTc1YjcxM2RjYzMwZjJiOGVlOTEyZjcy
|
11
|
+
OGZiYmQ0YmVkMGJhMWIwZjg5YjFkNDc1M2ZlM2NiZmU3MThkMWE=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MWMyNmYzYmI4MzUxYjYwOWZmM2RmMDgxYzUxOTg2Zjg2NDFhMTRhOGFlYjU2
|
14
|
+
NmEyNmU0ZWRhZjRiZmFiNGZhNDY3NTc1YThlODRjNjNkZmI0YThhN2RmZDdi
|
15
|
+
OTNmNGE5MzM4ZjQ1MTk5YTM0OGNhZjcxZjk0YTdkZmEwNDAxNDE=
|
data/.gitignore
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# rcov generated
|
2
|
+
coverage
|
3
|
+
|
4
|
+
# rdoc generated
|
5
|
+
rdoc
|
6
|
+
|
7
|
+
# yard generated
|
8
|
+
doc
|
9
|
+
.yardoc
|
10
|
+
|
11
|
+
# bundler
|
12
|
+
.bundle
|
13
|
+
|
14
|
+
# jeweler generated
|
15
|
+
pkg
|
16
|
+
|
17
|
+
# RVM
|
18
|
+
.rvmrc
|
19
|
+
|
20
|
+
# Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
|
21
|
+
#
|
22
|
+
# * Create a file at ~/.gitignore
|
23
|
+
# * Include files you want ignored
|
24
|
+
# * Run: git config --global core.excludesfile ~/.gitignore
|
25
|
+
#
|
26
|
+
# After doing this, these files will be ignored in all your git projects,
|
27
|
+
# saving you from having to 'pollute' every project you touch with them
|
28
|
+
#
|
29
|
+
# Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
|
30
|
+
#
|
31
|
+
# For MacOS:
|
32
|
+
|
33
|
+
.DS_Store
|
34
|
+
|
35
|
+
# For TextMate
|
36
|
+
*.tmproj
|
37
|
+
tmtags
|
38
|
+
|
39
|
+
# For emacs:
|
40
|
+
*~
|
41
|
+
\#*
|
42
|
+
.\#*
|
43
|
+
|
44
|
+
# For vim:
|
45
|
+
*.swp
|
46
|
+
|
47
|
+
Gemfile.lock
|
data/.rspec
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
--color
|
2
|
-
|
2
|
+
--format documentation
|
data/.rubocop.yml
ADDED
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2015-09-18 15:56:53 -0400 using RuboCop version 0.34.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 1
|
10
|
+
# Configuration parameters: AllowSafeAssignment.
|
11
|
+
Lint/AssignmentInCondition:
|
12
|
+
Exclude:
|
13
|
+
- 'lib/mongoid/full_text_search.rb'
|
14
|
+
|
15
|
+
# Offense count: 1
|
16
|
+
# Cop supports --auto-correct.
|
17
|
+
# Configuration parameters: AlignWith, SupportedStyles, AutoCorrect.
|
18
|
+
Lint/EndAlignment:
|
19
|
+
Enabled: false
|
20
|
+
|
21
|
+
# Offense count: 2
|
22
|
+
Lint/HandleExceptions:
|
23
|
+
Exclude:
|
24
|
+
- 'lib/mongoid/full_text_search.rb'
|
25
|
+
- 'spec/mongoid/full_text_search_spec.rb'
|
26
|
+
|
27
|
+
# Offense count: 1
|
28
|
+
Lint/NonLocalExitFromIterator:
|
29
|
+
Exclude:
|
30
|
+
- 'lib/mongoid/full_text_search.rb'
|
31
|
+
|
32
|
+
# Offense count: 4
|
33
|
+
Lint/UselessAssignment:
|
34
|
+
Exclude:
|
35
|
+
- 'spec/mongoid/full_text_search_spec.rb'
|
36
|
+
|
37
|
+
# Offense count: 5
|
38
|
+
Metrics/AbcSize:
|
39
|
+
Max: 106
|
40
|
+
|
41
|
+
# Offense count: 4
|
42
|
+
Metrics/CyclomaticComplexity:
|
43
|
+
Max: 22
|
44
|
+
|
45
|
+
# Offense count: 262
|
46
|
+
# Configuration parameters: AllowURI, URISchemes.
|
47
|
+
Metrics/LineLength:
|
48
|
+
Max: 174
|
49
|
+
|
50
|
+
# Offense count: 5
|
51
|
+
# Configuration parameters: CountComments.
|
52
|
+
Metrics/MethodLength:
|
53
|
+
Max: 50
|
54
|
+
|
55
|
+
# Offense count: 1
|
56
|
+
# Configuration parameters: CountComments.
|
57
|
+
Metrics/ModuleLength:
|
58
|
+
Max: 224
|
59
|
+
|
60
|
+
# Offense count: 4
|
61
|
+
Metrics/PerceivedComplexity:
|
62
|
+
Max: 25
|
63
|
+
|
64
|
+
# Offense count: 1
|
65
|
+
Style/AsciiComments:
|
66
|
+
Exclude:
|
67
|
+
- 'spec/mongoid/full_text_search_spec.rb'
|
68
|
+
|
69
|
+
# Offense count: 1
|
70
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
71
|
+
Style/ClassAndModuleChildren:
|
72
|
+
Exclude:
|
73
|
+
- 'lib/mongoid/full_text_search.rb'
|
74
|
+
|
75
|
+
# Offense count: 1
|
76
|
+
Style/ConstantName:
|
77
|
+
Exclude:
|
78
|
+
- 'spec/models/russian_artwork.rb'
|
79
|
+
|
80
|
+
# Offense count: 22
|
81
|
+
Style/Documentation:
|
82
|
+
Enabled: false
|
83
|
+
|
84
|
+
# Offense count: 3
|
85
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
86
|
+
Style/FormatString:
|
87
|
+
Exclude:
|
88
|
+
- 'lib/mongoid/full_text_search.rb'
|
89
|
+
- 'spec/models/external_artwork_no_fields_supplied.rb'
|
90
|
+
|
91
|
+
# Offense count: 2
|
92
|
+
Style/MultilineBlockChain:
|
93
|
+
Exclude:
|
94
|
+
- 'lib/mongoid/full_text_search.rb'
|
95
|
+
|
96
|
+
# Offense count: 4
|
97
|
+
# Configuration parameters: Methods.
|
98
|
+
Style/SingleLineBlockParams:
|
99
|
+
Exclude:
|
100
|
+
- 'lib/mongoid/full_text_search.rb'
|
101
|
+
- 'spec/mongoid/full_text_search_spec.rb'
|
data/.travis.yml
CHANGED
@@ -1,7 +1,15 @@
|
|
1
1
|
rvm:
|
2
|
+
- 2.2
|
3
|
+
- 2.1
|
4
|
+
- 2.0
|
2
5
|
- 1.9.3
|
3
|
-
-
|
6
|
+
- rbx-2.2.10
|
7
|
+
- jruby-19mode
|
8
|
+
|
4
9
|
env:
|
5
|
-
-
|
6
|
-
-
|
10
|
+
- MONGOID_VERSION=3.0.0
|
11
|
+
- MONGOID_VERSION=3.1.0
|
12
|
+
- MONGOID_VERSION=4.0
|
13
|
+
- MONGOID_VERSION=5.0
|
14
|
+
|
7
15
|
services: mongodb
|
data/CHANGELOG.md
CHANGED
@@ -1,10 +1,17 @@
|
|
1
|
+
0.7.0 (9/18/2015)
|
2
|
+
-----------------
|
3
|
+
|
4
|
+
* Compatible with Mongoid 4 and 5 - [@dblock](https://github.com/dblock).
|
5
|
+
* Rewritten .gemspec, removed Jeweler - [@dblock](https://github.com/dblock).
|
6
|
+
* Added RuboCop - [@dblock](https://github.com/dblock).
|
7
|
+
|
1
8
|
0.6.1 (4/3/2013)
|
2
|
-
|
9
|
+
----------------
|
3
10
|
|
4
11
|
* [#6](https://github.com/artsy/mongoid_fulltext/pull/6): Upgrade to Mongoid ~> 3.0 - [@simi](https://github.com/simi).
|
5
12
|
|
6
13
|
0.6.0 (7/16/2012)
|
7
|
-
|
14
|
+
-----------------
|
8
15
|
|
9
16
|
* [#2](https://github.com/artsy/mongoid_fulltext/pull/2): Upgrade to Mongoid 3.0 - [@volmer](https://github.com/volmer).
|
10
17
|
* [#1](https://github.com/artsy/mongoid_fulltext/pull/1): Fix: downcase destroys non-latin strings - [@netoneko](https://github.com/netoneko).
|
data/Gemfile
CHANGED
@@ -1,15 +1,25 @@
|
|
1
|
-
source
|
1
|
+
source 'http://rubygems.org'
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
case version = ENV['MONGOID_VERSION'] || '5'
|
4
|
+
when /5/
|
5
|
+
gem 'mongoid', '~> 5.0'
|
6
|
+
when /4/
|
7
|
+
gem 'mongoid', '~> 4.0'
|
8
|
+
when /3.1.0/
|
9
|
+
gem 'mongoid', '~> 3.1.0'
|
10
|
+
when /3.0.0/
|
11
|
+
gem 'mongoid', '~> 3.0.0'
|
5
12
|
else
|
6
|
-
gem
|
13
|
+
gem 'mongoid', version
|
7
14
|
end
|
8
15
|
|
9
|
-
|
16
|
+
gemspec
|
10
17
|
|
11
|
-
group :
|
12
|
-
gem
|
13
|
-
|
14
|
-
|
18
|
+
group :test do
|
19
|
+
gem 'rspec'
|
20
|
+
end
|
21
|
+
|
22
|
+
group :development do
|
23
|
+
gem 'rake'
|
24
|
+
gem 'rubocop', '0.34.1'
|
15
25
|
end
|
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -1,19 +1,22 @@
|
|
1
|
-
Mongoid Fulltext Search
|
1
|
+
Mongoid Fulltext Search
|
2
2
|
=======================
|
3
3
|
|
4
|
+
[![Build Status](https://secure.travis-ci.org/artsy/mongoid_fulltext.svg)](http://travis-ci.org/artsy/mongoid_fulltext)
|
5
|
+
[![Gem Version](https://badge.fury.io/rb/mongoid_fulltext.svg)](http://badge.fury.io/rb/mongoid_fulltext)
|
6
|
+
|
4
7
|
Full-text search using n-gram matching for the Mongoid ODM. Tested on MongoDB 1.6 and above, but
|
5
8
|
probably works on earlier versions as well.
|
6
9
|
|
7
|
-
MongoDB
|
8
|
-
where you want something a little less than a full-blown indexing service
|
10
|
+
MongoDB introduced full-text search capabilities in v2.4, so this gem is a good fit for cases
|
11
|
+
where you want something a little less than a full-blown indexing service. The mongoid_fulltext gem
|
9
12
|
lets you do a fuzzy string search across relatively short strings, which makes it good for populating
|
10
13
|
autocomplete boxes based on the display names of your Rails models but not appropriate for, say,
|
11
14
|
indexing hundreds of thousands of HTML documents.
|
12
15
|
|
13
16
|
Install
|
14
|
-
|
17
|
+
-------
|
15
18
|
|
16
|
-
Version 0.6.
|
19
|
+
Version 0.6.1 or newer of this gem requires Ruby 1.9.3 or newer and works with Mongoid 3, 4 and 5.
|
17
20
|
Use version 0.5.x for Mongoid 2.4.x and Ruby 1.8.7, 1.9.2 or 1.9.3.
|
18
21
|
|
19
22
|
For Ruby 1.8.7 and/or Mongoid 2.x use [mongoid_fulltext 0.5.x](https://github.com/artsy/mongoid_fulltext/tree/0.5-stable).
|
@@ -22,8 +25,8 @@ For Ruby 1.8.7 and/or Mongoid 2.x use [mongoid_fulltext 0.5.x](https://github.co
|
|
22
25
|
gem 'mongoid_fulltext'
|
23
26
|
```
|
24
27
|
|
25
|
-
|
26
|
-
|
28
|
+
Examples
|
29
|
+
--------
|
27
30
|
|
28
31
|
Suppose you have an `Artist` model and want to index each artist's name:
|
29
32
|
|
@@ -395,7 +398,7 @@ Fork the project. Make your feature addition or bug fix with tests. Send a pull
|
|
395
398
|
Copyright and License
|
396
399
|
---------------------
|
397
400
|
|
398
|
-
MIT License, see [LICENSE](
|
401
|
+
MIT License, see [LICENSE](LICENSE) for details.
|
399
402
|
|
400
|
-
(c) 2011-
|
403
|
+
(c) 2011-2015 [Artsy Inc.](http://artsy.github.io)
|
401
404
|
|
data/Rakefile
CHANGED
@@ -1,36 +1,16 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
require 'bundler'
|
3
|
-
begin
|
4
|
-
Bundler.setup(:default, :development)
|
5
|
-
rescue Bundler::BundlerError => e
|
6
|
-
$stderr.puts e.message
|
7
|
-
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
-
exit e.status_code
|
9
|
-
end
|
10
|
-
require 'rake'
|
11
|
-
require 'rspec/core/rake_task'
|
2
|
+
require 'bundler/gem_tasks'
|
12
3
|
|
13
|
-
|
14
|
-
Jeweler::Tasks.new do |gem|
|
15
|
-
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
16
|
-
gem.name = "mongoid_fulltext"
|
17
|
-
gem.homepage = "http://github.com/aaw/mongoid_fulltext"
|
18
|
-
gem.license = "MIT"
|
19
|
-
gem.summary = %Q{Full-text search for the Mongoid ORM}
|
20
|
-
gem.description = %Q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
|
21
|
-
gem.email = "aaron.windsor@gmail.com"
|
22
|
-
gem.authors = ["Aaron Windsor"]
|
23
|
-
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
24
|
-
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
25
|
-
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
26
|
-
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
27
|
-
end
|
28
|
-
Jeweler::RubygemsDotOrgTasks.new
|
4
|
+
Bundler.setup :default, :development
|
29
5
|
|
6
|
+
require 'rspec/core'
|
7
|
+
require 'rspec/core/rake_task'
|
30
8
|
|
31
|
-
desc "Run all tests"
|
32
9
|
RSpec::Core::RakeTask.new(:spec) do |spec|
|
33
|
-
spec.pattern =
|
10
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
34
11
|
end
|
35
12
|
|
36
|
-
|
13
|
+
require 'rubocop/rake_task'
|
14
|
+
RuboCop::RakeTask.new(:rubocop)
|
15
|
+
|
16
|
+
task default: [:rubocop, :spec]
|
@@ -0,0 +1,372 @@
|
|
1
|
+
require 'mongoid'
|
2
|
+
require 'mongoid/compatibility'
|
3
|
+
if Mongoid::Compatibility::Version.mongoid3?
|
4
|
+
require_relative 'indexes'
|
5
|
+
else
|
6
|
+
require_relative 'indexable'
|
7
|
+
end
|
8
|
+
require 'unicode_utils'
|
9
|
+
require 'cgi'
|
10
|
+
|
11
|
+
module Mongoid::FullTextSearch
|
12
|
+
extend ActiveSupport::Concern
|
13
|
+
|
14
|
+
included do
|
15
|
+
cattr_accessor :mongoid_fulltext_config
|
16
|
+
end
|
17
|
+
|
18
|
+
class UnspecifiedIndexError < StandardError; end
|
19
|
+
class UnknownFilterQueryOperator < StandardError; end
|
20
|
+
|
21
|
+
module ClassMethods
|
22
|
+
def fulltext_search_in(*args)
|
23
|
+
self.mongoid_fulltext_config = {} if mongoid_fulltext_config.nil?
|
24
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
25
|
+
if options.key?(:index_name)
|
26
|
+
index_name = options[:index_name]
|
27
|
+
else
|
28
|
+
index_name = 'mongoid_fulltext.index_%s_%s' % [name.downcase, mongoid_fulltext_config.count]
|
29
|
+
end
|
30
|
+
|
31
|
+
config = {
|
32
|
+
alphabet: 'abcdefghijklmnopqrstuvwxyz0123456789 ',
|
33
|
+
word_separators: "-_ \n\t",
|
34
|
+
ngram_width: 3,
|
35
|
+
max_ngrams_to_search: 6,
|
36
|
+
apply_prefix_scoring_to_all_words: true,
|
37
|
+
index_full_words: true,
|
38
|
+
index_short_prefixes: false,
|
39
|
+
max_candidate_set_size: 1000,
|
40
|
+
remove_accents: true,
|
41
|
+
reindex_immediately: true,
|
42
|
+
stop_words: Hash[%w(i a s t me my we he it am is be do an if
|
43
|
+
or as of at by to up in on no so our you him
|
44
|
+
his she her its who are was has had did the and
|
45
|
+
but for out off why how all any few nor not own
|
46
|
+
too can don now ours your hers they them what whom
|
47
|
+
this that were been have does with into from down over
|
48
|
+
then once here when both each more most some such only
|
49
|
+
same than very will just yours their which these those
|
50
|
+
being doing until while about after above below under
|
51
|
+
again there where other myself itself theirs having during
|
52
|
+
before should himself herself because against between through
|
53
|
+
further yourself ourselves yourselves themselves).map { |x| [x, true] }]
|
54
|
+
}
|
55
|
+
|
56
|
+
config.update(options)
|
57
|
+
|
58
|
+
args = [:to_s] if args.empty?
|
59
|
+
config[:ngram_fields] = args
|
60
|
+
config[:alphabet] = Hash[config[:alphabet].split('').map { |ch| [ch, ch] }]
|
61
|
+
config[:word_separators] = Hash[config[:word_separators].split('').map { |ch| [ch, ch] }]
|
62
|
+
mongoid_fulltext_config[index_name] = config
|
63
|
+
|
64
|
+
before_save(:update_ngram_index) if config[:reindex_immediately]
|
65
|
+
before_destroy :remove_from_ngram_index
|
66
|
+
end
|
67
|
+
|
68
|
+
def create_fulltext_indexes
|
69
|
+
return unless mongoid_fulltext_config
|
70
|
+
mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
|
71
|
+
fulltext_search_ensure_indexes(index_name, fulltext_config)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def fulltext_search_ensure_indexes(index_name, config)
|
76
|
+
db = collection.database
|
77
|
+
coll = db[index_name]
|
78
|
+
|
79
|
+
# The order of filters matters when the same index is used from two or more collections.
|
80
|
+
filter_indexes = (config[:filters] || []).map do |key, _value|
|
81
|
+
["filter_values.#{key}", 1]
|
82
|
+
end.sort_by { |filter_index| filter_index[0] }
|
83
|
+
|
84
|
+
index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes)
|
85
|
+
|
86
|
+
# Since the definition of the index could have changed, we'll clean up by
|
87
|
+
# removing any indexes that aren't on the exact.
|
88
|
+
correct_keys = index_definition.map { |field_def| field_def[0] }
|
89
|
+
all_filter_keys = filter_indexes.map { |field_def| field_def[0] }
|
90
|
+
coll.indexes.each do |idef|
|
91
|
+
keys = idef['key'].keys
|
92
|
+
next unless keys.member?('ngram')
|
93
|
+
all_filter_keys |= keys.find_all { |key| key.starts_with?('filter_values.') }
|
94
|
+
next unless keys & correct_keys != correct_keys
|
95
|
+
Mongoid.logger.info "Dropping #{idef['name']} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger
|
96
|
+
if Mongoid::Compatibility::Version.mongoid5?
|
97
|
+
coll.indexes.drop_one(idef['key'])
|
98
|
+
else
|
99
|
+
coll.indexes.drop(idef['key'])
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
if all_filter_keys.length > filter_indexes.length
|
104
|
+
filter_indexes = all_filter_keys.map { |key| [key, 1] }.sort_by { |filter_index| filter_index[0] }
|
105
|
+
index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes)
|
106
|
+
end
|
107
|
+
|
108
|
+
Mongoid.logger.info "Ensuring fts_index on #{coll.name}: #{index_definition}" if Mongoid.logger
|
109
|
+
if Mongoid::Compatibility::Version.mongoid5?
|
110
|
+
coll.indexes.create_one(Hash[index_definition], name: 'fts_index')
|
111
|
+
else
|
112
|
+
coll.indexes.create(Hash[index_definition], name: 'fts_index')
|
113
|
+
end
|
114
|
+
|
115
|
+
Mongoid.logger.info "Ensuring document_id index on #{coll.name}" if Mongoid.logger
|
116
|
+
if Mongoid::Compatibility::Version.mongoid5?
|
117
|
+
coll.indexes.create_one('document_id' => 1) # to make removes fast
|
118
|
+
else
|
119
|
+
coll.indexes.create('document_id' => 1) # to make removes fast
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def fulltext_search(query_string, options = {})
|
124
|
+
max_results = options.key?(:max_results) ? options.delete(:max_results) : 10
|
125
|
+
return_scores = options.key?(:return_scores) ? options.delete(:return_scores) : false
|
126
|
+
if mongoid_fulltext_config.count > 1 && !options.key?(:index)
|
127
|
+
error_message = '%s is indexed by multiple full-text indexes. You must specify one by passing an :index_name parameter'
|
128
|
+
fail UnspecifiedIndexError, error_message % name, caller
|
129
|
+
end
|
130
|
+
index_name = options.key?(:index) ? options.delete(:index) : mongoid_fulltext_config.keys.first
|
131
|
+
|
132
|
+
# Options hash should only contain filters after this point
|
133
|
+
|
134
|
+
ngrams = all_ngrams(query_string, mongoid_fulltext_config[index_name])
|
135
|
+
return [] if ngrams.empty?
|
136
|
+
|
137
|
+
# For each ngram, construct the query we'll use to pull index documents and
|
138
|
+
# get a count of the number of index documents containing that n-gram
|
139
|
+
ordering = { 'score' => -1 }
|
140
|
+
limit = mongoid_fulltext_config[index_name][:max_candidate_set_size]
|
141
|
+
coll = collection.database[index_name]
|
142
|
+
cursors = ngrams.map do |ngram|
|
143
|
+
query = { 'ngram' => ngram[0] }
|
144
|
+
query.update(map_query_filters options)
|
145
|
+
count = coll.find(query).count
|
146
|
+
{ ngram: ngram, count: count, query: query }
|
147
|
+
end.sort! { |record1, record2| record1[:count] <=> record2[:count] }
|
148
|
+
|
149
|
+
# Using the queries we just constructed and the n-gram frequency counts we
|
150
|
+
# just computed, pull in about *:max_candidate_set_size* candidates by
|
151
|
+
# considering the n-grams in order of increasing frequency. When we've
|
152
|
+
# spent all *:max_candidate_set_size* candidates, pull the top-scoring
|
153
|
+
# *max_results* candidates for each remaining n-gram.
|
154
|
+
results_so_far = 0
|
155
|
+
candidates_list = cursors.map do |doc|
|
156
|
+
next if doc[:count] == 0
|
157
|
+
query_result = coll.find(doc[:query])
|
158
|
+
if results_so_far >= limit
|
159
|
+
query_result = query_result.sort(ordering).limit(max_results)
|
160
|
+
elsif doc[:count] > limit - results_so_far
|
161
|
+
query_result = query_result.sort(ordering).limit(limit - results_so_far)
|
162
|
+
end
|
163
|
+
results_so_far += doc[:count]
|
164
|
+
ngram_score = ngrams[doc[:ngram][0]]
|
165
|
+
Hash[query_result.map do |candidate|
|
166
|
+
[candidate['document_id'],
|
167
|
+
{ clazz: candidate['class'], score: candidate['score'] * ngram_score }]
|
168
|
+
end]
|
169
|
+
end.compact
|
170
|
+
|
171
|
+
# Finally, score all candidates by matching them up with other candidates that are
|
172
|
+
# associated with the same document. This is similar to how you might process a
|
173
|
+
# boolean AND query, except that with an AND query, you'd stop after considering
|
174
|
+
# the first candidate list and matching its candidates up with candidates from other
|
175
|
+
# lists, whereas here we want the search to be a little fuzzier so we'll run through
|
176
|
+
# all candidate lists, removing candidates as we match them up.
|
177
|
+
all_scores = []
|
178
|
+
until candidates_list.empty?
|
179
|
+
candidates = candidates_list.pop
|
180
|
+
scores = candidates.map do |candidate_id, data|
|
181
|
+
{ id: candidate_id,
|
182
|
+
clazz: data[:clazz],
|
183
|
+
score: data[:score] + candidates_list.map { |others| (others.delete(candidate_id) || { score: 0 })[:score] }.sum
|
184
|
+
}
|
185
|
+
end
|
186
|
+
all_scores.concat(scores)
|
187
|
+
end
|
188
|
+
all_scores.sort! { |document1, document2| -document1[:score] <=> -document2[:score] }
|
189
|
+
instantiate_mapreduce_results(all_scores[0..max_results - 1], return_scores: return_scores)
|
190
|
+
end
|
191
|
+
|
192
|
+
def instantiate_mapreduce_result(result)
|
193
|
+
result[:clazz].constantize.find(result[:id])
|
194
|
+
end
|
195
|
+
|
196
|
+
def instantiate_mapreduce_results(results, options)
|
197
|
+
if options[:return_scores]
|
198
|
+
results.map { |result| [instantiate_mapreduce_result(result), result[:score]] }.find_all { |result| !result[0].nil? }
|
199
|
+
else
|
200
|
+
results.map { |result| instantiate_mapreduce_result(result) }.compact
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def all_ngrams(str, config, bound_number_returned = true)
|
205
|
+
return {} if str.nil?
|
206
|
+
|
207
|
+
if config[:remove_accents]
|
208
|
+
if defined?(UnicodeUtils)
|
209
|
+
str = UnicodeUtils.nfkd(str)
|
210
|
+
elsif defined?(DiacriticsFu)
|
211
|
+
str = DiacriticsFu.escape(str)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
# Remove any characters that aren't in the alphabet and aren't word separators
|
216
|
+
filtered_str = str.mb_chars.downcase.to_s.split('').find_all { |ch| config[:alphabet][ch] || config[:word_separators][ch] }.join('')
|
217
|
+
|
218
|
+
# Figure out how many ngrams to extract from the string. If we can't afford to extract all ngrams,
|
219
|
+
# step over the string in evenly spaced strides to extract ngrams. For example, to extract 3 3-letter
|
220
|
+
# ngrams from 'abcdefghijk', we'd want to extract 'abc', 'efg', and 'ijk'.
|
221
|
+
if bound_number_returned
|
222
|
+
step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max
|
223
|
+
else
|
224
|
+
step_size = 1
|
225
|
+
end
|
226
|
+
|
227
|
+
# Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the
|
228
|
+
# input string using the step size that we just computed. Let score(x,y) be the score of string x
|
229
|
+
# compared with string y - assigning scores to ngrams with the square root-based scoring function
|
230
|
+
# below and multiplying scores of matching ngrams together yields a score function that has the
|
231
|
+
# property that score(x,y) > score(x,z) for any string z containing y and score(x,y) > score(x,z)
|
232
|
+
# for any string z contained in y.
|
233
|
+
ngram_array = (0..filtered_str.length - config[:ngram_width]).step(step_size).map do |i|
|
234
|
+
if i == 0 || (config[:apply_prefix_scoring_to_all_words] && \
|
235
|
+
config[:word_separators].key?(filtered_str[i - 1].chr))
|
236
|
+
score = Math.sqrt(1 + 1.0 / filtered_str.length)
|
237
|
+
else
|
238
|
+
score = Math.sqrt(2.0 / filtered_str.length)
|
239
|
+
end
|
240
|
+
{ ngram: filtered_str[i..i + config[:ngram_width] - 1], score: score }
|
241
|
+
end
|
242
|
+
|
243
|
+
# If an ngram appears multiple times in the query string, keep the max score
|
244
|
+
ngram_array = ngram_array.group_by { |h| h[:ngram] }.map { |key, values| { ngram: key, score: values.map { |v| v[:score] }.max } }
|
245
|
+
|
246
|
+
if config[:index_short_prefixes] || config[:index_full_words]
|
247
|
+
split_regex_def = config[:word_separators].keys.map { |k| Regexp.escape(k) }.join
|
248
|
+
split_regex = Regexp.compile("[#{split_regex_def}]")
|
249
|
+
all_words = filtered_str.split(split_regex)
|
250
|
+
end
|
251
|
+
|
252
|
+
# Add 'short prefix' records to the array: prefixes of the string that are length (ngram_width - 1)
|
253
|
+
if config[:index_short_prefixes]
|
254
|
+
prefixes_seen = {}
|
255
|
+
all_words.each do |word|
|
256
|
+
next if word.length < config[:ngram_width] - 1
|
257
|
+
prefix = word[0...config[:ngram_width] - 1]
|
258
|
+
if prefixes_seen[prefix].nil? && (config[:stop_words][word].nil? || word == filtered_str)
|
259
|
+
ngram_array << { ngram: prefix, score: 1 + 1.0 / filtered_str.length }
|
260
|
+
prefixes_seen[prefix] = true
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
# Add records to the array of ngrams for each full word in the string that isn't a stop word
|
266
|
+
if config[:index_full_words]
|
267
|
+
full_words_seen = {}
|
268
|
+
all_words.each do |word|
|
269
|
+
if word.length > 1 && full_words_seen[word].nil? && (config[:stop_words][word].nil? || word == filtered_str)
|
270
|
+
ngram_array << { ngram: word, score: 1 + 1.0 / filtered_str.length }
|
271
|
+
full_words_seen[word] = true
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
# If an ngram appears as any combination of full word, short prefix, and ngram, keep the sum of the two scores
|
277
|
+
Hash[ngram_array.group_by { |h| h[:ngram] }.map { |key, values| [key, values.map { |v| v[:score] }.sum] }]
|
278
|
+
end
|
279
|
+
|
280
|
+
def remove_from_ngram_index
|
281
|
+
mongoid_fulltext_config.each_pair do |index_name, _fulltext_config|
|
282
|
+
coll = collection.database[index_name]
|
283
|
+
if Mongoid::Compatibility::Version.mongoid5?
|
284
|
+
coll.find('class' => name).delete_many
|
285
|
+
else
|
286
|
+
coll.find('class' => name).remove_all
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
def update_ngram_index
|
292
|
+
all.each(&:update_ngram_index)
|
293
|
+
end
|
294
|
+
|
295
|
+
private
|
296
|
+
|
297
|
+
# Take a list of filters to be mapped so they can update the query
|
298
|
+
# used upon the fulltext search of the ngrams
|
299
|
+
def map_query_filters(filters)
|
300
|
+
Hash[filters.map do|key, value|
|
301
|
+
case value
|
302
|
+
when Hash then
|
303
|
+
if value.key? :any then format_query_filter('$in', key, value[:any])
|
304
|
+
elsif value.key? :all then format_query_filter('$all', key, value[:all])
|
305
|
+
else fail UnknownFilterQueryOperator, value.keys.join(','), caller end
|
306
|
+
else format_query_filter('$all', key, value)
|
307
|
+
end
|
308
|
+
end]
|
309
|
+
end
|
310
|
+
|
311
|
+
def format_query_filter(operator, key, value)
|
312
|
+
['filter_values.%s' % key, { operator => [value].flatten }]
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
def update_ngram_index
|
317
|
+
mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
|
318
|
+
if condition = fulltext_config[:update_if]
|
319
|
+
case condition
|
320
|
+
when Symbol then next unless send condition
|
321
|
+
when String then next unless instance_eval condition
|
322
|
+
when Proc then next unless condition.call self
|
323
|
+
else; next
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
# remove existing ngrams from external index
|
328
|
+
coll = collection.database[index_name.to_sym]
|
329
|
+
if Mongoid::Compatibility::Version.mongoid5?
|
330
|
+
coll.find('document_id' => _id).delete_many
|
331
|
+
else
|
332
|
+
coll.find('document_id' => _id).remove_all
|
333
|
+
end
|
334
|
+
# extract ngrams from fields
|
335
|
+
field_values = fulltext_config[:ngram_fields].map { |field| send(field) }
|
336
|
+
ngrams = field_values.inject({}) { |accum, item| accum.update(self.class.all_ngrams(item, fulltext_config, false)) }
|
337
|
+
return if ngrams.empty?
|
338
|
+
# apply filters, if necessary
|
339
|
+
filter_values = nil
|
340
|
+
if fulltext_config.key?(:filters)
|
341
|
+
filter_values = Hash[fulltext_config[:filters].map do |key, value|
|
342
|
+
begin
|
343
|
+
[key, value.call(self)]
|
344
|
+
rescue
|
345
|
+
# Suppress any exceptions caused by filters
|
346
|
+
end
|
347
|
+
end.compact]
|
348
|
+
end
|
349
|
+
# insert new ngrams in external index
|
350
|
+
ngrams.each_pair do |ngram, score|
|
351
|
+
index_document = { 'ngram' => ngram, 'document_id' => _id, 'score' => score, 'class' => self.class.name }
|
352
|
+
index_document['filter_values'] = filter_values if fulltext_config.key?(:filters)
|
353
|
+
if Mongoid::Compatibility::Version.mongoid5?
|
354
|
+
coll.insert_one(index_document)
|
355
|
+
else
|
356
|
+
coll.insert(index_document)
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
def remove_from_ngram_index
|
363
|
+
mongoid_fulltext_config.each_pair do |index_name, _fulltext_config|
|
364
|
+
coll = collection.database[index_name]
|
365
|
+
if Mongoid::Compatibility::Version.mongoid5?
|
366
|
+
coll.find('document_id' => _id).delete_many
|
367
|
+
else
|
368
|
+
coll.find('document_id' => _id).remove_all
|
369
|
+
end
|
370
|
+
end
|
371
|
+
end
|
372
|
+
end
|