mini_search 1.0.3 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +22 -0
- data/lib/mini_search.rb +29 -6
- data/lib/mini_search/inverted_index.rb +2 -2
- data/lib/mini_search/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e2ecaa95d697c1cd628a7341433c10b345c33445fd6016b7a64335a57d1361d2
|
4
|
+
data.tar.gz: efc707b5805716278a4e14962f821f3a7f3e060f51a410da36b3a7b914d05359
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 293bc4f841c3da5c54ae8d9f9831362bb3f243bb6b38c5fe6f9454c8e29651feaef7aaff8a47cc4252e2dc86bb88bff928f6b7c8f1b1dff312c716d85c845f60
|
7
|
+
data.tar.gz: 957f959316270a1d9f31fb251b0a4d7b284ff9f2176defaff2341f3d84f376f350d71ee62e3b9060228c5b9a02256ed1dc1d74764c22ccd3b4a4b0efe4838a5b
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -248,6 +248,28 @@ First we create an inverted Index
|
|
248
248
|
We can see results are sorted by score, notice that the document we index can have any other
|
249
249
|
fields like name, price and etc. But only `:id` and `:indexed_field` are required
|
250
250
|
|
251
|
+
## Configuring multiple cores using yaml
|
252
|
+
|
253
|
+
You can configure a multiple core using a yaml config file.
|
254
|
+
|
255
|
+
```yaml
|
256
|
+
cores:
|
257
|
+
- main:
|
258
|
+
lang: 'pt'
|
259
|
+
synonyms_map:
|
260
|
+
bebe: 'nene'
|
261
|
+
stop_words:
|
262
|
+
- 'de'
|
263
|
+
- 'para'
|
264
|
+
- aux:
|
265
|
+
lang: 'pt'
|
266
|
+
synonyms_map:
|
267
|
+
bebe: 'nene'
|
268
|
+
stop_words:
|
269
|
+
- 'de'
|
270
|
+
- 'para'
|
271
|
+
```
|
272
|
+
|
251
273
|
## Development
|
252
274
|
|
253
275
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/mini_search.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'yaml'
|
1
2
|
require 'mini_search/version.rb'
|
2
3
|
require 'mini_search/stemmer/portuguese.rb'
|
3
4
|
require 'mini_search/standard_whitespace_tokenizer.rb'
|
@@ -59,12 +60,8 @@ module MiniSearch
|
|
59
60
|
new(indexing_pipeline, querying_pipeline)
|
60
61
|
end
|
61
62
|
|
62
|
-
def self.new_localized_index(
|
63
|
-
|
64
|
-
language_support = LANGUAGE_SUPPORTS[language_support].new(stop_words)
|
65
|
-
end
|
66
|
-
|
67
|
-
raise 'language support not found or nil' unless language_support
|
63
|
+
def self.new_localized_index(lang, synonyms_map: {}, stop_words: [])
|
64
|
+
language_support = find_language_support(lang, stop_words)
|
68
65
|
|
69
66
|
new_index(
|
70
67
|
stop_words: language_support.stop_words,
|
@@ -72,4 +69,30 @@ module MiniSearch
|
|
72
69
|
synonyms_map: synonyms_map
|
73
70
|
)
|
74
71
|
end
|
72
|
+
|
73
|
+
def self.from_config_file(file)
|
74
|
+
raise "file not found '#{file}'" unless File.exists?(file)
|
75
|
+
|
76
|
+
cores = YAML.load_file(file)['cores']
|
77
|
+
|
78
|
+
cores.map do |core|
|
79
|
+
lang = core['lang'].to_sym
|
80
|
+
|
81
|
+
new_localized_index(
|
82
|
+
lang,
|
83
|
+
stop_words: core['stop_words'],
|
84
|
+
synonyms_map: core['synonyms_map'].transform_values { |v| v.split(',') }
|
85
|
+
)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
private_class_method def self.find_language_support(lang, stop_words)
|
90
|
+
if lang.is_a?(Symbol)
|
91
|
+
language_support = LANGUAGE_SUPPORTS[lang].new(stop_words)
|
92
|
+
end
|
93
|
+
|
94
|
+
raise 'language support not found or nil' unless language_support
|
95
|
+
|
96
|
+
language_support
|
97
|
+
end
|
75
98
|
end
|
@@ -154,8 +154,8 @@ module MiniSearch
|
|
154
154
|
|
155
155
|
def generate_idfs(processed_terms)
|
156
156
|
processed_terms.each_with_object({}) do |term, idfs|
|
157
|
-
if @
|
158
|
-
idfs[term] = Idf.calculate(@
|
157
|
+
if @inverted_index[term].to_a.any?
|
158
|
+
idfs[term] = Idf.calculate(@inverted_index[term].size, @documents.size)
|
159
159
|
end
|
160
160
|
end
|
161
161
|
end
|
data/lib/mini_search/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mini_search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew S Aguiar
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-04-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|