mini_search 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +22 -0
- data/lib/mini_search.rb +29 -6
- data/lib/mini_search/inverted_index.rb +2 -2
- data/lib/mini_search/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e2ecaa95d697c1cd628a7341433c10b345c33445fd6016b7a64335a57d1361d2
|
|
4
|
+
data.tar.gz: efc707b5805716278a4e14962f821f3a7f3e060f51a410da36b3a7b914d05359
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 293bc4f841c3da5c54ae8d9f9831362bb3f243bb6b38c5fe6f9454c8e29651feaef7aaff8a47cc4252e2dc86bb88bff928f6b7c8f1b1dff312c716d85c845f60
|
|
7
|
+
data.tar.gz: 957f959316270a1d9f31fb251b0a4d7b284ff9f2176defaff2341f3d84f376f350d71ee62e3b9060228c5b9a02256ed1dc1d74764c22ccd3b4a4b0efe4838a5b
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
|
@@ -248,6 +248,28 @@ First we create an inverted Index
|
|
|
248
248
|
We can see results are sorted by score, notice that the document we index can have any other
|
|
249
249
|
fields like name, price and etc. But only `:id` and `:indexed_field` are required
|
|
250
250
|
|
|
251
|
+
## Configuring multiple cores using yaml
|
|
252
|
+
|
|
253
|
+
You can configure a multiple core using a yaml config file.
|
|
254
|
+
|
|
255
|
+
```yaml
|
|
256
|
+
cores:
|
|
257
|
+
- main:
|
|
258
|
+
lang: 'pt'
|
|
259
|
+
synonyms_map:
|
|
260
|
+
bebe: 'nene'
|
|
261
|
+
stop_words:
|
|
262
|
+
- 'de'
|
|
263
|
+
- 'para'
|
|
264
|
+
- aux:
|
|
265
|
+
lang: 'pt'
|
|
266
|
+
synonyms_map:
|
|
267
|
+
bebe: 'nene'
|
|
268
|
+
stop_words:
|
|
269
|
+
- 'de'
|
|
270
|
+
- 'para'
|
|
271
|
+
```
|
|
272
|
+
|
|
251
273
|
## Development
|
|
252
274
|
|
|
253
275
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/mini_search.rb
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
require 'yaml'
|
|
1
2
|
require 'mini_search/version.rb'
|
|
2
3
|
require 'mini_search/stemmer/portuguese.rb'
|
|
3
4
|
require 'mini_search/standard_whitespace_tokenizer.rb'
|
|
@@ -59,12 +60,8 @@ module MiniSearch
|
|
|
59
60
|
new(indexing_pipeline, querying_pipeline)
|
|
60
61
|
end
|
|
61
62
|
|
|
62
|
-
def self.new_localized_index(
|
|
63
|
-
|
|
64
|
-
language_support = LANGUAGE_SUPPORTS[language_support].new(stop_words)
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
raise 'language support not found or nil' unless language_support
|
|
63
|
+
def self.new_localized_index(lang, synonyms_map: {}, stop_words: [])
|
|
64
|
+
language_support = find_language_support(lang, stop_words)
|
|
68
65
|
|
|
69
66
|
new_index(
|
|
70
67
|
stop_words: language_support.stop_words,
|
|
@@ -72,4 +69,30 @@ module MiniSearch
|
|
|
72
69
|
synonyms_map: synonyms_map
|
|
73
70
|
)
|
|
74
71
|
end
|
|
72
|
+
|
|
73
|
+
def self.from_config_file(file)
|
|
74
|
+
raise "file not found '#{file}'" unless File.exists?(file)
|
|
75
|
+
|
|
76
|
+
cores = YAML.load_file(file)['cores']
|
|
77
|
+
|
|
78
|
+
cores.map do |core|
|
|
79
|
+
lang = core['lang'].to_sym
|
|
80
|
+
|
|
81
|
+
new_localized_index(
|
|
82
|
+
lang,
|
|
83
|
+
stop_words: core['stop_words'],
|
|
84
|
+
synonyms_map: core['synonyms_map'].transform_values { |v| v.split(',') }
|
|
85
|
+
)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private_class_method def self.find_language_support(lang, stop_words)
|
|
90
|
+
if lang.is_a?(Symbol)
|
|
91
|
+
language_support = LANGUAGE_SUPPORTS[lang].new(stop_words)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
raise 'language support not found or nil' unless language_support
|
|
95
|
+
|
|
96
|
+
language_support
|
|
97
|
+
end
|
|
75
98
|
end
|
|
@@ -154,8 +154,8 @@ module MiniSearch
|
|
|
154
154
|
|
|
155
155
|
def generate_idfs(processed_terms)
|
|
156
156
|
processed_terms.each_with_object({}) do |term, idfs|
|
|
157
|
-
if @
|
|
158
|
-
idfs[term] = Idf.calculate(@
|
|
157
|
+
if @inverted_index[term].to_a.any?
|
|
158
|
+
idfs[term] = Idf.calculate(@inverted_index[term].size, @documents.size)
|
|
159
159
|
end
|
|
160
160
|
end
|
|
161
161
|
end
|
data/lib/mini_search/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: mini_search
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.
|
|
4
|
+
version: 1.0.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew S Aguiar
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2020-04-19 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|