mini_search 1.0.4 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.devcontainer/devcontainer.json +31 -0
- data/Gemfile.lock +22 -18
- data/README.md +94 -36
- data/bin/console +1 -0
- data/lib/mini_search/compound_tokenizer.rb +15 -0
- data/lib/mini_search/ngram_tokenizer.rb +15 -0
- data/lib/mini_search/pipeline.rb +13 -2
- data/lib/mini_search/version.rb +1 -1
- data/lib/mini_search.rb +14 -6
- data/mini_search.gemspec +3 -2
- metadata +27 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 45bd263c2e9f4056e9401efe9f26bf735cafb41cdb9fef570e7426233ff6a92d
|
4
|
+
data.tar.gz: d29e30b91edf2434cb99d219b83b1dbba26e47190cf53be01a32355992af5042
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5e1661985c5bee26475a35424fe78b81b462ea90e41984623bb13c3621626a241b3107390c69ddae177b6b6a3fdb49886e647bc4f2650ee1a9a308e62f7350a8
|
7
|
+
data.tar.gz: 36b1a2729315cec0e2b3805dea9dd05c888b9a2795b9e57e34f7a454e98c589705bcf5ad2bb5df2c022e1b3dc6d8b7388d9b5ea4bd6dd04a9412ceb4e2b1a2d9
|
@@ -0,0 +1,31 @@
|
|
1
|
+
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
2
|
+
// README at: https://github.com/devcontainers/templates/tree/main/src/ruby
|
3
|
+
{
|
4
|
+
"name": "Ruby",
|
5
|
+
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
6
|
+
"image": "mcr.microsoft.com/devcontainers/ruby:0-3.1-bullseye",
|
7
|
+
"features": {
|
8
|
+
"ghcr.io/devcontainers/features/git:1": {
|
9
|
+
"ppa": true,
|
10
|
+
"version": "os-provided"
|
11
|
+
}
|
12
|
+
},
|
13
|
+
"customizations": {
|
14
|
+
"vscode": {
|
15
|
+
"extensions": [
|
16
|
+
// Add the IDs of extensions you want installed when the container is created.
|
17
|
+
"rebornix.Ruby"
|
18
|
+
]
|
19
|
+
}
|
20
|
+
}
|
21
|
+
// Features to add to the dev container. More info: https://containers.dev/features.
|
22
|
+
// "features": {},
|
23
|
+
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
24
|
+
// "forwardPorts": [],
|
25
|
+
// Use 'postCreateCommand' to run commands after the container is created.
|
26
|
+
// "postCreateCommand": "ruby --version",
|
27
|
+
// Configure tool-specific properties.
|
28
|
+
// "customizations": {},
|
29
|
+
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
30
|
+
// "remoteUser": "root"
|
31
|
+
}
|
data/Gemfile.lock
CHANGED
@@ -1,35 +1,39 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
mini_search (1.0.
|
4
|
+
mini_search (1.0.4)
|
5
|
+
ruby_ngrams (~> 0.0.6)
|
5
6
|
|
6
7
|
GEM
|
7
8
|
remote: https://rubygems.org/
|
8
9
|
specs:
|
9
|
-
diff-lcs (1.
|
10
|
-
rake (
|
11
|
-
rspec (3.
|
12
|
-
rspec-core (~> 3.
|
13
|
-
rspec-expectations (~> 3.
|
14
|
-
rspec-mocks (~> 3.
|
15
|
-
rspec-core (3.
|
16
|
-
rspec-support (~> 3.
|
17
|
-
rspec-expectations (3.
|
10
|
+
diff-lcs (1.5.0)
|
11
|
+
rake (12.3.3)
|
12
|
+
rspec (3.12.0)
|
13
|
+
rspec-core (~> 3.12.0)
|
14
|
+
rspec-expectations (~> 3.12.0)
|
15
|
+
rspec-mocks (~> 3.12.0)
|
16
|
+
rspec-core (3.12.1)
|
17
|
+
rspec-support (~> 3.12.0)
|
18
|
+
rspec-expectations (3.12.2)
|
18
19
|
diff-lcs (>= 1.2.0, < 2.0)
|
19
|
-
rspec-support (~> 3.
|
20
|
-
rspec-mocks (3.
|
20
|
+
rspec-support (~> 3.12.0)
|
21
|
+
rspec-mocks (3.12.5)
|
21
22
|
diff-lcs (>= 1.2.0, < 2.0)
|
22
|
-
rspec-support (~> 3.
|
23
|
-
rspec-support (3.
|
23
|
+
rspec-support (~> 3.12.0)
|
24
|
+
rspec-support (3.12.0)
|
25
|
+
ruby_cli (0.2.1)
|
26
|
+
ruby_ngrams (0.0.6)
|
27
|
+
ruby_cli (>= 0.2.0)
|
24
28
|
|
25
29
|
PLATFORMS
|
26
|
-
|
30
|
+
x86_64-linux
|
27
31
|
|
28
32
|
DEPENDENCIES
|
29
|
-
bundler (~>
|
33
|
+
bundler (~> 2.4.10)
|
30
34
|
mini_search!
|
31
|
-
rake (~>
|
35
|
+
rake (~> 12.0)
|
32
36
|
rspec (~> 3.0)
|
33
37
|
|
34
38
|
BUNDLED WITH
|
35
|
-
|
39
|
+
2.4.10
|
data/README.md
CHANGED
@@ -20,6 +20,36 @@ Or install it yourself as:
|
|
20
20
|
|
21
21
|
$ gem install mini_search
|
22
22
|
|
23
|
+
## BM25 (from wikipedia)
|
24
|
+
|
25
|
+
BM25 is a bag-of-words retrieval function that ranks a set of documents based on the query terms appearing in each document, regardless
|
26
|
+
of their proximity within the document. It is a family of scoring functions with slightly different components and parameters.
|
27
|
+
One of the most prominent instantiations of the function is as follows.
|
28
|
+
|
29
|
+
Given a query Q, containing keywords `q1....qn` the BM25 score of a document `D` is:
|
30
|
+
|
31
|
+

|
32
|
+
|
33
|
+
where `f(qi, D)` is qi's term frequency (tf) in the document `D`, `|D|` is the length of the document `D` in words, and avgdl is the
|
34
|
+
average document length in the text collection from which documents are drawn. `k1` and `b` are free parameters, usually chosen, in absence of
|
35
|
+
an advanced optimization, as `k1 in |1.2,2.0|` and `b = 0.75`. `IDF(qi)` is the IDF (inverse document frequency) weight of the query term
|
36
|
+
`qi`. It is usually computed as:
|
37
|
+
|
38
|
+

|
39
|
+
|
40
|
+
where `N` is the total number of documents in the collection, and `n(q)` is the number of documents containing `qi`.
|
41
|
+
|
42
|
+
There are several interpretations for IDF and slight variations on its formula. In the original BM25 derivation,
|
43
|
+
the IDF component is derived from the Binary Independence Model.
|
44
|
+
|
45
|
+
The above formula for IDF has drawbacks for terms appearing in more than half of the corpus documents. These terms' IDF is negative,
|
46
|
+
so for any two almost-identical documents, one which contains the term may be ranked lower than one which does not. This is often an
|
47
|
+
undesirable behavior, so many applications adjust the IDF formula in various ways:
|
48
|
+
|
49
|
+
Each summand can be given a floor of 0, to trim out common terms;
|
50
|
+
The IDF function can be given a floor of a constant `e`, to avoid common terms being ignored at all;
|
51
|
+
The IDF function can be replaced with a similarly shaped one which is non-negative, or strictly positive to avoid terms being ignored at all.
|
52
|
+
|
23
53
|
## Inverted Index
|
24
54
|
|
25
55
|
MiniSearch implements a inverted index (basically a hashmap where terms are keys and values are documents that contains that key.
|
@@ -165,42 +195,6 @@ With this changes our index would be:
|
|
165
195
|
Pretty better now, we could apply other steps like removing some words that are irrelevant for us (stop words),
|
166
196
|
add synonyms for some words but this other changes are specifics from languages.
|
167
197
|
|
168
|
-
TODO
|
169
|
-
|
170
|
-
## Language support (stop words, stemmers)
|
171
|
-
|
172
|
-
TODO
|
173
|
-
|
174
|
-
## BM25 (from wikipedia)
|
175
|
-
|
176
|
-
BM25 is a bag-of-words retrieval function that ranks a set of documents based on the query terms appearing in each document, regardless
|
177
|
-
of their proximity within the document. It is a family of scoring functions with slightly different components and parameters.
|
178
|
-
One of the most prominent instantiations of the function is as follows.
|
179
|
-
|
180
|
-
Given a query Q, containing keywords `q1....qn` the BM25 score of a document `D` is:
|
181
|
-
|
182
|
-

|
183
|
-
|
184
|
-
where `f(qi, D)` is qi's term frequency (tf) in the document `D`, `|D|` is the length of the document `D` in words, and avgdl is the
|
185
|
-
average document length in the text collection from which documents are drawn. `k1` and `b` are free parameters, usually chosen, in absence of
|
186
|
-
an advanced optimization, as `k1 in |1.2,2.0|` and `b = 0.75`. `IDF(qi)` is the IDF (inverse document frequency) weight of the query term
|
187
|
-
`qi`. It is usually computed as:
|
188
|
-
|
189
|
-

|
190
|
-
|
191
|
-
where `N` is the total number of documents in the collection, and `n(q)` is the number of documents containing `qi`.
|
192
|
-
|
193
|
-
There are several interpretations for IDF and slight variations on its formula. In the original BM25 derivation,
|
194
|
-
the IDF component is derived from the Binary Independence Model.
|
195
|
-
|
196
|
-
The above formula for IDF has drawbacks for terms appearing in more than half of the corpus documents. These terms' IDF is negative,
|
197
|
-
so for any two almost-identical documents, one which contains the term may be ranked lower than one which does not. This is often an
|
198
|
-
undesirable behavior, so many applications adjust the IDF formula in various ways:
|
199
|
-
|
200
|
-
Each summand can be given a floor of 0, to trim out common terms;
|
201
|
-
The IDF function can be given a floor of a constant `e`, to avoid common terms being ignored at all;
|
202
|
-
The IDF function can be replaced with a similarly shaped one which is non-negative, or strictly positive to avoid terms being ignored at all.
|
203
|
-
|
204
198
|
## Usage
|
205
199
|
|
206
200
|
First we create an inverted Index
|
@@ -248,6 +242,70 @@ First we create an inverted Index
|
|
248
242
|
We can see results are sorted by score, notice that the document we index can have any other
|
249
243
|
fields like name, price and etc. But only `:id` and `:indexed_field` are required
|
250
244
|
|
245
|
+
## Language support (stop words, stemmers)
|
246
|
+
|
247
|
+
Creating an index using `MiniSearch.new_index` will gives an inverted_index that does not
|
248
|
+
have any language support like stop_words and synonyms. We could pass them as arguments
|
249
|
+
in `new_index` like:
|
250
|
+
|
251
|
+
```
|
252
|
+
index = MiniSearch.new_index(
|
253
|
+
stop_words: stop_words,
|
254
|
+
stemmer: stemmer,
|
255
|
+
synonyms_map: synonyms_map
|
256
|
+
)
|
257
|
+
```
|
258
|
+
|
259
|
+
Arguments:
|
260
|
+
|
261
|
+
- The stop_words is a array of worlds that should be removed when indexing the document.
|
262
|
+
- The stemmer is a object of type Stemmer, that implements a `stem` method that remove all but the stem of the word (example: `carrocha` -> `carr`).
|
263
|
+
- The synonyms_map is a hashmap with original terms and a list of synonyms (example: `{'calçado' => ['sapato', 'tenis', 'salto', 'chinelo]}`)
|
264
|
+
|
265
|
+
## n-gram Tokenizer
|
266
|
+
|
267
|
+
By default creating an index using `MiniSearch.new_index` will gives an inverted_index that uses a simple whitespace tokenizer.
|
268
|
+
(e.g. `"Hello World" => ["Hello", "World"]`)
|
269
|
+
|
270
|
+
You can change this behavior to use an n-gram tokenizer which will break words down into smaller pieces with a configurable
|
271
|
+
token window. You can read more about how this kind of tokenization works for [Elastic Search](https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-ngram-tokenizer.html).
|
272
|
+
(e.g. `ngrams: 2` the phrase `"Hello World" => ["He", "el", "ll", "lo", "o ", " W", "Wo", "or", "rl", "ld"]`)
|
273
|
+
or
|
274
|
+
(e.g. `ngrams: 3` the phrase `"Hello World" => ["Hel", "ell", "llo", "lo ", "o W", " Wo", "Wor", "orl", "rld"]`)
|
275
|
+
|
276
|
+
To enable this simply pass an integer for the parameter `ngrams`.
|
277
|
+
|
278
|
+
```
|
279
|
+
index = MiniSearch.new_index(
|
280
|
+
ngrams: 2,
|
281
|
+
)
|
282
|
+
```
|
283
|
+
|
284
|
+
Arguments:
|
285
|
+
|
286
|
+
- ngrams: An integer which represents the amount of characters each token should be. Common paramaeters are: (`2` for bigrams or `3` for trigrams)
|
287
|
+
|
288
|
+
# Stemmers
|
289
|
+
|
290
|
+
Stemmers are classes that implements the `def stem(word)` method, that receives a word and returs the stem:
|
291
|
+
|
292
|
+
Example of a NaiveEnglishStemmer:
|
293
|
+
|
294
|
+
```
|
295
|
+
module MiniSearch
|
296
|
+
module Stemmer
|
297
|
+
class NaiveEnglishStemmer
|
298
|
+
def stem(word)
|
299
|
+
# removes plural
|
300
|
+
word[0..-2] if word.end_with?('s')
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
305
|
+
```
|
306
|
+
|
307
|
+
MiniSearch comes with a Brazilian Portuguese stemmer for now.
|
308
|
+
|
251
309
|
## Configuring multiple cores using yaml
|
252
310
|
|
253
311
|
You can configure a multiple core using a yaml config file.
|
data/bin/console
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require "bundler/setup"
|
4
4
|
require "mini_search"
|
5
|
+
require "ruby_ngrams" # We want to be able to use this when testing!
|
5
6
|
|
6
7
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
8
|
# with your gem easier. You can also use a different console, if you like.
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module MiniSearch
|
4
|
+
class CompoundTokenizer
|
5
|
+
def initialize(tokenizers)
|
6
|
+
@tokenizers = tokenizers
|
7
|
+
end
|
8
|
+
|
9
|
+
def execute(string)
|
10
|
+
@tokenizers.each_with_object([]) do |tokenizer, tokens|
|
11
|
+
tokens.concat(tokenizer.execute(string))
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "ruby_ngrams"
|
3
|
+
|
4
|
+
module MiniSearch
|
5
|
+
class NgramTokenizer
|
6
|
+
def initialize(n)
|
7
|
+
@n = n || 2
|
8
|
+
end
|
9
|
+
|
10
|
+
def execute(string)
|
11
|
+
# In the future, we may want to consider doing a strip on tokens to remove whitespace.
|
12
|
+
string.ngrams(regex: //, n: @n).map(&:join)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/mini_search/pipeline.rb
CHANGED
@@ -5,16 +5,27 @@ module MiniSearch
|
|
5
5
|
# do when indexing a document or searching
|
6
6
|
class Pipeline
|
7
7
|
def initialize(tokenizer, filters)
|
8
|
+
@standard_tokenizer = MiniSearch::StandardWhitespaceTokenizer.new
|
8
9
|
@tokenizer = tokenizer
|
9
10
|
@filters = filters
|
10
11
|
end
|
11
12
|
|
12
13
|
def execute(string)
|
13
|
-
tokens
|
14
|
+
# Since the filter model expects tokens that are tokenized by
|
15
|
+
# the standard tokenizer, let's use that first.
|
16
|
+
tokens = @standard_tokenizer.execute(string)
|
14
17
|
|
15
|
-
|
18
|
+
# Apply filters
|
19
|
+
filters_applied = @filters.reduce(tokens) do |filtered_tokens, filter|
|
16
20
|
filter.execute(filtered_tokens)
|
17
21
|
end
|
22
|
+
|
23
|
+
# Return if our selected tokenizer is the standard tokenizer
|
24
|
+
return filters_applied if @tokenizer.is_a? MiniSearch::StandardWhitespaceTokenizer
|
25
|
+
|
26
|
+
# Execute non-standard tokenization after rejoining the tokens
|
27
|
+
# that were tokenized with the StandardWhitespaceTokenizer
|
28
|
+
@tokenizer.execute(filters_applied.join(' '))
|
18
29
|
end
|
19
30
|
end
|
20
31
|
end
|
data/lib/mini_search/version.rb
CHANGED
data/lib/mini_search.rb
CHANGED
@@ -2,6 +2,8 @@ require 'yaml'
|
|
2
2
|
require 'mini_search/version.rb'
|
3
3
|
require 'mini_search/stemmer/portuguese.rb'
|
4
4
|
require 'mini_search/standard_whitespace_tokenizer.rb'
|
5
|
+
require 'mini_search/ngram_tokenizer.rb'
|
6
|
+
require 'mini_search/compound_tokenizer.rb'
|
5
7
|
require 'mini_search/strip_filter.rb'
|
6
8
|
require 'mini_search/remove_punctuation_filter.rb'
|
7
9
|
require 'mini_search/downcase_filter.rb'
|
@@ -24,8 +26,13 @@ module MiniSearch
|
|
24
26
|
MiniSearch::InvertedIndex.new(indexing_pipeline, querying_pipeline)
|
25
27
|
end
|
26
28
|
|
27
|
-
def self.new_index(stop_words: [], synonyms_map: {}, stemmer: nil)
|
28
|
-
|
29
|
+
def self.new_index(stop_words: [], synonyms_map: {}, stemmer: nil, ngrams: nil)
|
30
|
+
tokenizer =
|
31
|
+
if ngrams
|
32
|
+
NgramTokenizer.new(ngrams)
|
33
|
+
else
|
34
|
+
StandardWhitespaceTokenizer.new
|
35
|
+
end
|
29
36
|
|
30
37
|
strip_filter = StripFilter.new
|
31
38
|
remove_punctuation_filter = RemovePunctuationFilter.new
|
@@ -35,7 +42,7 @@ module MiniSearch
|
|
35
42
|
synonyms_filter = SynonymsFilter.new(synonyms_map)
|
36
43
|
|
37
44
|
indexing_pipeline = Pipeline.new(
|
38
|
-
|
45
|
+
tokenizer,
|
39
46
|
[
|
40
47
|
strip_filter,
|
41
48
|
remove_punctuation_filter,
|
@@ -46,7 +53,7 @@ module MiniSearch
|
|
46
53
|
)
|
47
54
|
|
48
55
|
querying_pipeline = Pipeline.new(
|
49
|
-
|
56
|
+
tokenizer,
|
50
57
|
[
|
51
58
|
strip_filter,
|
52
59
|
remove_punctuation_filter,
|
@@ -60,13 +67,14 @@ module MiniSearch
|
|
60
67
|
new(indexing_pipeline, querying_pipeline)
|
61
68
|
end
|
62
69
|
|
63
|
-
def self.new_localized_index(lang, synonyms_map: {}, stop_words: [])
|
70
|
+
def self.new_localized_index(lang, synonyms_map: {}, stop_words: [], ngrams: nil)
|
64
71
|
language_support = find_language_support(lang, stop_words)
|
65
72
|
|
66
73
|
new_index(
|
67
74
|
stop_words: language_support.stop_words,
|
68
75
|
stemmer: language_support.stemmer,
|
69
|
-
synonyms_map: synonyms_map
|
76
|
+
synonyms_map: synonyms_map,
|
77
|
+
ngrams: ngrams
|
70
78
|
)
|
71
79
|
end
|
72
80
|
|
data/mini_search.gemspec
CHANGED
@@ -23,7 +23,8 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
24
24
|
spec.require_paths = ["lib"]
|
25
25
|
|
26
|
-
spec.add_development_dependency "bundler", "~>
|
27
|
-
spec.add_development_dependency "rake", "~>
|
26
|
+
spec.add_development_dependency "bundler", "~> 2.4.10"
|
27
|
+
spec.add_development_dependency "rake", "~> 12.0"
|
28
28
|
spec.add_development_dependency "rspec", "~> 3.0"
|
29
|
+
spec.add_runtime_dependency "ruby_ngrams", "~> 0.0.6"
|
29
30
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mini_search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew S Aguiar
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -16,28 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 2.4.10
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 2.4.10
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '12.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '12.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: ruby_ngrams
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.0.6
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.0.6
|
55
69
|
description: In-memory naive search engine.
|
56
70
|
email:
|
57
71
|
- andrewaguiar6@gmail.com
|
@@ -59,6 +73,7 @@ executables: []
|
|
59
73
|
extensions: []
|
60
74
|
extra_rdoc_files: []
|
61
75
|
files:
|
76
|
+
- ".devcontainer/devcontainer.json"
|
62
77
|
- ".gitignore"
|
63
78
|
- ".rspec"
|
64
79
|
- ".travis.yml"
|
@@ -74,11 +89,13 @@ files:
|
|
74
89
|
- formula2.svg
|
75
90
|
- lib/mini_search.rb
|
76
91
|
- lib/mini_search/bm_25.rb
|
92
|
+
- lib/mini_search/compound_tokenizer.rb
|
77
93
|
- lib/mini_search/downcase_filter.rb
|
78
94
|
- lib/mini_search/idf.rb
|
79
95
|
- lib/mini_search/inverted_index.rb
|
80
96
|
- lib/mini_search/inverted_index_spec.rb
|
81
97
|
- lib/mini_search/language_support/portuguese.rb
|
98
|
+
- lib/mini_search/ngram_tokenizer.rb
|
82
99
|
- lib/mini_search/pipeline.rb
|
83
100
|
- lib/mini_search/remove_punctuation_filter.rb
|
84
101
|
- lib/mini_search/standard_whitespace_tokenizer.rb
|
@@ -94,7 +111,7 @@ homepage: https://www.github.com/andrewaguiar/mini_search
|
|
94
111
|
licenses:
|
95
112
|
- MIT
|
96
113
|
metadata: {}
|
97
|
-
post_install_message:
|
114
|
+
post_install_message:
|
98
115
|
rdoc_options: []
|
99
116
|
require_paths:
|
100
117
|
- lib
|
@@ -109,8 +126,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
126
|
- !ruby/object:Gem::Version
|
110
127
|
version: '0'
|
111
128
|
requirements: []
|
112
|
-
rubygems_version: 3.
|
113
|
-
signing_key:
|
129
|
+
rubygems_version: 3.1.6
|
130
|
+
signing_key:
|
114
131
|
specification_version: 4
|
115
132
|
summary: In-memory naive search engine.
|
116
133
|
test_files: []
|