mini_search 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +35 -0
- data/LICENSE.txt +21 -0
- data/README.md +267 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/formula1.svg +144 -0
- data/formula2.svg +79 -0
- data/lib/mini_search.rb +75 -0
- data/lib/mini_search/bm_25.rb +14 -0
- data/lib/mini_search/downcase_filter.rb +9 -0
- data/lib/mini_search/idf.rb +12 -0
- data/lib/mini_search/inverted_index.rb +163 -0
- data/lib/mini_search/inverted_index_spec.rb +34 -0
- data/lib/mini_search/language_support/portuguese.rb +19 -0
- data/lib/mini_search/pipeline.rb +20 -0
- data/lib/mini_search/remove_punctuation_filter.rb +11 -0
- data/lib/mini_search/standard_whitespace_tokenizer.rb +9 -0
- data/lib/mini_search/stemmer/portuguese.rb +365 -0
- data/lib/mini_search/stemmer_filter.rb +19 -0
- data/lib/mini_search/stop_words_filter.rb +13 -0
- data/lib/mini_search/strip_filter.rb +9 -0
- data/lib/mini_search/synonyms_filter.rb +19 -0
- data/lib/mini_search/tf.rb +9 -0
- data/lib/mini_search/version.rb +3 -0
- data/mini_search.gemspec +29 -0
- metadata +116 -0
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module MiniSearch
|
4
|
+
class StemmerFilter
|
5
|
+
def initialize(stemmer)
|
6
|
+
@stemmer = stemmer
|
7
|
+
end
|
8
|
+
|
9
|
+
def execute(tokens)
|
10
|
+
return tokens unless @stemmer
|
11
|
+
|
12
|
+
new_tokens = tokens.map do |token|
|
13
|
+
@stemmer.stem(token)
|
14
|
+
end
|
15
|
+
|
16
|
+
(new_tokens + tokens).uniq
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module MiniSearch
|
4
|
+
class SynonymsFilter
|
5
|
+
def initialize(synonyms_map)
|
6
|
+
@flatten_synonyms_map = synonyms_map.keys.each_with_object({}) do |key, hash|
|
7
|
+
synonyms_map[key].each do |value|
|
8
|
+
hash[value] = key
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def execute(tokens)
|
14
|
+
synonyms = tokens.map { |token| @flatten_synonyms_map[token] }.reject(&:nil?)
|
15
|
+
|
16
|
+
tokens + synonyms
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/mini_search.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "mini_search/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "mini_search"
|
8
|
+
spec.version = MiniSearch::VERSION
|
9
|
+
spec.authors = ["Andrew S Aguiar"]
|
10
|
+
spec.email = ["andrewaguiar6@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{In-memory naive search engine.}
|
13
|
+
spec.description = %q{In-memory naive search engine.}
|
14
|
+
spec.homepage = "https://www.github.com/andrewaguiar/mini_search"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
# Specify which files should be added to the gem when it is released.
|
18
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
19
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
20
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
21
|
+
end
|
22
|
+
spec.bindir = "exe"
|
23
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
24
|
+
spec.require_paths = ["lib"]
|
25
|
+
|
26
|
+
spec.add_development_dependency "bundler", "~> 1.16"
|
27
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
28
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mini_search
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.3
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrew S Aguiar
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-09-28 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.16'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.16'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
description: In-memory naive search engine.
|
56
|
+
email:
|
57
|
+
- andrewaguiar6@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- ".rspec"
|
64
|
+
- ".travis.yml"
|
65
|
+
- CODE_OF_CONDUCT.md
|
66
|
+
- Gemfile
|
67
|
+
- Gemfile.lock
|
68
|
+
- LICENSE.txt
|
69
|
+
- README.md
|
70
|
+
- Rakefile
|
71
|
+
- bin/console
|
72
|
+
- bin/setup
|
73
|
+
- formula1.svg
|
74
|
+
- formula2.svg
|
75
|
+
- lib/mini_search.rb
|
76
|
+
- lib/mini_search/bm_25.rb
|
77
|
+
- lib/mini_search/downcase_filter.rb
|
78
|
+
- lib/mini_search/idf.rb
|
79
|
+
- lib/mini_search/inverted_index.rb
|
80
|
+
- lib/mini_search/inverted_index_spec.rb
|
81
|
+
- lib/mini_search/language_support/portuguese.rb
|
82
|
+
- lib/mini_search/pipeline.rb
|
83
|
+
- lib/mini_search/remove_punctuation_filter.rb
|
84
|
+
- lib/mini_search/standard_whitespace_tokenizer.rb
|
85
|
+
- lib/mini_search/stemmer/portuguese.rb
|
86
|
+
- lib/mini_search/stemmer_filter.rb
|
87
|
+
- lib/mini_search/stop_words_filter.rb
|
88
|
+
- lib/mini_search/strip_filter.rb
|
89
|
+
- lib/mini_search/synonyms_filter.rb
|
90
|
+
- lib/mini_search/tf.rb
|
91
|
+
- lib/mini_search/version.rb
|
92
|
+
- mini_search.gemspec
|
93
|
+
homepage: https://www.github.com/andrewaguiar/mini_search
|
94
|
+
licenses:
|
95
|
+
- MIT
|
96
|
+
metadata: {}
|
97
|
+
post_install_message:
|
98
|
+
rdoc_options: []
|
99
|
+
require_paths:
|
100
|
+
- lib
|
101
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: '0'
|
106
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
requirements: []
|
112
|
+
rubygems_version: 3.0.3
|
113
|
+
signing_key:
|
114
|
+
specification_version: 4
|
115
|
+
summary: In-memory naive search engine.
|
116
|
+
test_files: []
|