facile_search 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -0
- data/README.md +13 -3
- data/lib/facile_search/inverted_index.rb +16 -7
- data/lib/facile_search/tokenizer/ngram.rb +10 -6
- data/lib/facile_search/tokenizer/normalization_hook.rb +10 -0
- data/lib/facile_search/tokenizer.rb +31 -0
- data/lib/facile_search/utils.rb +9 -0
- data/lib/facile_search/version.rb +1 -1
- data/lib/facile_search.rb +2 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 924b90dc73809cebcb2268d303e1fcbbc7526b69
|
4
|
+
data.tar.gz: 9e20031e7689e879137eb4acda78816c3442c2c7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9bedca991f4cd059d387884e23a407c6f8f614026d0fcc75350d0798618b43bcc8bd458f7189911e1a6ad57cef04d97a807d59baa8fb4a19f85709085a076f4a
|
7
|
+
data.tar.gz: a02e9b6412073de167582196132ab2e22fc82d3e26ccdf1d0b4e0ddbca96dba09a3da169b0938254f4b616510fa85a34c169992aaae7a82ae3ff29719f9c535a
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# FacileSearch
|
2
2
|
|
3
|
+
[](https://travis-ci.org/i2bskn/facile_search)
|
4
|
+
[](https://codeclimate.com/github/i2bskn/facile_search)
|
5
|
+
|
3
6
|
Simple search with inverted index.
|
4
7
|
(Index data is stored in the Redis.)
|
5
8
|
|
@@ -21,12 +24,19 @@ Or install it yourself as:
|
|
21
24
|
|
22
25
|
## Usage
|
23
26
|
|
27
|
+
Create inverted index object.
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
meta_data = FacileSearch::MetaData.new(namespace: "example_index", text_field: "text", id_field: "id")
|
31
|
+
index = FacileSearch::InvertedIndex.new(meta_data)
|
32
|
+
```
|
33
|
+
|
34
|
+
Indexing and search.
|
35
|
+
|
24
36
|
```ruby
|
25
|
-
meta = FacileSearch::MetaData.new(namespace: "example_index", text_field: "text")
|
26
|
-
index = FacileSearch::InvertedIndex.new(meta)
|
27
37
|
sample = Struct.new(:id, :text).new(1, "some text")
|
28
38
|
index.indexing(sample) # => "OK"
|
29
|
-
index.search(
|
39
|
+
index.search("text") # => [1]
|
30
40
|
```
|
31
41
|
|
32
42
|
## Contributing
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require "facile_search/tokenizer/ngram"
|
2
|
-
|
3
1
|
module FacileSearch
|
4
2
|
class InvertedIndex
|
5
3
|
extend Forwardable
|
@@ -30,11 +28,22 @@ module FacileSearch
|
|
30
28
|
end
|
31
29
|
end
|
32
30
|
|
33
|
-
def search(queries)
|
34
|
-
queries.each_with_object([]) {|query, obj|
|
35
|
-
|
36
|
-
|
37
|
-
|
31
|
+
def search(*queries)
|
32
|
+
queries.flatten.each_with_object([]) {|query, obj|
|
33
|
+
if tokenizer.tokenizable?(query)
|
34
|
+
tokens = tokenizer.tokenize(query)
|
35
|
+
return [] if tokens.size.zero?
|
36
|
+
obj << redis.hmget(namespace, *tokens).map {|value| deserialize(value) }.inject(&:&)
|
37
|
+
else
|
38
|
+
cursor = 0
|
39
|
+
ids = []
|
40
|
+
10000.times do # TODO: Adjust max number of loop
|
41
|
+
cursor, matches = redis.hscan(namespace, cursor, match: "*#{query}*")
|
42
|
+
ids << matches.map {|_, value| deserialize(value) }
|
43
|
+
break if cursor.to_i.zero?
|
44
|
+
end
|
45
|
+
obj << ids.uniq
|
46
|
+
end
|
38
47
|
}.inject(&:&)
|
39
48
|
end
|
40
49
|
|
@@ -1,12 +1,16 @@
|
|
1
1
|
module FacileSearch
|
2
|
-
|
3
|
-
class NGram
|
4
|
-
def initialize(n)
|
5
|
-
@n = n
|
2
|
+
class Tokenizer
|
3
|
+
class NGram < self
|
4
|
+
def initialize(n = nil)
|
5
|
+
@n = n || 2 # default is bigram
|
6
6
|
end
|
7
7
|
|
8
|
-
def
|
9
|
-
|
8
|
+
def tokenizable?(text)
|
9
|
+
text.size >= @n
|
10
|
+
end
|
11
|
+
|
12
|
+
def dividing(text)
|
13
|
+
text.chars.each_cons(@n).map(&:join)
|
10
14
|
end
|
11
15
|
end
|
12
16
|
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require "facile_search/tokenizer/ngram"
|
2
|
+
require "facile_search/tokenizer/normalization_hook"
|
3
|
+
|
4
|
+
module FacileSearch
|
5
|
+
class Tokenizer
|
6
|
+
def tokenize(text)
|
7
|
+
dividing(preprocess(text))
|
8
|
+
end
|
9
|
+
|
10
|
+
def tokenizable?(text)
|
11
|
+
raise NotImplementedError
|
12
|
+
end
|
13
|
+
|
14
|
+
def dividing(text)
|
15
|
+
raise NotImplementedError
|
16
|
+
end
|
17
|
+
|
18
|
+
def preprocess(text)
|
19
|
+
hooks.inject(text) {|txt, hook| hook.apply(txt) }
|
20
|
+
end
|
21
|
+
|
22
|
+
def add_hook(hook)
|
23
|
+
hooks << hook
|
24
|
+
end
|
25
|
+
|
26
|
+
def hooks
|
27
|
+
@hooks ||= []
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
data/lib/facile_search.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: facile_search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- i2bskn
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04-
|
11
|
+
date: 2015-04-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis-objects
|
@@ -112,7 +112,10 @@ files:
|
|
112
112
|
- lib/facile_search.rb
|
113
113
|
- lib/facile_search/inverted_index.rb
|
114
114
|
- lib/facile_search/meta_data.rb
|
115
|
+
- lib/facile_search/tokenizer.rb
|
115
116
|
- lib/facile_search/tokenizer/ngram.rb
|
117
|
+
- lib/facile_search/tokenizer/normalization_hook.rb
|
118
|
+
- lib/facile_search/utils.rb
|
116
119
|
- lib/facile_search/version.rb
|
117
120
|
homepage: https://github.com/i2bskn/facile_search
|
118
121
|
licenses:
|