facile_search 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -0
- data/README.md +13 -3
- data/lib/facile_search/inverted_index.rb +16 -7
- data/lib/facile_search/tokenizer/ngram.rb +10 -6
- data/lib/facile_search/tokenizer/normalization_hook.rb +10 -0
- data/lib/facile_search/tokenizer.rb +31 -0
- data/lib/facile_search/utils.rb +9 -0
- data/lib/facile_search/version.rb +1 -1
- data/lib/facile_search.rb +2 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 924b90dc73809cebcb2268d303e1fcbbc7526b69
|
4
|
+
data.tar.gz: 9e20031e7689e879137eb4acda78816c3442c2c7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9bedca991f4cd059d387884e23a407c6f8f614026d0fcc75350d0798618b43bcc8bd458f7189911e1a6ad57cef04d97a807d59baa8fb4a19f85709085a076f4a
|
7
|
+
data.tar.gz: a02e9b6412073de167582196132ab2e22fc82d3e26ccdf1d0b4e0ddbca96dba09a3da169b0938254f4b616510fa85a34c169992aaae7a82ae3ff29719f9c535a
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# FacileSearch
|
2
2
|
|
3
|
+
[![Build Status](https://travis-ci.org/i2bskn/facile_search.svg?branch=master)](https://travis-ci.org/i2bskn/facile_search)
|
4
|
+
[![Code Climate](https://codeclimate.com/github/i2bskn/facile_search/badges/gpa.svg)](https://codeclimate.com/github/i2bskn/facile_search)
|
5
|
+
|
3
6
|
Simple search with inverted index.
|
4
7
|
(Index data is stored in the Redis.)
|
5
8
|
|
@@ -21,12 +24,19 @@ Or install it yourself as:
|
|
21
24
|
|
22
25
|
## Usage
|
23
26
|
|
27
|
+
Create inverted index object.
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
meta_data = FacileSearch::MetaData.new(namespace: "example_index", text_field: "text", id_field: "id")
|
31
|
+
index = FacileSearch::InvertedIndex.new(meta_data)
|
32
|
+
```
|
33
|
+
|
34
|
+
Indexing and search.
|
35
|
+
|
24
36
|
```ruby
|
25
|
-
meta = FacileSearch::MetaData.new(namespace: "example_index", text_field: "text")
|
26
|
-
index = FacileSearch::InvertedIndex.new(meta)
|
27
37
|
sample = Struct.new(:id, :text).new(1, "some text")
|
28
38
|
index.indexing(sample) # => "OK"
|
29
|
-
index.search(
|
39
|
+
index.search("text") # => [1]
|
30
40
|
```
|
31
41
|
|
32
42
|
## Contributing
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require "facile_search/tokenizer/ngram"
|
2
|
-
|
3
1
|
module FacileSearch
|
4
2
|
class InvertedIndex
|
5
3
|
extend Forwardable
|
@@ -30,11 +28,22 @@ module FacileSearch
|
|
30
28
|
end
|
31
29
|
end
|
32
30
|
|
33
|
-
def search(queries)
|
34
|
-
queries.each_with_object([]) {|query, obj|
|
35
|
-
|
36
|
-
|
37
|
-
|
31
|
+
def search(*queries)
|
32
|
+
queries.flatten.each_with_object([]) {|query, obj|
|
33
|
+
if tokenizer.tokenizable?(query)
|
34
|
+
tokens = tokenizer.tokenize(query)
|
35
|
+
return [] if tokens.size.zero?
|
36
|
+
obj << redis.hmget(namespace, *tokens).map {|value| deserialize(value) }.inject(&:&)
|
37
|
+
else
|
38
|
+
cursor = 0
|
39
|
+
ids = []
|
40
|
+
10000.times do # TODO: Adjust max number of loop
|
41
|
+
cursor, matches = redis.hscan(namespace, cursor, match: "*#{query}*")
|
42
|
+
ids << matches.map {|_, value| deserialize(value) }
|
43
|
+
break if cursor.to_i.zero?
|
44
|
+
end
|
45
|
+
obj << ids.uniq
|
46
|
+
end
|
38
47
|
}.inject(&:&)
|
39
48
|
end
|
40
49
|
|
@@ -1,12 +1,16 @@
|
|
1
1
|
module FacileSearch
|
2
|
-
|
3
|
-
class NGram
|
4
|
-
def initialize(n)
|
5
|
-
@n = n
|
2
|
+
class Tokenizer
|
3
|
+
class NGram < self
|
4
|
+
def initialize(n = nil)
|
5
|
+
@n = n || 2 # default is bigram
|
6
6
|
end
|
7
7
|
|
8
|
-
def
|
9
|
-
|
8
|
+
def tokenizable?(text)
|
9
|
+
text.size >= @n
|
10
|
+
end
|
11
|
+
|
12
|
+
def dividing(text)
|
13
|
+
text.chars.each_cons(@n).map(&:join)
|
10
14
|
end
|
11
15
|
end
|
12
16
|
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require "facile_search/tokenizer/ngram"
|
2
|
+
require "facile_search/tokenizer/normalization_hook"
|
3
|
+
|
4
|
+
module FacileSearch
|
5
|
+
class Tokenizer
|
6
|
+
def tokenize(text)
|
7
|
+
dividing(preprocess(text))
|
8
|
+
end
|
9
|
+
|
10
|
+
def tokenizable?(text)
|
11
|
+
raise NotImplementedError
|
12
|
+
end
|
13
|
+
|
14
|
+
def dividing(text)
|
15
|
+
raise NotImplementedError
|
16
|
+
end
|
17
|
+
|
18
|
+
def preprocess(text)
|
19
|
+
hooks.inject(text) {|txt, hook| hook.apply(txt) }
|
20
|
+
end
|
21
|
+
|
22
|
+
def add_hook(hook)
|
23
|
+
hooks << hook
|
24
|
+
end
|
25
|
+
|
26
|
+
def hooks
|
27
|
+
@hooks ||= []
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
data/lib/facile_search.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: facile_search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- i2bskn
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04-
|
11
|
+
date: 2015-04-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis-objects
|
@@ -112,7 +112,10 @@ files:
|
|
112
112
|
- lib/facile_search.rb
|
113
113
|
- lib/facile_search/inverted_index.rb
|
114
114
|
- lib/facile_search/meta_data.rb
|
115
|
+
- lib/facile_search/tokenizer.rb
|
115
116
|
- lib/facile_search/tokenizer/ngram.rb
|
117
|
+
- lib/facile_search/tokenizer/normalization_hook.rb
|
118
|
+
- lib/facile_search/utils.rb
|
116
119
|
- lib/facile_search/version.rb
|
117
120
|
homepage: https://github.com/i2bskn/facile_search
|
118
121
|
licenses:
|