facile_search 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: df9c6537e798b558d1322a4f9b4708261e5a95b6
4
- data.tar.gz: 87270a7bbd1d922517e48594e5a7c2b1b22d866c
3
+ metadata.gz: 924b90dc73809cebcb2268d303e1fcbbc7526b69
4
+ data.tar.gz: 9e20031e7689e879137eb4acda78816c3442c2c7
5
5
  SHA512:
6
- metadata.gz: 90f3969afac89a9a114806a71d720a7b24376cab41e219ecd0cec50d7e3b054dbabd318e56cc399950fb95bcb92d2f3871389d4b9bbee928fa33f7e7bf9cd473
7
- data.tar.gz: 06b25f0d288f36320cbca8fe019a7cd69d273559da08a2724a09b09ada2a580a1f8721a3c590baca9f4fead1ccd265b30d3c1f572f153b57c890d1543e5b0087
6
+ metadata.gz: 9bedca991f4cd059d387884e23a407c6f8f614026d0fcc75350d0798618b43bcc8bd458f7189911e1a6ad57cef04d97a807d59baa8fb4a19f85709085a076f4a
7
+ data.tar.gz: a02e9b6412073de167582196132ab2e22fc82d3e26ccdf1d0b4e0ddbca96dba09a3da169b0938254f4b616510fa85a34c169992aaae7a82ae3ff29719f9c535a
data/.travis.yml CHANGED
@@ -1,4 +1,7 @@
1
1
  language: ruby
2
2
  rvm:
3
+ - 2.1.5
3
4
  - 2.2.1
5
+ services:
6
+ - redis-server
4
7
 
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # FacileSearch
2
2
 
3
+ [![Build Status](https://travis-ci.org/i2bskn/facile_search.svg?branch=master)](https://travis-ci.org/i2bskn/facile_search)
4
+ [![Code Climate](https://codeclimate.com/github/i2bskn/facile_search/badges/gpa.svg)](https://codeclimate.com/github/i2bskn/facile_search)
5
+
3
6
  Simple search with inverted index.
4
7
  (Index data is stored in the Redis.)
5
8
 
@@ -21,12 +24,19 @@ Or install it yourself as:
21
24
 
22
25
  ## Usage
23
26
 
27
+ Create inverted index object.
28
+
29
+ ```ruby
30
+ meta_data = FacileSearch::MetaData.new(namespace: "example_index", text_field: "text", id_field: "id")
31
+ index = FacileSearch::InvertedIndex.new(meta_data)
32
+ ```
33
+
34
+ Indexing and search.
35
+
24
36
  ```ruby
25
- meta = FacileSearch::MetaData.new(namespace: "example_index", text_field: "text")
26
- index = FacileSearch::InvertedIndex.new(meta)
27
37
  sample = Struct.new(:id, :text).new(1, "some text")
28
38
  index.indexing(sample) # => "OK"
29
- index.search(["text"]) # => [1]
39
+ index.search("text") # => [1]
30
40
  ```
31
41
 
32
42
  ## Contributing
@@ -1,5 +1,3 @@
1
- require "facile_search/tokenizer/ngram"
2
-
3
1
  module FacileSearch
4
2
  class InvertedIndex
5
3
  extend Forwardable
@@ -30,11 +28,22 @@ module FacileSearch
30
28
  end
31
29
  end
32
30
 
33
- def search(queries)
34
- queries.each_with_object([]) {|query, obj|
35
- tokens = tokenizer.tokenize(query)
36
- return [] if tokens.size.zero?
37
- obj << redis.hmget(namespace, *tokens).map {|ids| deserialize(ids) }.inject(&:&)
31
+ def search(*queries)
32
+ queries.flatten.each_with_object([]) {|query, obj|
33
+ if tokenizer.tokenizable?(query)
34
+ tokens = tokenizer.tokenize(query)
35
+ return [] if tokens.size.zero?
36
+ obj << redis.hmget(namespace, *tokens).map {|value| deserialize(value) }.inject(&:&)
37
+ else
38
+ cursor = 0
39
+ ids = []
40
+ 10000.times do # TODO: Adjust max number of loop
41
+ cursor, matches = redis.hscan(namespace, cursor, match: "*#{query}*")
42
+ ids << matches.map {|_, value| deserialize(value) }
43
+ break if cursor.to_i.zero?
44
+ end
45
+ obj << ids.uniq
46
+ end
38
47
  }.inject(&:&)
39
48
  end
40
49
 
@@ -1,12 +1,16 @@
1
1
  module FacileSearch
2
- module Tokenizer
3
- class NGram
4
- def initialize(n)
5
- @n = n
2
+ class Tokenizer
3
+ class NGram < self
4
+ def initialize(n = nil)
5
+ @n = n || 2 # default is bigram
6
6
  end
7
7
 
8
- def tokenize(strings)
9
- strings.chars.each_cons(@n).map(&:join)
8
+ def tokenizable?(text)
9
+ text.size >= @n
10
+ end
11
+
12
+ def dividing(text)
13
+ text.chars.each_cons(@n).map(&:join)
10
14
  end
11
15
  end
12
16
  end
@@ -0,0 +1,10 @@
1
+ module FacileSearch
2
+ class Tokenizer
3
+ module NormalizationHook
4
+ def self.apply(text)
5
+ NKF.nkf("-Wwm0Z1", text)
6
+ end
7
+ end
8
+ end
9
+ end
10
+
@@ -0,0 +1,31 @@
1
+ require "facile_search/tokenizer/ngram"
2
+ require "facile_search/tokenizer/normalization_hook"
3
+
4
+ module FacileSearch
5
+ class Tokenizer
6
+ def tokenize(text)
7
+ dividing(preprocess(text))
8
+ end
9
+
10
+ def tokenizable?(text)
11
+ raise NotImplementedError
12
+ end
13
+
14
+ def dividing(text)
15
+ raise NotImplementedError
16
+ end
17
+
18
+ def preprocess(text)
19
+ hooks.inject(text) {|txt, hook| hook.apply(txt) }
20
+ end
21
+
22
+ def add_hook(hook)
23
+ hooks << hook
24
+ end
25
+
26
+ def hooks
27
+ @hooks ||= []
28
+ end
29
+ end
30
+ end
31
+
@@ -0,0 +1,9 @@
1
+ module FacileSearch
2
+ module Utils
3
+ def divide_queries(queries)
4
+ queries.split(/[[:blank:]]/)
5
+ end
6
+ end
7
+ extend Utils
8
+ end
9
+
@@ -1,4 +1,4 @@
1
1
  module FacileSearch
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
4
4
 
data/lib/facile_search.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require "nkf"
1
2
  require "forwardable"
2
3
 
3
4
  require "redis/objects"
@@ -5,5 +6,6 @@ require "oj"
5
6
 
6
7
  require "facile_search/version"
7
8
  require "facile_search/meta_data"
9
+ require "facile_search/tokenizer"
8
10
  require "facile_search/inverted_index"
9
11
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: facile_search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - i2bskn
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-28 00:00:00.000000000 Z
11
+ date: 2015-04-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis-objects
@@ -112,7 +112,10 @@ files:
112
112
  - lib/facile_search.rb
113
113
  - lib/facile_search/inverted_index.rb
114
114
  - lib/facile_search/meta_data.rb
115
+ - lib/facile_search/tokenizer.rb
115
116
  - lib/facile_search/tokenizer/ngram.rb
117
+ - lib/facile_search/tokenizer/normalization_hook.rb
118
+ - lib/facile_search/utils.rb
116
119
  - lib/facile_search/version.rb
117
120
  homepage: https://github.com/i2bskn/facile_search
118
121
  licenses: