facile_search 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: df9c6537e798b558d1322a4f9b4708261e5a95b6
4
- data.tar.gz: 87270a7bbd1d922517e48594e5a7c2b1b22d866c
3
+ metadata.gz: 924b90dc73809cebcb2268d303e1fcbbc7526b69
4
+ data.tar.gz: 9e20031e7689e879137eb4acda78816c3442c2c7
5
5
  SHA512:
6
- metadata.gz: 90f3969afac89a9a114806a71d720a7b24376cab41e219ecd0cec50d7e3b054dbabd318e56cc399950fb95bcb92d2f3871389d4b9bbee928fa33f7e7bf9cd473
7
- data.tar.gz: 06b25f0d288f36320cbca8fe019a7cd69d273559da08a2724a09b09ada2a580a1f8721a3c590baca9f4fead1ccd265b30d3c1f572f153b57c890d1543e5b0087
6
+ metadata.gz: 9bedca991f4cd059d387884e23a407c6f8f614026d0fcc75350d0798618b43bcc8bd458f7189911e1a6ad57cef04d97a807d59baa8fb4a19f85709085a076f4a
7
+ data.tar.gz: a02e9b6412073de167582196132ab2e22fc82d3e26ccdf1d0b4e0ddbca96dba09a3da169b0938254f4b616510fa85a34c169992aaae7a82ae3ff29719f9c535a
data/.travis.yml CHANGED
@@ -1,4 +1,7 @@
1
1
  language: ruby
2
2
  rvm:
3
+ - 2.1.5
3
4
  - 2.2.1
5
+ services:
6
+ - redis-server
4
7
 
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # FacileSearch
2
2
 
3
+ [![Build Status](https://travis-ci.org/i2bskn/facile_search.svg?branch=master)](https://travis-ci.org/i2bskn/facile_search)
4
+ [![Code Climate](https://codeclimate.com/github/i2bskn/facile_search/badges/gpa.svg)](https://codeclimate.com/github/i2bskn/facile_search)
5
+
3
6
  Simple search with inverted index.
4
7
  (Index data is stored in the Redis.)
5
8
 
@@ -21,12 +24,19 @@ Or install it yourself as:
21
24
 
22
25
  ## Usage
23
26
 
27
+ Create inverted index object.
28
+
29
+ ```ruby
30
+ meta_data = FacileSearch::MetaData.new(namespace: "example_index", text_field: "text", id_field: "id")
31
+ index = FacileSearch::InvertedIndex.new(meta_data)
32
+ ```
33
+
34
+ Indexing and search.
35
+
24
36
  ```ruby
25
- meta = FacileSearch::MetaData.new(namespace: "example_index", text_field: "text")
26
- index = FacileSearch::InvertedIndex.new(meta)
27
37
  sample = Struct.new(:id, :text).new(1, "some text")
28
38
  index.indexing(sample) # => "OK"
29
- index.search(["text"]) # => [1]
39
+ index.search("text") # => [1]
30
40
  ```
31
41
 
32
42
  ## Contributing
@@ -1,5 +1,3 @@
1
- require "facile_search/tokenizer/ngram"
2
-
3
1
  module FacileSearch
4
2
  class InvertedIndex
5
3
  extend Forwardable
@@ -30,11 +28,22 @@ module FacileSearch
30
28
  end
31
29
  end
32
30
 
33
- def search(queries)
34
- queries.each_with_object([]) {|query, obj|
35
- tokens = tokenizer.tokenize(query)
36
- return [] if tokens.size.zero?
37
- obj << redis.hmget(namespace, *tokens).map {|ids| deserialize(ids) }.inject(&:&)
31
+ def search(*queries)
32
+ queries.flatten.each_with_object([]) {|query, obj|
33
+ if tokenizer.tokenizable?(query)
34
+ tokens = tokenizer.tokenize(query)
35
+ return [] if tokens.size.zero?
36
+ obj << redis.hmget(namespace, *tokens).map {|value| deserialize(value) }.inject(&:&)
37
+ else
38
+ cursor = 0
39
+ ids = []
40
+ 10000.times do # TODO: Adjust max number of loop
41
+ cursor, matches = redis.hscan(namespace, cursor, match: "*#{query}*")
42
+ ids << matches.map {|_, value| deserialize(value) }
43
+ break if cursor.to_i.zero?
44
+ end
45
+ obj << ids.uniq
46
+ end
38
47
  }.inject(&:&)
39
48
  end
40
49
 
@@ -1,12 +1,16 @@
1
1
  module FacileSearch
2
- module Tokenizer
3
- class NGram
4
- def initialize(n)
5
- @n = n
2
+ class Tokenizer
3
+ class NGram < self
4
+ def initialize(n = nil)
5
+ @n = n || 2 # default is bigram
6
6
  end
7
7
 
8
- def tokenize(strings)
9
- strings.chars.each_cons(@n).map(&:join)
8
+ def tokenizable?(text)
9
+ text.size >= @n
10
+ end
11
+
12
+ def dividing(text)
13
+ text.chars.each_cons(@n).map(&:join)
10
14
  end
11
15
  end
12
16
  end
@@ -0,0 +1,10 @@
1
+ module FacileSearch
2
+ class Tokenizer
3
+ module NormalizationHook
4
+ def self.apply(text)
5
+ NKF.nkf("-Wwm0Z1", text)
6
+ end
7
+ end
8
+ end
9
+ end
10
+
@@ -0,0 +1,31 @@
1
+ require "facile_search/tokenizer/ngram"
2
+ require "facile_search/tokenizer/normalization_hook"
3
+
4
+ module FacileSearch
5
+ class Tokenizer
6
+ def tokenize(text)
7
+ dividing(preprocess(text))
8
+ end
9
+
10
+ def tokenizable?(text)
11
+ raise NotImplementedError
12
+ end
13
+
14
+ def dividing(text)
15
+ raise NotImplementedError
16
+ end
17
+
18
+ def preprocess(text)
19
+ hooks.inject(text) {|txt, hook| hook.apply(txt) }
20
+ end
21
+
22
+ def add_hook(hook)
23
+ hooks << hook
24
+ end
25
+
26
+ def hooks
27
+ @hooks ||= []
28
+ end
29
+ end
30
+ end
31
+
@@ -0,0 +1,9 @@
1
+ module FacileSearch
2
+ module Utils
3
+ def divide_queries(queries)
4
+ queries.split(/[[:blank:]]/)
5
+ end
6
+ end
7
+ extend Utils
8
+ end
9
+
@@ -1,4 +1,4 @@
1
1
  module FacileSearch
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
4
4
 
data/lib/facile_search.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require "nkf"
1
2
  require "forwardable"
2
3
 
3
4
  require "redis/objects"
@@ -5,5 +6,6 @@ require "oj"
5
6
 
6
7
  require "facile_search/version"
7
8
  require "facile_search/meta_data"
9
+ require "facile_search/tokenizer"
8
10
  require "facile_search/inverted_index"
9
11
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: facile_search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - i2bskn
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-28 00:00:00.000000000 Z
11
+ date: 2015-04-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis-objects
@@ -112,7 +112,10 @@ files:
112
112
  - lib/facile_search.rb
113
113
  - lib/facile_search/inverted_index.rb
114
114
  - lib/facile_search/meta_data.rb
115
+ - lib/facile_search/tokenizer.rb
115
116
  - lib/facile_search/tokenizer/ngram.rb
117
+ - lib/facile_search/tokenizer/normalization_hook.rb
118
+ - lib/facile_search/utils.rb
116
119
  - lib/facile_search/version.rb
117
120
  homepage: https://github.com/i2bskn/facile_search
118
121
  licenses: