lunar 0.4.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -1
- data/LICENSE +1 -1
- data/README.markdown +116 -0
- data/Rakefile +6 -5
- data/VERSION +1 -1
- data/lib/lunar.rb +112 -24
- data/lib/lunar/connection.rb +51 -0
- data/lib/lunar/fuzzy_matches.rb +24 -0
- data/lib/lunar/fuzzy_word.rb +2 -2
- data/lib/lunar/index.rb +200 -94
- data/lib/lunar/keyword_matches.rb +32 -0
- data/lib/lunar/lunar_nest.rb +19 -0
- data/lib/lunar/range_matches.rb +28 -0
- data/lib/lunar/result_set.rb +85 -28
- data/lib/lunar/scoring.rb +4 -2
- data/lib/lunar/stopwords.rb +15 -0
- data/lib/lunar/words.rb +6 -3
- data/lunar.gemspec +31 -60
- data/test/helper.rb +4 -5
- data/test/test_fuzzy_indexing.rb +105 -0
- data/test/test_index.rb +150 -0
- data/test/test_lunar.rb +178 -1
- data/test/test_lunar_fuzzy_word.rb +4 -4
- data/test/test_lunar_nest.rb +46 -0
- data/test/{test_lunar_scoring.rb → test_scoring.rb} +5 -5
- metadata +72 -68
- data/.document +0 -5
- data/DATA +0 -41
- data/README.md +0 -80
- data/examples/ohm.rb +0 -40
- data/lib/lunar/search.rb +0 -68
- data/lib/lunar/sets.rb +0 -86
- data/test/test_lunar_fuzzy.rb +0 -118
- data/test/test_lunar_index.rb +0 -191
- data/test/test_lunar_search.rb +0 -261
- data/test/test_sets.rb +0 -48
- data/vendor/nest/nest.rb +0 -7
- data/vendor/redis/.gitignore +0 -9
- data/vendor/redis/LICENSE +0 -20
- data/vendor/redis/README.markdown +0 -120
- data/vendor/redis/Rakefile +0 -75
- data/vendor/redis/benchmarking/logging.rb +0 -62
- data/vendor/redis/benchmarking/pipeline.rb +0 -44
- data/vendor/redis/benchmarking/speed.rb +0 -21
- data/vendor/redis/benchmarking/suite.rb +0 -24
- data/vendor/redis/benchmarking/worker.rb +0 -71
- data/vendor/redis/bin/distredis +0 -33
- data/vendor/redis/examples/basic.rb +0 -15
- data/vendor/redis/examples/dist_redis.rb +0 -43
- data/vendor/redis/examples/incr-decr.rb +0 -17
- data/vendor/redis/examples/list.rb +0 -26
- data/vendor/redis/examples/pubsub.rb +0 -25
- data/vendor/redis/examples/sets.rb +0 -36
- data/vendor/redis/lib/edis.rb +0 -3
- data/vendor/redis/lib/redis.rb +0 -496
- data/vendor/redis/lib/redis/client.rb +0 -265
- data/vendor/redis/lib/redis/dist_redis.rb +0 -118
- data/vendor/redis/lib/redis/distributed.rb +0 -460
- data/vendor/redis/lib/redis/hash_ring.rb +0 -131
- data/vendor/redis/lib/redis/pipeline.rb +0 -13
- data/vendor/redis/lib/redis/raketasks.rb +0 -1
- data/vendor/redis/lib/redis/subscribe.rb +0 -79
- data/vendor/redis/profile.rb +0 -22
- data/vendor/redis/tasks/redis.tasks.rb +0 -140
- data/vendor/redis/test/db/.gitignore +0 -1
- data/vendor/redis/test/distributed_test.rb +0 -1131
- data/vendor/redis/test/redis_test.rb +0 -1134
- data/vendor/redis/test/test.conf +0 -8
- data/vendor/redis/test/test_helper.rb +0 -113
data/.document
DELETED
data/DATA
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
|
2
|
-
Item
|
3
|
-
- id: 1
|
4
|
-
- name: 'iphone 3g'
|
5
|
-
- tags: 'apple mobile'
|
6
|
-
|
7
|
-
Item
|
8
|
-
- id: 2
|
9
|
-
- name: 'nokia n95'
|
10
|
-
- tags: 'symbian mobile'
|
11
|
-
|
12
|
-
q: mobile
|
13
|
-
|
14
|
-
|
15
|
-
Item:name:iphone => 1004 1001
|
16
|
-
Item:name:3gs => 1001
|
17
|
-
Item:name:apple => 1004 1003 1002
|
18
|
-
Item:name:macbook => 1003 1002
|
19
|
-
Item:name:pro => 1003 1002
|
20
|
-
Item:name:17 => 1003 1002
|
21
|
-
Item:desc:iphone => 1004 1003 1002
|
22
|
-
Item:desc:4g => 1004 1003
|
23
|
-
|
24
|
-
Keywords Only
|
25
|
-
-------------
|
26
|
-
iphone => 1004 1001 1003 1002
|
27
|
-
3gs => 1001
|
28
|
-
apple => 1004 1003 1002
|
29
|
-
macbook => 1002 1003
|
30
|
-
pro => 1002 1003
|
31
|
-
17 => 17
|
32
|
-
4g => 1003 1004
|
33
|
-
|
34
|
-
Key value pairs
|
35
|
-
---------------
|
36
|
-
name: iphone => 1001 1004
|
37
|
-
name: iphone, desc: iphone => 1004
|
38
|
-
name: iphone, desc: 4g => 1004
|
39
|
-
name: apple, desc: iphone => 1002, 1003, 1004
|
40
|
-
name: apple macbook pro, desc: iphone => 1002 1003
|
41
|
-
name: apple macbook pro 17, desc: 4g => 1003
|
data/README.md
DELETED
@@ -1,80 +0,0 @@
|
|
1
|
-
Lunar
|
2
|
-
=====
|
3
|
-
|
4
|
-
A minimalistic search index implemented using Redis, taking advantage of its
|
5
|
-
powerful `sorted sets` and the ability to do set intersection and union.
|
6
|
-
|
7
|
-
Results are sorted by their word score, which is stored as the score in the
|
8
|
-
Redis sorted set.
|
9
|
-
|
10
|
-
Examples
|
11
|
-
--------
|
12
|
-
|
13
|
-
class Item < Ohm::Model
|
14
|
-
attribute :name
|
15
|
-
attribute :description
|
16
|
-
|
17
|
-
protected
|
18
|
-
def write
|
19
|
-
super
|
20
|
-
index
|
21
|
-
end
|
22
|
-
|
23
|
-
def index
|
24
|
-
Lunar::Index.create 'Item' do |i|
|
25
|
-
i.key id
|
26
|
-
i.attr :name, name
|
27
|
-
i.attr :description, description
|
28
|
-
end
|
29
|
-
|
30
|
-
# You can also do this, no problem
|
31
|
-
Lunar::Index.create Item do |i|
|
32
|
-
i.key id
|
33
|
-
i.attr :name, name
|
34
|
-
i.attr :description, description
|
35
|
-
end
|
36
|
-
|
37
|
-
# Or to avoid name ties...
|
38
|
-
Lunar::Index.create self.class do |i|
|
39
|
-
i.key id
|
40
|
-
i.attr :name, name
|
41
|
-
i.attr :description, description
|
42
|
-
end
|
43
|
-
|
44
|
-
Lunar::Index.create Item do |i|
|
45
|
-
i.key id
|
46
|
-
i.fuzzy :name, name # this has a 100 character limit on name
|
47
|
-
# for performance reasons
|
48
|
-
i.integer :cost, cost
|
49
|
-
i.float :voting_quotient, voting_quotient
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
# Searching...
|
55
|
-
# You can just straight out search keywords
|
56
|
-
Lunar.search(Item, "iphone")
|
57
|
-
|
58
|
-
# Or opt to filter by field
|
59
|
-
Lunar.search(Item, :name => "iphone", :description => "mobile")
|
60
|
-
|
61
|
-
# For fuzzy declared fields you can currently only search
|
62
|
-
# using a fuzzy strategy exclusively, e.g.
|
63
|
-
Lunar.search(Item, :fuzzy => { :name => "i" })
|
64
|
-
# i, ip, iph, ipho, iphone, 3, 3g, 3gs all would match 'iPhone 3Gs'
|
65
|
-
|
66
|
-
# For integer / float types, you can do range searches on them e.g.
|
67
|
-
Lunar.search(Item, :cost => 300..500, :voting_quotient => 10..20)
|
68
|
-
|
69
|
-
# Or using the pagination gem with this:
|
70
|
-
@items = Lunar.search(Item, "iphone")
|
71
|
-
paginate @items, :per_page => 10, :page => 1
|
72
|
-
|
73
|
-
# If you want to be cheap about CPU cycles you can increase the
|
74
|
-
# default `:ttl` of search results (which is 30)
|
75
|
-
|
76
|
-
# Somewhere in config/initializers or init.rb, you decide...
|
77
|
-
Lunar.ttl = 300 # search results would be the same for 5 minutes.
|
78
|
-
# for high write public sites, this may be a good
|
79
|
-
# option as people don't really expect their stuff to
|
80
|
-
# be searchable right away on public content sites.
|
data/examples/ohm.rb
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
class Item < Ohm::Model
|
2
|
-
attribute :name
|
3
|
-
attribute :description
|
4
|
-
|
5
|
-
def index
|
6
|
-
Lunar::Index.create 'Item' do |i|
|
7
|
-
i.key id
|
8
|
-
i.attr :name, name
|
9
|
-
i.attr :description, description
|
10
|
-
end
|
11
|
-
|
12
|
-
# You can also do this, no problem
|
13
|
-
Lunar::Index.create Item do |i|
|
14
|
-
i.key id
|
15
|
-
i.attr :name, name
|
16
|
-
i.attr :description, description
|
17
|
-
end
|
18
|
-
|
19
|
-
# Or to avoid name ties...
|
20
|
-
Lunar::Index.create self.class do |i|
|
21
|
-
i.key id
|
22
|
-
i.attr :name, name
|
23
|
-
i.attr :description, description
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
# Searching...
|
29
|
-
# You can just straight out search keywords
|
30
|
-
Lunar.search(Item, "iphone")
|
31
|
-
|
32
|
-
# Or opt to filter by field
|
33
|
-
Lunar.search(Item, :name => "iphone", :description => "mobile")
|
34
|
-
|
35
|
-
# Or search a field using an array
|
36
|
-
Lunar.search(Item, :name => ["iphone", "apple"])
|
37
|
-
|
38
|
-
# Or using the pagination gem with this:
|
39
|
-
@items = Lunar.search(Item, "iphone")
|
40
|
-
paginate @items, :per_page => 10, :page => 1
|
data/lib/lunar/search.rb
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
module Lunar
|
2
|
-
class Search
|
3
|
-
attr :sets, :prefix, :search_identifier, :fuzzy_sets, :key_value_sets
|
4
|
-
|
5
|
-
def initialize(prefix, keywords)
|
6
|
-
if keywords.is_a?(Hash)
|
7
|
-
if fuzzy_hash = keywords.delete(:fuzzy)
|
8
|
-
@fuzzy_sets = fuzzy_hash.inject([]) { |a, (field, query)|
|
9
|
-
a | FuzzySets.new(prefix, query, field)
|
10
|
-
}
|
11
|
-
@search_identifier = fuzzy_hash.hash
|
12
|
-
else
|
13
|
-
@key_value_sets =
|
14
|
-
keywords.map { |field, val| Sets.new(prefix, val, field) }.flatten
|
15
|
-
|
16
|
-
@search_identifier = keywords.hash
|
17
|
-
end
|
18
|
-
else
|
19
|
-
@sets = Sets.new(prefix, keywords)
|
20
|
-
@search_identifier = keywords.hash
|
21
|
-
end
|
22
|
-
|
23
|
-
@prefix = prefix
|
24
|
-
|
25
|
-
# Default finder, uses Ohm style finding
|
26
|
-
@finder = lambda { |id| prefix[id] }
|
27
|
-
end
|
28
|
-
|
29
|
-
def results(&block)
|
30
|
-
block ||= @finder
|
31
|
-
|
32
|
-
if sets
|
33
|
-
if sets.empty?
|
34
|
-
return []
|
35
|
-
else
|
36
|
-
if not Lunar.redis.exists(dist_key)
|
37
|
-
Lunar.redis.zunion dist_key, sets.size, *sets
|
38
|
-
Lunar.redis.expire dist_key, Lunar.ttl
|
39
|
-
end
|
40
|
-
SortedResultSet.new(dist_key, &block)
|
41
|
-
end
|
42
|
-
elsif key_value_sets
|
43
|
-
if key_value_sets.empty?
|
44
|
-
return []
|
45
|
-
else
|
46
|
-
Lunar.redis.zinter dist_key, key_value_sets.size, *key_value_sets
|
47
|
-
SortedResultSet.new(dist_key, &block)
|
48
|
-
end
|
49
|
-
elsif fuzzy_sets
|
50
|
-
if fuzzy_sets.empty?
|
51
|
-
return []
|
52
|
-
else
|
53
|
-
if not Lunar.redis.exists(dist_key)
|
54
|
-
Lunar.redis.sunionstore dist_key, *fuzzy_sets
|
55
|
-
Lunar.redis.expire dist_key, Lunar.ttl
|
56
|
-
end
|
57
|
-
UnsortedResultSet.new(dist_key, &block)
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
protected
|
63
|
-
def dist_key
|
64
|
-
Lunar.nest[:Results][search_identifier]
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
data/lib/lunar/sets.rb
DELETED
@@ -1,86 +0,0 @@
|
|
1
|
-
module Lunar
|
2
|
-
module Sets
|
3
|
-
def self.new(prefix, keywords, field = '*')
|
4
|
-
case keywords
|
5
|
-
when String
|
6
|
-
KeywordSets.new(prefix, keywords, field)
|
7
|
-
when Range
|
8
|
-
RangeSets.new(prefix, keywords, field)
|
9
|
-
when Array
|
10
|
-
KeywordSets.new(prefix, keywords.join(' '), field)
|
11
|
-
else
|
12
|
-
raise TypeError, ":keywords should only be a String or Range"
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
class FuzzySets < Array
|
18
|
-
attr :prefix, :words, :field
|
19
|
-
|
20
|
-
def initialize(prefix, keywords, field)
|
21
|
-
@prefix = prefix
|
22
|
-
@field = field
|
23
|
-
@words = Words.new(keywords)
|
24
|
-
|
25
|
-
super(redis_set_keys)
|
26
|
-
end
|
27
|
-
|
28
|
-
protected
|
29
|
-
def redis_set_keys
|
30
|
-
words.map { |w| ns[Lunar.encode(w)] }
|
31
|
-
end
|
32
|
-
|
33
|
-
def ns
|
34
|
-
@ns ||= Lunar.nest[prefix][:Fuzzy][field]
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
class RangeSets < Array
|
39
|
-
attr :prefix, :field, :range
|
40
|
-
|
41
|
-
def initialize(prefix, range, field)
|
42
|
-
@prefix = prefix
|
43
|
-
@field = field
|
44
|
-
@range = range
|
45
|
-
|
46
|
-
super [write_and_retrieve_key]
|
47
|
-
end
|
48
|
-
|
49
|
-
def write_and_retrieve_key
|
50
|
-
zrange = Lunar.redis.zrangebyscore(Lunar.nest[prefix][field],
|
51
|
-
@range.first, @range.last)
|
52
|
-
|
53
|
-
zrange.each { |id| Lunar.redis.zadd key, 1, id }
|
54
|
-
key.to_s
|
55
|
-
end
|
56
|
-
|
57
|
-
def key
|
58
|
-
@key ||= Lunar.nest[prefix][field]["#{ range.first }_TO_#{ range.last }"]
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
class KeywordSets < Array
|
63
|
-
attr :prefix, :words, :field
|
64
|
-
|
65
|
-
def initialize(prefix, keywords, field = '*')
|
66
|
-
@prefix = prefix
|
67
|
-
@field = field
|
68
|
-
@words = Words.new(keywords)
|
69
|
-
|
70
|
-
super(redis_set_keys)
|
71
|
-
end
|
72
|
-
|
73
|
-
protected
|
74
|
-
def redis_set_keys
|
75
|
-
keys_for_each_word.map { |key| Lunar.redis.keys(key) }.flatten
|
76
|
-
end
|
77
|
-
|
78
|
-
def keys_for_each_word
|
79
|
-
words.map { |w| ns[Lunar.encode(w)] }
|
80
|
-
end
|
81
|
-
|
82
|
-
def ns
|
83
|
-
@ns ||= Lunar.nest[prefix][field]
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
data/test/test_lunar_fuzzy.rb
DELETED
@@ -1,118 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# module Lunar
|
3
|
-
# module Fuzzy
|
4
|
-
#
|
5
|
-
# end
|
6
|
-
# end
|
7
|
-
require "helper"
|
8
|
-
|
9
|
-
class LunarFuzzyTest < Test::Unit::TestCase
|
10
|
-
setup do
|
11
|
-
Lunar.redis(Redis.new(:host => '127.0.0.1', :port => '6380'))
|
12
|
-
Lunar.redis.flushdb
|
13
|
-
end
|
14
|
-
|
15
|
-
context "when setting fuzzy name, 'Yukihiro Matsumoto'" do
|
16
|
-
setup do
|
17
|
-
@index = Lunar::Index.create 'Item' do |i|
|
18
|
-
i.key 1001
|
19
|
-
i.fuzzy :name, 'Yukihiro Matsumoto'
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
should "store Lunar:Item:name:Y up to o and M up to o" do
|
24
|
-
fname, lname = 'yukihiro', 'matsumoto'
|
25
|
-
|
26
|
-
(1..fname.length).each do |length|
|
27
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
|
28
|
-
assert Lunar.redis.smembers(key).include?('1001')
|
29
|
-
end
|
30
|
-
|
31
|
-
(1..lname.length).each do |length|
|
32
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
|
33
|
-
assert Lunar.redis.smembers(key).include?('1001')
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
context "when creating an index that already exists" do
|
39
|
-
setup do
|
40
|
-
@index = Lunar::Index.create 'Item' do |i|
|
41
|
-
i.key 1001
|
42
|
-
i.fuzzy :name, 'Yukihiro Matsumoto'
|
43
|
-
end
|
44
|
-
|
45
|
-
@index = Lunar::Index.create 'Item' do |i|
|
46
|
-
i.key 1001
|
47
|
-
i.fuzzy :name, 'Martin Fowler Yuki'
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
should "remove all fuzzy entries for Yukihiro Matsumoto" do
|
52
|
-
fname, lname = 'yukihiro', 'matsumoto'
|
53
|
-
|
54
|
-
(5..fname.length).each do |length|
|
55
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
|
56
|
-
assert ! Lunar.redis.smembers(key).include?('1001')
|
57
|
-
end
|
58
|
-
|
59
|
-
(3..lname.length).each do |length|
|
60
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
|
61
|
-
assert ! Lunar.redis.smembers(key).include?('1001')
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
should "store Lunar:Item:name:M up to n and F up to r etc..." do
|
66
|
-
fname, lname, triple = 'martin', 'fowler', 'yuki'
|
67
|
-
|
68
|
-
(1..fname.length).each do |length|
|
69
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
|
70
|
-
assert Lunar.redis.smembers(key).include?('1001')
|
71
|
-
end
|
72
|
-
|
73
|
-
(1..lname.length).each do |length|
|
74
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
|
75
|
-
assert Lunar.redis.smembers(key).include?('1001')
|
76
|
-
end
|
77
|
-
|
78
|
-
(1..triple.length).each do |length|
|
79
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(triple[0, length]) }"
|
80
|
-
assert Lunar.redis.smembers(key).include?('1001')
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
context "on delete" do
|
86
|
-
setup do
|
87
|
-
@index = Lunar::Index.create 'Item' do |i|
|
88
|
-
i.key 1001
|
89
|
-
i.fuzzy :name, 'Yukihiro Matsumoto'
|
90
|
-
end
|
91
|
-
|
92
|
-
Lunar::Index.delete('Item', 1001)
|
93
|
-
end
|
94
|
-
|
95
|
-
should "remove all fuzzy entries for Yukihiro Matsumoto" do
|
96
|
-
fname, lname = 'yukihiro', 'matsumoto'
|
97
|
-
|
98
|
-
(0..fname.length).each do |length|
|
99
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
|
100
|
-
assert ! Lunar.redis.smembers(key).include?('1001')
|
101
|
-
end
|
102
|
-
|
103
|
-
(0..lname.length).each do |length|
|
104
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
|
105
|
-
assert ! Lunar.redis.smembers(key).include?('1001')
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
should "also remove the key Lunar:Item:Fuzzy:1001:name" do
|
110
|
-
assert ! Lunar.redis.exists("Lunar:Item:Fuzzy:1001:name")
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
protected
|
115
|
-
def encode(str)
|
116
|
-
Lunar.encode(str)
|
117
|
-
end
|
118
|
-
end
|