lunar 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -1
- data/LICENSE +1 -1
- data/README.markdown +116 -0
- data/Rakefile +6 -5
- data/VERSION +1 -1
- data/lib/lunar.rb +112 -24
- data/lib/lunar/connection.rb +51 -0
- data/lib/lunar/fuzzy_matches.rb +24 -0
- data/lib/lunar/fuzzy_word.rb +2 -2
- data/lib/lunar/index.rb +200 -94
- data/lib/lunar/keyword_matches.rb +32 -0
- data/lib/lunar/lunar_nest.rb +19 -0
- data/lib/lunar/range_matches.rb +28 -0
- data/lib/lunar/result_set.rb +85 -28
- data/lib/lunar/scoring.rb +4 -2
- data/lib/lunar/stopwords.rb +15 -0
- data/lib/lunar/words.rb +6 -3
- data/lunar.gemspec +31 -60
- data/test/helper.rb +4 -5
- data/test/test_fuzzy_indexing.rb +105 -0
- data/test/test_index.rb +150 -0
- data/test/test_lunar.rb +178 -1
- data/test/test_lunar_fuzzy_word.rb +4 -4
- data/test/test_lunar_nest.rb +46 -0
- data/test/{test_lunar_scoring.rb → test_scoring.rb} +5 -5
- metadata +72 -68
- data/.document +0 -5
- data/DATA +0 -41
- data/README.md +0 -80
- data/examples/ohm.rb +0 -40
- data/lib/lunar/search.rb +0 -68
- data/lib/lunar/sets.rb +0 -86
- data/test/test_lunar_fuzzy.rb +0 -118
- data/test/test_lunar_index.rb +0 -191
- data/test/test_lunar_search.rb +0 -261
- data/test/test_sets.rb +0 -48
- data/vendor/nest/nest.rb +0 -7
- data/vendor/redis/.gitignore +0 -9
- data/vendor/redis/LICENSE +0 -20
- data/vendor/redis/README.markdown +0 -120
- data/vendor/redis/Rakefile +0 -75
- data/vendor/redis/benchmarking/logging.rb +0 -62
- data/vendor/redis/benchmarking/pipeline.rb +0 -44
- data/vendor/redis/benchmarking/speed.rb +0 -21
- data/vendor/redis/benchmarking/suite.rb +0 -24
- data/vendor/redis/benchmarking/worker.rb +0 -71
- data/vendor/redis/bin/distredis +0 -33
- data/vendor/redis/examples/basic.rb +0 -15
- data/vendor/redis/examples/dist_redis.rb +0 -43
- data/vendor/redis/examples/incr-decr.rb +0 -17
- data/vendor/redis/examples/list.rb +0 -26
- data/vendor/redis/examples/pubsub.rb +0 -25
- data/vendor/redis/examples/sets.rb +0 -36
- data/vendor/redis/lib/edis.rb +0 -3
- data/vendor/redis/lib/redis.rb +0 -496
- data/vendor/redis/lib/redis/client.rb +0 -265
- data/vendor/redis/lib/redis/dist_redis.rb +0 -118
- data/vendor/redis/lib/redis/distributed.rb +0 -460
- data/vendor/redis/lib/redis/hash_ring.rb +0 -131
- data/vendor/redis/lib/redis/pipeline.rb +0 -13
- data/vendor/redis/lib/redis/raketasks.rb +0 -1
- data/vendor/redis/lib/redis/subscribe.rb +0 -79
- data/vendor/redis/profile.rb +0 -22
- data/vendor/redis/tasks/redis.tasks.rb +0 -140
- data/vendor/redis/test/db/.gitignore +0 -1
- data/vendor/redis/test/distributed_test.rb +0 -1131
- data/vendor/redis/test/redis_test.rb +0 -1134
- data/vendor/redis/test/test.conf +0 -8
- data/vendor/redis/test/test_helper.rb +0 -113
data/.document
DELETED
data/DATA
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
|
2
|
-
Item
|
3
|
-
- id: 1
|
4
|
-
- name: 'iphone 3g'
|
5
|
-
- tags: 'apple mobile'
|
6
|
-
|
7
|
-
Item
|
8
|
-
- id: 2
|
9
|
-
- name: 'nokia n95'
|
10
|
-
- tags: 'symbian mobile'
|
11
|
-
|
12
|
-
q: mobile
|
13
|
-
|
14
|
-
|
15
|
-
Item:name:iphone => 1004 1001
|
16
|
-
Item:name:3gs => 1001
|
17
|
-
Item:name:apple => 1004 1003 1002
|
18
|
-
Item:name:macbook => 1003 1002
|
19
|
-
Item:name:pro => 1003 1002
|
20
|
-
Item:name:17 => 1003 1002
|
21
|
-
Item:desc:iphone => 1004 1003 1002
|
22
|
-
Item:desc:4g => 1004 1003
|
23
|
-
|
24
|
-
Keywords Only
|
25
|
-
-------------
|
26
|
-
iphone => 1004 1001 1003 1002
|
27
|
-
3gs => 1001
|
28
|
-
apple => 1004 1003 1002
|
29
|
-
macbook => 1002 1003
|
30
|
-
pro => 1002 1003
|
31
|
-
17 => 17
|
32
|
-
4g => 1003 1004
|
33
|
-
|
34
|
-
Key value pairs
|
35
|
-
---------------
|
36
|
-
name: iphone => 1001 1004
|
37
|
-
name: iphone, desc: iphone => 1004
|
38
|
-
name: iphone, desc: 4g => 1004
|
39
|
-
name: apple, desc: iphone => 1002, 1003, 1004
|
40
|
-
name: apple macbook pro, desc: iphone => 1002 1003
|
41
|
-
name: apple macbook pro 17, desc: 4g => 1003
|
data/README.md
DELETED
@@ -1,80 +0,0 @@
|
|
1
|
-
Lunar
|
2
|
-
=====
|
3
|
-
|
4
|
-
A minimalistic search index implemented using Redis, taking advantage of its
|
5
|
-
powerful `sorted sets` and the ability to do set intersection and union.
|
6
|
-
|
7
|
-
Results are sorted by their word score, which is stored as the score in the
|
8
|
-
Redis sorted set.
|
9
|
-
|
10
|
-
Examples
|
11
|
-
--------
|
12
|
-
|
13
|
-
class Item < Ohm::Model
|
14
|
-
attribute :name
|
15
|
-
attribute :description
|
16
|
-
|
17
|
-
protected
|
18
|
-
def write
|
19
|
-
super
|
20
|
-
index
|
21
|
-
end
|
22
|
-
|
23
|
-
def index
|
24
|
-
Lunar::Index.create 'Item' do |i|
|
25
|
-
i.key id
|
26
|
-
i.attr :name, name
|
27
|
-
i.attr :description, description
|
28
|
-
end
|
29
|
-
|
30
|
-
# You can also do this, no problem
|
31
|
-
Lunar::Index.create Item do |i|
|
32
|
-
i.key id
|
33
|
-
i.attr :name, name
|
34
|
-
i.attr :description, description
|
35
|
-
end
|
36
|
-
|
37
|
-
# Or to avoid name ties...
|
38
|
-
Lunar::Index.create self.class do |i|
|
39
|
-
i.key id
|
40
|
-
i.attr :name, name
|
41
|
-
i.attr :description, description
|
42
|
-
end
|
43
|
-
|
44
|
-
Lunar::Index.create Item do |i|
|
45
|
-
i.key id
|
46
|
-
i.fuzzy :name, name # this has a 100 character limit on name
|
47
|
-
# for performance reasons
|
48
|
-
i.integer :cost, cost
|
49
|
-
i.float :voting_quotient, voting_quotient
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
# Searching...
|
55
|
-
# You can just straight out search keywords
|
56
|
-
Lunar.search(Item, "iphone")
|
57
|
-
|
58
|
-
# Or opt to filter by field
|
59
|
-
Lunar.search(Item, :name => "iphone", :description => "mobile")
|
60
|
-
|
61
|
-
# For fuzzy declared fields you can currently only search
|
62
|
-
# using a fuzzy strategy exclusively, e.g.
|
63
|
-
Lunar.search(Item, :fuzzy => { :name => "i" })
|
64
|
-
# i, ip, iph, ipho, iphone, 3, 3g, 3gs all would match 'iPhone 3Gs'
|
65
|
-
|
66
|
-
# For integer / float types, you can do range searches on them e.g.
|
67
|
-
Lunar.search(Item, :cost => 300..500, :voting_quotient => 10..20)
|
68
|
-
|
69
|
-
# Or using the pagination gem with this:
|
70
|
-
@items = Lunar.search(Item, "iphone")
|
71
|
-
paginate @items, :per_page => 10, :page => 1
|
72
|
-
|
73
|
-
# If you want to be cheap about CPU cycles you can increase the
|
74
|
-
# default `:ttl` of search results (which is 30)
|
75
|
-
|
76
|
-
# Somewhere in config/initializers or init.rb, you decide...
|
77
|
-
Lunar.ttl = 300 # search results would be the same for 5 minutes.
|
78
|
-
# for high write public sites, this may be a good
|
79
|
-
# option as people don't really expect their stuff to
|
80
|
-
# be searchable right away on public content sites.
|
data/examples/ohm.rb
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
class Item < Ohm::Model
|
2
|
-
attribute :name
|
3
|
-
attribute :description
|
4
|
-
|
5
|
-
def index
|
6
|
-
Lunar::Index.create 'Item' do |i|
|
7
|
-
i.key id
|
8
|
-
i.attr :name, name
|
9
|
-
i.attr :description, description
|
10
|
-
end
|
11
|
-
|
12
|
-
# You can also do this, no problem
|
13
|
-
Lunar::Index.create Item do |i|
|
14
|
-
i.key id
|
15
|
-
i.attr :name, name
|
16
|
-
i.attr :description, description
|
17
|
-
end
|
18
|
-
|
19
|
-
# Or to avoid name ties...
|
20
|
-
Lunar::Index.create self.class do |i|
|
21
|
-
i.key id
|
22
|
-
i.attr :name, name
|
23
|
-
i.attr :description, description
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
# Searching...
|
29
|
-
# You can just straight out search keywords
|
30
|
-
Lunar.search(Item, "iphone")
|
31
|
-
|
32
|
-
# Or opt to filter by field
|
33
|
-
Lunar.search(Item, :name => "iphone", :description => "mobile")
|
34
|
-
|
35
|
-
# Or search a field using an array
|
36
|
-
Lunar.search(Item, :name => ["iphone", "apple"])
|
37
|
-
|
38
|
-
# Or using the pagination gem with this:
|
39
|
-
@items = Lunar.search(Item, "iphone")
|
40
|
-
paginate @items, :per_page => 10, :page => 1
|
data/lib/lunar/search.rb
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
module Lunar
|
2
|
-
class Search
|
3
|
-
attr :sets, :prefix, :search_identifier, :fuzzy_sets, :key_value_sets
|
4
|
-
|
5
|
-
def initialize(prefix, keywords)
|
6
|
-
if keywords.is_a?(Hash)
|
7
|
-
if fuzzy_hash = keywords.delete(:fuzzy)
|
8
|
-
@fuzzy_sets = fuzzy_hash.inject([]) { |a, (field, query)|
|
9
|
-
a | FuzzySets.new(prefix, query, field)
|
10
|
-
}
|
11
|
-
@search_identifier = fuzzy_hash.hash
|
12
|
-
else
|
13
|
-
@key_value_sets =
|
14
|
-
keywords.map { |field, val| Sets.new(prefix, val, field) }.flatten
|
15
|
-
|
16
|
-
@search_identifier = keywords.hash
|
17
|
-
end
|
18
|
-
else
|
19
|
-
@sets = Sets.new(prefix, keywords)
|
20
|
-
@search_identifier = keywords.hash
|
21
|
-
end
|
22
|
-
|
23
|
-
@prefix = prefix
|
24
|
-
|
25
|
-
# Default finder, uses Ohm style finding
|
26
|
-
@finder = lambda { |id| prefix[id] }
|
27
|
-
end
|
28
|
-
|
29
|
-
def results(&block)
|
30
|
-
block ||= @finder
|
31
|
-
|
32
|
-
if sets
|
33
|
-
if sets.empty?
|
34
|
-
return []
|
35
|
-
else
|
36
|
-
if not Lunar.redis.exists(dist_key)
|
37
|
-
Lunar.redis.zunion dist_key, sets.size, *sets
|
38
|
-
Lunar.redis.expire dist_key, Lunar.ttl
|
39
|
-
end
|
40
|
-
SortedResultSet.new(dist_key, &block)
|
41
|
-
end
|
42
|
-
elsif key_value_sets
|
43
|
-
if key_value_sets.empty?
|
44
|
-
return []
|
45
|
-
else
|
46
|
-
Lunar.redis.zinter dist_key, key_value_sets.size, *key_value_sets
|
47
|
-
SortedResultSet.new(dist_key, &block)
|
48
|
-
end
|
49
|
-
elsif fuzzy_sets
|
50
|
-
if fuzzy_sets.empty?
|
51
|
-
return []
|
52
|
-
else
|
53
|
-
if not Lunar.redis.exists(dist_key)
|
54
|
-
Lunar.redis.sunionstore dist_key, *fuzzy_sets
|
55
|
-
Lunar.redis.expire dist_key, Lunar.ttl
|
56
|
-
end
|
57
|
-
UnsortedResultSet.new(dist_key, &block)
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
protected
|
63
|
-
def dist_key
|
64
|
-
Lunar.nest[:Results][search_identifier]
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
data/lib/lunar/sets.rb
DELETED
@@ -1,86 +0,0 @@
|
|
1
|
-
module Lunar
|
2
|
-
module Sets
|
3
|
-
def self.new(prefix, keywords, field = '*')
|
4
|
-
case keywords
|
5
|
-
when String
|
6
|
-
KeywordSets.new(prefix, keywords, field)
|
7
|
-
when Range
|
8
|
-
RangeSets.new(prefix, keywords, field)
|
9
|
-
when Array
|
10
|
-
KeywordSets.new(prefix, keywords.join(' '), field)
|
11
|
-
else
|
12
|
-
raise TypeError, ":keywords should only be a String or Range"
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
class FuzzySets < Array
|
18
|
-
attr :prefix, :words, :field
|
19
|
-
|
20
|
-
def initialize(prefix, keywords, field)
|
21
|
-
@prefix = prefix
|
22
|
-
@field = field
|
23
|
-
@words = Words.new(keywords)
|
24
|
-
|
25
|
-
super(redis_set_keys)
|
26
|
-
end
|
27
|
-
|
28
|
-
protected
|
29
|
-
def redis_set_keys
|
30
|
-
words.map { |w| ns[Lunar.encode(w)] }
|
31
|
-
end
|
32
|
-
|
33
|
-
def ns
|
34
|
-
@ns ||= Lunar.nest[prefix][:Fuzzy][field]
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
class RangeSets < Array
|
39
|
-
attr :prefix, :field, :range
|
40
|
-
|
41
|
-
def initialize(prefix, range, field)
|
42
|
-
@prefix = prefix
|
43
|
-
@field = field
|
44
|
-
@range = range
|
45
|
-
|
46
|
-
super [write_and_retrieve_key]
|
47
|
-
end
|
48
|
-
|
49
|
-
def write_and_retrieve_key
|
50
|
-
zrange = Lunar.redis.zrangebyscore(Lunar.nest[prefix][field],
|
51
|
-
@range.first, @range.last)
|
52
|
-
|
53
|
-
zrange.each { |id| Lunar.redis.zadd key, 1, id }
|
54
|
-
key.to_s
|
55
|
-
end
|
56
|
-
|
57
|
-
def key
|
58
|
-
@key ||= Lunar.nest[prefix][field]["#{ range.first }_TO_#{ range.last }"]
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
class KeywordSets < Array
|
63
|
-
attr :prefix, :words, :field
|
64
|
-
|
65
|
-
def initialize(prefix, keywords, field = '*')
|
66
|
-
@prefix = prefix
|
67
|
-
@field = field
|
68
|
-
@words = Words.new(keywords)
|
69
|
-
|
70
|
-
super(redis_set_keys)
|
71
|
-
end
|
72
|
-
|
73
|
-
protected
|
74
|
-
def redis_set_keys
|
75
|
-
keys_for_each_word.map { |key| Lunar.redis.keys(key) }.flatten
|
76
|
-
end
|
77
|
-
|
78
|
-
def keys_for_each_word
|
79
|
-
words.map { |w| ns[Lunar.encode(w)] }
|
80
|
-
end
|
81
|
-
|
82
|
-
def ns
|
83
|
-
@ns ||= Lunar.nest[prefix][field]
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
data/test/test_lunar_fuzzy.rb
DELETED
@@ -1,118 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# module Lunar
|
3
|
-
# module Fuzzy
|
4
|
-
#
|
5
|
-
# end
|
6
|
-
# end
|
7
|
-
require "helper"
|
8
|
-
|
9
|
-
class LunarFuzzyTest < Test::Unit::TestCase
|
10
|
-
setup do
|
11
|
-
Lunar.redis(Redis.new(:host => '127.0.0.1', :port => '6380'))
|
12
|
-
Lunar.redis.flushdb
|
13
|
-
end
|
14
|
-
|
15
|
-
context "when setting fuzzy name, 'Yukihiro Matsumoto'" do
|
16
|
-
setup do
|
17
|
-
@index = Lunar::Index.create 'Item' do |i|
|
18
|
-
i.key 1001
|
19
|
-
i.fuzzy :name, 'Yukihiro Matsumoto'
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
should "store Lunar:Item:name:Y up to o and M up to o" do
|
24
|
-
fname, lname = 'yukihiro', 'matsumoto'
|
25
|
-
|
26
|
-
(1..fname.length).each do |length|
|
27
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
|
28
|
-
assert Lunar.redis.smembers(key).include?('1001')
|
29
|
-
end
|
30
|
-
|
31
|
-
(1..lname.length).each do |length|
|
32
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
|
33
|
-
assert Lunar.redis.smembers(key).include?('1001')
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
context "when creating an index that already exists" do
|
39
|
-
setup do
|
40
|
-
@index = Lunar::Index.create 'Item' do |i|
|
41
|
-
i.key 1001
|
42
|
-
i.fuzzy :name, 'Yukihiro Matsumoto'
|
43
|
-
end
|
44
|
-
|
45
|
-
@index = Lunar::Index.create 'Item' do |i|
|
46
|
-
i.key 1001
|
47
|
-
i.fuzzy :name, 'Martin Fowler Yuki'
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
should "remove all fuzzy entries for Yukihiro Matsumoto" do
|
52
|
-
fname, lname = 'yukihiro', 'matsumoto'
|
53
|
-
|
54
|
-
(5..fname.length).each do |length|
|
55
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
|
56
|
-
assert ! Lunar.redis.smembers(key).include?('1001')
|
57
|
-
end
|
58
|
-
|
59
|
-
(3..lname.length).each do |length|
|
60
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
|
61
|
-
assert ! Lunar.redis.smembers(key).include?('1001')
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
should "store Lunar:Item:name:M up to n and F up to r etc..." do
|
66
|
-
fname, lname, triple = 'martin', 'fowler', 'yuki'
|
67
|
-
|
68
|
-
(1..fname.length).each do |length|
|
69
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
|
70
|
-
assert Lunar.redis.smembers(key).include?('1001')
|
71
|
-
end
|
72
|
-
|
73
|
-
(1..lname.length).each do |length|
|
74
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
|
75
|
-
assert Lunar.redis.smembers(key).include?('1001')
|
76
|
-
end
|
77
|
-
|
78
|
-
(1..triple.length).each do |length|
|
79
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(triple[0, length]) }"
|
80
|
-
assert Lunar.redis.smembers(key).include?('1001')
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
context "on delete" do
|
86
|
-
setup do
|
87
|
-
@index = Lunar::Index.create 'Item' do |i|
|
88
|
-
i.key 1001
|
89
|
-
i.fuzzy :name, 'Yukihiro Matsumoto'
|
90
|
-
end
|
91
|
-
|
92
|
-
Lunar::Index.delete('Item', 1001)
|
93
|
-
end
|
94
|
-
|
95
|
-
should "remove all fuzzy entries for Yukihiro Matsumoto" do
|
96
|
-
fname, lname = 'yukihiro', 'matsumoto'
|
97
|
-
|
98
|
-
(0..fname.length).each do |length|
|
99
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
|
100
|
-
assert ! Lunar.redis.smembers(key).include?('1001')
|
101
|
-
end
|
102
|
-
|
103
|
-
(0..lname.length).each do |length|
|
104
|
-
key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
|
105
|
-
assert ! Lunar.redis.smembers(key).include?('1001')
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
should "also remove the key Lunar:Item:Fuzzy:1001:name" do
|
110
|
-
assert ! Lunar.redis.exists("Lunar:Item:Fuzzy:1001:name")
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
protected
|
115
|
-
def encode(str)
|
116
|
-
Lunar.encode(str)
|
117
|
-
end
|
118
|
-
end
|