lunar 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. data/.gitignore +2 -1
  2. data/LICENSE +1 -1
  3. data/README.markdown +116 -0
  4. data/Rakefile +6 -5
  5. data/VERSION +1 -1
  6. data/lib/lunar.rb +112 -24
  7. data/lib/lunar/connection.rb +51 -0
  8. data/lib/lunar/fuzzy_matches.rb +24 -0
  9. data/lib/lunar/fuzzy_word.rb +2 -2
  10. data/lib/lunar/index.rb +200 -94
  11. data/lib/lunar/keyword_matches.rb +32 -0
  12. data/lib/lunar/lunar_nest.rb +19 -0
  13. data/lib/lunar/range_matches.rb +28 -0
  14. data/lib/lunar/result_set.rb +85 -28
  15. data/lib/lunar/scoring.rb +4 -2
  16. data/lib/lunar/stopwords.rb +15 -0
  17. data/lib/lunar/words.rb +6 -3
  18. data/lunar.gemspec +31 -60
  19. data/test/helper.rb +4 -5
  20. data/test/test_fuzzy_indexing.rb +105 -0
  21. data/test/test_index.rb +150 -0
  22. data/test/test_lunar.rb +178 -1
  23. data/test/test_lunar_fuzzy_word.rb +4 -4
  24. data/test/test_lunar_nest.rb +46 -0
  25. data/test/{test_lunar_scoring.rb → test_scoring.rb} +5 -5
  26. metadata +72 -68
  27. data/.document +0 -5
  28. data/DATA +0 -41
  29. data/README.md +0 -80
  30. data/examples/ohm.rb +0 -40
  31. data/lib/lunar/search.rb +0 -68
  32. data/lib/lunar/sets.rb +0 -86
  33. data/test/test_lunar_fuzzy.rb +0 -118
  34. data/test/test_lunar_index.rb +0 -191
  35. data/test/test_lunar_search.rb +0 -261
  36. data/test/test_sets.rb +0 -48
  37. data/vendor/nest/nest.rb +0 -7
  38. data/vendor/redis/.gitignore +0 -9
  39. data/vendor/redis/LICENSE +0 -20
  40. data/vendor/redis/README.markdown +0 -120
  41. data/vendor/redis/Rakefile +0 -75
  42. data/vendor/redis/benchmarking/logging.rb +0 -62
  43. data/vendor/redis/benchmarking/pipeline.rb +0 -44
  44. data/vendor/redis/benchmarking/speed.rb +0 -21
  45. data/vendor/redis/benchmarking/suite.rb +0 -24
  46. data/vendor/redis/benchmarking/worker.rb +0 -71
  47. data/vendor/redis/bin/distredis +0 -33
  48. data/vendor/redis/examples/basic.rb +0 -15
  49. data/vendor/redis/examples/dist_redis.rb +0 -43
  50. data/vendor/redis/examples/incr-decr.rb +0 -17
  51. data/vendor/redis/examples/list.rb +0 -26
  52. data/vendor/redis/examples/pubsub.rb +0 -25
  53. data/vendor/redis/examples/sets.rb +0 -36
  54. data/vendor/redis/lib/edis.rb +0 -3
  55. data/vendor/redis/lib/redis.rb +0 -496
  56. data/vendor/redis/lib/redis/client.rb +0 -265
  57. data/vendor/redis/lib/redis/dist_redis.rb +0 -118
  58. data/vendor/redis/lib/redis/distributed.rb +0 -460
  59. data/vendor/redis/lib/redis/hash_ring.rb +0 -131
  60. data/vendor/redis/lib/redis/pipeline.rb +0 -13
  61. data/vendor/redis/lib/redis/raketasks.rb +0 -1
  62. data/vendor/redis/lib/redis/subscribe.rb +0 -79
  63. data/vendor/redis/profile.rb +0 -22
  64. data/vendor/redis/tasks/redis.tasks.rb +0 -140
  65. data/vendor/redis/test/db/.gitignore +0 -1
  66. data/vendor/redis/test/distributed_test.rb +0 -1131
  67. data/vendor/redis/test/redis_test.rb +0 -1134
  68. data/vendor/redis/test/test.conf +0 -8
  69. data/vendor/redis/test/test_helper.rb +0 -113
data/.document DELETED
@@ -1,5 +0,0 @@
1
- README.rdoc
2
- lib/**/*.rb
3
- bin/*
4
- features/**/*.feature
5
- LICENSE
data/DATA DELETED
@@ -1,41 +0,0 @@
1
-
2
- Item
3
- - id: 1
4
- - name: 'iphone 3g'
5
- - tags: 'apple mobile'
6
-
7
- Item
8
- - id: 2
9
- - name: 'nokia n95'
10
- - tags: 'symbian mobile'
11
-
12
- q: mobile
13
-
14
-
15
- Item:name:iphone => 1004 1001
16
- Item:name:3gs => 1001
17
- Item:name:apple => 1004 1003 1002
18
- Item:name:macbook => 1003 1002
19
- Item:name:pro => 1003 1002
20
- Item:name:17 => 1003 1002
21
- Item:desc:iphone => 1004 1003 1002
22
- Item:desc:4g => 1004 1003
23
-
24
- Keywords Only
25
- -------------
26
- iphone => 1004 1001 1003 1002
27
- 3gs => 1001
28
- apple => 1004 1003 1002
29
- macbook => 1002 1003
30
- pro => 1002 1003
31
- 17 => 17
32
- 4g => 1003 1004
33
-
34
- Key value pairs
35
- ---------------
36
- name: iphone => 1001 1004
37
- name: iphone, desc: iphone => 1004
38
- name: iphone, desc: 4g => 1004
39
- name: apple, desc: iphone => 1002, 1003, 1004
40
- name: apple macbook pro, desc: iphone => 1002 1003
41
- name: apple macbook pro 17, desc: 4g => 1003
data/README.md DELETED
@@ -1,80 +0,0 @@
1
- Lunar
2
- =====
3
-
4
- A minimalistic search index implemented using Redis, taking advantage of its
5
- powerful `sorted sets` and the ability to do set intersection and union.
6
-
7
- Results are sorted by their word score, which is stored as the score in the
8
- Redis sorted set.
9
-
10
- Examples
11
- --------
12
-
13
- class Item < Ohm::Model
14
- attribute :name
15
- attribute :description
16
-
17
- protected
18
- def write
19
- super
20
- index
21
- end
22
-
23
- def index
24
- Lunar::Index.create 'Item' do |i|
25
- i.key id
26
- i.attr :name, name
27
- i.attr :description, description
28
- end
29
-
30
- # You can also do this, no problem
31
- Lunar::Index.create Item do |i|
32
- i.key id
33
- i.attr :name, name
34
- i.attr :description, description
35
- end
36
-
37
- # Or to avoid name ties...
38
- Lunar::Index.create self.class do |i|
39
- i.key id
40
- i.attr :name, name
41
- i.attr :description, description
42
- end
43
-
44
- Lunar::Index.create Item do |i|
45
- i.key id
46
- i.fuzzy :name, name # this has a 100 character limit on name
47
- # for performance reasons
48
- i.integer :cost, cost
49
- i.float :voting_quotient, voting_quotient
50
- end
51
- end
52
- end
53
-
54
- # Searching...
55
- # You can just straight out search keywords
56
- Lunar.search(Item, "iphone")
57
-
58
- # Or opt to filter by field
59
- Lunar.search(Item, :name => "iphone", :description => "mobile")
60
-
61
- # For fuzzy declared fields you can currently only search
62
- # using a fuzzy strategy exclusively, e.g.
63
- Lunar.search(Item, :fuzzy => { :name => "i" })
64
- # i, ip, iph, ipho, iphone, 3, 3g, 3gs all would match 'iPhone 3Gs'
65
-
66
- # For integer / float types, you can do range searches on them e.g.
67
- Lunar.search(Item, :cost => 300..500, :voting_quotient => 10..20)
68
-
69
- # Or using the pagination gem with this:
70
- @items = Lunar.search(Item, "iphone")
71
- paginate @items, :per_page => 10, :page => 1
72
-
73
- # If you want to be cheap about CPU cycles you can increase the
74
- # default `:ttl` of search results (which is 30)
75
-
76
- # Somewhere in config/initializers or init.rb, you decide...
77
- Lunar.ttl = 300 # search results would be the same for 5 minutes.
78
- # for high write public sites, this may be a good
79
- # option as people don't really expect their stuff to
80
- # be searchable right away on public content sites.
data/examples/ohm.rb DELETED
@@ -1,40 +0,0 @@
1
- class Item < Ohm::Model
2
- attribute :name
3
- attribute :description
4
-
5
- def index
6
- Lunar::Index.create 'Item' do |i|
7
- i.key id
8
- i.attr :name, name
9
- i.attr :description, description
10
- end
11
-
12
- # You can also do this, no problem
13
- Lunar::Index.create Item do |i|
14
- i.key id
15
- i.attr :name, name
16
- i.attr :description, description
17
- end
18
-
19
- # Or to avoid name ties...
20
- Lunar::Index.create self.class do |i|
21
- i.key id
22
- i.attr :name, name
23
- i.attr :description, description
24
- end
25
- end
26
- end
27
-
28
- # Searching...
29
- # You can just straight out search keywords
30
- Lunar.search(Item, "iphone")
31
-
32
- # Or opt to filter by field
33
- Lunar.search(Item, :name => "iphone", :description => "mobile")
34
-
35
- # Or search a field using an array
36
- Lunar.search(Item, :name => ["iphone", "apple"])
37
-
38
- # Or using the pagination gem with this:
39
- @items = Lunar.search(Item, "iphone")
40
- paginate @items, :per_page => 10, :page => 1
data/lib/lunar/search.rb DELETED
@@ -1,68 +0,0 @@
1
- module Lunar
2
- class Search
3
- attr :sets, :prefix, :search_identifier, :fuzzy_sets, :key_value_sets
4
-
5
- def initialize(prefix, keywords)
6
- if keywords.is_a?(Hash)
7
- if fuzzy_hash = keywords.delete(:fuzzy)
8
- @fuzzy_sets = fuzzy_hash.inject([]) { |a, (field, query)|
9
- a | FuzzySets.new(prefix, query, field)
10
- }
11
- @search_identifier = fuzzy_hash.hash
12
- else
13
- @key_value_sets =
14
- keywords.map { |field, val| Sets.new(prefix, val, field) }.flatten
15
-
16
- @search_identifier = keywords.hash
17
- end
18
- else
19
- @sets = Sets.new(prefix, keywords)
20
- @search_identifier = keywords.hash
21
- end
22
-
23
- @prefix = prefix
24
-
25
- # Default finder, uses Ohm style finding
26
- @finder = lambda { |id| prefix[id] }
27
- end
28
-
29
- def results(&block)
30
- block ||= @finder
31
-
32
- if sets
33
- if sets.empty?
34
- return []
35
- else
36
- if not Lunar.redis.exists(dist_key)
37
- Lunar.redis.zunion dist_key, sets.size, *sets
38
- Lunar.redis.expire dist_key, Lunar.ttl
39
- end
40
- SortedResultSet.new(dist_key, &block)
41
- end
42
- elsif key_value_sets
43
- if key_value_sets.empty?
44
- return []
45
- else
46
- Lunar.redis.zinter dist_key, key_value_sets.size, *key_value_sets
47
- SortedResultSet.new(dist_key, &block)
48
- end
49
- elsif fuzzy_sets
50
- if fuzzy_sets.empty?
51
- return []
52
- else
53
- if not Lunar.redis.exists(dist_key)
54
- Lunar.redis.sunionstore dist_key, *fuzzy_sets
55
- Lunar.redis.expire dist_key, Lunar.ttl
56
- end
57
- UnsortedResultSet.new(dist_key, &block)
58
- end
59
- end
60
- end
61
-
62
- protected
63
- def dist_key
64
- Lunar.nest[:Results][search_identifier]
65
- end
66
- end
67
- end
68
-
data/lib/lunar/sets.rb DELETED
@@ -1,86 +0,0 @@
1
- module Lunar
2
- module Sets
3
- def self.new(prefix, keywords, field = '*')
4
- case keywords
5
- when String
6
- KeywordSets.new(prefix, keywords, field)
7
- when Range
8
- RangeSets.new(prefix, keywords, field)
9
- when Array
10
- KeywordSets.new(prefix, keywords.join(' '), field)
11
- else
12
- raise TypeError, ":keywords should only be a String or Range"
13
- end
14
- end
15
- end
16
-
17
- class FuzzySets < Array
18
- attr :prefix, :words, :field
19
-
20
- def initialize(prefix, keywords, field)
21
- @prefix = prefix
22
- @field = field
23
- @words = Words.new(keywords)
24
-
25
- super(redis_set_keys)
26
- end
27
-
28
- protected
29
- def redis_set_keys
30
- words.map { |w| ns[Lunar.encode(w)] }
31
- end
32
-
33
- def ns
34
- @ns ||= Lunar.nest[prefix][:Fuzzy][field]
35
- end
36
- end
37
-
38
- class RangeSets < Array
39
- attr :prefix, :field, :range
40
-
41
- def initialize(prefix, range, field)
42
- @prefix = prefix
43
- @field = field
44
- @range = range
45
-
46
- super [write_and_retrieve_key]
47
- end
48
-
49
- def write_and_retrieve_key
50
- zrange = Lunar.redis.zrangebyscore(Lunar.nest[prefix][field],
51
- @range.first, @range.last)
52
-
53
- zrange.each { |id| Lunar.redis.zadd key, 1, id }
54
- key.to_s
55
- end
56
-
57
- def key
58
- @key ||= Lunar.nest[prefix][field]["#{ range.first }_TO_#{ range.last }"]
59
- end
60
- end
61
-
62
- class KeywordSets < Array
63
- attr :prefix, :words, :field
64
-
65
- def initialize(prefix, keywords, field = '*')
66
- @prefix = prefix
67
- @field = field
68
- @words = Words.new(keywords)
69
-
70
- super(redis_set_keys)
71
- end
72
-
73
- protected
74
- def redis_set_keys
75
- keys_for_each_word.map { |key| Lunar.redis.keys(key) }.flatten
76
- end
77
-
78
- def keys_for_each_word
79
- words.map { |w| ns[Lunar.encode(w)] }
80
- end
81
-
82
- def ns
83
- @ns ||= Lunar.nest[prefix][field]
84
- end
85
- end
86
- end
@@ -1,118 +0,0 @@
1
- #
2
- # module Lunar
3
- # module Fuzzy
4
- #
5
- # end
6
- # end
7
- require "helper"
8
-
9
- class LunarFuzzyTest < Test::Unit::TestCase
10
- setup do
11
- Lunar.redis(Redis.new(:host => '127.0.0.1', :port => '6380'))
12
- Lunar.redis.flushdb
13
- end
14
-
15
- context "when setting fuzzy name, 'Yukihiro Matsumoto'" do
16
- setup do
17
- @index = Lunar::Index.create 'Item' do |i|
18
- i.key 1001
19
- i.fuzzy :name, 'Yukihiro Matsumoto'
20
- end
21
- end
22
-
23
- should "store Lunar:Item:name:Y up to o and M up to o" do
24
- fname, lname = 'yukihiro', 'matsumoto'
25
-
26
- (1..fname.length).each do |length|
27
- key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
28
- assert Lunar.redis.smembers(key).include?('1001')
29
- end
30
-
31
- (1..lname.length).each do |length|
32
- key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
33
- assert Lunar.redis.smembers(key).include?('1001')
34
- end
35
- end
36
- end
37
-
38
- context "when creating an index that already exists" do
39
- setup do
40
- @index = Lunar::Index.create 'Item' do |i|
41
- i.key 1001
42
- i.fuzzy :name, 'Yukihiro Matsumoto'
43
- end
44
-
45
- @index = Lunar::Index.create 'Item' do |i|
46
- i.key 1001
47
- i.fuzzy :name, 'Martin Fowler Yuki'
48
- end
49
- end
50
-
51
- should "remove all fuzzy entries for Yukihiro Matsumoto" do
52
- fname, lname = 'yukihiro', 'matsumoto'
53
-
54
- (5..fname.length).each do |length|
55
- key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
56
- assert ! Lunar.redis.smembers(key).include?('1001')
57
- end
58
-
59
- (3..lname.length).each do |length|
60
- key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
61
- assert ! Lunar.redis.smembers(key).include?('1001')
62
- end
63
- end
64
-
65
- should "store Lunar:Item:name:M up to n and F up to r etc..." do
66
- fname, lname, triple = 'martin', 'fowler', 'yuki'
67
-
68
- (1..fname.length).each do |length|
69
- key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
70
- assert Lunar.redis.smembers(key).include?('1001')
71
- end
72
-
73
- (1..lname.length).each do |length|
74
- key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
75
- assert Lunar.redis.smembers(key).include?('1001')
76
- end
77
-
78
- (1..triple.length).each do |length|
79
- key = "Lunar:Item:Fuzzy:name:#{ encode(triple[0, length]) }"
80
- assert Lunar.redis.smembers(key).include?('1001')
81
- end
82
- end
83
- end
84
-
85
- context "on delete" do
86
- setup do
87
- @index = Lunar::Index.create 'Item' do |i|
88
- i.key 1001
89
- i.fuzzy :name, 'Yukihiro Matsumoto'
90
- end
91
-
92
- Lunar::Index.delete('Item', 1001)
93
- end
94
-
95
- should "remove all fuzzy entries for Yukihiro Matsumoto" do
96
- fname, lname = 'yukihiro', 'matsumoto'
97
-
98
- (0..fname.length).each do |length|
99
- key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
100
- assert ! Lunar.redis.smembers(key).include?('1001')
101
- end
102
-
103
- (0..lname.length).each do |length|
104
- key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
105
- assert ! Lunar.redis.smembers(key).include?('1001')
106
- end
107
- end
108
-
109
- should "also remove the key Lunar:Item:Fuzzy:1001:name" do
110
- assert ! Lunar.redis.exists("Lunar:Item:Fuzzy:1001:name")
111
- end
112
- end
113
-
114
- protected
115
- def encode(str)
116
- Lunar.encode(str)
117
- end
118
- end