lunar 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. data/.gitignore +2 -1
  2. data/LICENSE +1 -1
  3. data/README.markdown +116 -0
  4. data/Rakefile +6 -5
  5. data/VERSION +1 -1
  6. data/lib/lunar.rb +112 -24
  7. data/lib/lunar/connection.rb +51 -0
  8. data/lib/lunar/fuzzy_matches.rb +24 -0
  9. data/lib/lunar/fuzzy_word.rb +2 -2
  10. data/lib/lunar/index.rb +200 -94
  11. data/lib/lunar/keyword_matches.rb +32 -0
  12. data/lib/lunar/lunar_nest.rb +19 -0
  13. data/lib/lunar/range_matches.rb +28 -0
  14. data/lib/lunar/result_set.rb +85 -28
  15. data/lib/lunar/scoring.rb +4 -2
  16. data/lib/lunar/stopwords.rb +15 -0
  17. data/lib/lunar/words.rb +6 -3
  18. data/lunar.gemspec +31 -60
  19. data/test/helper.rb +4 -5
  20. data/test/test_fuzzy_indexing.rb +105 -0
  21. data/test/test_index.rb +150 -0
  22. data/test/test_lunar.rb +178 -1
  23. data/test/test_lunar_fuzzy_word.rb +4 -4
  24. data/test/test_lunar_nest.rb +46 -0
  25. data/test/{test_lunar_scoring.rb → test_scoring.rb} +5 -5
  26. metadata +72 -68
  27. data/.document +0 -5
  28. data/DATA +0 -41
  29. data/README.md +0 -80
  30. data/examples/ohm.rb +0 -40
  31. data/lib/lunar/search.rb +0 -68
  32. data/lib/lunar/sets.rb +0 -86
  33. data/test/test_lunar_fuzzy.rb +0 -118
  34. data/test/test_lunar_index.rb +0 -191
  35. data/test/test_lunar_search.rb +0 -261
  36. data/test/test_sets.rb +0 -48
  37. data/vendor/nest/nest.rb +0 -7
  38. data/vendor/redis/.gitignore +0 -9
  39. data/vendor/redis/LICENSE +0 -20
  40. data/vendor/redis/README.markdown +0 -120
  41. data/vendor/redis/Rakefile +0 -75
  42. data/vendor/redis/benchmarking/logging.rb +0 -62
  43. data/vendor/redis/benchmarking/pipeline.rb +0 -44
  44. data/vendor/redis/benchmarking/speed.rb +0 -21
  45. data/vendor/redis/benchmarking/suite.rb +0 -24
  46. data/vendor/redis/benchmarking/worker.rb +0 -71
  47. data/vendor/redis/bin/distredis +0 -33
  48. data/vendor/redis/examples/basic.rb +0 -15
  49. data/vendor/redis/examples/dist_redis.rb +0 -43
  50. data/vendor/redis/examples/incr-decr.rb +0 -17
  51. data/vendor/redis/examples/list.rb +0 -26
  52. data/vendor/redis/examples/pubsub.rb +0 -25
  53. data/vendor/redis/examples/sets.rb +0 -36
  54. data/vendor/redis/lib/edis.rb +0 -3
  55. data/vendor/redis/lib/redis.rb +0 -496
  56. data/vendor/redis/lib/redis/client.rb +0 -265
  57. data/vendor/redis/lib/redis/dist_redis.rb +0 -118
  58. data/vendor/redis/lib/redis/distributed.rb +0 -460
  59. data/vendor/redis/lib/redis/hash_ring.rb +0 -131
  60. data/vendor/redis/lib/redis/pipeline.rb +0 -13
  61. data/vendor/redis/lib/redis/raketasks.rb +0 -1
  62. data/vendor/redis/lib/redis/subscribe.rb +0 -79
  63. data/vendor/redis/profile.rb +0 -22
  64. data/vendor/redis/tasks/redis.tasks.rb +0 -140
  65. data/vendor/redis/test/db/.gitignore +0 -1
  66. data/vendor/redis/test/distributed_test.rb +0 -1131
  67. data/vendor/redis/test/redis_test.rb +0 -1134
  68. data/vendor/redis/test/test.conf +0 -8
  69. data/vendor/redis/test/test_helper.rb +0 -113
data/.document DELETED
@@ -1,5 +0,0 @@
1
- README.rdoc
2
- lib/**/*.rb
3
- bin/*
4
- features/**/*.feature
5
- LICENSE
data/DATA DELETED
@@ -1,41 +0,0 @@
1
-
2
- Item
3
- - id: 1
4
- - name: 'iphone 3g'
5
- - tags: 'apple mobile'
6
-
7
- Item
8
- - id: 2
9
- - name: 'nokia n95'
10
- - tags: 'symbian mobile'
11
-
12
- q: mobile
13
-
14
-
15
- Item:name:iphone => 1004 1001
16
- Item:name:3gs => 1001
17
- Item:name:apple => 1004 1003 1002
18
- Item:name:macbook => 1003 1002
19
- Item:name:pro => 1003 1002
20
- Item:name:17 => 1003 1002
21
- Item:desc:iphone => 1004 1003 1002
22
- Item:desc:4g => 1004 1003
23
-
24
- Keywords Only
25
- -------------
26
- iphone => 1004 1001 1003 1002
27
- 3gs => 1001
28
- apple => 1004 1003 1002
29
- macbook => 1002 1003
30
- pro => 1002 1003
31
- 17 => 17
32
- 4g => 1003 1004
33
-
34
- Key value pairs
35
- ---------------
36
- name: iphone => 1001 1004
37
- name: iphone, desc: iphone => 1004
38
- name: iphone, desc: 4g => 1004
39
- name: apple, desc: iphone => 1002, 1003, 1004
40
- name: apple macbook pro, desc: iphone => 1002 1003
41
- name: apple macbook pro 17, desc: 4g => 1003
data/README.md DELETED
@@ -1,80 +0,0 @@
1
- Lunar
2
- =====
3
-
4
- A minimalistic search index implemented using Redis, taking advantage of its
5
- powerful `sorted sets` and the ability to do set intersection and union.
6
-
7
- Results are sorted by their word score, which is stored as the score in the
8
- Redis sorted set.
9
-
10
- Examples
11
- --------
12
-
13
- class Item < Ohm::Model
14
- attribute :name
15
- attribute :description
16
-
17
- protected
18
- def write
19
- super
20
- index
21
- end
22
-
23
- def index
24
- Lunar::Index.create 'Item' do |i|
25
- i.key id
26
- i.attr :name, name
27
- i.attr :description, description
28
- end
29
-
30
- # You can also do this, no problem
31
- Lunar::Index.create Item do |i|
32
- i.key id
33
- i.attr :name, name
34
- i.attr :description, description
35
- end
36
-
37
- # Or to avoid name ties...
38
- Lunar::Index.create self.class do |i|
39
- i.key id
40
- i.attr :name, name
41
- i.attr :description, description
42
- end
43
-
44
- Lunar::Index.create Item do |i|
45
- i.key id
46
- i.fuzzy :name, name # this has a 100 character limit on name
47
- # for performance reasons
48
- i.integer :cost, cost
49
- i.float :voting_quotient, voting_quotient
50
- end
51
- end
52
- end
53
-
54
- # Searching...
55
- # You can just straight out search keywords
56
- Lunar.search(Item, "iphone")
57
-
58
- # Or opt to filter by field
59
- Lunar.search(Item, :name => "iphone", :description => "mobile")
60
-
61
- # For fuzzy declared fields you can currently only search
62
- # using a fuzzy strategy exclusively, e.g.
63
- Lunar.search(Item, :fuzzy => { :name => "i" })
64
- # i, ip, iph, ipho, iphone, 3, 3g, 3gs all would match 'iPhone 3Gs'
65
-
66
- # For integer / float types, you can do range searches on them e.g.
67
- Lunar.search(Item, :cost => 300..500, :voting_quotient => 10..20)
68
-
69
- # Or using the pagination gem with this:
70
- @items = Lunar.search(Item, "iphone")
71
- paginate @items, :per_page => 10, :page => 1
72
-
73
- # If you want to be cheap about CPU cycles you can increase the
74
- # default `:ttl` of search results (which is 30)
75
-
76
- # Somewhere in config/initializers or init.rb, you decide...
77
- Lunar.ttl = 300 # search results would be the same for 5 minutes.
78
- # for high write public sites, this may be a good
79
- # option as people don't really expect their stuff to
80
- # be searchable right away on public content sites.
data/examples/ohm.rb DELETED
@@ -1,40 +0,0 @@
1
- class Item < Ohm::Model
2
- attribute :name
3
- attribute :description
4
-
5
- def index
6
- Lunar::Index.create 'Item' do |i|
7
- i.key id
8
- i.attr :name, name
9
- i.attr :description, description
10
- end
11
-
12
- # You can also do this, no problem
13
- Lunar::Index.create Item do |i|
14
- i.key id
15
- i.attr :name, name
16
- i.attr :description, description
17
- end
18
-
19
- # Or to avoid name ties...
20
- Lunar::Index.create self.class do |i|
21
- i.key id
22
- i.attr :name, name
23
- i.attr :description, description
24
- end
25
- end
26
- end
27
-
28
- # Searching...
29
- # You can just straight out search keywords
30
- Lunar.search(Item, "iphone")
31
-
32
- # Or opt to filter by field
33
- Lunar.search(Item, :name => "iphone", :description => "mobile")
34
-
35
- # Or search a field using an array
36
- Lunar.search(Item, :name => ["iphone", "apple"])
37
-
38
- # Or using the pagination gem with this:
39
- @items = Lunar.search(Item, "iphone")
40
- paginate @items, :per_page => 10, :page => 1
data/lib/lunar/search.rb DELETED
@@ -1,68 +0,0 @@
1
- module Lunar
2
- class Search
3
- attr :sets, :prefix, :search_identifier, :fuzzy_sets, :key_value_sets
4
-
5
- def initialize(prefix, keywords)
6
- if keywords.is_a?(Hash)
7
- if fuzzy_hash = keywords.delete(:fuzzy)
8
- @fuzzy_sets = fuzzy_hash.inject([]) { |a, (field, query)|
9
- a | FuzzySets.new(prefix, query, field)
10
- }
11
- @search_identifier = fuzzy_hash.hash
12
- else
13
- @key_value_sets =
14
- keywords.map { |field, val| Sets.new(prefix, val, field) }.flatten
15
-
16
- @search_identifier = keywords.hash
17
- end
18
- else
19
- @sets = Sets.new(prefix, keywords)
20
- @search_identifier = keywords.hash
21
- end
22
-
23
- @prefix = prefix
24
-
25
- # Default finder, uses Ohm style finding
26
- @finder = lambda { |id| prefix[id] }
27
- end
28
-
29
- def results(&block)
30
- block ||= @finder
31
-
32
- if sets
33
- if sets.empty?
34
- return []
35
- else
36
- if not Lunar.redis.exists(dist_key)
37
- Lunar.redis.zunion dist_key, sets.size, *sets
38
- Lunar.redis.expire dist_key, Lunar.ttl
39
- end
40
- SortedResultSet.new(dist_key, &block)
41
- end
42
- elsif key_value_sets
43
- if key_value_sets.empty?
44
- return []
45
- else
46
- Lunar.redis.zinter dist_key, key_value_sets.size, *key_value_sets
47
- SortedResultSet.new(dist_key, &block)
48
- end
49
- elsif fuzzy_sets
50
- if fuzzy_sets.empty?
51
- return []
52
- else
53
- if not Lunar.redis.exists(dist_key)
54
- Lunar.redis.sunionstore dist_key, *fuzzy_sets
55
- Lunar.redis.expire dist_key, Lunar.ttl
56
- end
57
- UnsortedResultSet.new(dist_key, &block)
58
- end
59
- end
60
- end
61
-
62
- protected
63
- def dist_key
64
- Lunar.nest[:Results][search_identifier]
65
- end
66
- end
67
- end
68
-
data/lib/lunar/sets.rb DELETED
@@ -1,86 +0,0 @@
1
- module Lunar
2
- module Sets
3
- def self.new(prefix, keywords, field = '*')
4
- case keywords
5
- when String
6
- KeywordSets.new(prefix, keywords, field)
7
- when Range
8
- RangeSets.new(prefix, keywords, field)
9
- when Array
10
- KeywordSets.new(prefix, keywords.join(' '), field)
11
- else
12
- raise TypeError, ":keywords should only be a String or Range"
13
- end
14
- end
15
- end
16
-
17
- class FuzzySets < Array
18
- attr :prefix, :words, :field
19
-
20
- def initialize(prefix, keywords, field)
21
- @prefix = prefix
22
- @field = field
23
- @words = Words.new(keywords)
24
-
25
- super(redis_set_keys)
26
- end
27
-
28
- protected
29
- def redis_set_keys
30
- words.map { |w| ns[Lunar.encode(w)] }
31
- end
32
-
33
- def ns
34
- @ns ||= Lunar.nest[prefix][:Fuzzy][field]
35
- end
36
- end
37
-
38
- class RangeSets < Array
39
- attr :prefix, :field, :range
40
-
41
- def initialize(prefix, range, field)
42
- @prefix = prefix
43
- @field = field
44
- @range = range
45
-
46
- super [write_and_retrieve_key]
47
- end
48
-
49
- def write_and_retrieve_key
50
- zrange = Lunar.redis.zrangebyscore(Lunar.nest[prefix][field],
51
- @range.first, @range.last)
52
-
53
- zrange.each { |id| Lunar.redis.zadd key, 1, id }
54
- key.to_s
55
- end
56
-
57
- def key
58
- @key ||= Lunar.nest[prefix][field]["#{ range.first }_TO_#{ range.last }"]
59
- end
60
- end
61
-
62
- class KeywordSets < Array
63
- attr :prefix, :words, :field
64
-
65
- def initialize(prefix, keywords, field = '*')
66
- @prefix = prefix
67
- @field = field
68
- @words = Words.new(keywords)
69
-
70
- super(redis_set_keys)
71
- end
72
-
73
- protected
74
- def redis_set_keys
75
- keys_for_each_word.map { |key| Lunar.redis.keys(key) }.flatten
76
- end
77
-
78
- def keys_for_each_word
79
- words.map { |w| ns[Lunar.encode(w)] }
80
- end
81
-
82
- def ns
83
- @ns ||= Lunar.nest[prefix][field]
84
- end
85
- end
86
- end
@@ -1,118 +0,0 @@
1
- #
2
- # module Lunar
3
- # module Fuzzy
4
- #
5
- # end
6
- # end
7
- require "helper"
8
-
9
- class LunarFuzzyTest < Test::Unit::TestCase
10
- setup do
11
- Lunar.redis(Redis.new(:host => '127.0.0.1', :port => '6380'))
12
- Lunar.redis.flushdb
13
- end
14
-
15
- context "when setting fuzzy name, 'Yukihiro Matsumoto'" do
16
- setup do
17
- @index = Lunar::Index.create 'Item' do |i|
18
- i.key 1001
19
- i.fuzzy :name, 'Yukihiro Matsumoto'
20
- end
21
- end
22
-
23
- should "store Lunar:Item:name:Y up to o and M up to o" do
24
- fname, lname = 'yukihiro', 'matsumoto'
25
-
26
- (1..fname.length).each do |length|
27
- key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
28
- assert Lunar.redis.smembers(key).include?('1001')
29
- end
30
-
31
- (1..lname.length).each do |length|
32
- key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
33
- assert Lunar.redis.smembers(key).include?('1001')
34
- end
35
- end
36
- end
37
-
38
- context "when creating an index that already exists" do
39
- setup do
40
- @index = Lunar::Index.create 'Item' do |i|
41
- i.key 1001
42
- i.fuzzy :name, 'Yukihiro Matsumoto'
43
- end
44
-
45
- @index = Lunar::Index.create 'Item' do |i|
46
- i.key 1001
47
- i.fuzzy :name, 'Martin Fowler Yuki'
48
- end
49
- end
50
-
51
- should "remove all fuzzy entries for Yukihiro Matsumoto" do
52
- fname, lname = 'yukihiro', 'matsumoto'
53
-
54
- (5..fname.length).each do |length|
55
- key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
56
- assert ! Lunar.redis.smembers(key).include?('1001')
57
- end
58
-
59
- (3..lname.length).each do |length|
60
- key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
61
- assert ! Lunar.redis.smembers(key).include?('1001')
62
- end
63
- end
64
-
65
- should "store Lunar:Item:name:M up to n and F up to r etc..." do
66
- fname, lname, triple = 'martin', 'fowler', 'yuki'
67
-
68
- (1..fname.length).each do |length|
69
- key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
70
- assert Lunar.redis.smembers(key).include?('1001')
71
- end
72
-
73
- (1..lname.length).each do |length|
74
- key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
75
- assert Lunar.redis.smembers(key).include?('1001')
76
- end
77
-
78
- (1..triple.length).each do |length|
79
- key = "Lunar:Item:Fuzzy:name:#{ encode(triple[0, length]) }"
80
- assert Lunar.redis.smembers(key).include?('1001')
81
- end
82
- end
83
- end
84
-
85
- context "on delete" do
86
- setup do
87
- @index = Lunar::Index.create 'Item' do |i|
88
- i.key 1001
89
- i.fuzzy :name, 'Yukihiro Matsumoto'
90
- end
91
-
92
- Lunar::Index.delete('Item', 1001)
93
- end
94
-
95
- should "remove all fuzzy entries for Yukihiro Matsumoto" do
96
- fname, lname = 'yukihiro', 'matsumoto'
97
-
98
- (0..fname.length).each do |length|
99
- key = "Lunar:Item:Fuzzy:name:#{ encode(fname[0, length]) }"
100
- assert ! Lunar.redis.smembers(key).include?('1001')
101
- end
102
-
103
- (0..lname.length).each do |length|
104
- key = "Lunar:Item:Fuzzy:name:#{ encode(lname[0, length]) }"
105
- assert ! Lunar.redis.smembers(key).include?('1001')
106
- end
107
- end
108
-
109
- should "also remove the key Lunar:Item:Fuzzy:1001:name" do
110
- assert ! Lunar.redis.exists("Lunar:Item:Fuzzy:1001:name")
111
- end
112
- end
113
-
114
- protected
115
- def encode(str)
116
- Lunar.encode(str)
117
- end
118
- end