picky 0.0.0 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (161) hide show
  1. data/bin/picky +14 -0
  2. data/lib/bundling.rb +10 -0
  3. data/lib/constants.rb +9 -0
  4. data/lib/deployment.rb +212 -0
  5. data/lib/picky/application.rb +40 -0
  6. data/lib/picky/cacher/convenience.rb +3 -0
  7. data/lib/picky/cacher/generator.rb +17 -0
  8. data/lib/picky/cacher/partial/default.rb +7 -0
  9. data/lib/picky/cacher/partial/none.rb +19 -0
  10. data/lib/picky/cacher/partial/strategy.rb +7 -0
  11. data/lib/picky/cacher/partial/subtoken.rb +91 -0
  12. data/lib/picky/cacher/partial_generator.rb +15 -0
  13. data/lib/picky/cacher/similarity/default.rb +7 -0
  14. data/lib/picky/cacher/similarity/double_levenshtone.rb +73 -0
  15. data/lib/picky/cacher/similarity/none.rb +25 -0
  16. data/lib/picky/cacher/similarity/strategy.rb +7 -0
  17. data/lib/picky/cacher/similarity_generator.rb +15 -0
  18. data/lib/picky/cacher/weights/default.rb +7 -0
  19. data/lib/picky/cacher/weights/logarithmic.rb +39 -0
  20. data/lib/picky/cacher/weights/strategy.rb +7 -0
  21. data/lib/picky/cacher/weights_generator.rb +15 -0
  22. data/lib/picky/configuration/configuration.rb +13 -0
  23. data/lib/picky/configuration/field.rb +68 -0
  24. data/lib/picky/configuration/indexes.rb +60 -0
  25. data/lib/picky/configuration/queries.rb +32 -0
  26. data/lib/picky/configuration/type.rb +52 -0
  27. data/lib/picky/cores.rb +101 -0
  28. data/lib/picky/db/configuration.rb +23 -0
  29. data/lib/picky/ext/ruby19/extconf.rb +7 -0
  30. data/lib/picky/ext/ruby19/performant.c +339 -0
  31. data/lib/picky/extensions/array.rb +45 -0
  32. data/lib/picky/extensions/hash.rb +11 -0
  33. data/lib/picky/extensions/module.rb +15 -0
  34. data/lib/picky/extensions/symbol.rb +18 -0
  35. data/lib/picky/generator.rb +156 -0
  36. data/lib/picky/helpers/cache.rb +23 -0
  37. data/lib/picky/helpers/gc.rb +11 -0
  38. data/lib/picky/helpers/measuring.rb +45 -0
  39. data/lib/picky/helpers/search.rb +27 -0
  40. data/lib/picky/index/bundle.rb +328 -0
  41. data/lib/picky/index/category.rb +109 -0
  42. data/lib/picky/index/combined.rb +38 -0
  43. data/lib/picky/index/type.rb +30 -0
  44. data/lib/picky/indexers/base.rb +77 -0
  45. data/lib/picky/indexers/default.rb +3 -0
  46. data/lib/picky/indexers/field.rb +13 -0
  47. data/lib/picky/indexers/no_source_specified_error.rb +5 -0
  48. data/lib/picky/indexers/solr.rb +60 -0
  49. data/lib/picky/indexes.rb +180 -0
  50. data/lib/picky/initializers/ext.rb +6 -0
  51. data/lib/picky/initializers/mysql.rb +22 -0
  52. data/lib/picky/loader.rb +287 -0
  53. data/lib/picky/loggers/search.rb +19 -0
  54. data/lib/picky/performant/array.rb +23 -0
  55. data/lib/picky/query/allocation.rb +82 -0
  56. data/lib/picky/query/allocations.rb +131 -0
  57. data/lib/picky/query/base.rb +124 -0
  58. data/lib/picky/query/combination.rb +69 -0
  59. data/lib/picky/query/combinations.rb +106 -0
  60. data/lib/picky/query/combinator.rb +92 -0
  61. data/lib/picky/query/full.rb +15 -0
  62. data/lib/picky/query/live.rb +22 -0
  63. data/lib/picky/query/qualifiers.rb +73 -0
  64. data/lib/picky/query/solr.rb +77 -0
  65. data/lib/picky/query/token.rb +215 -0
  66. data/lib/picky/query/tokens.rb +102 -0
  67. data/lib/picky/query/weigher.rb +159 -0
  68. data/lib/picky/query/weights.rb +55 -0
  69. data/lib/picky/rack/harakiri.rb +37 -0
  70. data/lib/picky/results/base.rb +103 -0
  71. data/lib/picky/results/full.rb +19 -0
  72. data/lib/picky/results/live.rb +19 -0
  73. data/lib/picky/routing.rb +165 -0
  74. data/lib/picky/signals.rb +11 -0
  75. data/lib/picky/solr/schema_generator.rb +73 -0
  76. data/lib/picky/sources/base.rb +19 -0
  77. data/lib/picky/sources/csv.rb +30 -0
  78. data/lib/picky/sources/db.rb +77 -0
  79. data/lib/picky/tokenizers/base.rb +130 -0
  80. data/lib/picky/tokenizers/default.rb +3 -0
  81. data/lib/picky/tokenizers/index.rb +73 -0
  82. data/lib/picky/tokenizers/query.rb +70 -0
  83. data/lib/picky/umlaut_substituter.rb +21 -0
  84. data/lib/picky-tasks.rb +6 -0
  85. data/lib/picky.rb +18 -0
  86. data/lib/tasks/application.rake +5 -0
  87. data/lib/tasks/cache.rake +53 -0
  88. data/lib/tasks/framework.rake +4 -0
  89. data/lib/tasks/index.rake +29 -0
  90. data/lib/tasks/server.rake +48 -0
  91. data/lib/tasks/shortcuts.rake +13 -0
  92. data/lib/tasks/solr.rake +36 -0
  93. data/lib/tasks/spec.rake +11 -0
  94. data/lib/tasks/statistics.rake +13 -0
  95. data/lib/tasks/try.rake +29 -0
  96. data/prototype_project/Gemfile +23 -0
  97. data/prototype_project/Rakefile +1 -0
  98. data/prototype_project/app/README +6 -0
  99. data/prototype_project/app/application.rb +50 -0
  100. data/prototype_project/app/application.ru +29 -0
  101. data/prototype_project/app/db.yml +10 -0
  102. data/prototype_project/app/logging.rb +20 -0
  103. data/prototype_project/app/unicorn.ru +10 -0
  104. data/prototype_project/log/README +1 -0
  105. data/prototype_project/script/console +34 -0
  106. data/prototype_project/tmp/README +0 -0
  107. data/prototype_project/tmp/pids/README +0 -0
  108. data/spec/ext/performant_spec.rb +64 -0
  109. data/spec/lib/application_spec.rb +61 -0
  110. data/spec/lib/cacher/partial/subtoken_spec.rb +89 -0
  111. data/spec/lib/cacher/partial_generator_spec.rb +35 -0
  112. data/spec/lib/cacher/similarity/double_levenshtone_spec.rb +60 -0
  113. data/spec/lib/cacher/similarity/none_spec.rb +23 -0
  114. data/spec/lib/cacher/similarity_generator_spec.rb +22 -0
  115. data/spec/lib/cacher/weights/logarithmic_spec.rb +30 -0
  116. data/spec/lib/cacher/weights_generator_spec.rb +21 -0
  117. data/spec/lib/configuration/configuration_spec.rb +38 -0
  118. data/spec/lib/configuration/type_spec.rb +49 -0
  119. data/spec/lib/configuration_spec.rb +8 -0
  120. data/spec/lib/cores_spec.rb +65 -0
  121. data/spec/lib/extensions/array_spec.rb +37 -0
  122. data/spec/lib/extensions/hash_spec.rb +11 -0
  123. data/spec/lib/extensions/module_spec.rb +27 -0
  124. data/spec/lib/extensions/symbol_spec.rb +85 -0
  125. data/spec/lib/generator_spec.rb +135 -0
  126. data/spec/lib/helpers/cache_spec.rb +35 -0
  127. data/spec/lib/helpers/gc_spec.rb +71 -0
  128. data/spec/lib/helpers/measuring_spec.rb +18 -0
  129. data/spec/lib/helpers/search_spec.rb +50 -0
  130. data/spec/lib/index/bundle_partial_generation_speed_spec.rb +47 -0
  131. data/spec/lib/index/bundle_spec.rb +260 -0
  132. data/spec/lib/index/category_spec.rb +203 -0
  133. data/spec/lib/indexers/base_spec.rb +73 -0
  134. data/spec/lib/indexers/field_spec.rb +20 -0
  135. data/spec/lib/loader_spec.rb +48 -0
  136. data/spec/lib/loggers/search_spec.rb +19 -0
  137. data/spec/lib/performant/array_spec.rb +13 -0
  138. data/spec/lib/query/allocation_spec.rb +194 -0
  139. data/spec/lib/query/allocations_spec.rb +336 -0
  140. data/spec/lib/query/base_spec.rb +104 -0
  141. data/spec/lib/query/combination_spec.rb +90 -0
  142. data/spec/lib/query/combinations_spec.rb +83 -0
  143. data/spec/lib/query/combinator_spec.rb +112 -0
  144. data/spec/lib/query/full_spec.rb +22 -0
  145. data/spec/lib/query/live_spec.rb +61 -0
  146. data/spec/lib/query/qualifiers_spec.rb +31 -0
  147. data/spec/lib/query/solr_spec.rb +51 -0
  148. data/spec/lib/query/token_spec.rb +297 -0
  149. data/spec/lib/query/tokens_spec.rb +189 -0
  150. data/spec/lib/query/weights_spec.rb +47 -0
  151. data/spec/lib/results/base_spec.rb +233 -0
  152. data/spec/lib/routing_spec.rb +318 -0
  153. data/spec/lib/solr/schema_generator_spec.rb +42 -0
  154. data/spec/lib/sources/db_spec.rb +91 -0
  155. data/spec/lib/tokenizers/base_spec.rb +61 -0
  156. data/spec/lib/tokenizers/index_spec.rb +51 -0
  157. data/spec/lib/tokenizers/query_spec.rb +105 -0
  158. data/spec/lib/umlaut_substituter_spec.rb +84 -0
  159. data/spec/specific/speed_spec.rb +55 -0
  160. metadata +371 -15
  161. data/README.textile +0 -9
@@ -0,0 +1,61 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Tokenizers::Base do
5
+
6
+ before(:each) do
7
+ @tokenizer = Tokenizers::Base.new
8
+ end
9
+
10
+ context 'stopwords' do
11
+ describe '.stopwords' do
12
+ context 'without stopwords given' do
13
+ it 'should define a method remove_stopwords' do
14
+ lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
15
+ end
16
+ it 'should define a method remove_stopwords that does nothing' do
17
+ @tokenizer.remove_stopwords('from this text').should == nil
18
+ end
19
+ it 'should not define a method remove_non_single_stopwords' do
20
+ lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should raise_error(NoMethodError)
21
+ end
22
+ end
23
+ context 'with stopwords given' do
24
+ before(:each) do
25
+ class << @tokenizer
26
+ stopwords(/r|e/)
27
+ end
28
+ end
29
+ it 'should define a method remove_stopwords' do
30
+ lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
31
+ end
32
+ it 'should define a method stopwords that removes stopwords' do
33
+ @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
34
+ end
35
+ it 'should define a method remove_non_single_stopwords' do
36
+ lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
37
+ end
38
+ it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
39
+ @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
40
+ end
41
+ it 'should define a method remove_non_single_stopwords that does not single stopwords' do
42
+ @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
43
+ end
44
+ end
45
+ context 'error case' do
46
+ before(:each) do
47
+ class << @tokenizer
48
+ stopwords(/any/)
49
+ end
50
+ end
51
+ it 'should not remove non-single stopwords with a star' do
52
+ @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
53
+ end
54
+ it 'should not remove non-single stopwords with a tilde' do
55
+ @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ end
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+
5
+ # TODO CLEAN UP.
6
+ #
7
+ describe Tokenizers::Index do
8
+
9
+ before(:each) do
10
+ @tokenizer = Tokenizers::Index.new
11
+ end
12
+
13
+ describe "remove_illegal_characters" do
14
+ it "should not remove ' from a query by default" do
15
+ @tokenizer.remove_illegals("Lugi's").should == "Lugi's"
16
+ end
17
+ end
18
+
19
+ describe "reject!" do
20
+ it "should reject tokens if blank" do
21
+ t1 = stub(:token, :to_s => '')
22
+ t2 = stub(:token, :to_s => 'not blank')
23
+ t3 = stub(:token, :to_s => '')
24
+
25
+ @tokenizer.reject([t1, t2, t3]).should == [t2]
26
+ end
27
+ end
28
+
29
+ describe "tokenize" do
30
+ describe "normalizing" do
31
+ def self.it_should_normalize_token(text, expected)
32
+ it "should handle the #{text} case" do
33
+ @tokenizer.tokenize(text).to_a.should == [expected].compact
34
+ end
35
+ end
36
+ # defaults
37
+ it_should_normalize_token 'it_should_not_normalize_by_default', :it_should_not_normalize_by_default
38
+ end
39
+ describe "tokenizing" do
40
+ def self.it_should_tokenize_token(text, expected)
41
+ it "should handle the #{text} case" do
42
+ @tokenizer.tokenize(text).to_a.should == expected
43
+ end
44
+ end
45
+ # defaults
46
+ it_should_tokenize_token "splitting on \\s", [:splitting, :on, :"\\s"]
47
+ it_should_tokenize_token 'und', [:und]
48
+ end
49
+ end
50
+
51
+ end
@@ -0,0 +1,105 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Tokenizers::Query do
5
+
6
+ before(:each) do
7
+ @tokenizer = Tokenizers::Query.new
8
+ end
9
+
10
+ describe 'preprocess' do
11
+ it 'should call methods in order' do
12
+ text = stub :text
13
+
14
+ @tokenizer.should_receive(:remove_illegals).once.ordered.with text
15
+ @tokenizer.should_receive(:remove_non_single_stopwords).once.ordered.with text
16
+ @tokenizer.should_receive(:contract).once.ordered
17
+
18
+ @tokenizer.preprocess text
19
+ end
20
+ it 'should return the text unchanged by default' do
21
+ text = "some text"
22
+
23
+ @tokenizer.preprocess(text).should == text
24
+ end
25
+ end
26
+
27
+ describe 'process' do
28
+ before(:each) do
29
+ @tokens = mock :tokens, :null_object => true
30
+ end
31
+ it 'should tokenize the tokens' do
32
+ @tokens.should_receive(:tokenize_with).once.with @tokenizer
33
+
34
+ @tokenizer.process @tokens
35
+ end
36
+ it 'should call methods on the tokens in order' do
37
+ @tokens.should_receive(:tokenize_with).once.ordered
38
+ @tokens.should_receive(:reject).once.ordered
39
+ @tokens.should_receive(:cap).once.ordered
40
+ @tokens.should_receive(:partialize_last).once.ordered
41
+
42
+ @tokenizer.process @tokens
43
+ end
44
+ it 'should return the tokens' do
45
+ @tokenizer.process(@tokens).should == @tokens
46
+ end
47
+ end
48
+
49
+ describe 'pretokenize' do
50
+ def self.it_should_pretokenize text, expected
51
+ it "should pretokenize #{text} as #{expected}" do
52
+ @tokenizer.pretokenize(text).should == expected
53
+ end
54
+ end
55
+ it_should_pretokenize 'test miau test', ['test', 'miau', 'test']
56
+ end
57
+
58
+ describe "tokenizing" do
59
+ def self.it_should_tokenize_token(text, expected)
60
+ it "should handle the #{text} case" do
61
+ @tokenizer.tokenize(text).map(&:text).should == expected
62
+ end
63
+ end
64
+ it_should_tokenize_token 'simple tokenizing on \s', [:simple, :tokenizing, :on, :'\s']
65
+ end
66
+
67
+ describe 'normalize_with_patterns' do
68
+ def self.it_should_pattern_normalize original, expected
69
+ it "should normalize #{original} with pattern into #{expected}" do
70
+ @tokenizer.normalize_with_patterns(original).should == expected
71
+ end
72
+ end
73
+ it_should_pattern_normalize 'no pattern normalization', 'no pattern normalization'
74
+ end
75
+
76
+ describe 'reject' do
77
+ it 'should reject blank tokens' do
78
+ @tokenizer.reject(["some token answering to blank?", nil, nil]).should == ["some token answering to blank?"]
79
+ end
80
+ end
81
+
82
+ describe "last token" do
83
+ it "should be partial" do
84
+ @tokenizer.tokenize("First Second Third Last").last.instance_variable_get(:@partial).should be_true
85
+ end
86
+ end
87
+
88
+ describe ".tokenize" do
89
+ it "should return an Array of tokens" do
90
+ @tokenizer.tokenize('test test').to_a.should be_instance_of(Array)
91
+ end
92
+ it "should return an empty tokenized query if the query string is blank or empty" do
93
+ @tokenizer.tokenize('').map(&:to_s).should == []
94
+ end
95
+ end
96
+ describe "token_for" do
97
+ it "should get a preprocessed token" do
98
+ text = stub(:text)
99
+ Query::Token.should_receive(:processed).with text
100
+
101
+ @tokenizer.token_for text
102
+ end
103
+ end
104
+
105
+ end
@@ -0,0 +1,84 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe UmlautSubstituter do
5
+ include UmlautSubstituter
6
+
7
+ # A bit of metaprogramming to help with the myriads of its.
8
+ #
9
+ def self.it_should_substitute(special_character, normal_character)
10
+ it "should substitute #{special_character} with #{normal_character}" do
11
+ substitute_umlauts(special_character).should == normal_character
12
+ end
13
+ end
14
+ def self.it_should_not_substitute(special_character)
15
+ it "should not substitute #{special_character}" do
16
+ substitute_umlauts(special_character).should == special_character
17
+ end
18
+ end
19
+
20
+ describe "normal characters" do
21
+ it_should_not_substitute('abcdefghijklmnopqrstuvwxyz1234567890')
22
+ end
23
+
24
+ describe "situations" do
25
+ it_should_substitute 'Peter Müller', 'Peter Mueller'
26
+ it_should_substitute 'Lüchinger', 'Luechinger'
27
+ # it_should_substitute 'LÜCHINGER', 'LUECHINGER'
28
+ end
29
+
30
+ describe "umlauts" do
31
+ it_should_substitute 'ä', 'ae'
32
+ it_should_substitute 'Ä', 'Ae'
33
+ it_should_substitute 'ë', 'e'
34
+ it_should_substitute 'Ë', 'E'
35
+ it_should_substitute 'ï', 'i'
36
+ it_should_substitute 'Ï', 'I'
37
+ it_should_substitute 'ö', 'oe'
38
+ it_should_substitute 'Ö', 'Oe'
39
+ it_should_substitute 'ü', 'ue'
40
+ it_should_substitute 'Ü', 'Ue'
41
+ end
42
+
43
+ describe "acute" do
44
+ it_should_substitute 'é', 'e'
45
+ it_should_substitute 'É', 'E'
46
+ end
47
+
48
+ describe "grave" do
49
+ it_should_substitute 'à', 'a'
50
+ it_should_substitute 'À', 'A'
51
+ it_should_substitute 'è', 'e'
52
+ it_should_substitute 'È', 'E'
53
+ it_should_substitute 'ì', 'i'
54
+ it_should_substitute 'ò', 'o'
55
+ end
56
+
57
+ describe "circonflex" do
58
+ it_should_substitute 'â', 'a'
59
+ it_should_substitute 'ê', 'e'
60
+ it_should_substitute 'Ê', 'E'
61
+ it_should_substitute 'î', 'i'
62
+ it_should_substitute 'Î', 'I'
63
+ it_should_substitute 'ô', 'o'
64
+ it_should_substitute 'Ô', 'O'
65
+ it_should_substitute 'û', 'u'
66
+ end
67
+
68
+ describe "cedilla" do
69
+ it_should_substitute 'ç', 'c'
70
+ it_should_substitute 'Ç', 'C'
71
+ end
72
+
73
+ describe "ligatures" do
74
+ it_should_substitute 'ß', 'ss'
75
+ # it_should_substitute 'Æ', 'AE'
76
+ end
77
+
78
+ describe "norse" do
79
+ # it_should_substitute 'ø', 'o'
80
+ it_should_substitute 'å', 'a'
81
+ it_should_substitute 'Å', 'A'
82
+ end
83
+
84
+ end
@@ -0,0 +1,55 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe "Speccing Ruby for speed" do
4
+
5
+ describe "various versions for allocation id concatenating" do
6
+ before(:each) do
7
+ @allocs = [:hello, :speed, :test]
8
+ @ids = {
9
+ :hello => (1..100_000).to_a,
10
+ :speed => (1..5_000).to_a,
11
+ :test => (1..1_000).to_a
12
+ }
13
+ GC.disable
14
+ end
15
+ after(:each) do
16
+ GC.enable
17
+ GC.start # start the GC to minimize the chance that it will run again during the speed spec
18
+ end
19
+ describe "+" do
20
+ it "should be fast" do
21
+ Benchmark.realtime do
22
+ @allocs.inject([]) do |total, alloc|
23
+ total + @ids[alloc]
24
+ end
25
+ end.should <= 0.0025
26
+ end
27
+ end
28
+ describe "map and flatten!(1)" do
29
+ it "should be fast" do
30
+ Benchmark.realtime do
31
+ @allocs.map { |alloc| @ids[alloc] }.flatten!(1)
32
+ end.should <= 0.02
33
+ end
34
+ end
35
+ describe "<< and flatten!(1)" do
36
+ it "should be fast" do
37
+ Benchmark.realtime do
38
+ @allocs.inject([]) do |total, alloc|
39
+ total << @ids[alloc]
40
+ end.flatten!(1)
41
+ end.should <= 0.02
42
+ end
43
+ end
44
+ describe "<< and flatten!" do
45
+ it "should be fast" do
46
+ Benchmark.realtime do
47
+ @allocs.inject([]) do |total, alloc|
48
+ total << @ids[alloc]
49
+ end.flatten!
50
+ end.should <= 0.02
51
+ end
52
+ end
53
+ end
54
+
55
+ end