picky 0.0.0 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. data/bin/picky +14 -0
  2. data/lib/bundling.rb +10 -0
  3. data/lib/constants.rb +9 -0
  4. data/lib/deployment.rb +212 -0
  5. data/lib/picky/application.rb +40 -0
  6. data/lib/picky/cacher/convenience.rb +3 -0
  7. data/lib/picky/cacher/generator.rb +17 -0
  8. data/lib/picky/cacher/partial/default.rb +7 -0
  9. data/lib/picky/cacher/partial/none.rb +19 -0
  10. data/lib/picky/cacher/partial/strategy.rb +7 -0
  11. data/lib/picky/cacher/partial/subtoken.rb +91 -0
  12. data/lib/picky/cacher/partial_generator.rb +15 -0
  13. data/lib/picky/cacher/similarity/default.rb +7 -0
  14. data/lib/picky/cacher/similarity/double_levenshtone.rb +73 -0
  15. data/lib/picky/cacher/similarity/none.rb +25 -0
  16. data/lib/picky/cacher/similarity/strategy.rb +7 -0
  17. data/lib/picky/cacher/similarity_generator.rb +15 -0
  18. data/lib/picky/cacher/weights/default.rb +7 -0
  19. data/lib/picky/cacher/weights/logarithmic.rb +39 -0
  20. data/lib/picky/cacher/weights/strategy.rb +7 -0
  21. data/lib/picky/cacher/weights_generator.rb +15 -0
  22. data/lib/picky/configuration/configuration.rb +13 -0
  23. data/lib/picky/configuration/field.rb +68 -0
  24. data/lib/picky/configuration/indexes.rb +60 -0
  25. data/lib/picky/configuration/queries.rb +32 -0
  26. data/lib/picky/configuration/type.rb +52 -0
  27. data/lib/picky/cores.rb +101 -0
  28. data/lib/picky/db/configuration.rb +23 -0
  29. data/lib/picky/ext/ruby19/extconf.rb +7 -0
  30. data/lib/picky/ext/ruby19/performant.c +339 -0
  31. data/lib/picky/extensions/array.rb +45 -0
  32. data/lib/picky/extensions/hash.rb +11 -0
  33. data/lib/picky/extensions/module.rb +15 -0
  34. data/lib/picky/extensions/symbol.rb +18 -0
  35. data/lib/picky/generator.rb +156 -0
  36. data/lib/picky/helpers/cache.rb +23 -0
  37. data/lib/picky/helpers/gc.rb +11 -0
  38. data/lib/picky/helpers/measuring.rb +45 -0
  39. data/lib/picky/helpers/search.rb +27 -0
  40. data/lib/picky/index/bundle.rb +328 -0
  41. data/lib/picky/index/category.rb +109 -0
  42. data/lib/picky/index/combined.rb +38 -0
  43. data/lib/picky/index/type.rb +30 -0
  44. data/lib/picky/indexers/base.rb +77 -0
  45. data/lib/picky/indexers/default.rb +3 -0
  46. data/lib/picky/indexers/field.rb +13 -0
  47. data/lib/picky/indexers/no_source_specified_error.rb +5 -0
  48. data/lib/picky/indexers/solr.rb +60 -0
  49. data/lib/picky/indexes.rb +180 -0
  50. data/lib/picky/initializers/ext.rb +6 -0
  51. data/lib/picky/initializers/mysql.rb +22 -0
  52. data/lib/picky/loader.rb +287 -0
  53. data/lib/picky/loggers/search.rb +19 -0
  54. data/lib/picky/performant/array.rb +23 -0
  55. data/lib/picky/query/allocation.rb +82 -0
  56. data/lib/picky/query/allocations.rb +131 -0
  57. data/lib/picky/query/base.rb +124 -0
  58. data/lib/picky/query/combination.rb +69 -0
  59. data/lib/picky/query/combinations.rb +106 -0
  60. data/lib/picky/query/combinator.rb +92 -0
  61. data/lib/picky/query/full.rb +15 -0
  62. data/lib/picky/query/live.rb +22 -0
  63. data/lib/picky/query/qualifiers.rb +73 -0
  64. data/lib/picky/query/solr.rb +77 -0
  65. data/lib/picky/query/token.rb +215 -0
  66. data/lib/picky/query/tokens.rb +102 -0
  67. data/lib/picky/query/weigher.rb +159 -0
  68. data/lib/picky/query/weights.rb +55 -0
  69. data/lib/picky/rack/harakiri.rb +37 -0
  70. data/lib/picky/results/base.rb +103 -0
  71. data/lib/picky/results/full.rb +19 -0
  72. data/lib/picky/results/live.rb +19 -0
  73. data/lib/picky/routing.rb +165 -0
  74. data/lib/picky/signals.rb +11 -0
  75. data/lib/picky/solr/schema_generator.rb +73 -0
  76. data/lib/picky/sources/base.rb +19 -0
  77. data/lib/picky/sources/csv.rb +30 -0
  78. data/lib/picky/sources/db.rb +77 -0
  79. data/lib/picky/tokenizers/base.rb +130 -0
  80. data/lib/picky/tokenizers/default.rb +3 -0
  81. data/lib/picky/tokenizers/index.rb +73 -0
  82. data/lib/picky/tokenizers/query.rb +70 -0
  83. data/lib/picky/umlaut_substituter.rb +21 -0
  84. data/lib/picky-tasks.rb +6 -0
  85. data/lib/picky.rb +18 -0
  86. data/lib/tasks/application.rake +5 -0
  87. data/lib/tasks/cache.rake +53 -0
  88. data/lib/tasks/framework.rake +4 -0
  89. data/lib/tasks/index.rake +29 -0
  90. data/lib/tasks/server.rake +48 -0
  91. data/lib/tasks/shortcuts.rake +13 -0
  92. data/lib/tasks/solr.rake +36 -0
  93. data/lib/tasks/spec.rake +11 -0
  94. data/lib/tasks/statistics.rake +13 -0
  95. data/lib/tasks/try.rake +29 -0
  96. data/prototype_project/Gemfile +23 -0
  97. data/prototype_project/Rakefile +1 -0
  98. data/prototype_project/app/README +6 -0
  99. data/prototype_project/app/application.rb +50 -0
  100. data/prototype_project/app/application.ru +29 -0
  101. data/prototype_project/app/db.yml +10 -0
  102. data/prototype_project/app/logging.rb +20 -0
  103. data/prototype_project/app/unicorn.ru +10 -0
  104. data/prototype_project/log/README +1 -0
  105. data/prototype_project/script/console +34 -0
  106. data/prototype_project/tmp/README +0 -0
  107. data/prototype_project/tmp/pids/README +0 -0
  108. data/spec/ext/performant_spec.rb +64 -0
  109. data/spec/lib/application_spec.rb +61 -0
  110. data/spec/lib/cacher/partial/subtoken_spec.rb +89 -0
  111. data/spec/lib/cacher/partial_generator_spec.rb +35 -0
  112. data/spec/lib/cacher/similarity/double_levenshtone_spec.rb +60 -0
  113. data/spec/lib/cacher/similarity/none_spec.rb +23 -0
  114. data/spec/lib/cacher/similarity_generator_spec.rb +22 -0
  115. data/spec/lib/cacher/weights/logarithmic_spec.rb +30 -0
  116. data/spec/lib/cacher/weights_generator_spec.rb +21 -0
  117. data/spec/lib/configuration/configuration_spec.rb +38 -0
  118. data/spec/lib/configuration/type_spec.rb +49 -0
  119. data/spec/lib/configuration_spec.rb +8 -0
  120. data/spec/lib/cores_spec.rb +65 -0
  121. data/spec/lib/extensions/array_spec.rb +37 -0
  122. data/spec/lib/extensions/hash_spec.rb +11 -0
  123. data/spec/lib/extensions/module_spec.rb +27 -0
  124. data/spec/lib/extensions/symbol_spec.rb +85 -0
  125. data/spec/lib/generator_spec.rb +135 -0
  126. data/spec/lib/helpers/cache_spec.rb +35 -0
  127. data/spec/lib/helpers/gc_spec.rb +71 -0
  128. data/spec/lib/helpers/measuring_spec.rb +18 -0
  129. data/spec/lib/helpers/search_spec.rb +50 -0
  130. data/spec/lib/index/bundle_partial_generation_speed_spec.rb +47 -0
  131. data/spec/lib/index/bundle_spec.rb +260 -0
  132. data/spec/lib/index/category_spec.rb +203 -0
  133. data/spec/lib/indexers/base_spec.rb +73 -0
  134. data/spec/lib/indexers/field_spec.rb +20 -0
  135. data/spec/lib/loader_spec.rb +48 -0
  136. data/spec/lib/loggers/search_spec.rb +19 -0
  137. data/spec/lib/performant/array_spec.rb +13 -0
  138. data/spec/lib/query/allocation_spec.rb +194 -0
  139. data/spec/lib/query/allocations_spec.rb +336 -0
  140. data/spec/lib/query/base_spec.rb +104 -0
  141. data/spec/lib/query/combination_spec.rb +90 -0
  142. data/spec/lib/query/combinations_spec.rb +83 -0
  143. data/spec/lib/query/combinator_spec.rb +112 -0
  144. data/spec/lib/query/full_spec.rb +22 -0
  145. data/spec/lib/query/live_spec.rb +61 -0
  146. data/spec/lib/query/qualifiers_spec.rb +31 -0
  147. data/spec/lib/query/solr_spec.rb +51 -0
  148. data/spec/lib/query/token_spec.rb +297 -0
  149. data/spec/lib/query/tokens_spec.rb +189 -0
  150. data/spec/lib/query/weights_spec.rb +47 -0
  151. data/spec/lib/results/base_spec.rb +233 -0
  152. data/spec/lib/routing_spec.rb +318 -0
  153. data/spec/lib/solr/schema_generator_spec.rb +42 -0
  154. data/spec/lib/sources/db_spec.rb +91 -0
  155. data/spec/lib/tokenizers/base_spec.rb +61 -0
  156. data/spec/lib/tokenizers/index_spec.rb +51 -0
  157. data/spec/lib/tokenizers/query_spec.rb +105 -0
  158. data/spec/lib/umlaut_substituter_spec.rb +84 -0
  159. data/spec/specific/speed_spec.rb +55 -0
  160. metadata +371 -15
  161. data/README.textile +0 -9
@@ -0,0 +1,61 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Tokenizers::Base do
5
+
6
+ before(:each) do
7
+ @tokenizer = Tokenizers::Base.new
8
+ end
9
+
10
+ context 'stopwords' do
11
+ describe '.stopwords' do
12
+ context 'without stopwords given' do
13
+ it 'should define a method remove_stopwords' do
14
+ lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
15
+ end
16
+ it 'should define a method remove_stopwords that does nothing' do
17
+ @tokenizer.remove_stopwords('from this text').should == nil
18
+ end
19
+ it 'should not define a method remove_non_single_stopwords' do
20
+ lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should raise_error(NoMethodError)
21
+ end
22
+ end
23
+ context 'with stopwords given' do
24
+ before(:each) do
25
+ class << @tokenizer
26
+ stopwords(/r|e/)
27
+ end
28
+ end
29
+ it 'should define a method remove_stopwords' do
30
+ lambda { @tokenizer.remove_stopwords('from this text') }.should_not raise_error
31
+ end
32
+ it 'should define a method stopwords that removes stopwords' do
33
+ @tokenizer.remove_stopwords('from this text').should == 'fom this txt'
34
+ end
35
+ it 'should define a method remove_non_single_stopwords' do
36
+ lambda { @tokenizer.remove_non_single_stopwords('from this text') }.should_not raise_error
37
+ end
38
+ it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
39
+ @tokenizer.remove_non_single_stopwords('rerere rerere').should == ' '
40
+ end
41
+ it 'should define a method remove_non_single_stopwords that does not single stopwords' do
42
+ @tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
43
+ end
44
+ end
45
+ context 'error case' do
46
+ before(:each) do
47
+ class << @tokenizer
48
+ stopwords(/any/)
49
+ end
50
+ end
51
+ it 'should not remove non-single stopwords with a star' do
52
+ @tokenizer.remove_non_single_stopwords('a*').should == 'a*'
53
+ end
54
+ it 'should not remove non-single stopwords with a tilde' do
55
+ @tokenizer.remove_non_single_stopwords('a~').should == 'a~'
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ end
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+
5
+ # TODO CLEAN UP.
6
+ #
7
+ describe Tokenizers::Index do
8
+
9
+ before(:each) do
10
+ @tokenizer = Tokenizers::Index.new
11
+ end
12
+
13
+ describe "remove_illegal_characters" do
14
+ it "should not remove ' from a query by default" do
15
+ @tokenizer.remove_illegals("Lugi's").should == "Lugi's"
16
+ end
17
+ end
18
+
19
+ describe "reject!" do
20
+ it "should reject tokens if blank" do
21
+ t1 = stub(:token, :to_s => '')
22
+ t2 = stub(:token, :to_s => 'not blank')
23
+ t3 = stub(:token, :to_s => '')
24
+
25
+ @tokenizer.reject([t1, t2, t3]).should == [t2]
26
+ end
27
+ end
28
+
29
+ describe "tokenize" do
30
+ describe "normalizing" do
31
+ def self.it_should_normalize_token(text, expected)
32
+ it "should handle the #{text} case" do
33
+ @tokenizer.tokenize(text).to_a.should == [expected].compact
34
+ end
35
+ end
36
+ # defaults
37
+ it_should_normalize_token 'it_should_not_normalize_by_default', :it_should_not_normalize_by_default
38
+ end
39
+ describe "tokenizing" do
40
+ def self.it_should_tokenize_token(text, expected)
41
+ it "should handle the #{text} case" do
42
+ @tokenizer.tokenize(text).to_a.should == expected
43
+ end
44
+ end
45
+ # defaults
46
+ it_should_tokenize_token "splitting on \\s", [:splitting, :on, :"\\s"]
47
+ it_should_tokenize_token 'und', [:und]
48
+ end
49
+ end
50
+
51
+ end
@@ -0,0 +1,105 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Tokenizers::Query do
5
+
6
+ before(:each) do
7
+ @tokenizer = Tokenizers::Query.new
8
+ end
9
+
10
+ describe 'preprocess' do
11
+ it 'should call methods in order' do
12
+ text = stub :text
13
+
14
+ @tokenizer.should_receive(:remove_illegals).once.ordered.with text
15
+ @tokenizer.should_receive(:remove_non_single_stopwords).once.ordered.with text
16
+ @tokenizer.should_receive(:contract).once.ordered
17
+
18
+ @tokenizer.preprocess text
19
+ end
20
+ it 'should return the text unchanged by default' do
21
+ text = "some text"
22
+
23
+ @tokenizer.preprocess(text).should == text
24
+ end
25
+ end
26
+
27
+ describe 'process' do
28
+ before(:each) do
29
+ @tokens = mock :tokens, :null_object => true
30
+ end
31
+ it 'should tokenize the tokens' do
32
+ @tokens.should_receive(:tokenize_with).once.with @tokenizer
33
+
34
+ @tokenizer.process @tokens
35
+ end
36
+ it 'should call methods on the tokens in order' do
37
+ @tokens.should_receive(:tokenize_with).once.ordered
38
+ @tokens.should_receive(:reject).once.ordered
39
+ @tokens.should_receive(:cap).once.ordered
40
+ @tokens.should_receive(:partialize_last).once.ordered
41
+
42
+ @tokenizer.process @tokens
43
+ end
44
+ it 'should return the tokens' do
45
+ @tokenizer.process(@tokens).should == @tokens
46
+ end
47
+ end
48
+
49
+ describe 'pretokenize' do
50
+ def self.it_should_pretokenize text, expected
51
+ it "should pretokenize #{text} as #{expected}" do
52
+ @tokenizer.pretokenize(text).should == expected
53
+ end
54
+ end
55
+ it_should_pretokenize 'test miau test', ['test', 'miau', 'test']
56
+ end
57
+
58
+ describe "tokenizing" do
59
+ def self.it_should_tokenize_token(text, expected)
60
+ it "should handle the #{text} case" do
61
+ @tokenizer.tokenize(text).map(&:text).should == expected
62
+ end
63
+ end
64
+ it_should_tokenize_token 'simple tokenizing on \s', [:simple, :tokenizing, :on, :'\s']
65
+ end
66
+
67
+ describe 'normalize_with_patterns' do
68
+ def self.it_should_pattern_normalize original, expected
69
+ it "should normalize #{original} with pattern into #{expected}" do
70
+ @tokenizer.normalize_with_patterns(original).should == expected
71
+ end
72
+ end
73
+ it_should_pattern_normalize 'no pattern normalization', 'no pattern normalization'
74
+ end
75
+
76
+ describe 'reject' do
77
+ it 'should reject blank tokens' do
78
+ @tokenizer.reject(["some token answering to blank?", nil, nil]).should == ["some token answering to blank?"]
79
+ end
80
+ end
81
+
82
+ describe "last token" do
83
+ it "should be partial" do
84
+ @tokenizer.tokenize("First Second Third Last").last.instance_variable_get(:@partial).should be_true
85
+ end
86
+ end
87
+
88
+ describe ".tokenize" do
89
+ it "should return an Array of tokens" do
90
+ @tokenizer.tokenize('test test').to_a.should be_instance_of(Array)
91
+ end
92
+ it "should return an empty tokenized query if the query string is blank or empty" do
93
+ @tokenizer.tokenize('').map(&:to_s).should == []
94
+ end
95
+ end
96
+ describe "token_for" do
97
+ it "should get a preprocessed token" do
98
+ text = stub(:text)
99
+ Query::Token.should_receive(:processed).with text
100
+
101
+ @tokenizer.token_for text
102
+ end
103
+ end
104
+
105
+ end
@@ -0,0 +1,84 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe UmlautSubstituter do
5
+ include UmlautSubstituter
6
+
7
+ # A bit of metaprogramming to help with the myriads of its.
8
+ #
9
+ def self.it_should_substitute(special_character, normal_character)
10
+ it "should substitute #{special_character} with #{normal_character}" do
11
+ substitute_umlauts(special_character).should == normal_character
12
+ end
13
+ end
14
+ def self.it_should_not_substitute(special_character)
15
+ it "should not substitute #{special_character}" do
16
+ substitute_umlauts(special_character).should == special_character
17
+ end
18
+ end
19
+
20
+ describe "normal characters" do
21
+ it_should_not_substitute('abcdefghijklmnopqrstuvwxyz1234567890')
22
+ end
23
+
24
+ describe "situations" do
25
+ it_should_substitute 'Peter Müller', 'Peter Mueller'
26
+ it_should_substitute 'Lüchinger', 'Luechinger'
27
+ # it_should_substitute 'LÜCHINGER', 'LUECHINGER'
28
+ end
29
+
30
+ describe "umlauts" do
31
+ it_should_substitute 'ä', 'ae'
32
+ it_should_substitute 'Ä', 'Ae'
33
+ it_should_substitute 'ë', 'e'
34
+ it_should_substitute 'Ë', 'E'
35
+ it_should_substitute 'ï', 'i'
36
+ it_should_substitute 'Ï', 'I'
37
+ it_should_substitute 'ö', 'oe'
38
+ it_should_substitute 'Ö', 'Oe'
39
+ it_should_substitute 'ü', 'ue'
40
+ it_should_substitute 'Ü', 'Ue'
41
+ end
42
+
43
+ describe "acute" do
44
+ it_should_substitute 'é', 'e'
45
+ it_should_substitute 'É', 'E'
46
+ end
47
+
48
+ describe "grave" do
49
+ it_should_substitute 'à', 'a'
50
+ it_should_substitute 'À', 'A'
51
+ it_should_substitute 'è', 'e'
52
+ it_should_substitute 'È', 'E'
53
+ it_should_substitute 'ì', 'i'
54
+ it_should_substitute 'ò', 'o'
55
+ end
56
+
57
+ describe "circonflex" do
58
+ it_should_substitute 'â', 'a'
59
+ it_should_substitute 'ê', 'e'
60
+ it_should_substitute 'Ê', 'E'
61
+ it_should_substitute 'î', 'i'
62
+ it_should_substitute 'Î', 'I'
63
+ it_should_substitute 'ô', 'o'
64
+ it_should_substitute 'Ô', 'O'
65
+ it_should_substitute 'û', 'u'
66
+ end
67
+
68
+ describe "cedilla" do
69
+ it_should_substitute 'ç', 'c'
70
+ it_should_substitute 'Ç', 'C'
71
+ end
72
+
73
+ describe "ligatures" do
74
+ it_should_substitute 'ß', 'ss'
75
+ # it_should_substitute 'Æ', 'AE'
76
+ end
77
+
78
+ describe "norse" do
79
+ # it_should_substitute 'ø', 'o'
80
+ it_should_substitute 'å', 'a'
81
+ it_should_substitute 'Å', 'A'
82
+ end
83
+
84
+ end
@@ -0,0 +1,55 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe "Speccing Ruby for speed" do
4
+
5
+ describe "various versions for allocation id concatenating" do
6
+ before(:each) do
7
+ @allocs = [:hello, :speed, :test]
8
+ @ids = {
9
+ :hello => (1..100_000).to_a,
10
+ :speed => (1..5_000).to_a,
11
+ :test => (1..1_000).to_a
12
+ }
13
+ GC.disable
14
+ end
15
+ after(:each) do
16
+ GC.enable
17
+ GC.start # start the GC to minimize the chance that it will run again during the speed spec
18
+ end
19
+ describe "+" do
20
+ it "should be fast" do
21
+ Benchmark.realtime do
22
+ @allocs.inject([]) do |total, alloc|
23
+ total + @ids[alloc]
24
+ end
25
+ end.should <= 0.0025
26
+ end
27
+ end
28
+ describe "map and flatten!(1)" do
29
+ it "should be fast" do
30
+ Benchmark.realtime do
31
+ @allocs.map { |alloc| @ids[alloc] }.flatten!(1)
32
+ end.should <= 0.02
33
+ end
34
+ end
35
+ describe "<< and flatten!(1)" do
36
+ it "should be fast" do
37
+ Benchmark.realtime do
38
+ @allocs.inject([]) do |total, alloc|
39
+ total << @ids[alloc]
40
+ end.flatten!(1)
41
+ end.should <= 0.02
42
+ end
43
+ end
44
+ describe "<< and flatten!" do
45
+ it "should be fast" do
46
+ Benchmark.realtime do
47
+ @allocs.inject([]) do |total, alloc|
48
+ total << @ids[alloc]
49
+ end.flatten!
50
+ end.should <= 0.02
51
+ end
52
+ end
53
+ end
54
+
55
+ end