picky 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,6 +3,19 @@
3
3
  class Application
4
4
  class << self
5
5
 
6
+ # Finalize the subclass as soon as it
7
+ # has finished loading.
8
+ #
9
+ # Note: finalize finalizes the routes.
10
+ #
11
+ def inherited app
12
+ @apps ||= []
13
+ @apps << app
14
+ end
15
+ def finalize_apps
16
+ @apps.each &:finalize
17
+ end
18
+
6
19
  # An application simply delegates to the routing to handle a request.
7
20
  #
8
21
  def call env
@@ -9,6 +9,8 @@ module Index
9
9
  #
10
10
  # TODO Need to use the right subtokens. Bake in?
11
11
  #
12
+ # TODO One can use it as a wrapper, and it will extract the indexes itself. Rename: ExactFirst.
13
+ #
12
14
  class Combined < Bundle
13
15
 
14
16
  delegate :similar,
@@ -25,6 +27,8 @@ module Index
25
27
  :load,
26
28
  :to => :@partial
27
29
 
30
+ # TODO initialize type_or_category # => installs itself on all full and partial
31
+ #
28
32
  def initialize full, partial
29
33
  @full = full
30
34
  @partial = partial
data/lib/picky/loader.rb CHANGED
@@ -70,6 +70,10 @@ module Loader
70
70
  load_user 'app/logging'
71
71
  load_user 'app/application'
72
72
 
73
+ # Finalize the applications.
74
+ #
75
+ Application.finalize_apps
76
+
73
77
  # TODO Rewrite
74
78
  #
75
79
  Query::Qualifiers.instance.prepare
@@ -32,7 +32,7 @@ module Query
32
32
  results = nil
33
33
 
34
34
  duration = timed do
35
- results = execute(tokens, offset) || empty_results # TODO Does not work yet
35
+ results = execute(tokens, offset) || empty_results(offset) # TODO Does not work yet
36
36
  end
37
37
  results.duration = duration.round 6
38
38
 
@@ -42,21 +42,21 @@ module Query
42
42
  # Return nil if no results have been found.
43
43
  #
44
44
  def execute tokens, offset
45
- results_from sorted_allocations(tokens), offset
45
+ results_from offset, sorted_allocations(tokens)
46
46
  end
47
-
47
+
48
48
  # Returns an empty result with default values.
49
49
  #
50
- def empty_results
51
- result_type.new
50
+ def empty_results offset = 0
51
+ result_type.new offset
52
52
  end
53
-
53
+
54
54
  # Delegates the tokenizing to the query tokenizer.
55
55
  #
56
56
  def tokenized text
57
57
  @tokenizer.tokenize text
58
58
  end
59
-
59
+
60
60
  # Gets sorted allocations for the tokens.
61
61
  #
62
62
  # This generates the possible allocations, sorted.
@@ -111,9 +111,9 @@ module Query
111
111
  #
112
112
  # TODO Move to results. result_type.from allocations, offset
113
113
  #
114
- def results_from allocations = nil, offset = 0
115
- results = result_type.new allocations
116
- results.prepare! offset
114
+ def results_from offset = 0, allocations = nil
115
+ results = result_type.new offset, allocations
116
+ results.prepare!
117
117
  results
118
118
  end
119
119
 
@@ -5,16 +5,16 @@ module Results
5
5
  #
6
6
  class Base
7
7
 
8
- # Duration is set by the query.
8
+ # Duration is set externally by the query.
9
9
  #
10
10
  attr_writer :duration
11
11
  attr_reader :allocations, :offset
12
12
 
13
13
  # Takes instances of Query::Allocations as param.
14
14
  #
15
- def initialize allocations = nil
15
+ def initialize offset = 0, allocations = nil
16
+ @offset = offset
16
17
  @allocations = allocations || Query::Allocations.new
17
- @offset = 0
18
18
  end
19
19
 
20
20
  def add more_results
@@ -49,9 +49,8 @@ module Results
49
49
  # Without this, the allocations are not processed,
50
50
  # and no ids are calculated.
51
51
  #
52
- def prepare! offset = 0
53
- @offset = offset
54
- allocations.process! self.max_results, offset
52
+ def prepare!
53
+ allocations.process! self.max_results, self.offset
55
54
  end
56
55
 
57
56
  # Duration default is 0.
@@ -88,6 +88,13 @@ module Tokenizers
88
88
  process tokens # processing tokens / strings
89
89
  end
90
90
 
91
+ attr_accessor :substituter
92
+ alias substituter? substituter
93
+
94
+ def initialize substituter = UmlautSubstituter.new
95
+ @substituter = substituter
96
+ end
97
+
91
98
  # Hooks.
92
99
  #
93
100
 
@@ -5,8 +5,6 @@ module Tokenizers
5
5
  #
6
6
  class Index < Base
7
7
 
8
- include UmlautSubstituter
9
-
10
8
  # Default handling definitions. Override in config.
11
9
  #
12
10
  removes_characters(//)
@@ -26,7 +24,7 @@ module Tokenizers
26
24
  # 5. Remove non-single stopwords. (Stopwords that occur with other words)
27
25
  #
28
26
  def preprocess text
29
- text = substitute_umlauts text
27
+ text = substituter.substitute text if substituter?
30
28
  text.downcase!
31
29
  remove_illegals text
32
30
  contract text
@@ -13,8 +13,6 @@ module Tokenizers
13
13
  #
14
14
  class Query < Base
15
15
 
16
- include UmlautSubstituter
17
-
18
16
  # Default query tokenizer behaviour. Override in config.
19
17
  #
20
18
  removes_characters(//)
@@ -53,10 +51,10 @@ module Tokenizers
53
51
  # TODO Perhaps move to Normalizer?
54
52
  #
55
53
  def normalize text
56
- text = substitute_umlauts text # Substitute special characters TODO Move to subclass
57
- text.downcase! # Downcase all text
58
- normalize_with_patterns text # normalize
59
- text.to_sym # symbolize
54
+ text = substituter.substitute text if substituter? # Substitute special characters TODO Move to subclass
55
+ text.downcase! # Downcase all text
56
+ normalize_with_patterns text # normalize
57
+ text.to_sym # symbolize
60
58
  end
61
59
 
62
60
  # Returns a token for a word.
@@ -1,8 +1,20 @@
1
1
  # encoding: utf-8
2
2
  #
3
- module UmlautSubstituter
4
- def substitute_umlauts text
5
- trans = ActiveSupport::Multibyte.proxy_class.new(text).normalize(:kd)
3
+
4
+ # Substitutes certain umlauts, like
5
+ # ä, ö, ü => ae, oe, ue.
6
+ # (and more, see specs)
7
+ #
8
+ class UmlautSubstituter
9
+
10
+ attr_reader :chars
11
+
12
+ def initialize
13
+ @chars = ActiveSupport::Multibyte.proxy_class
14
+ end
15
+
16
+ def substitute text
17
+ trans = chars.new(text).normalize(:kd)
6
18
 
7
19
  # substitute special cases
8
20
  #
@@ -18,4 +30,5 @@ module UmlautSubstituter
18
30
  cp < 0x0300 || cp > 0x035F
19
31
  }.pack('U*')
20
32
  end
33
+
21
34
  end
@@ -23,8 +23,6 @@ Indexes.load_from_cache
23
23
  # Use Harakiri middleware to kill unicorn child after X requests.
24
24
  #
25
25
  # See http://vimeo.com/12614970 for more info.
26
- #
27
- # Note: Comment this.
28
26
  #
29
27
  Rack::Harakiri.after = 50
30
28
  use Rack::Harakiri
@@ -33,5 +31,4 @@ use Rack::Harakiri
33
31
  #
34
32
  # Note: Needs to be the same constant name as in app/application.rb.
35
33
  #
36
- PickySearch.finalize
37
34
  run PickySearch
@@ -3,13 +3,6 @@ require File.dirname(__FILE__) + '/../spec_helper'
3
3
  describe Performant::Array do
4
4
 
5
5
  describe "memory_efficient_intersect" do
6
- before(:each) do
7
- GC.disable
8
- end
9
- after(:each) do
10
- GC.enable
11
- GC.start
12
- end
13
6
  it "should intersect empty arrays correctly" do
14
7
  arys = [[3,4], [1,2,3], []]
15
8
 
@@ -45,15 +38,13 @@ describe Performant::Array do
45
38
  arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
46
39
 
47
40
  # brute force
48
- Benchmark.realtime do
49
- Performant::Array.memory_efficient_intersect(arys.sort_by(&:size))
50
- end.should < 0.001
41
+ performance_of { Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)) }.should < 0.001
51
42
  end
52
43
  it "should be optimal for 2 small arrays of 50/10_000" do
53
44
  arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
54
45
 
55
46
  # &
56
- Benchmark.realtime do
47
+ performance_of do
57
48
  arys.inject(arys.shift.dup) do |total, ary|
58
49
  total & arys
59
50
  end
@@ -24,7 +24,7 @@ describe Application do
24
24
  # Note that Picky needs the following characters to
25
25
  # pass through, as they are control characters: *"~:
26
26
  #
27
- querying.removes_characters(/[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/)
27
+ querying.removes_characters(/[^a-zA-Z0-9äöü\s\/\-\,\&\"\~\*\:]/)
28
28
  querying.stopwords(/\b(and|the|of|it|in|for)\b/)
29
29
  querying.splits_text_on(/[\s\/\-\,\&]+/)
30
30
  querying.normalizes_words([
@@ -28,7 +28,7 @@ describe Cacher::Partial::Subtoken do
28
28
  }
29
29
  end
30
30
  it "should be fast" do
31
- Benchmark.realtime { @cacher.generate_from( :florian => [1], :flavia => [2] ) }.should < 0.0001
31
+ performance_of { @cacher.generate_from( :florian => [1], :flavia => [2] ) }.should < 0.0001
32
32
  end
33
33
  it "should handle duplicate ids" do
34
34
  @cacher.generate_from( :flo => [1], :fla => [1] ).should == {
@@ -106,7 +106,7 @@ describe Cacher::Partial::Subtoken do
106
106
  end
107
107
  end
108
108
  it "should be fast" do
109
- Benchmark.realtime { @cacher.generate_from(@index) }.should < 0.005
109
+ performance_of { @cacher.generate_from(@index) }.should < 0.005
110
110
  end
111
111
  end
112
112
  describe "a bigger example with almost identical symbols" do
@@ -118,7 +118,7 @@ describe Cacher::Partial::Subtoken do
118
118
  end
119
119
  end
120
120
  it "should be fast" do
121
- Benchmark.realtime { @cacher.generate_from(@index) }.should < 0.003
121
+ performance_of { @cacher.generate_from(@index) }.should < 0.003
122
122
  end
123
123
  end
124
124
  end
@@ -39,7 +39,7 @@ describe Hash do
39
39
  lambda { @obj.to_json(:some => :option) }.should_not raise_error
40
40
  end
41
41
  it "should be fast" do
42
- Benchmark.realtime { @obj.to_json }.should < 0.00006
42
+ performance_of { @obj.to_json }.should < 0.00006
43
43
  end
44
44
  end
45
45
 
@@ -6,11 +6,6 @@ describe Symbol do
6
6
  include Helpers::Measuring
7
7
  before(:each) do
8
8
  @token = (((0..9).to_a)*10).to_s.to_sym
9
- GC.disable
10
- end
11
- after(:each) do
12
- GC.enable
13
- GC.start
14
9
  end
15
10
  # Note: They influence each other. each_subtoken is faster though.
16
11
  #
@@ -29,15 +29,9 @@ describe Index::Bundle do
29
29
  random_keys = generate_random_keys 500
30
30
  random_ids = generate_random_ids 500
31
31
  @full.index = Hash[random_keys.zip(random_ids)]
32
-
33
- GC.disable
34
- end
35
- after(:each) do
36
- GC.enable
37
- GC.start
38
32
  end
39
33
  it 'should be fast' do
40
- Benchmark.realtime do
34
+ performance_of do
41
35
  @full.generate_partial
42
36
  end.should < 0.2
43
37
  end
@@ -328,7 +328,7 @@ describe Query::Allocations do
328
328
  @allocations.total.should == 110
329
329
  end
330
330
  it 'should be fast' do
331
- Benchmark.realtime { @allocations.process!(20, 0) }.should < 0.0001
331
+ performance_of { @allocations.process!(20, 0) }.should < 0.0001
332
332
  end
333
333
  end
334
334
  end
@@ -62,21 +62,21 @@ describe 'Query::Combinations' do
62
62
  @combination2.should_receive(:ids).once.with.and_return (1..100).to_a
63
63
  @combination3.should_receive(:ids).once.with.and_return (1..10).to_a
64
64
 
65
- Benchmark.realtime { @combinations.ids }.should < 0.004
65
+ performance_of { @combinations.ids }.should < 0.004
66
66
  end
67
67
  it "should be fast" do
68
68
  @combination1.should_receive(:ids).once.with.and_return (1..1000).to_a
69
69
  @combination2.should_receive(:ids).once.with.and_return (1..100).to_a
70
70
  @combination3.should_receive(:ids).once.with.and_return (1..10).to_a
71
71
 
72
- Benchmark.realtime { @combinations.ids }.should < 0.00015
72
+ performance_of { @combinations.ids }.should < 0.00015
73
73
  end
74
74
  it "should be fast" do
75
75
  @combination1.should_receive(:ids).once.with.and_return (1..1000).to_a
76
76
  @combination2.should_receive(:ids).once.with.and_return (901..1000).to_a
77
77
  @combination3.should_receive(:ids).once.with.and_return (1..10).to_a
78
78
 
79
- Benchmark.realtime { @combinations.ids }.should < 0.0001
79
+ performance_of { @combinations.ids }.should < 0.0001
80
80
  end
81
81
  end
82
82
 
@@ -31,7 +31,7 @@ describe Query::Live do
31
31
  allocations = stub :allocations
32
32
  @query.should_receive(:sorted_allocations).and_return allocations
33
33
 
34
- @query.should_receive(:results_from).once.with(allocations, 0).and_return stub(:results, :prepare! => true)
34
+ @query.should_receive(:results_from).once.with(0, allocations).and_return stub(:results, :prepare! => true)
35
35
 
36
36
  @query.execute 'some query', 0
37
37
  end
@@ -53,7 +53,7 @@ describe Query::Live do
53
53
  @query.results_from(@allocations).duration.should == 0
54
54
  end
55
55
  it "should generate a result with the allocations" do
56
- @query.results_from(@allocations).allocations.should == @allocations
56
+ @query.results_from(0, @allocations).allocations.should == @allocations
57
57
  end
58
58
  end
59
59
  end
@@ -20,10 +20,9 @@ describe Results do
20
20
  @allocations = stub :allocations,
21
21
  :process! => nil, :size => 12
22
22
 
23
- @results = Results::Base.new @allocations
23
+ @results = Results::Base.new 1234, @allocations
24
24
  @results.stub! :duration => 0.1234567890,
25
- :total => 12345678,
26
- :offset => 1234
25
+ :total => 12345678
27
26
  end
28
27
  it 'should output a specific log' do
29
28
  @results.to_log('some_query').should == '|0-08-16 10:07:33|0.123457|some_query |12345678|1234|12|'
@@ -46,11 +45,11 @@ describe Results do
46
45
  before(:each) do
47
46
  @allocations = stub :allocations, :process! => nil, :to_result => :allocations, :total => :some_total
48
47
 
49
- @results = Results::Base.new @allocations
48
+ @results = Results::Base.new :some_offset, @allocations
50
49
  @results.duration = :some_duration
51
50
  end
52
51
  it 'should do it correctly' do
53
- @results.prepare! :some_offset
52
+ @results.prepare!
54
53
 
55
54
  @results.serialize.should == { :allocations => :allocations, :offset => :some_offset, :duration => :some_duration, :total => :some_total }
56
55
  end
@@ -139,7 +138,7 @@ describe Results do
139
138
  }.should_not raise_error
140
139
  end
141
140
  it 'should set the allocations to an empty array' do
142
- Results::Full.new(:some_allocations).instance_variable_get(:@allocations).should == :some_allocations
141
+ Results::Full.new(:unimportant, :some_allocations).instance_variable_get(:@allocations).should == :some_allocations
143
142
  end
144
143
  end
145
144
  describe 'Live' do
@@ -149,7 +148,7 @@ describe Results do
149
148
  }.should_not raise_error
150
149
  end
151
150
  it 'should set the allocations to an empty array' do
152
- Results::Live.new(:some_allocations).instance_variable_get(:@allocations).should == :some_allocations
151
+ Results::Live.new(:unimportant, :some_allocations).instance_variable_get(:@allocations).should == :some_allocations
153
152
  end
154
153
  end
155
154
  end
@@ -188,7 +187,7 @@ describe Results do
188
187
  describe 'Full' do
189
188
  it 'should delegate to allocations.total' do
190
189
  allocations = stub :allocations
191
- results = Results::Full.new allocations
190
+ results = Results::Full.new nil, allocations
192
191
 
193
192
  allocations.should_receive(:total).once
194
193
 
@@ -198,7 +197,7 @@ describe Results do
198
197
  describe 'Live' do
199
198
  it 'should delegate to allocations.total' do
200
199
  allocations = stub :allocations
201
- results = Results::Live.new allocations
200
+ results = Results::Live.new nil, allocations
202
201
 
203
202
  allocations.should_receive(:total).once
204
203
 
@@ -2,18 +2,20 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe UmlautSubstituter do
5
- include UmlautSubstituter
5
+ before(:each) do
6
+ @substituter = UmlautSubstituter.new
7
+ end
6
8
 
7
9
  # A bit of metaprogramming to help with the myriads of its.
8
10
  #
9
11
  def self.it_should_substitute(special_character, normal_character)
10
12
  it "should substitute #{special_character} with #{normal_character}" do
11
- substitute_umlauts(special_character).should == normal_character
13
+ @substituter.substitute(special_character).should == normal_character
12
14
  end
13
15
  end
14
16
  def self.it_should_not_substitute(special_character)
15
17
  it "should not substitute #{special_character}" do
16
- substitute_umlauts(special_character).should == special_character
18
+ @substituter.substitute(special_character).should == special_character
17
19
  end
18
20
  end
19
21
 
@@ -9,15 +9,10 @@ describe "Speccing Ruby for speed" do
9
9
  :speed => (1..5_000).to_a,
10
10
  :test => (1..1_000).to_a
11
11
  }
12
- GC.disable
13
- end
14
- after(:each) do
15
- GC.enable
16
- GC.start # start the GC to minimize the chance that it will run again during the speed spec
17
12
  end
18
13
  describe "+" do
19
14
  it "should be fast" do
20
- Benchmark.realtime do
15
+ performance_of do
21
16
  @allocs.inject([]) do |total, alloc|
22
17
  total + @ids[alloc]
23
18
  end
@@ -26,14 +21,14 @@ describe "Speccing Ruby for speed" do
26
21
  end
27
22
  describe "map and flatten!(1)" do
28
23
  it "should be fast" do
29
- Benchmark.realtime do
24
+ performance_of do
30
25
  @allocs.map { |alloc| @ids[alloc] }.flatten!(1)
31
26
  end.should < 0.02
32
27
  end
33
28
  end
34
29
  describe "<< and flatten!(1)" do
35
30
  it "should be fast" do
36
- Benchmark.realtime do
31
+ performance_of do
37
32
  @allocs.inject([]) do |total, alloc|
38
33
  total << @ids[alloc]
39
34
  end.flatten!(1)
@@ -42,7 +37,7 @@ describe "Speccing Ruby for speed" do
42
37
  end
43
38
  describe "<< and flatten!" do
44
39
  it "should be fast" do
45
- Benchmark.realtime do
40
+ performance_of do
46
41
  @allocs.inject([]) do |total, alloc|
47
42
  total << @ids[alloc]
48
43
  end.flatten!
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 2
8
- - 1
9
- version: 0.2.1
8
+ - 2
9
+ version: 0.2.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - Florian Hanke
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-10-12 00:00:00 +02:00
17
+ date: 2010-10-14 00:00:00 +02:00
18
18
  default_executable: picky
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency