picky 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,19 @@
3
3
  class Application
4
4
  class << self
5
5
 
6
+ # Finalize the subclass as soon as it
7
+ # has finished loading.
8
+ #
9
+ # Note: finalize finalizes the routes.
10
+ #
11
+ def inherited app
12
+ @apps ||= []
13
+ @apps << app
14
+ end
15
+ def finalize_apps
16
+ @apps.each &:finalize
17
+ end
18
+
6
19
  # An application simply delegates to the routing to handle a request.
7
20
  #
8
21
  def call env
@@ -9,6 +9,8 @@ module Index
9
9
  #
10
10
  # TODO Need to use the right subtokens. Bake in?
11
11
  #
12
+ # TODO One can use it as a wrapper, and it will extract the indexes itself. Rename: ExactFirst.
13
+ #
12
14
  class Combined < Bundle
13
15
 
14
16
  delegate :similar,
@@ -25,6 +27,8 @@ module Index
25
27
  :load,
26
28
  :to => :@partial
27
29
 
30
+ # TODO initialize type_or_category # => installs itself on all full and partial
31
+ #
28
32
  def initialize full, partial
29
33
  @full = full
30
34
  @partial = partial
data/lib/picky/loader.rb CHANGED
@@ -70,6 +70,10 @@ module Loader
70
70
  load_user 'app/logging'
71
71
  load_user 'app/application'
72
72
 
73
+ # Finalize the applications.
74
+ #
75
+ Application.finalize_apps
76
+
73
77
  # TODO Rewrite
74
78
  #
75
79
  Query::Qualifiers.instance.prepare
@@ -32,7 +32,7 @@ module Query
32
32
  results = nil
33
33
 
34
34
  duration = timed do
35
- results = execute(tokens, offset) || empty_results # TODO Does not work yet
35
+ results = execute(tokens, offset) || empty_results(offset) # TODO Does not work yet
36
36
  end
37
37
  results.duration = duration.round 6
38
38
 
@@ -42,21 +42,21 @@ module Query
42
42
  # Return nil if no results have been found.
43
43
  #
44
44
  def execute tokens, offset
45
- results_from sorted_allocations(tokens), offset
45
+ results_from offset, sorted_allocations(tokens)
46
46
  end
47
-
47
+
48
48
  # Returns an empty result with default values.
49
49
  #
50
- def empty_results
51
- result_type.new
50
+ def empty_results offset = 0
51
+ result_type.new offset
52
52
  end
53
-
53
+
54
54
  # Delegates the tokenizing to the query tokenizer.
55
55
  #
56
56
  def tokenized text
57
57
  @tokenizer.tokenize text
58
58
  end
59
-
59
+
60
60
  # Gets sorted allocations for the tokens.
61
61
  #
62
62
  # This generates the possible allocations, sorted.
@@ -111,9 +111,9 @@ module Query
111
111
  #
112
112
  # TODO Move to results. result_type.from allocations, offset
113
113
  #
114
- def results_from allocations = nil, offset = 0
115
- results = result_type.new allocations
116
- results.prepare! offset
114
+ def results_from offset = 0, allocations = nil
115
+ results = result_type.new offset, allocations
116
+ results.prepare!
117
117
  results
118
118
  end
119
119
 
@@ -5,16 +5,16 @@ module Results
5
5
  #
6
6
  class Base
7
7
 
8
- # Duration is set by the query.
8
+ # Duration is set externally by the query.
9
9
  #
10
10
  attr_writer :duration
11
11
  attr_reader :allocations, :offset
12
12
 
13
13
  # Takes instances of Query::Allocations as param.
14
14
  #
15
- def initialize allocations = nil
15
+ def initialize offset = 0, allocations = nil
16
+ @offset = offset
16
17
  @allocations = allocations || Query::Allocations.new
17
- @offset = 0
18
18
  end
19
19
 
20
20
  def add more_results
@@ -49,9 +49,8 @@ module Results
49
49
  # Without this, the allocations are not processed,
50
50
  # and no ids are calculated.
51
51
  #
52
- def prepare! offset = 0
53
- @offset = offset
54
- allocations.process! self.max_results, offset
52
+ def prepare!
53
+ allocations.process! self.max_results, self.offset
55
54
  end
56
55
 
57
56
  # Duration default is 0.
@@ -88,6 +88,13 @@ module Tokenizers
88
88
  process tokens # processing tokens / strings
89
89
  end
90
90
 
91
+ attr_accessor :substituter
92
+ alias substituter? substituter
93
+
94
+ def initialize substituter = UmlautSubstituter.new
95
+ @substituter = substituter
96
+ end
97
+
91
98
  # Hooks.
92
99
  #
93
100
 
@@ -5,8 +5,6 @@ module Tokenizers
5
5
  #
6
6
  class Index < Base
7
7
 
8
- include UmlautSubstituter
9
-
10
8
  # Default handling definitions. Override in config.
11
9
  #
12
10
  removes_characters(//)
@@ -26,7 +24,7 @@ module Tokenizers
26
24
  # 5. Remove non-single stopwords. (Stopwords that occur with other words)
27
25
  #
28
26
  def preprocess text
29
- text = substitute_umlauts text
27
+ text = substituter.substitute text if substituter?
30
28
  text.downcase!
31
29
  remove_illegals text
32
30
  contract text
@@ -13,8 +13,6 @@ module Tokenizers
13
13
  #
14
14
  class Query < Base
15
15
 
16
- include UmlautSubstituter
17
-
18
16
  # Default query tokenizer behaviour. Override in config.
19
17
  #
20
18
  removes_characters(//)
@@ -53,10 +51,10 @@ module Tokenizers
53
51
  # TODO Perhaps move to Normalizer?
54
52
  #
55
53
  def normalize text
56
- text = substitute_umlauts text # Substitute special characters TODO Move to subclass
57
- text.downcase! # Downcase all text
58
- normalize_with_patterns text # normalize
59
- text.to_sym # symbolize
54
+ text = substituter.substitute text if substituter? # Substitute special characters TODO Move to subclass
55
+ text.downcase! # Downcase all text
56
+ normalize_with_patterns text # normalize
57
+ text.to_sym # symbolize
60
58
  end
61
59
 
62
60
  # Returns a token for a word.
@@ -1,8 +1,20 @@
1
1
  # encoding: utf-8
2
2
  #
3
- module UmlautSubstituter
4
- def substitute_umlauts text
5
- trans = ActiveSupport::Multibyte.proxy_class.new(text).normalize(:kd)
3
+
4
+ # Substitutes certain umlauts, like
5
+ # ä, ö, ü => ae, oe, ue.
6
+ # (and more, see specs)
7
+ #
8
+ class UmlautSubstituter
9
+
10
+ attr_reader :chars
11
+
12
+ def initialize
13
+ @chars = ActiveSupport::Multibyte.proxy_class
14
+ end
15
+
16
+ def substitute text
17
+ trans = chars.new(text).normalize(:kd)
6
18
 
7
19
  # substitute special cases
8
20
  #
@@ -18,4 +30,5 @@ module UmlautSubstituter
18
30
  cp < 0x0300 || cp > 0x035F
19
31
  }.pack('U*')
20
32
  end
33
+
21
34
  end
@@ -23,8 +23,6 @@ Indexes.load_from_cache
23
23
  # Use Harakiri middleware to kill unicorn child after X requests.
24
24
  #
25
25
  # See http://vimeo.com/12614970 for more info.
26
- #
27
- # Note: Comment this.
28
26
  #
29
27
  Rack::Harakiri.after = 50
30
28
  use Rack::Harakiri
@@ -33,5 +31,4 @@ use Rack::Harakiri
33
31
  #
34
32
  # Note: Needs to be the same constant name as in app/application.rb.
35
33
  #
36
- PickySearch.finalize
37
34
  run PickySearch
@@ -3,13 +3,6 @@ require File.dirname(__FILE__) + '/../spec_helper'
3
3
  describe Performant::Array do
4
4
 
5
5
  describe "memory_efficient_intersect" do
6
- before(:each) do
7
- GC.disable
8
- end
9
- after(:each) do
10
- GC.enable
11
- GC.start
12
- end
13
6
  it "should intersect empty arrays correctly" do
14
7
  arys = [[3,4], [1,2,3], []]
15
8
 
@@ -45,15 +38,13 @@ describe Performant::Array do
45
38
  arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
46
39
 
47
40
  # brute force
48
- Benchmark.realtime do
49
- Performant::Array.memory_efficient_intersect(arys.sort_by(&:size))
50
- end.should < 0.001
41
+ performance_of { Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)) }.should < 0.001
51
42
  end
52
43
  it "should be optimal for 2 small arrays of 50/10_000" do
53
44
  arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
54
45
 
55
46
  # &
56
- Benchmark.realtime do
47
+ performance_of do
57
48
  arys.inject(arys.shift.dup) do |total, ary|
58
49
  total & arys
59
50
  end
@@ -24,7 +24,7 @@ describe Application do
24
24
  # Note that Picky needs the following characters to
25
25
  # pass through, as they are control characters: *"~:
26
26
  #
27
- querying.removes_characters(/[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/)
27
+ querying.removes_characters(/[^a-zA-Z0-9äöü\s\/\-\,\&\"\~\*\:]/)
28
28
  querying.stopwords(/\b(and|the|of|it|in|for)\b/)
29
29
  querying.splits_text_on(/[\s\/\-\,\&]+/)
30
30
  querying.normalizes_words([
@@ -28,7 +28,7 @@ describe Cacher::Partial::Subtoken do
28
28
  }
29
29
  end
30
30
  it "should be fast" do
31
- Benchmark.realtime { @cacher.generate_from( :florian => [1], :flavia => [2] ) }.should < 0.0001
31
+ performance_of { @cacher.generate_from( :florian => [1], :flavia => [2] ) }.should < 0.0001
32
32
  end
33
33
  it "should handle duplicate ids" do
34
34
  @cacher.generate_from( :flo => [1], :fla => [1] ).should == {
@@ -106,7 +106,7 @@ describe Cacher::Partial::Subtoken do
106
106
  end
107
107
  end
108
108
  it "should be fast" do
109
- Benchmark.realtime { @cacher.generate_from(@index) }.should < 0.005
109
+ performance_of { @cacher.generate_from(@index) }.should < 0.005
110
110
  end
111
111
  end
112
112
  describe "a bigger example with almost identical symbols" do
@@ -118,7 +118,7 @@ describe Cacher::Partial::Subtoken do
118
118
  end
119
119
  end
120
120
  it "should be fast" do
121
- Benchmark.realtime { @cacher.generate_from(@index) }.should < 0.003
121
+ performance_of { @cacher.generate_from(@index) }.should < 0.003
122
122
  end
123
123
  end
124
124
  end
@@ -39,7 +39,7 @@ describe Hash do
39
39
  lambda { @obj.to_json(:some => :option) }.should_not raise_error
40
40
  end
41
41
  it "should be fast" do
42
- Benchmark.realtime { @obj.to_json }.should < 0.00006
42
+ performance_of { @obj.to_json }.should < 0.00006
43
43
  end
44
44
  end
45
45
 
@@ -6,11 +6,6 @@ describe Symbol do
6
6
  include Helpers::Measuring
7
7
  before(:each) do
8
8
  @token = (((0..9).to_a)*10).to_s.to_sym
9
- GC.disable
10
- end
11
- after(:each) do
12
- GC.enable
13
- GC.start
14
9
  end
15
10
  # Note: They influence each other. each_subtoken is faster though.
16
11
  #
@@ -29,15 +29,9 @@ describe Index::Bundle do
29
29
  random_keys = generate_random_keys 500
30
30
  random_ids = generate_random_ids 500
31
31
  @full.index = Hash[random_keys.zip(random_ids)]
32
-
33
- GC.disable
34
- end
35
- after(:each) do
36
- GC.enable
37
- GC.start
38
32
  end
39
33
  it 'should be fast' do
40
- Benchmark.realtime do
34
+ performance_of do
41
35
  @full.generate_partial
42
36
  end.should < 0.2
43
37
  end
@@ -328,7 +328,7 @@ describe Query::Allocations do
328
328
  @allocations.total.should == 110
329
329
  end
330
330
  it 'should be fast' do
331
- Benchmark.realtime { @allocations.process!(20, 0) }.should < 0.0001
331
+ performance_of { @allocations.process!(20, 0) }.should < 0.0001
332
332
  end
333
333
  end
334
334
  end
@@ -62,21 +62,21 @@ describe 'Query::Combinations' do
62
62
  @combination2.should_receive(:ids).once.with.and_return (1..100).to_a
63
63
  @combination3.should_receive(:ids).once.with.and_return (1..10).to_a
64
64
 
65
- Benchmark.realtime { @combinations.ids }.should < 0.004
65
+ performance_of { @combinations.ids }.should < 0.004
66
66
  end
67
67
  it "should be fast" do
68
68
  @combination1.should_receive(:ids).once.with.and_return (1..1000).to_a
69
69
  @combination2.should_receive(:ids).once.with.and_return (1..100).to_a
70
70
  @combination3.should_receive(:ids).once.with.and_return (1..10).to_a
71
71
 
72
- Benchmark.realtime { @combinations.ids }.should < 0.00015
72
+ performance_of { @combinations.ids }.should < 0.00015
73
73
  end
74
74
  it "should be fast" do
75
75
  @combination1.should_receive(:ids).once.with.and_return (1..1000).to_a
76
76
  @combination2.should_receive(:ids).once.with.and_return (901..1000).to_a
77
77
  @combination3.should_receive(:ids).once.with.and_return (1..10).to_a
78
78
 
79
- Benchmark.realtime { @combinations.ids }.should < 0.0001
79
+ performance_of { @combinations.ids }.should < 0.0001
80
80
  end
81
81
  end
82
82
 
@@ -31,7 +31,7 @@ describe Query::Live do
31
31
  allocations = stub :allocations
32
32
  @query.should_receive(:sorted_allocations).and_return allocations
33
33
 
34
- @query.should_receive(:results_from).once.with(allocations, 0).and_return stub(:results, :prepare! => true)
34
+ @query.should_receive(:results_from).once.with(0, allocations).and_return stub(:results, :prepare! => true)
35
35
 
36
36
  @query.execute 'some query', 0
37
37
  end
@@ -53,7 +53,7 @@ describe Query::Live do
53
53
  @query.results_from(@allocations).duration.should == 0
54
54
  end
55
55
  it "should generate a result with the allocations" do
56
- @query.results_from(@allocations).allocations.should == @allocations
56
+ @query.results_from(0, @allocations).allocations.should == @allocations
57
57
  end
58
58
  end
59
59
  end
@@ -20,10 +20,9 @@ describe Results do
20
20
  @allocations = stub :allocations,
21
21
  :process! => nil, :size => 12
22
22
 
23
- @results = Results::Base.new @allocations
23
+ @results = Results::Base.new 1234, @allocations
24
24
  @results.stub! :duration => 0.1234567890,
25
- :total => 12345678,
26
- :offset => 1234
25
+ :total => 12345678
27
26
  end
28
27
  it 'should output a specific log' do
29
28
  @results.to_log('some_query').should == '|0-08-16 10:07:33|0.123457|some_query |12345678|1234|12|'
@@ -46,11 +45,11 @@ describe Results do
46
45
  before(:each) do
47
46
  @allocations = stub :allocations, :process! => nil, :to_result => :allocations, :total => :some_total
48
47
 
49
- @results = Results::Base.new @allocations
48
+ @results = Results::Base.new :some_offset, @allocations
50
49
  @results.duration = :some_duration
51
50
  end
52
51
  it 'should do it correctly' do
53
- @results.prepare! :some_offset
52
+ @results.prepare!
54
53
 
55
54
  @results.serialize.should == { :allocations => :allocations, :offset => :some_offset, :duration => :some_duration, :total => :some_total }
56
55
  end
@@ -139,7 +138,7 @@ describe Results do
139
138
  }.should_not raise_error
140
139
  end
141
140
  it 'should set the allocations to an empty array' do
142
- Results::Full.new(:some_allocations).instance_variable_get(:@allocations).should == :some_allocations
141
+ Results::Full.new(:unimportant, :some_allocations).instance_variable_get(:@allocations).should == :some_allocations
143
142
  end
144
143
  end
145
144
  describe 'Live' do
@@ -149,7 +148,7 @@ describe Results do
149
148
  }.should_not raise_error
150
149
  end
151
150
  it 'should set the allocations to an empty array' do
152
- Results::Live.new(:some_allocations).instance_variable_get(:@allocations).should == :some_allocations
151
+ Results::Live.new(:unimportant, :some_allocations).instance_variable_get(:@allocations).should == :some_allocations
153
152
  end
154
153
  end
155
154
  end
@@ -188,7 +187,7 @@ describe Results do
188
187
  describe 'Full' do
189
188
  it 'should delegate to allocations.total' do
190
189
  allocations = stub :allocations
191
- results = Results::Full.new allocations
190
+ results = Results::Full.new nil, allocations
192
191
 
193
192
  allocations.should_receive(:total).once
194
193
 
@@ -198,7 +197,7 @@ describe Results do
198
197
  describe 'Live' do
199
198
  it 'should delegate to allocations.total' do
200
199
  allocations = stub :allocations
201
- results = Results::Live.new allocations
200
+ results = Results::Live.new nil, allocations
202
201
 
203
202
  allocations.should_receive(:total).once
204
203
 
@@ -2,18 +2,20 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe UmlautSubstituter do
5
- include UmlautSubstituter
5
+ before(:each) do
6
+ @substituter = UmlautSubstituter.new
7
+ end
6
8
 
7
9
  # A bit of metaprogramming to help with the myriads of its.
8
10
  #
9
11
  def self.it_should_substitute(special_character, normal_character)
10
12
  it "should substitute #{special_character} with #{normal_character}" do
11
- substitute_umlauts(special_character).should == normal_character
13
+ @substituter.substitute(special_character).should == normal_character
12
14
  end
13
15
  end
14
16
  def self.it_should_not_substitute(special_character)
15
17
  it "should not substitute #{special_character}" do
16
- substitute_umlauts(special_character).should == special_character
18
+ @substituter.substitute(special_character).should == special_character
17
19
  end
18
20
  end
19
21
 
@@ -9,15 +9,10 @@ describe "Speccing Ruby for speed" do
9
9
  :speed => (1..5_000).to_a,
10
10
  :test => (1..1_000).to_a
11
11
  }
12
- GC.disable
13
- end
14
- after(:each) do
15
- GC.enable
16
- GC.start # start the GC to minimize the chance that it will run again during the speed spec
17
12
  end
18
13
  describe "+" do
19
14
  it "should be fast" do
20
- Benchmark.realtime do
15
+ performance_of do
21
16
  @allocs.inject([]) do |total, alloc|
22
17
  total + @ids[alloc]
23
18
  end
@@ -26,14 +21,14 @@ describe "Speccing Ruby for speed" do
26
21
  end
27
22
  describe "map and flatten!(1)" do
28
23
  it "should be fast" do
29
- Benchmark.realtime do
24
+ performance_of do
30
25
  @allocs.map { |alloc| @ids[alloc] }.flatten!(1)
31
26
  end.should < 0.02
32
27
  end
33
28
  end
34
29
  describe "<< and flatten!(1)" do
35
30
  it "should be fast" do
36
- Benchmark.realtime do
31
+ performance_of do
37
32
  @allocs.inject([]) do |total, alloc|
38
33
  total << @ids[alloc]
39
34
  end.flatten!(1)
@@ -42,7 +37,7 @@ describe "Speccing Ruby for speed" do
42
37
  end
43
38
  describe "<< and flatten!" do
44
39
  it "should be fast" do
45
- Benchmark.realtime do
40
+ performance_of do
46
41
  @allocs.inject([]) do |total, alloc|
47
42
  total << @ids[alloc]
48
43
  end.flatten!
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 2
8
- - 1
9
- version: 0.2.1
8
+ - 2
9
+ version: 0.2.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - Florian Hanke
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-10-12 00:00:00 +02:00
17
+ date: 2010-10-14 00:00:00 +02:00
18
18
  default_executable: picky
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency