picky 1.3.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/index/file/text.rb +4 -3
- data/lib/picky/indexers/serial.rb +8 -0
- data/lib/picky/indexing/bundle.rb +2 -1
- data/lib/picky/indexing/category.rb +9 -0
- data/lib/picky/interfaces/live_parameters.rb +3 -1
- data/lib/picky/sources/base.rb +2 -0
- data/lib/picky/sources/couch.rb +7 -61
- data/lib/picky/sources/csv.rb +7 -4
- data/lib/picky/sources/delicious.rb +1 -2
- data/lib/picky/sources/wrappers/base.rb +1 -1
- data/spec/ext/performant_spec.rb +40 -1
- data/spec/lib/index/file/text_spec.rb +1 -1
- data/spec/lib/indexers/serial_spec.rb +19 -0
- data/spec/lib/indexing/bundle_spec.rb +44 -14
- data/spec/lib/indexing/category_spec.rb +3 -2
- data/spec/lib/query/allocation_spec.rb +26 -0
- data/spec/lib/query/allocations_spec.rb +32 -13
- data/spec/lib/query/combinations_spec.rb +7 -0
- data/spec/lib/sources/couch_spec.rb +4 -29
- data/spec/lib/sources/csv_spec.rb +23 -3
- data/spec/specific/speed_spec.rb +47 -1
- metadata +3 -3
@@ -29,14 +29,15 @@ module Index
|
|
29
29
|
# * id,data\n
|
30
30
|
# * id,data\n
|
31
31
|
#
|
32
|
-
# Yields an id and a symbol token.
|
32
|
+
# Yields an id string and a symbol token.
|
33
33
|
#
|
34
34
|
def retrieve
|
35
|
-
id
|
35
|
+
id = nil
|
36
|
+
token = nil
|
36
37
|
::File.open(cache_path, 'r:binary') do |file|
|
37
38
|
file.each_line do |line|
|
38
39
|
id, token = line.split ?,, 2
|
39
|
-
yield id
|
40
|
+
yield id, (token.chomp! || token).to_sym
|
40
41
|
end
|
41
42
|
end
|
42
43
|
end
|
@@ -20,6 +20,14 @@ module Indexers
|
|
20
20
|
raise NoSourceSpecifiedException.new("No source given for #{@configuration.identifier}.")
|
21
21
|
end
|
22
22
|
|
23
|
+
# Delegates the key format to the source.
|
24
|
+
#
|
25
|
+
# Default is to_i.
|
26
|
+
#
|
27
|
+
def key_format
|
28
|
+
@source.key_format || :to_i
|
29
|
+
end
|
30
|
+
|
23
31
|
# Selects the original id (indexed id) and a column to process. The column data is called "token".
|
24
32
|
#
|
25
33
|
# Note: Puts together the parts first in an array, then releasing the array from time to time by joining.
|
@@ -68,9 +68,10 @@ module Indexing # :nodoc:all
|
|
68
68
|
# and later dumping the optimized index.
|
69
69
|
#
|
70
70
|
def retrieve
|
71
|
+
key_format = self[:key_format] || :to_i
|
71
72
|
files.retrieve do |id, token|
|
72
73
|
initialize_index_for token
|
73
|
-
index[token] << id
|
74
|
+
index[token] << id.send(key_format)
|
74
75
|
end
|
75
76
|
end
|
76
77
|
# Sets up a piece of the index for the given token.
|
@@ -80,8 +80,17 @@ module Indexing
|
|
80
80
|
#
|
81
81
|
def cache
|
82
82
|
prepare_index_directory
|
83
|
+
configure
|
83
84
|
generate_caches
|
84
85
|
end
|
86
|
+
# We need to set what formatting method should be used.
|
87
|
+
# Uses the one defined in the indexer.
|
88
|
+
#
|
89
|
+
def configure
|
90
|
+
key_format = indexer.key_format
|
91
|
+
exact[:key_format] = key_format
|
92
|
+
partial[:key_format] = key_format
|
93
|
+
end
|
85
94
|
def generate_caches
|
86
95
|
generate_caches_from_source
|
87
96
|
generate_partial
|
@@ -22,7 +22,7 @@ module Interfaces
|
|
22
22
|
#
|
23
23
|
Thread.new do
|
24
24
|
loop do
|
25
|
-
|
25
|
+
IO.select([@child], nil, nil, 2) or next
|
26
26
|
result = @child.gets ';;;'
|
27
27
|
pid, configuration_hash = eval result
|
28
28
|
next unless Hash === configuration_hash
|
@@ -123,6 +123,8 @@ module Interfaces
|
|
123
123
|
send :"#{key}=", new_value
|
124
124
|
end
|
125
125
|
rescue StandardError => e
|
126
|
+
# Catch any error and reraise as config error.
|
127
|
+
#
|
126
128
|
raise CouldNotUpdateConfigurationError.new current_key, e.message
|
127
129
|
end
|
128
130
|
end
|
data/lib/picky/sources/base.rb
CHANGED
data/lib/picky/sources/couch.rb
CHANGED
@@ -9,21 +9,6 @@ module Sources
|
|
9
9
|
|
10
10
|
# A Couch database source.
|
11
11
|
#
|
12
|
-
# <b>IMPORTANT NOTE:
|
13
|
-
#
|
14
|
-
# Since Picky currently only handles integer ids (we're working on this),
|
15
|
-
# and CouchDB uses hexadecimal ids, this source automatically
|
16
|
-
# recalculates a couch id such as
|
17
|
-
# fa3f2577a8dbc6a91d7f9989cdffd38e
|
18
|
-
# into
|
19
|
-
# 332634873577882511228481564366832915342
|
20
|
-
# using String#hex.
|
21
|
-
#
|
22
|
-
# When using the integer ids in a webapp to get your
|
23
|
-
# objects from CouchDB, please do a Integer#to_s(16) on the
|
24
|
-
# ids you get from Picky before you use them to get your object from CouchDB.</b>
|
25
|
-
#
|
26
|
-
#
|
27
12
|
# Options:
|
28
13
|
# * url
|
29
14
|
# and all the options of a <tt>RestClient::Resource</tt>.
|
@@ -35,49 +20,6 @@ module Sources
|
|
35
20
|
#
|
36
21
|
class Couch < Base
|
37
22
|
|
38
|
-
# If your Couch DB uses UUID keys, use
|
39
|
-
# Sources::Couch.new(:title, keys: Sources::Couch::UUIDKeys.new)
|
40
|
-
# Do not forget to reconvert the UUID Key from an integer in the client:
|
41
|
-
# uuid = UUIDTools::UUID.parse_int(id)
|
42
|
-
# uuid.to_s
|
43
|
-
#
|
44
|
-
class UUIDKeys
|
45
|
-
def initialize
|
46
|
-
# Tries to require the uuidtools gem.
|
47
|
-
#
|
48
|
-
begin
|
49
|
-
require 'uuidtools'
|
50
|
-
rescue LoadError
|
51
|
-
puts_gem_missing 'uuidtools', 'UUID keys in a CouchDB source'
|
52
|
-
exit 1
|
53
|
-
end
|
54
|
-
end
|
55
|
-
def to_i id
|
56
|
-
uuid = UUIDTools::UUID.parse id
|
57
|
-
uuid.to_i
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
# If your Couch DB uses Hex keys, use
|
62
|
-
# Sources::Couch.new(:title, keys: Sources::Couch::HexKeys.new)
|
63
|
-
# Do not forget to reconvert the Hex Key from an integer in the client:
|
64
|
-
# id.to_s(16)
|
65
|
-
#
|
66
|
-
class HexKeys
|
67
|
-
def to_i id
|
68
|
-
id.hex
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
# If your Couch DB uses Integer keys, use
|
73
|
-
# Sources::Couch.new(:title, keys: Sources::Couch::IntegerKeys.new)
|
74
|
-
#
|
75
|
-
class IntegerKeys
|
76
|
-
def to_i id
|
77
|
-
id
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
23
|
#
|
82
24
|
#
|
83
25
|
def initialize *category_names, options
|
@@ -85,8 +27,12 @@ module Sources
|
|
85
27
|
|
86
28
|
Hash === options && options[:url] || raise_no_db_given(category_names)
|
87
29
|
@db = RestClient::Resource.new options.delete(:url), options
|
88
|
-
|
89
|
-
|
30
|
+
end
|
31
|
+
|
32
|
+
# Default key format method for couch DB is to_sym.
|
33
|
+
#
|
34
|
+
def key_format
|
35
|
+
:to_sym
|
90
36
|
end
|
91
37
|
|
92
38
|
# Tries to require the rest_client gem.
|
@@ -106,7 +52,7 @@ module Sources
|
|
106
52
|
def harvest type, category
|
107
53
|
category_name = category.from.to_s
|
108
54
|
get_data do |doc|
|
109
|
-
yield
|
55
|
+
yield doc[@@id_key], doc[category_name] || next
|
110
56
|
end
|
111
57
|
end
|
112
58
|
|
data/lib/picky/sources/csv.rb
CHANGED
@@ -26,7 +26,7 @@ module Sources
|
|
26
26
|
|
27
27
|
# The options that were passed into #new.
|
28
28
|
#
|
29
|
-
attr_reader :csv_options
|
29
|
+
attr_reader :csv_options, :key_format
|
30
30
|
|
31
31
|
# The data category names.
|
32
32
|
#
|
@@ -36,8 +36,11 @@ module Sources
|
|
36
36
|
require 'csv'
|
37
37
|
@category_names = category_names
|
38
38
|
|
39
|
-
@csv_options
|
40
|
-
@file_name
|
39
|
+
@csv_options = Hash === options && options || {}
|
40
|
+
@file_name = @csv_options.delete(:file) || raise_no_file_given(category_names)
|
41
|
+
|
42
|
+
key_format = options.delete :key_format
|
43
|
+
@key_format = key_format && key_format.to_sym || :to_i
|
41
44
|
end
|
42
45
|
|
43
46
|
# Raises a NoCSVFileGiven exception.
|
@@ -51,7 +54,7 @@ module Sources
|
|
51
54
|
def harvest _, category
|
52
55
|
index = category_names.index category.from
|
53
56
|
get_data do |ary|
|
54
|
-
indexed_id = ary.shift
|
57
|
+
indexed_id = ary.shift
|
55
58
|
text = ary[index]
|
56
59
|
next unless text
|
57
60
|
text.force_encoding 'utf-8' # TODO Still needed?
|
@@ -27,8 +27,7 @@ module Sources
|
|
27
27
|
# Harvests the data to index.
|
28
28
|
#
|
29
29
|
def harvest _, category
|
30
|
-
get_data do |
|
31
|
-
indexed_id = uid
|
30
|
+
get_data do |indexed_id, data|
|
32
31
|
text = data[category.from]
|
33
32
|
next unless text
|
34
33
|
text.force_encoding 'utf-8' # TODO Still needed?
|
data/spec/ext/performant_spec.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/../spec_helper'
|
2
2
|
|
3
3
|
describe Performant::Array do
|
4
|
-
|
4
|
+
|
5
5
|
describe "memory_efficient_intersect" do
|
6
6
|
it "should intersect empty arrays correctly" do
|
7
7
|
arys = [[3,4], [1,2,3], []]
|
@@ -51,5 +51,44 @@ describe Performant::Array do
|
|
51
51
|
end.should < 0.0015
|
52
52
|
end
|
53
53
|
end
|
54
|
+
|
55
|
+
describe "memory_efficient_intersect with symbols" do
|
56
|
+
it "should intersect empty arrays correctly" do
|
57
|
+
arys = [[:c,:d], [:a,:b,:c], []]
|
58
|
+
|
59
|
+
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == []
|
60
|
+
end
|
61
|
+
it "should handle intermediate empty results correctly" do
|
62
|
+
arys = [[:e,:d], [:a,:b,:c], [:c,:d,:e,:h,:i]]
|
63
|
+
|
64
|
+
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == []
|
65
|
+
end
|
66
|
+
it "should intersect correctly" do
|
67
|
+
arys = [[:c,:d], [:a,:b,:c], [:c,:d,:e,:h,:i]]
|
68
|
+
|
69
|
+
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == [:c]
|
70
|
+
end
|
71
|
+
it "should intersect many arrays" do
|
72
|
+
arys = [[:c,:d,:e,:f,:g], [:a,:b,:c,:e,:f,:g], [:c,:d,:e,:f,:g,:h,:i], [:a,:b,:c,:d,:e,:f,:g,:h,:i,:j], [:b,:c,:e,:f,:g,:s], [:a,:b,:c,:d,:e,:f,:g,:h,:i,:j], [:b,:c,:e,:f,:g,:s]]
|
73
|
+
|
74
|
+
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == [:c,:e,:f,:g]
|
75
|
+
end
|
76
|
+
it "should be optimal for 2 small arrays of 50/10_000" do
|
77
|
+
arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a << 7]
|
78
|
+
|
79
|
+
# brute force
|
80
|
+
performance_of { Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)) }.should < 0.001
|
81
|
+
end
|
82
|
+
it "should be optimal for 2 small arrays of 50/10_000" do
|
83
|
+
arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a << 7]
|
84
|
+
|
85
|
+
# &
|
86
|
+
performance_of do
|
87
|
+
arys.inject(arys.shift.dup) do |total, ary|
|
88
|
+
total & arys
|
89
|
+
end
|
90
|
+
end.should < 0.0015
|
91
|
+
end
|
92
|
+
end
|
54
93
|
|
55
94
|
end
|
@@ -13,6 +13,25 @@ describe Indexers::Serial do
|
|
13
13
|
@indexer.stub! :timed_exclaim
|
14
14
|
end
|
15
15
|
|
16
|
+
describe 'key_format' do
|
17
|
+
context 'source has key_format' do
|
18
|
+
before(:each) do
|
19
|
+
@source.stub! :key_format => :some_key_format
|
20
|
+
end
|
21
|
+
it 'returns what the source returns' do
|
22
|
+
@indexer.key_format.should == :some_key_format
|
23
|
+
end
|
24
|
+
end
|
25
|
+
context 'source does not have key_format' do
|
26
|
+
before(:each) do
|
27
|
+
@source.stub! :key_format => nil
|
28
|
+
end
|
29
|
+
it 'returns :to_i' do
|
30
|
+
@indexer.key_format.should == :to_i
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
16
35
|
describe "tokenizer" do
|
17
36
|
it "returns the right one" do
|
18
37
|
@indexer.tokenizer.should == @tokenizer
|
@@ -3,9 +3,9 @@ require 'spec_helper'
|
|
3
3
|
describe Indexing::Bundle do
|
4
4
|
|
5
5
|
before(:each) do
|
6
|
-
@
|
7
|
-
@category
|
8
|
-
@configuration
|
6
|
+
@internal_index = stub :index, :name => :some_index
|
7
|
+
@category = stub :category, :name => :some_category
|
8
|
+
@configuration = Configuration::Index.new @internal_index, @category
|
9
9
|
|
10
10
|
@partial = stub :partial
|
11
11
|
@weights = stub :weights
|
@@ -42,17 +42,47 @@ describe Indexing::Bundle do
|
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
45
|
+
describe 'retrieve' do
|
46
|
+
before(:each) do
|
47
|
+
files = stub :files
|
48
|
+
files.should_receive(:retrieve).once.and_yield ' 1234', :some_token
|
49
|
+
@index.stub! :files => files
|
50
|
+
|
51
|
+
@ary = stub :ary
|
52
|
+
@internal_index.should_receive(:[]).any_number_of_times.and_return @ary
|
53
|
+
@index.stub! :index => @internal_index
|
54
|
+
end
|
55
|
+
context 'id key format' do
|
56
|
+
before(:each) do
|
57
|
+
@index.should_receive(:[]).once.with(:key_format).and_return :to_i
|
58
|
+
end
|
59
|
+
it 'should call the other methods correctly' do
|
60
|
+
@ary.should_receive(:<<).once.with 1234
|
61
|
+
|
62
|
+
@index.retrieve
|
63
|
+
end
|
64
|
+
end
|
65
|
+
context 'other key format' do
|
66
|
+
before(:each) do
|
67
|
+
@index.should_receive(:[]).once.with(:key_format).and_return :strip
|
68
|
+
end
|
69
|
+
it 'should call the other methods correctly' do
|
70
|
+
@ary.should_receive(:<<).once.with '1234'
|
71
|
+
|
72
|
+
@index.retrieve
|
73
|
+
end
|
74
|
+
end
|
75
|
+
context 'no key format - default' do
|
76
|
+
before(:each) do
|
77
|
+
@index.should_receive(:[]).once.with(:key_format).and_return nil
|
78
|
+
end
|
79
|
+
it 'should call the other methods correctly' do
|
80
|
+
@ary.should_receive(:<<).once.with 1234
|
81
|
+
|
82
|
+
@index.retrieve
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
56
86
|
|
57
87
|
describe 'load_from_index_file' do
|
58
88
|
it 'should call two methods in order' do
|
@@ -3,8 +3,9 @@ require 'spec_helper'
|
|
3
3
|
describe Indexing::Category do
|
4
4
|
|
5
5
|
before(:each) do
|
6
|
-
@index
|
7
|
-
@
|
6
|
+
@index = stub :index, :name => :some_index
|
7
|
+
@source = stub :some_given_source, :key_format => nil
|
8
|
+
@category = Indexing::Category.new :some_category, @index, :source => @source
|
8
9
|
end
|
9
10
|
context "unit specs" do
|
10
11
|
before(:each) do
|
@@ -99,6 +99,32 @@ describe Query::Allocation do
|
|
99
99
|
@allocation.process!(20, 10).should == []
|
100
100
|
end
|
101
101
|
end
|
102
|
+
context 'with symbol ids' do
|
103
|
+
before(:each) do
|
104
|
+
@allocation.stub! :calculate_ids => [:a,:b,:c,:d,:e,:f,:g,:h,:i,:j]
|
105
|
+
end
|
106
|
+
it 'should process right' do
|
107
|
+
@allocation.process!(0, 0).should == []
|
108
|
+
end
|
109
|
+
it 'should process right' do
|
110
|
+
@allocation.process!(0, 10).should == []
|
111
|
+
end
|
112
|
+
it 'should process right' do
|
113
|
+
@allocation.process!(5, 0).should == [:a,:b,:c,:d,:e]
|
114
|
+
end
|
115
|
+
it 'should process right' do
|
116
|
+
@allocation.process!(5, 5).should == [:f,:g,:h,:i,:j]
|
117
|
+
end
|
118
|
+
it 'should process right' do
|
119
|
+
@allocation.process!(20, 0).should == [:a,:b,:c,:d,:e,:f,:g,:h,:i,:j]
|
120
|
+
end
|
121
|
+
it 'should process right' do
|
122
|
+
@allocation.process!(20, 5).should == [:f,:g,:h,:i,:j]
|
123
|
+
end
|
124
|
+
it 'should process right' do
|
125
|
+
@allocation.process!(20, 10).should == []
|
126
|
+
end
|
127
|
+
end
|
102
128
|
end
|
103
129
|
|
104
130
|
describe 'to_result' do
|
@@ -69,20 +69,39 @@ describe Query::Allocations do
|
|
69
69
|
end
|
70
70
|
|
71
71
|
describe 'ids' do
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
72
|
+
context 'integers' do
|
73
|
+
before(:each) do
|
74
|
+
@allocation1 = stub :allocation1, :ids => [1, 2, 3, 4]
|
75
|
+
@allocation2 = stub :allocation2, :ids => [5, 6, 7]
|
76
|
+
@allocation3 = stub :allocation3, :ids => [8, 9]
|
77
|
+
@allocations = Query::Allocations.new [@allocation1, @allocation2, @allocation3]
|
78
|
+
end
|
79
|
+
it 'should return the right amount of ids' do
|
80
|
+
@allocations.ids(0).should == []
|
81
|
+
end
|
82
|
+
it 'should return the right amount of ids' do
|
83
|
+
@allocations.ids(6).should == [1,2,3,4,5,6]
|
84
|
+
end
|
85
|
+
it 'should return the right amount of ids' do
|
86
|
+
@allocations.ids.should == [1,2,3,4,5,6,7,8,9]
|
87
|
+
end
|
83
88
|
end
|
84
|
-
|
85
|
-
|
89
|
+
context 'symbols' do
|
90
|
+
before(:each) do
|
91
|
+
@allocation1 = stub :allocation1, :ids => [:a, :b, :c, :d]
|
92
|
+
@allocation2 = stub :allocation2, :ids => [:e, :f, :g]
|
93
|
+
@allocation3 = stub :allocation3, :ids => [:h, :i]
|
94
|
+
@allocations = Query::Allocations.new [@allocation1, @allocation2, @allocation3]
|
95
|
+
end
|
96
|
+
it 'should return the right amount of ids' do
|
97
|
+
@allocations.ids(0).should == []
|
98
|
+
end
|
99
|
+
it 'should return the right amount of ids' do
|
100
|
+
@allocations.ids(6).should == [:a,:b,:c,:d,:e,:f]
|
101
|
+
end
|
102
|
+
it 'should return the right amount of ids' do
|
103
|
+
@allocations.ids.should == [:a,:b,:c,:d,:e,:f,:g,:h,:i]
|
104
|
+
end
|
86
105
|
end
|
87
106
|
end
|
88
107
|
|
@@ -127,6 +127,13 @@ describe 'Query::Combinations' do
|
|
127
127
|
|
128
128
|
@combinations.ids.should == (1..10).to_a
|
129
129
|
end
|
130
|
+
it "should intersect symbol_keys correctly" do
|
131
|
+
@combination1.should_receive(:ids).once.with.and_return (:'00001'..:'10000').to_a
|
132
|
+
@combination2.should_receive(:ids).once.with.and_return (:'00001'..:'00100').to_a
|
133
|
+
@combination3.should_receive(:ids).once.with.and_return (:'00001'..:'00010').to_a
|
134
|
+
|
135
|
+
@combinations.ids.should == (:'00001'..:'0010').to_a
|
136
|
+
end
|
130
137
|
it "should intersect correctly when intermediate intersect result is empty" do
|
131
138
|
@combination1.should_receive(:ids).once.with.and_return (1..100_000).to_a
|
132
139
|
@combination2.should_receive(:ids).once.with.and_return (11..100).to_a
|
@@ -2,36 +2,11 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Sources::Couch do
|
4
4
|
|
5
|
-
describe 'UUIDKeys' do
|
6
|
-
before(:each) do
|
7
|
-
@keys = Sources::Couch::UUIDKeys.new
|
8
|
-
end
|
9
|
-
it 'converts uuids' do
|
10
|
-
@keys.to_i('550e8400-e29b-41d4-a716-446655440000').should == 113059749145936325402354257176981405696
|
11
|
-
end
|
12
|
-
end
|
13
|
-
describe 'HexKeys' do
|
14
|
-
before(:each) do
|
15
|
-
@keys = Sources::Couch::HexKeys.new
|
16
|
-
end
|
17
|
-
it 'converts uuids' do
|
18
|
-
@keys.to_i('7f').should == 127
|
19
|
-
end
|
20
|
-
end
|
21
|
-
describe 'IntegerKeys' do
|
22
|
-
before(:each) do
|
23
|
-
@keys = Sources::Couch::IntegerKeys.new
|
24
|
-
end
|
25
|
-
it 'converts uuids' do
|
26
|
-
@keys.to_i('123').should == '123'
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
5
|
describe 'special keys' do
|
31
6
|
context 'uuid keys' do
|
32
7
|
context "with database" do
|
33
8
|
before(:each) do
|
34
|
-
@source = Sources::Couch.new :a, :b, :c, url: 'http://localhost:5984/picky'
|
9
|
+
@source = Sources::Couch.new :a, :b, :c, url: 'http://localhost:5984/picky'
|
35
10
|
RestClient::Request.should_receive(:execute).any_number_of_times.and_return %{{"rows":[{"doc":{"_id":"550e8400-e29b-41d4-a716-446655440000","a":"a data","b":"b data","c":"c data"}}]}}
|
36
11
|
end
|
37
12
|
|
@@ -39,7 +14,7 @@ describe Sources::Couch do
|
|
39
14
|
it "yields the right data" do
|
40
15
|
field = stub :b, :from => :b
|
41
16
|
@source.harvest :anything, field do |id, token|
|
42
|
-
id.should eql(
|
17
|
+
id.should eql('550e8400-e29b-41d4-a716-446655440000')
|
43
18
|
token.should eql('b data')
|
44
19
|
end.should have(1).item
|
45
20
|
end
|
@@ -57,7 +32,7 @@ describe Sources::Couch do
|
|
57
32
|
context 'integer keys' do
|
58
33
|
context "with database" do
|
59
34
|
before(:each) do
|
60
|
-
@source = Sources::Couch.new :a, :b, :c, url: 'http://localhost:5984/picky'
|
35
|
+
@source = Sources::Couch.new :a, :b, :c, url: 'http://localhost:5984/picky'
|
61
36
|
RestClient::Request.should_receive(:execute).any_number_of_times.and_return %{{"rows":[{"doc":{"_id":"123","a":"a data","b":"b data","c":"c data"}}]}}
|
62
37
|
end
|
63
38
|
|
@@ -99,7 +74,7 @@ describe Sources::Couch do
|
|
99
74
|
it "yields the right data" do
|
100
75
|
field = stub :b, :from => :b
|
101
76
|
@source.harvest :anything, field do |id, token|
|
102
|
-
id.should eql(
|
77
|
+
id.should eql('7f')
|
103
78
|
token.should eql('b data')
|
104
79
|
end.should have(1).item
|
105
80
|
end
|
@@ -35,7 +35,7 @@ describe Sources::CSV do
|
|
35
35
|
end
|
36
36
|
context "with file" do
|
37
37
|
before(:each) do
|
38
|
-
::CSV.should_receive(:foreach).any_number_of_times.and_yield ['7', 'a data', 'b data', 'c data']
|
38
|
+
::CSV.should_receive(:foreach).any_number_of_times.and_yield [' 7', 'a data', 'b data', 'c data']
|
39
39
|
end
|
40
40
|
context 'without separator' do
|
41
41
|
before(:each) do
|
@@ -45,14 +45,34 @@ describe Sources::CSV do
|
|
45
45
|
it "should yield the right data" do
|
46
46
|
field = stub :b, :from => :b
|
47
47
|
@source.harvest :anything, field do |id, token|
|
48
|
-
[id, token].should == [7, 'b data']
|
48
|
+
[id, token].should == [' 7', 'b data']
|
49
49
|
end
|
50
50
|
end
|
51
51
|
end
|
52
52
|
describe "get_data" do
|
53
53
|
it "should yield each line" do
|
54
54
|
@source.get_data do |data|
|
55
|
-
data.should == ['7', 'a data', 'b data', 'c data']
|
55
|
+
data.should == [' 7', 'a data', 'b data', 'c data']
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
context 'with key_format method' do
|
61
|
+
before(:each) do
|
62
|
+
@source = Sources::CSV.new :a, :b, :c, :file => :some_file, :key_format => :strip
|
63
|
+
end
|
64
|
+
describe "harvest" do
|
65
|
+
it "should yield the right data" do
|
66
|
+
field = stub :b, :from => :b
|
67
|
+
@source.harvest :anything, field do |id, token|
|
68
|
+
[id, token].should == [' 7', 'b data']
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
describe "get_data" do
|
73
|
+
it "should yield each line" do
|
74
|
+
@source.get_data do |data|
|
75
|
+
data.should == [' 7', 'a data', 'b data', 'c data']
|
56
76
|
end
|
57
77
|
end
|
58
78
|
end
|
data/spec/specific/speed_spec.rb
CHANGED
@@ -1,7 +1,53 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
1
3
|
require File.dirname(__FILE__) + '/../spec_helper'
|
2
4
|
|
3
5
|
describe "Speccing Ruby for speed" do
|
4
|
-
describe "various versions for allocation id concatenating" do
|
6
|
+
describe "various versions for allocation id concatenating – with symbols" do
|
7
|
+
before(:each) do
|
8
|
+
@allocs = [:hello, :speed, :test]
|
9
|
+
@ids = {
|
10
|
+
:hello => (:'000_001'..:'100_000').to_a,
|
11
|
+
:speed => (:'0_001'..:'5_000').to_a,
|
12
|
+
:test => (:'0_001'..:'1_000').to_a
|
13
|
+
}
|
14
|
+
end
|
15
|
+
describe "+" do
|
16
|
+
it "should be fast" do
|
17
|
+
performance_of do
|
18
|
+
@allocs.inject([]) do |total, alloc|
|
19
|
+
total + @ids[alloc]
|
20
|
+
end
|
21
|
+
end.should < 0.0025
|
22
|
+
end
|
23
|
+
end
|
24
|
+
describe "map and flatten!(1)" do
|
25
|
+
it "should be fast" do
|
26
|
+
performance_of do
|
27
|
+
@allocs.map { |alloc| @ids[alloc] }.flatten!(1)
|
28
|
+
end.should < 0.02
|
29
|
+
end
|
30
|
+
end
|
31
|
+
describe "<< and flatten!(1)" do
|
32
|
+
it "should be fast" do
|
33
|
+
performance_of do
|
34
|
+
@allocs.inject([]) do |total, alloc|
|
35
|
+
total << @ids[alloc]
|
36
|
+
end.flatten!(1)
|
37
|
+
end.should < 0.02
|
38
|
+
end
|
39
|
+
end
|
40
|
+
describe "<< and flatten!" do
|
41
|
+
it "should be fast" do
|
42
|
+
performance_of do
|
43
|
+
@allocs.inject([]) do |total, alloc|
|
44
|
+
total << @ids[alloc]
|
45
|
+
end.flatten!
|
46
|
+
end.should < 0.02
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
describe "various versions for allocation id concatenating – with integers" do
|
5
51
|
before(:each) do
|
6
52
|
@allocs = [:hello, :speed, :test]
|
7
53
|
@ids = {
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 3
|
8
|
-
-
|
9
|
-
version: 1.3.
|
8
|
+
- 1
|
9
|
+
version: 1.3.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Florian Hanke
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-01-
|
17
|
+
date: 2011-01-30 00:00:00 +01:00
|
18
18
|
default_executable: picky
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|