picky 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/index/file/text.rb +4 -3
- data/lib/picky/indexers/serial.rb +8 -0
- data/lib/picky/indexing/bundle.rb +2 -1
- data/lib/picky/indexing/category.rb +9 -0
- data/lib/picky/interfaces/live_parameters.rb +3 -1
- data/lib/picky/sources/base.rb +2 -0
- data/lib/picky/sources/couch.rb +7 -61
- data/lib/picky/sources/csv.rb +7 -4
- data/lib/picky/sources/delicious.rb +1 -2
- data/lib/picky/sources/wrappers/base.rb +1 -1
- data/spec/ext/performant_spec.rb +40 -1
- data/spec/lib/index/file/text_spec.rb +1 -1
- data/spec/lib/indexers/serial_spec.rb +19 -0
- data/spec/lib/indexing/bundle_spec.rb +44 -14
- data/spec/lib/indexing/category_spec.rb +3 -2
- data/spec/lib/query/allocation_spec.rb +26 -0
- data/spec/lib/query/allocations_spec.rb +32 -13
- data/spec/lib/query/combinations_spec.rb +7 -0
- data/spec/lib/sources/couch_spec.rb +4 -29
- data/spec/lib/sources/csv_spec.rb +23 -3
- data/spec/specific/speed_spec.rb +47 -1
- metadata +3 -3
@@ -29,14 +29,15 @@ module Index
|
|
29
29
|
# * id,data\n
|
30
30
|
# * id,data\n
|
31
31
|
#
|
32
|
-
# Yields an id and a symbol token.
|
32
|
+
# Yields an id string and a symbol token.
|
33
33
|
#
|
34
34
|
def retrieve
|
35
|
-
id
|
35
|
+
id = nil
|
36
|
+
token = nil
|
36
37
|
::File.open(cache_path, 'r:binary') do |file|
|
37
38
|
file.each_line do |line|
|
38
39
|
id, token = line.split ?,, 2
|
39
|
-
yield id
|
40
|
+
yield id, (token.chomp! || token).to_sym
|
40
41
|
end
|
41
42
|
end
|
42
43
|
end
|
@@ -20,6 +20,14 @@ module Indexers
|
|
20
20
|
raise NoSourceSpecifiedException.new("No source given for #{@configuration.identifier}.")
|
21
21
|
end
|
22
22
|
|
23
|
+
# Delegates the key format to the source.
|
24
|
+
#
|
25
|
+
# Default is to_i.
|
26
|
+
#
|
27
|
+
def key_format
|
28
|
+
@source.key_format || :to_i
|
29
|
+
end
|
30
|
+
|
23
31
|
# Selects the original id (indexed id) and a column to process. The column data is called "token".
|
24
32
|
#
|
25
33
|
# Note: Puts together the parts first in an array, then releasing the array from time to time by joining.
|
@@ -68,9 +68,10 @@ module Indexing # :nodoc:all
|
|
68
68
|
# and later dumping the optimized index.
|
69
69
|
#
|
70
70
|
def retrieve
|
71
|
+
key_format = self[:key_format] || :to_i
|
71
72
|
files.retrieve do |id, token|
|
72
73
|
initialize_index_for token
|
73
|
-
index[token] << id
|
74
|
+
index[token] << id.send(key_format)
|
74
75
|
end
|
75
76
|
end
|
76
77
|
# Sets up a piece of the index for the given token.
|
@@ -80,8 +80,17 @@ module Indexing
|
|
80
80
|
#
|
81
81
|
def cache
|
82
82
|
prepare_index_directory
|
83
|
+
configure
|
83
84
|
generate_caches
|
84
85
|
end
|
86
|
+
# We need to set what formatting method should be used.
|
87
|
+
# Uses the one defined in the indexer.
|
88
|
+
#
|
89
|
+
def configure
|
90
|
+
key_format = indexer.key_format
|
91
|
+
exact[:key_format] = key_format
|
92
|
+
partial[:key_format] = key_format
|
93
|
+
end
|
85
94
|
def generate_caches
|
86
95
|
generate_caches_from_source
|
87
96
|
generate_partial
|
@@ -22,7 +22,7 @@ module Interfaces
|
|
22
22
|
#
|
23
23
|
Thread.new do
|
24
24
|
loop do
|
25
|
-
|
25
|
+
IO.select([@child], nil, nil, 2) or next
|
26
26
|
result = @child.gets ';;;'
|
27
27
|
pid, configuration_hash = eval result
|
28
28
|
next unless Hash === configuration_hash
|
@@ -123,6 +123,8 @@ module Interfaces
|
|
123
123
|
send :"#{key}=", new_value
|
124
124
|
end
|
125
125
|
rescue StandardError => e
|
126
|
+
# Catch any error and reraise as config error.
|
127
|
+
#
|
126
128
|
raise CouldNotUpdateConfigurationError.new current_key, e.message
|
127
129
|
end
|
128
130
|
end
|
data/lib/picky/sources/base.rb
CHANGED
data/lib/picky/sources/couch.rb
CHANGED
@@ -9,21 +9,6 @@ module Sources
|
|
9
9
|
|
10
10
|
# A Couch database source.
|
11
11
|
#
|
12
|
-
# <b>IMPORTANT NOTE:
|
13
|
-
#
|
14
|
-
# Since Picky currently only handles integer ids (we're working on this),
|
15
|
-
# and CouchDB uses hexadecimal ids, this source automatically
|
16
|
-
# recalculates a couch id such as
|
17
|
-
# fa3f2577a8dbc6a91d7f9989cdffd38e
|
18
|
-
# into
|
19
|
-
# 332634873577882511228481564366832915342
|
20
|
-
# using String#hex.
|
21
|
-
#
|
22
|
-
# When using the integer ids in a webapp to get your
|
23
|
-
# objects from CouchDB, please do a Integer#to_s(16) on the
|
24
|
-
# ids you get from Picky before you use them to get your object from CouchDB.</b>
|
25
|
-
#
|
26
|
-
#
|
27
12
|
# Options:
|
28
13
|
# * url
|
29
14
|
# and all the options of a <tt>RestClient::Resource</tt>.
|
@@ -35,49 +20,6 @@ module Sources
|
|
35
20
|
#
|
36
21
|
class Couch < Base
|
37
22
|
|
38
|
-
# If your Couch DB uses UUID keys, use
|
39
|
-
# Sources::Couch.new(:title, keys: Sources::Couch::UUIDKeys.new)
|
40
|
-
# Do not forget to reconvert the UUID Key from an integer in the client:
|
41
|
-
# uuid = UUIDTools::UUID.parse_int(id)
|
42
|
-
# uuid.to_s
|
43
|
-
#
|
44
|
-
class UUIDKeys
|
45
|
-
def initialize
|
46
|
-
# Tries to require the uuidtools gem.
|
47
|
-
#
|
48
|
-
begin
|
49
|
-
require 'uuidtools'
|
50
|
-
rescue LoadError
|
51
|
-
puts_gem_missing 'uuidtools', 'UUID keys in a CouchDB source'
|
52
|
-
exit 1
|
53
|
-
end
|
54
|
-
end
|
55
|
-
def to_i id
|
56
|
-
uuid = UUIDTools::UUID.parse id
|
57
|
-
uuid.to_i
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
# If your Couch DB uses Hex keys, use
|
62
|
-
# Sources::Couch.new(:title, keys: Sources::Couch::HexKeys.new)
|
63
|
-
# Do not forget to reconvert the Hex Key from an integer in the client:
|
64
|
-
# id.to_s(16)
|
65
|
-
#
|
66
|
-
class HexKeys
|
67
|
-
def to_i id
|
68
|
-
id.hex
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
# If your Couch DB uses Integer keys, use
|
73
|
-
# Sources::Couch.new(:title, keys: Sources::Couch::IntegerKeys.new)
|
74
|
-
#
|
75
|
-
class IntegerKeys
|
76
|
-
def to_i id
|
77
|
-
id
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
23
|
#
|
82
24
|
#
|
83
25
|
def initialize *category_names, options
|
@@ -85,8 +27,12 @@ module Sources
|
|
85
27
|
|
86
28
|
Hash === options && options[:url] || raise_no_db_given(category_names)
|
87
29
|
@db = RestClient::Resource.new options.delete(:url), options
|
88
|
-
|
89
|
-
|
30
|
+
end
|
31
|
+
|
32
|
+
# Default key format method for couch DB is to_sym.
|
33
|
+
#
|
34
|
+
def key_format
|
35
|
+
:to_sym
|
90
36
|
end
|
91
37
|
|
92
38
|
# Tries to require the rest_client gem.
|
@@ -106,7 +52,7 @@ module Sources
|
|
106
52
|
def harvest type, category
|
107
53
|
category_name = category.from.to_s
|
108
54
|
get_data do |doc|
|
109
|
-
yield
|
55
|
+
yield doc[@@id_key], doc[category_name] || next
|
110
56
|
end
|
111
57
|
end
|
112
58
|
|
data/lib/picky/sources/csv.rb
CHANGED
@@ -26,7 +26,7 @@ module Sources
|
|
26
26
|
|
27
27
|
# The options that were passed into #new.
|
28
28
|
#
|
29
|
-
attr_reader :csv_options
|
29
|
+
attr_reader :csv_options, :key_format
|
30
30
|
|
31
31
|
# The data category names.
|
32
32
|
#
|
@@ -36,8 +36,11 @@ module Sources
|
|
36
36
|
require 'csv'
|
37
37
|
@category_names = category_names
|
38
38
|
|
39
|
-
@csv_options
|
40
|
-
@file_name
|
39
|
+
@csv_options = Hash === options && options || {}
|
40
|
+
@file_name = @csv_options.delete(:file) || raise_no_file_given(category_names)
|
41
|
+
|
42
|
+
key_format = options.delete :key_format
|
43
|
+
@key_format = key_format && key_format.to_sym || :to_i
|
41
44
|
end
|
42
45
|
|
43
46
|
# Raises a NoCSVFileGiven exception.
|
@@ -51,7 +54,7 @@ module Sources
|
|
51
54
|
def harvest _, category
|
52
55
|
index = category_names.index category.from
|
53
56
|
get_data do |ary|
|
54
|
-
indexed_id = ary.shift
|
57
|
+
indexed_id = ary.shift
|
55
58
|
text = ary[index]
|
56
59
|
next unless text
|
57
60
|
text.force_encoding 'utf-8' # TODO Still needed?
|
@@ -27,8 +27,7 @@ module Sources
|
|
27
27
|
# Harvests the data to index.
|
28
28
|
#
|
29
29
|
def harvest _, category
|
30
|
-
get_data do |
|
31
|
-
indexed_id = uid
|
30
|
+
get_data do |indexed_id, data|
|
32
31
|
text = data[category.from]
|
33
32
|
next unless text
|
34
33
|
text.force_encoding 'utf-8' # TODO Still needed?
|
data/spec/ext/performant_spec.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/../spec_helper'
|
2
2
|
|
3
3
|
describe Performant::Array do
|
4
|
-
|
4
|
+
|
5
5
|
describe "memory_efficient_intersect" do
|
6
6
|
it "should intersect empty arrays correctly" do
|
7
7
|
arys = [[3,4], [1,2,3], []]
|
@@ -51,5 +51,44 @@ describe Performant::Array do
|
|
51
51
|
end.should < 0.0015
|
52
52
|
end
|
53
53
|
end
|
54
|
+
|
55
|
+
describe "memory_efficient_intersect with symbols" do
|
56
|
+
it "should intersect empty arrays correctly" do
|
57
|
+
arys = [[:c,:d], [:a,:b,:c], []]
|
58
|
+
|
59
|
+
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == []
|
60
|
+
end
|
61
|
+
it "should handle intermediate empty results correctly" do
|
62
|
+
arys = [[:e,:d], [:a,:b,:c], [:c,:d,:e,:h,:i]]
|
63
|
+
|
64
|
+
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == []
|
65
|
+
end
|
66
|
+
it "should intersect correctly" do
|
67
|
+
arys = [[:c,:d], [:a,:b,:c], [:c,:d,:e,:h,:i]]
|
68
|
+
|
69
|
+
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == [:c]
|
70
|
+
end
|
71
|
+
it "should intersect many arrays" do
|
72
|
+
arys = [[:c,:d,:e,:f,:g], [:a,:b,:c,:e,:f,:g], [:c,:d,:e,:f,:g,:h,:i], [:a,:b,:c,:d,:e,:f,:g,:h,:i,:j], [:b,:c,:e,:f,:g,:s], [:a,:b,:c,:d,:e,:f,:g,:h,:i,:j], [:b,:c,:e,:f,:g,:s]]
|
73
|
+
|
74
|
+
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == [:c,:e,:f,:g]
|
75
|
+
end
|
76
|
+
it "should be optimal for 2 small arrays of 50/10_000" do
|
77
|
+
arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a << 7]
|
78
|
+
|
79
|
+
# brute force
|
80
|
+
performance_of { Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)) }.should < 0.001
|
81
|
+
end
|
82
|
+
it "should be optimal for 2 small arrays of 50/10_000" do
|
83
|
+
arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a << 7]
|
84
|
+
|
85
|
+
# &
|
86
|
+
performance_of do
|
87
|
+
arys.inject(arys.shift.dup) do |total, ary|
|
88
|
+
total & arys
|
89
|
+
end
|
90
|
+
end.should < 0.0015
|
91
|
+
end
|
92
|
+
end
|
54
93
|
|
55
94
|
end
|
@@ -13,6 +13,25 @@ describe Indexers::Serial do
|
|
13
13
|
@indexer.stub! :timed_exclaim
|
14
14
|
end
|
15
15
|
|
16
|
+
describe 'key_format' do
|
17
|
+
context 'source has key_format' do
|
18
|
+
before(:each) do
|
19
|
+
@source.stub! :key_format => :some_key_format
|
20
|
+
end
|
21
|
+
it 'returns what the source returns' do
|
22
|
+
@indexer.key_format.should == :some_key_format
|
23
|
+
end
|
24
|
+
end
|
25
|
+
context 'source does not have key_format' do
|
26
|
+
before(:each) do
|
27
|
+
@source.stub! :key_format => nil
|
28
|
+
end
|
29
|
+
it 'returns :to_i' do
|
30
|
+
@indexer.key_format.should == :to_i
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
16
35
|
describe "tokenizer" do
|
17
36
|
it "returns the right one" do
|
18
37
|
@indexer.tokenizer.should == @tokenizer
|
@@ -3,9 +3,9 @@ require 'spec_helper'
|
|
3
3
|
describe Indexing::Bundle do
|
4
4
|
|
5
5
|
before(:each) do
|
6
|
-
@
|
7
|
-
@category
|
8
|
-
@configuration
|
6
|
+
@internal_index = stub :index, :name => :some_index
|
7
|
+
@category = stub :category, :name => :some_category
|
8
|
+
@configuration = Configuration::Index.new @internal_index, @category
|
9
9
|
|
10
10
|
@partial = stub :partial
|
11
11
|
@weights = stub :weights
|
@@ -42,17 +42,47 @@ describe Indexing::Bundle do
|
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
45
|
+
describe 'retrieve' do
|
46
|
+
before(:each) do
|
47
|
+
files = stub :files
|
48
|
+
files.should_receive(:retrieve).once.and_yield ' 1234', :some_token
|
49
|
+
@index.stub! :files => files
|
50
|
+
|
51
|
+
@ary = stub :ary
|
52
|
+
@internal_index.should_receive(:[]).any_number_of_times.and_return @ary
|
53
|
+
@index.stub! :index => @internal_index
|
54
|
+
end
|
55
|
+
context 'id key format' do
|
56
|
+
before(:each) do
|
57
|
+
@index.should_receive(:[]).once.with(:key_format).and_return :to_i
|
58
|
+
end
|
59
|
+
it 'should call the other methods correctly' do
|
60
|
+
@ary.should_receive(:<<).once.with 1234
|
61
|
+
|
62
|
+
@index.retrieve
|
63
|
+
end
|
64
|
+
end
|
65
|
+
context 'other key format' do
|
66
|
+
before(:each) do
|
67
|
+
@index.should_receive(:[]).once.with(:key_format).and_return :strip
|
68
|
+
end
|
69
|
+
it 'should call the other methods correctly' do
|
70
|
+
@ary.should_receive(:<<).once.with '1234'
|
71
|
+
|
72
|
+
@index.retrieve
|
73
|
+
end
|
74
|
+
end
|
75
|
+
context 'no key format - default' do
|
76
|
+
before(:each) do
|
77
|
+
@index.should_receive(:[]).once.with(:key_format).and_return nil
|
78
|
+
end
|
79
|
+
it 'should call the other methods correctly' do
|
80
|
+
@ary.should_receive(:<<).once.with 1234
|
81
|
+
|
82
|
+
@index.retrieve
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
56
86
|
|
57
87
|
describe 'load_from_index_file' do
|
58
88
|
it 'should call two methods in order' do
|
@@ -3,8 +3,9 @@ require 'spec_helper'
|
|
3
3
|
describe Indexing::Category do
|
4
4
|
|
5
5
|
before(:each) do
|
6
|
-
@index
|
7
|
-
@
|
6
|
+
@index = stub :index, :name => :some_index
|
7
|
+
@source = stub :some_given_source, :key_format => nil
|
8
|
+
@category = Indexing::Category.new :some_category, @index, :source => @source
|
8
9
|
end
|
9
10
|
context "unit specs" do
|
10
11
|
before(:each) do
|
@@ -99,6 +99,32 @@ describe Query::Allocation do
|
|
99
99
|
@allocation.process!(20, 10).should == []
|
100
100
|
end
|
101
101
|
end
|
102
|
+
context 'with symbol ids' do
|
103
|
+
before(:each) do
|
104
|
+
@allocation.stub! :calculate_ids => [:a,:b,:c,:d,:e,:f,:g,:h,:i,:j]
|
105
|
+
end
|
106
|
+
it 'should process right' do
|
107
|
+
@allocation.process!(0, 0).should == []
|
108
|
+
end
|
109
|
+
it 'should process right' do
|
110
|
+
@allocation.process!(0, 10).should == []
|
111
|
+
end
|
112
|
+
it 'should process right' do
|
113
|
+
@allocation.process!(5, 0).should == [:a,:b,:c,:d,:e]
|
114
|
+
end
|
115
|
+
it 'should process right' do
|
116
|
+
@allocation.process!(5, 5).should == [:f,:g,:h,:i,:j]
|
117
|
+
end
|
118
|
+
it 'should process right' do
|
119
|
+
@allocation.process!(20, 0).should == [:a,:b,:c,:d,:e,:f,:g,:h,:i,:j]
|
120
|
+
end
|
121
|
+
it 'should process right' do
|
122
|
+
@allocation.process!(20, 5).should == [:f,:g,:h,:i,:j]
|
123
|
+
end
|
124
|
+
it 'should process right' do
|
125
|
+
@allocation.process!(20, 10).should == []
|
126
|
+
end
|
127
|
+
end
|
102
128
|
end
|
103
129
|
|
104
130
|
describe 'to_result' do
|
@@ -69,20 +69,39 @@ describe Query::Allocations do
|
|
69
69
|
end
|
70
70
|
|
71
71
|
describe 'ids' do
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
72
|
+
context 'integers' do
|
73
|
+
before(:each) do
|
74
|
+
@allocation1 = stub :allocation1, :ids => [1, 2, 3, 4]
|
75
|
+
@allocation2 = stub :allocation2, :ids => [5, 6, 7]
|
76
|
+
@allocation3 = stub :allocation3, :ids => [8, 9]
|
77
|
+
@allocations = Query::Allocations.new [@allocation1, @allocation2, @allocation3]
|
78
|
+
end
|
79
|
+
it 'should return the right amount of ids' do
|
80
|
+
@allocations.ids(0).should == []
|
81
|
+
end
|
82
|
+
it 'should return the right amount of ids' do
|
83
|
+
@allocations.ids(6).should == [1,2,3,4,5,6]
|
84
|
+
end
|
85
|
+
it 'should return the right amount of ids' do
|
86
|
+
@allocations.ids.should == [1,2,3,4,5,6,7,8,9]
|
87
|
+
end
|
83
88
|
end
|
84
|
-
|
85
|
-
|
89
|
+
context 'symbols' do
|
90
|
+
before(:each) do
|
91
|
+
@allocation1 = stub :allocation1, :ids => [:a, :b, :c, :d]
|
92
|
+
@allocation2 = stub :allocation2, :ids => [:e, :f, :g]
|
93
|
+
@allocation3 = stub :allocation3, :ids => [:h, :i]
|
94
|
+
@allocations = Query::Allocations.new [@allocation1, @allocation2, @allocation3]
|
95
|
+
end
|
96
|
+
it 'should return the right amount of ids' do
|
97
|
+
@allocations.ids(0).should == []
|
98
|
+
end
|
99
|
+
it 'should return the right amount of ids' do
|
100
|
+
@allocations.ids(6).should == [:a,:b,:c,:d,:e,:f]
|
101
|
+
end
|
102
|
+
it 'should return the right amount of ids' do
|
103
|
+
@allocations.ids.should == [:a,:b,:c,:d,:e,:f,:g,:h,:i]
|
104
|
+
end
|
86
105
|
end
|
87
106
|
end
|
88
107
|
|
@@ -127,6 +127,13 @@ describe 'Query::Combinations' do
|
|
127
127
|
|
128
128
|
@combinations.ids.should == (1..10).to_a
|
129
129
|
end
|
130
|
+
it "should intersect symbol_keys correctly" do
|
131
|
+
@combination1.should_receive(:ids).once.with.and_return (:'00001'..:'10000').to_a
|
132
|
+
@combination2.should_receive(:ids).once.with.and_return (:'00001'..:'00100').to_a
|
133
|
+
@combination3.should_receive(:ids).once.with.and_return (:'00001'..:'00010').to_a
|
134
|
+
|
135
|
+
@combinations.ids.should == (:'00001'..:'0010').to_a
|
136
|
+
end
|
130
137
|
it "should intersect correctly when intermediate intersect result is empty" do
|
131
138
|
@combination1.should_receive(:ids).once.with.and_return (1..100_000).to_a
|
132
139
|
@combination2.should_receive(:ids).once.with.and_return (11..100).to_a
|
@@ -2,36 +2,11 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Sources::Couch do
|
4
4
|
|
5
|
-
describe 'UUIDKeys' do
|
6
|
-
before(:each) do
|
7
|
-
@keys = Sources::Couch::UUIDKeys.new
|
8
|
-
end
|
9
|
-
it 'converts uuids' do
|
10
|
-
@keys.to_i('550e8400-e29b-41d4-a716-446655440000').should == 113059749145936325402354257176981405696
|
11
|
-
end
|
12
|
-
end
|
13
|
-
describe 'HexKeys' do
|
14
|
-
before(:each) do
|
15
|
-
@keys = Sources::Couch::HexKeys.new
|
16
|
-
end
|
17
|
-
it 'converts uuids' do
|
18
|
-
@keys.to_i('7f').should == 127
|
19
|
-
end
|
20
|
-
end
|
21
|
-
describe 'IntegerKeys' do
|
22
|
-
before(:each) do
|
23
|
-
@keys = Sources::Couch::IntegerKeys.new
|
24
|
-
end
|
25
|
-
it 'converts uuids' do
|
26
|
-
@keys.to_i('123').should == '123'
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
5
|
describe 'special keys' do
|
31
6
|
context 'uuid keys' do
|
32
7
|
context "with database" do
|
33
8
|
before(:each) do
|
34
|
-
@source = Sources::Couch.new :a, :b, :c, url: 'http://localhost:5984/picky'
|
9
|
+
@source = Sources::Couch.new :a, :b, :c, url: 'http://localhost:5984/picky'
|
35
10
|
RestClient::Request.should_receive(:execute).any_number_of_times.and_return %{{"rows":[{"doc":{"_id":"550e8400-e29b-41d4-a716-446655440000","a":"a data","b":"b data","c":"c data"}}]}}
|
36
11
|
end
|
37
12
|
|
@@ -39,7 +14,7 @@ describe Sources::Couch do
|
|
39
14
|
it "yields the right data" do
|
40
15
|
field = stub :b, :from => :b
|
41
16
|
@source.harvest :anything, field do |id, token|
|
42
|
-
id.should eql(
|
17
|
+
id.should eql('550e8400-e29b-41d4-a716-446655440000')
|
43
18
|
token.should eql('b data')
|
44
19
|
end.should have(1).item
|
45
20
|
end
|
@@ -57,7 +32,7 @@ describe Sources::Couch do
|
|
57
32
|
context 'integer keys' do
|
58
33
|
context "with database" do
|
59
34
|
before(:each) do
|
60
|
-
@source = Sources::Couch.new :a, :b, :c, url: 'http://localhost:5984/picky'
|
35
|
+
@source = Sources::Couch.new :a, :b, :c, url: 'http://localhost:5984/picky'
|
61
36
|
RestClient::Request.should_receive(:execute).any_number_of_times.and_return %{{"rows":[{"doc":{"_id":"123","a":"a data","b":"b data","c":"c data"}}]}}
|
62
37
|
end
|
63
38
|
|
@@ -99,7 +74,7 @@ describe Sources::Couch do
|
|
99
74
|
it "yields the right data" do
|
100
75
|
field = stub :b, :from => :b
|
101
76
|
@source.harvest :anything, field do |id, token|
|
102
|
-
id.should eql(
|
77
|
+
id.should eql('7f')
|
103
78
|
token.should eql('b data')
|
104
79
|
end.should have(1).item
|
105
80
|
end
|
@@ -35,7 +35,7 @@ describe Sources::CSV do
|
|
35
35
|
end
|
36
36
|
context "with file" do
|
37
37
|
before(:each) do
|
38
|
-
::CSV.should_receive(:foreach).any_number_of_times.and_yield ['7', 'a data', 'b data', 'c data']
|
38
|
+
::CSV.should_receive(:foreach).any_number_of_times.and_yield [' 7', 'a data', 'b data', 'c data']
|
39
39
|
end
|
40
40
|
context 'without separator' do
|
41
41
|
before(:each) do
|
@@ -45,14 +45,34 @@ describe Sources::CSV do
|
|
45
45
|
it "should yield the right data" do
|
46
46
|
field = stub :b, :from => :b
|
47
47
|
@source.harvest :anything, field do |id, token|
|
48
|
-
[id, token].should == [7, 'b data']
|
48
|
+
[id, token].should == [' 7', 'b data']
|
49
49
|
end
|
50
50
|
end
|
51
51
|
end
|
52
52
|
describe "get_data" do
|
53
53
|
it "should yield each line" do
|
54
54
|
@source.get_data do |data|
|
55
|
-
data.should == ['7', 'a data', 'b data', 'c data']
|
55
|
+
data.should == [' 7', 'a data', 'b data', 'c data']
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
context 'with key_format method' do
|
61
|
+
before(:each) do
|
62
|
+
@source = Sources::CSV.new :a, :b, :c, :file => :some_file, :key_format => :strip
|
63
|
+
end
|
64
|
+
describe "harvest" do
|
65
|
+
it "should yield the right data" do
|
66
|
+
field = stub :b, :from => :b
|
67
|
+
@source.harvest :anything, field do |id, token|
|
68
|
+
[id, token].should == [' 7', 'b data']
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
describe "get_data" do
|
73
|
+
it "should yield each line" do
|
74
|
+
@source.get_data do |data|
|
75
|
+
data.should == [' 7', 'a data', 'b data', 'c data']
|
56
76
|
end
|
57
77
|
end
|
58
78
|
end
|
data/spec/specific/speed_spec.rb
CHANGED
@@ -1,7 +1,53 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
1
3
|
require File.dirname(__FILE__) + '/../spec_helper'
|
2
4
|
|
3
5
|
describe "Speccing Ruby for speed" do
|
4
|
-
describe "various versions for allocation id concatenating" do
|
6
|
+
describe "various versions for allocation id concatenating – with symbols" do
|
7
|
+
before(:each) do
|
8
|
+
@allocs = [:hello, :speed, :test]
|
9
|
+
@ids = {
|
10
|
+
:hello => (:'000_001'..:'100_000').to_a,
|
11
|
+
:speed => (:'0_001'..:'5_000').to_a,
|
12
|
+
:test => (:'0_001'..:'1_000').to_a
|
13
|
+
}
|
14
|
+
end
|
15
|
+
describe "+" do
|
16
|
+
it "should be fast" do
|
17
|
+
performance_of do
|
18
|
+
@allocs.inject([]) do |total, alloc|
|
19
|
+
total + @ids[alloc]
|
20
|
+
end
|
21
|
+
end.should < 0.0025
|
22
|
+
end
|
23
|
+
end
|
24
|
+
describe "map and flatten!(1)" do
|
25
|
+
it "should be fast" do
|
26
|
+
performance_of do
|
27
|
+
@allocs.map { |alloc| @ids[alloc] }.flatten!(1)
|
28
|
+
end.should < 0.02
|
29
|
+
end
|
30
|
+
end
|
31
|
+
describe "<< and flatten!(1)" do
|
32
|
+
it "should be fast" do
|
33
|
+
performance_of do
|
34
|
+
@allocs.inject([]) do |total, alloc|
|
35
|
+
total << @ids[alloc]
|
36
|
+
end.flatten!(1)
|
37
|
+
end.should < 0.02
|
38
|
+
end
|
39
|
+
end
|
40
|
+
describe "<< and flatten!" do
|
41
|
+
it "should be fast" do
|
42
|
+
performance_of do
|
43
|
+
@allocs.inject([]) do |total, alloc|
|
44
|
+
total << @ids[alloc]
|
45
|
+
end.flatten!
|
46
|
+
end.should < 0.02
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
describe "various versions for allocation id concatenating – with integers" do
|
5
51
|
before(:each) do
|
6
52
|
@allocs = [:hello, :speed, :test]
|
7
53
|
@ids = {
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 3
|
8
|
-
-
|
9
|
-
version: 1.3.
|
8
|
+
- 1
|
9
|
+
version: 1.3.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Florian Hanke
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-01-
|
17
|
+
date: 2011-01-30 00:00:00 +01:00
|
18
18
|
default_executable: picky
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|