linkage 0.0.6 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. data/.gitignore +10 -0
  2. data/Gemfile +15 -13
  3. data/Gemfile.lock +67 -37
  4. data/Guardfile +0 -2
  5. data/Rakefile +122 -25
  6. data/lib/linkage/comparator.rb +172 -0
  7. data/lib/linkage/comparators/binary.rb +12 -0
  8. data/lib/linkage/comparators/compare.rb +46 -0
  9. data/lib/linkage/comparators/within.rb +32 -0
  10. data/lib/linkage/configuration.rb +285 -153
  11. data/lib/linkage/data.rb +32 -7
  12. data/lib/linkage/dataset.rb +107 -32
  13. data/lib/linkage/decollation.rb +93 -0
  14. data/lib/linkage/expectation.rb +21 -0
  15. data/lib/linkage/expectations/exhaustive.rb +63 -0
  16. data/lib/linkage/expectations/simple.rb +168 -0
  17. data/lib/linkage/field.rb +30 -4
  18. data/lib/linkage/field_set.rb +6 -3
  19. data/lib/linkage/function.rb +50 -3
  20. data/lib/linkage/functions/binary.rb +30 -0
  21. data/lib/linkage/functions/cast.rb +54 -0
  22. data/lib/linkage/functions/length.rb +29 -0
  23. data/lib/linkage/functions/strftime.rb +12 -11
  24. data/lib/linkage/functions/trim.rb +8 -0
  25. data/lib/linkage/group.rb +20 -0
  26. data/lib/linkage/import_buffer.rb +5 -16
  27. data/lib/linkage/meta_object.rb +139 -0
  28. data/lib/linkage/result_set.rb +74 -17
  29. data/lib/linkage/runner/single_threaded.rb +125 -10
  30. data/lib/linkage/version.rb +3 -0
  31. data/lib/linkage.rb +11 -0
  32. data/linkage.gemspec +16 -121
  33. data/test/config.yml +5 -0
  34. data/test/helper.rb +73 -8
  35. data/test/integration/test_collation.rb +45 -0
  36. data/test/integration/test_configuration.rb +268 -0
  37. data/test/integration/test_cross_linkage.rb +4 -17
  38. data/test/integration/test_dataset.rb +45 -2
  39. data/test/integration/test_dual_linkage.rb +40 -24
  40. data/test/integration/test_functions.rb +22 -0
  41. data/test/integration/test_result_set.rb +85 -0
  42. data/test/integration/test_scoring.rb +84 -0
  43. data/test/integration/test_self_linkage.rb +5 -0
  44. data/test/integration/test_within_comparator.rb +100 -0
  45. data/test/unit/comparators/test_compare.rb +105 -0
  46. data/test/unit/comparators/test_within.rb +57 -0
  47. data/test/unit/expectations/test_exhaustive.rb +111 -0
  48. data/test/unit/expectations/test_simple.rb +303 -0
  49. data/test/unit/functions/test_binary.rb +54 -0
  50. data/test/unit/functions/test_cast.rb +98 -0
  51. data/test/unit/functions/test_length.rb +52 -0
  52. data/test/unit/functions/test_strftime.rb +17 -13
  53. data/test/unit/functions/test_trim.rb +11 -4
  54. data/test/unit/test_comparator.rb +124 -0
  55. data/test/unit/test_configuration.rb +137 -175
  56. data/test/unit/test_data.rb +44 -0
  57. data/test/unit/test_dataset.rb +73 -21
  58. data/test/unit/test_decollation.rb +201 -0
  59. data/test/unit/test_field.rb +38 -14
  60. data/test/unit/test_field_set.rb +12 -8
  61. data/test/unit/test_function.rb +83 -16
  62. data/test/unit/test_group.rb +28 -0
  63. data/test/unit/test_import_buffer.rb +13 -27
  64. data/test/unit/test_meta_object.rb +208 -0
  65. data/test/unit/test_result_set.rb +221 -3
  66. metadata +82 -190
@@ -0,0 +1,208 @@
1
+ require 'helper'
2
+
3
+ class UnitTests::TestMetaObject < Test::Unit::TestCase
4
+ test "initialize with static string" do
5
+ meta_object = Linkage::MetaObject.new("foo")
6
+ assert meta_object.static?
7
+ assert_equal "foo", meta_object.object
8
+ assert_nil meta_object.side
9
+ end
10
+
11
+ test "initialize with static function" do
12
+ function = stub_function("foo", :static? => true)
13
+ meta_object = Linkage::MetaObject.new(function)
14
+ assert meta_object.static?
15
+ assert_equal function, meta_object.object
16
+ assert_nil meta_object.side
17
+ end
18
+
19
+ test "initialize with field" do
20
+ field = stub_field("foo")
21
+ meta_object = Linkage::MetaObject.new(field, :lhs)
22
+ assert !meta_object.static?
23
+ assert_equal field, meta_object.object
24
+ assert_equal :lhs, meta_object.side
25
+ end
26
+
27
+ test "getting side for dynamic object without setting it raises error" do
28
+ meta_object = Linkage::MetaObject.new(stub_field('foo'))
29
+ assert_raises(RuntimeError) { meta_object.side }
30
+ end
31
+
32
+ test "getting dataset calls #dataset on object" do
33
+ field = stub_field('foo')
34
+ meta_object = Linkage::MetaObject.new(field)
35
+
36
+ dataset = stub('dataset')
37
+ field.expects(:dataset).returns(dataset)
38
+ assert_equal dataset, meta_object.dataset
39
+ end
40
+
41
+ test "setting dataset sets object's dataset" do
42
+ func = stub_function('foo')
43
+ meta_object = Linkage::MetaObject.new(func)
44
+
45
+ dataset = stub('dataset')
46
+ func.expects(:dataset=).with(dataset)
47
+ meta_object.dataset = dataset
48
+ end
49
+
50
+ test "setting dataset on non-data object raises exception" do
51
+ meta_object = Linkage::MetaObject.new(123)
52
+ dataset = stub('dataset')
53
+ assert_raises(RuntimeError) { meta_object.dataset = dataset }
54
+ end
55
+
56
+ test "objects_equal? compares only objects, not sides" do
57
+ field = stub_field("foo")
58
+ object_1 = Linkage::MetaObject.new(field, :lhs)
59
+ object_2 = Linkage::MetaObject.new(field, :rhs)
60
+ object_3 = Linkage::MetaObject.new(123)
61
+ assert object_1.objects_equal?(object_2)
62
+ assert !object_1.objects_equal?("foo")
63
+ assert !object_2.objects_equal?(object_3)
64
+ end
65
+
66
+ test "dataset reader for field" do
67
+ dataset = stub('dataset')
68
+ field = stub_field("foo", :dataset => dataset)
69
+ object = Linkage::MetaObject.new(field, :lhs)
70
+
71
+ assert_equal dataset, object.dataset
72
+ end
73
+
74
+ test "dataset reader for function" do
75
+ dataset = stub('dataset')
76
+ function = stub_function("foo", :dataset => dataset)
77
+ object = Linkage::MetaObject.new(function, :lhs)
78
+
79
+ assert_equal dataset, object.dataset
80
+ end
81
+
82
+ test "datasets_equal?" do
83
+ dataset_1 = stub('dataset 1')
84
+ field_1 = stub_field('field 1', :dataset => dataset_1)
85
+ object_1 = Linkage::MetaObject.new(field_1, :lhs)
86
+
87
+ dataset_2 = stub('dataset 2')
88
+ field_2 = stub_field('field 2', :dataset => dataset_2)
89
+ object_2 = Linkage::MetaObject.new(field_2, :rhs)
90
+
91
+ field_3 = stub_field('field 3', :dataset => dataset_2)
92
+ object_3 = Linkage::MetaObject.new(field_3, :rhs)
93
+
94
+ object_4 = Linkage::MetaObject.new(123)
95
+
96
+ assert object_1.datasets_equal?(object_1)
97
+ assert object_2.datasets_equal?(object_3)
98
+ assert !object_1.datasets_equal?(object_2)
99
+ assert !object_1.datasets_equal?("foo")
100
+ assert !object_1.datasets_equal?(object_4)
101
+ end
102
+
103
+ test "to_expr for non-data object returns object" do
104
+ object = Linkage::MetaObject.new(123)
105
+ assert_equal 123, object.to_expr
106
+ end
107
+
108
+ test "to_expr for data object returns object.to_expr" do
109
+ field = stub_field('field')
110
+ object = Linkage::MetaObject.new(field, :lhs)
111
+
112
+ field.expects(:to_expr).returns(:foo)
113
+ assert_equal :foo, object.to_expr
114
+ end
115
+
116
+ test "to_identifier for non-data object returns object" do
117
+ object = Linkage::MetaObject.new(123)
118
+ assert_equal 123, object.to_identifier
119
+ end
120
+
121
+ test "to_identifer for data object returns identifier object" do
122
+ field = stub_field('field')
123
+ object = Linkage::MetaObject.new(field, :lhs)
124
+
125
+ field.expects(:to_expr).returns(:foo)
126
+ assert_equal(Sequel::SQL::Identifier.new(:foo), object.to_identifier)
127
+ end
128
+
129
+ test "merge with data object" do
130
+ field_1 = stub_field('field 1')
131
+ object_1 = Linkage::MetaObject.new(field_1, :lhs)
132
+ field_2 = stub_field('field 2')
133
+ object_2 = Linkage::MetaObject.new(field_2, :rhs)
134
+
135
+ merged_field = stub('merged field')
136
+ field_1.expects(:merge).with(field_2).returns(merged_field)
137
+ assert_equal merged_field, object_1.merge(object_2)
138
+ end
139
+
140
+ test "merge with non-data object raises exception" do
141
+ field_1 = stub_field('field 1')
142
+ object_1 = Linkage::MetaObject.new(field_1, :lhs)
143
+ object_2 = Linkage::MetaObject.new(123)
144
+ assert_raises(ArgumentError) { object_1.merge(object_2) }
145
+ assert_raises(ArgumentError) { object_2.merge(object_1) }
146
+ end
147
+
148
+ test "ruby_type calls Field#ruby_type" do
149
+ field = stub_field('field')
150
+ object = Linkage::MetaObject.new(field, :lhs)
151
+ field.expects(:ruby_type).returns(:type => String)
152
+ assert_equal({:type => String}, object.ruby_type)
153
+ end
154
+
155
+ test "ruby_type calls Function#ruby_type" do
156
+ function = stub_function("foo")
157
+ object = Linkage::MetaObject.new(function, :lhs)
158
+ function.expects(:ruby_type).returns(:type => String)
159
+ assert_equal({:type => String}, object.ruby_type)
160
+ end
161
+
162
+ test "ruby_type returns object class for non-data object" do
163
+ object = Linkage::MetaObject.new(123)
164
+ assert_equal({:type => Fixnum}, object.ruby_type)
165
+ end
166
+
167
+ test "database_type" do
168
+ dataset = mock('dataset')
169
+ field = stub_field('field', :dataset => dataset)
170
+ object = Linkage::MetaObject.new(field, :lhs)
171
+ dataset.expects(:database_type).returns(:mysql)
172
+ assert_equal :mysql, object.database_type
173
+ end
174
+
175
+ test "#collation returns Data#collation" do
176
+ dataset = mock('dataset')
177
+ field = stub_field('field', :dataset => dataset, :collation => 'foo')
178
+ object = Linkage::MetaObject.new(field, :lhs)
179
+ assert_equal 'foo', object.collation
180
+ end
181
+
182
+ test "#collation returns nil when underlying object is not a Data object" do
183
+ object = Linkage::MetaObject.new(123, :lhs)
184
+ assert_nil object.collation
185
+ end
186
+
187
+ test "#name for data object" do
188
+ field = stub_field('foo', :name => :foo)
189
+ object = Linkage::MetaObject.new(field, :lhs)
190
+ assert_equal :foo, object.name
191
+ end
192
+
193
+ test "#name for non-data object returns nil" do
194
+ object = Linkage::MetaObject.new(123)
195
+ assert_nil object.name
196
+ end
197
+
198
+ test "#raw? returns true for non-data object" do
199
+ object = Linkage::MetaObject.new(123)
200
+ assert object.raw?
201
+ end
202
+
203
+ test "#raw? returns false for data object" do
204
+ field = stub_field('foo', :name => :foo)
205
+ object = Linkage::MetaObject.new(field, :lhs)
206
+ assert !object.raw?
207
+ end
208
+ end
@@ -4,15 +4,233 @@ class TestResultSet < Test::Unit::TestCase
4
4
  def setup
5
5
  @config = stub('configuration', {
6
6
  :results_uri => 'foo://bar',
7
- :results_uri_options => {:blah => 'junk'}
7
+ :results_uri_options => {:blah => 'junk'},
8
+ :decollation_needed? => true,
9
+ :groups_table_name => :groups,
10
+ :original_groups_table_name => :original_groups,
11
+ :scores_table_name => :scores,
12
+ :matches_table_name => :matches
8
13
  })
14
+ @database = stub('database')
15
+ Sequel.stubs(:connect).with('foo://bar', :blah => 'junk').returns(@database)
9
16
  end
10
17
 
11
18
  test "creating a result set with a configuration" do
12
19
  result_set = Linkage::ResultSet.new(@config)
13
20
  end
14
21
 
15
- test "records?" do
16
- pend
22
+ test '#add_group creates two copies when decollation is needed' do
23
+ result_set = Linkage::ResultSet.new(@config)
24
+
25
+ group = stub('group', {
26
+ :values => {:foo => 'bar '},
27
+ :decollated_values => {:foo => 'BAR'}
28
+ })
29
+
30
+ groups_import_buffer = stub('groups import buffer')
31
+ groups_dataset = stub('groups dataset')
32
+ @database.stubs(:[]).with(:groups).returns(groups_dataset)
33
+ Linkage::ImportBuffer.stubs(:new).with(groups_dataset, [:id, :foo]).
34
+ returns(groups_import_buffer)
35
+
36
+ original_groups_import_buffer = stub('original groups import buffer')
37
+ original_groups_dataset = stub('original groups dataset')
38
+ @database.stubs(:[]).with(:original_groups).returns(original_groups_dataset)
39
+ Linkage::ImportBuffer.stubs(:new).with(original_groups_dataset, [:id, :foo]).
40
+ returns(original_groups_import_buffer)
41
+
42
+ groups_import_buffer.expects(:add).with([1, 'BAR'])
43
+ original_groups_import_buffer.expects(:add).with([1, 'bar '])
44
+ result_set.add_group(group)
45
+ end
46
+
47
+ test "#flush! flushes groups dataset" do
48
+ result_set = Linkage::ResultSet.new(@config)
49
+
50
+ group = stub('group', {
51
+ :values => {:foo => 'bar '},
52
+ :decollated_values => {:foo => 'BAR'}
53
+ })
54
+
55
+ groups_import_buffer = stub('groups import buffer')
56
+ groups_dataset = stub('groups dataset')
57
+ @database.stubs(:[]).with(:groups).returns(groups_dataset)
58
+ Linkage::ImportBuffer.stubs(:new).with(groups_dataset, [:id, :foo]).
59
+ returns(groups_import_buffer)
60
+
61
+ original_groups_import_buffer = stub('original groups import buffer')
62
+ original_groups_dataset = stub('original groups dataset')
63
+ @database.stubs(:[]).with(:original_groups).returns(original_groups_dataset)
64
+ Linkage::ImportBuffer.stubs(:new).with(original_groups_dataset, [:id, :foo]).
65
+ returns(original_groups_import_buffer)
66
+
67
+ groups_import_buffer.stubs(:add)
68
+ original_groups_import_buffer.stubs(:add)
69
+ result_set.add_group(group)
70
+
71
+ groups_import_buffer.expects(:flush)
72
+ original_groups_import_buffer.expects(:flush)
73
+ result_set.flush!
74
+ end
75
+
76
+ test "#add_group doesn't create copies when decollation is not needed" do
77
+ @config.stubs(:decollation_needed?).returns(false)
78
+ result_set = Linkage::ResultSet.new(@config)
79
+
80
+ group = stub('group', :values => {:foo => 'bar '})
81
+
82
+ groups_import_buffer = stub('groups import buffer')
83
+ groups_dataset = stub('groups dataset', :first_source_table => :groups, :db => @database)
84
+ @database.stubs(:[]).with(:groups).returns(groups_dataset)
85
+ Linkage::ImportBuffer.stubs(:new).with(groups_dataset, [:id, :foo]).
86
+ returns(groups_import_buffer)
87
+
88
+ original_groups_dataset = stub('original groups dataset', :first_source_table => :original_groups, :db => @database)
89
+ @database.stubs(:[]).with(:original_groups).returns(original_groups_dataset)
90
+ Linkage::ImportBuffer.expects(:new).with(original_groups_dataset, [:id, :foo]).never
91
+
92
+ groups_import_buffer.expects(:add).with([1, 'bar '])
93
+ result_set.add_group(group)
94
+ end
95
+
96
+ test "#add_score adds to score buffer" do
97
+ result_set = Linkage::ResultSet.new(@config)
98
+ scores_dataset = stub('scores dataset')
99
+ @database.stubs(:[]).with(:scores).returns(scores_dataset)
100
+ scores_import_buffer = stub('scores import buffer')
101
+ Linkage::ImportBuffer.expects(:new).
102
+ with(scores_dataset, [:comparator_id, :record_1_id, :record_2_id, :score]).
103
+ returns(scores_import_buffer)
104
+ scores_import_buffer.expects(:add).with([0, 1, 2, 123])
105
+ scores_import_buffer.expects(:add).with([1, 1, 2, 456])
106
+ result_set.add_score(0, 1, 2, 123)
107
+ result_set.add_score(1, 1, 2, 456)
108
+ end
109
+
110
+ test "#flush! flushes score buffer" do
111
+ result_set = Linkage::ResultSet.new(@config)
112
+ scores_dataset = stub('scores dataset')
113
+ @database.stubs(:[]).with(:scores).returns(scores_dataset)
114
+ scores_import_buffer = stub('scores import buffer')
115
+ Linkage::ImportBuffer.stubs(:new).
116
+ with(scores_dataset, [:comparator_id, :record_1_id, :record_2_id, :score]).
117
+ returns(scores_import_buffer)
118
+ scores_import_buffer.stubs(:add)
119
+ result_set.add_score(0, 1, 2, 123)
120
+
121
+ scores_import_buffer.expects(:flush)
122
+ result_set.flush!
123
+ end
124
+
125
+ test "#add_match adds to match buffer" do
126
+ result_set = Linkage::ResultSet.new(@config)
127
+ matches_dataset = stub('matches dataset')
128
+ @database.stubs(:[]).with(:matches).returns(matches_dataset)
129
+ matches_import_buffer = stub('matches import buffer')
130
+ Linkage::ImportBuffer.expects(:new).
131
+ with(matches_dataset, [:record_1_id, :record_2_id, :total_score]).
132
+ returns(matches_import_buffer)
133
+ matches_import_buffer.expects(:add).with([1, 2, 123])
134
+ matches_import_buffer.expects(:add).with([2, 3, 456])
135
+ result_set.add_match(1, 2, 123)
136
+ result_set.add_match(2, 3, 456)
137
+ end
138
+
139
+ test "#flush! flushes match buffer" do
140
+ result_set = Linkage::ResultSet.new(@config)
141
+ matches_dataset = stub('matches dataset')
142
+ @database.stubs(:[]).with(:matches).returns(matches_dataset)
143
+ matches_import_buffer = stub('matches import buffer')
144
+ Linkage::ImportBuffer.stubs(:new).
145
+ with(matches_dataset, [:record_1_id, :record_2_id, :total_score]).
146
+ returns(matches_import_buffer)
147
+ matches_import_buffer.stubs(:add)
148
+ result_set.add_match(1, 2, 123)
149
+
150
+ matches_import_buffer.expects(:flush)
151
+ result_set.flush!
152
+ end
153
+
154
+ test "#create_tables! uses custom table names" do
155
+ @config.stubs({
156
+ :groups_table_name => :foo_groups,
157
+ :original_groups_table_name => :foo_original_groups,
158
+ :scores_table_name => :foo_scores,
159
+ :matches_table_name => :foo_matches,
160
+ :groups_table_needed? => true,
161
+ :decollation_needed? => true,
162
+ :scores_table_needed? => true,
163
+ :groups_table_schema => [],
164
+ :scores_table_schema => [],
165
+ :matches_table_schema => []
166
+ })
167
+ result_set = Linkage::ResultSet.new(@config)
168
+ @database.expects(:create_table).with(:foo_groups)
169
+ @database.expects(:create_table).with(:foo_original_groups)
170
+ @database.expects(:create_table).with(:foo_scores)
171
+ @database.expects(:create_table).with(:foo_matches)
172
+ result_set.create_tables!
173
+ end
174
+
175
+ test "#add_group uses custom table names" do
176
+ @config.stubs({
177
+ :groups_table_name => :foo_groups,
178
+ :original_groups_table_name => :foo_original_groups
179
+ })
180
+ result_set = Linkage::ResultSet.new(@config)
181
+
182
+ group = stub('group', {
183
+ :values => {:foo => 'bar '},
184
+ :decollated_values => {:foo => 'BAR'}
185
+ })
186
+
187
+ groups_import_buffer = stub('groups import buffer')
188
+ groups_dataset = stub('groups dataset')
189
+ @database.stubs(:[]).with(:foo_groups).returns(groups_dataset)
190
+ Linkage::ImportBuffer.stubs(:new).with(groups_dataset, [:id, :foo]).
191
+ returns(groups_import_buffer)
192
+
193
+ original_groups_import_buffer = stub('original groups import buffer')
194
+ original_groups_dataset = stub('original groups dataset')
195
+ @database.stubs(:[]).with(:foo_original_groups).
196
+ returns(original_groups_dataset)
197
+ Linkage::ImportBuffer.stubs(:new).
198
+ with(original_groups_dataset, [:id, :foo]).
199
+ returns(original_groups_import_buffer)
200
+
201
+ groups_import_buffer.expects(:add).with([1, 'BAR'])
202
+ original_groups_import_buffer.expects(:add).with([1, 'bar '])
203
+ result_set.add_group(group)
204
+ end
205
+
206
+ test "#add_score uses custom table name" do
207
+ @config.stubs(:scores_table_name).returns(:foo_scores)
208
+ result_set = Linkage::ResultSet.new(@config)
209
+ scores_dataset = stub('scores dataset')
210
+ @database.stubs(:[]).with(:foo_scores).returns(scores_dataset)
211
+ scores_import_buffer = stub('scores import buffer')
212
+ Linkage::ImportBuffer.expects(:new).
213
+ with(scores_dataset,
214
+ [:comparator_id, :record_1_id, :record_2_id, :score]).
215
+ returns(scores_import_buffer)
216
+ scores_import_buffer.expects(:add).with([0, 1, 2, 123])
217
+ scores_import_buffer.expects(:add).with([1, 1, 2, 456])
218
+ result_set.add_score(0, 1, 2, 123)
219
+ result_set.add_score(1, 1, 2, 456)
220
+ end
221
+
222
+ test "#add_match uses custom table name" do
223
+ @config.stubs(:matches_table_name).returns(:foo_matches)
224
+ result_set = Linkage::ResultSet.new(@config)
225
+ matches_dataset = stub('matches dataset')
226
+ @database.stubs(:[]).with(:foo_matches).returns(matches_dataset)
227
+ matches_import_buffer = stub('matches import buffer')
228
+ Linkage::ImportBuffer.expects(:new).
229
+ with(matches_dataset, [:record_1_id, :record_2_id, :total_score]).
230
+ returns(matches_import_buffer)
231
+ matches_import_buffer.expects(:add).with([1, 2, 123])
232
+ matches_import_buffer.expects(:add).with([2, 3, 456])
233
+ result_set.add_match(1, 2, 123)
234
+ result_set.add_match(2, 3, 456)
17
235
  end
18
236
  end