linkage 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. data/.gitignore +10 -0
  2. data/Gemfile +15 -13
  3. data/Gemfile.lock +67 -37
  4. data/Guardfile +0 -2
  5. data/Rakefile +122 -25
  6. data/lib/linkage/comparator.rb +172 -0
  7. data/lib/linkage/comparators/binary.rb +12 -0
  8. data/lib/linkage/comparators/compare.rb +46 -0
  9. data/lib/linkage/comparators/within.rb +32 -0
  10. data/lib/linkage/configuration.rb +285 -153
  11. data/lib/linkage/data.rb +32 -7
  12. data/lib/linkage/dataset.rb +107 -32
  13. data/lib/linkage/decollation.rb +93 -0
  14. data/lib/linkage/expectation.rb +21 -0
  15. data/lib/linkage/expectations/exhaustive.rb +63 -0
  16. data/lib/linkage/expectations/simple.rb +168 -0
  17. data/lib/linkage/field.rb +30 -4
  18. data/lib/linkage/field_set.rb +6 -3
  19. data/lib/linkage/function.rb +50 -3
  20. data/lib/linkage/functions/binary.rb +30 -0
  21. data/lib/linkage/functions/cast.rb +54 -0
  22. data/lib/linkage/functions/length.rb +29 -0
  23. data/lib/linkage/functions/strftime.rb +12 -11
  24. data/lib/linkage/functions/trim.rb +8 -0
  25. data/lib/linkage/group.rb +20 -0
  26. data/lib/linkage/import_buffer.rb +5 -16
  27. data/lib/linkage/meta_object.rb +139 -0
  28. data/lib/linkage/result_set.rb +74 -17
  29. data/lib/linkage/runner/single_threaded.rb +125 -10
  30. data/lib/linkage/version.rb +3 -0
  31. data/lib/linkage.rb +11 -0
  32. data/linkage.gemspec +16 -121
  33. data/test/config.yml +5 -0
  34. data/test/helper.rb +73 -8
  35. data/test/integration/test_collation.rb +45 -0
  36. data/test/integration/test_configuration.rb +268 -0
  37. data/test/integration/test_cross_linkage.rb +4 -17
  38. data/test/integration/test_dataset.rb +45 -2
  39. data/test/integration/test_dual_linkage.rb +40 -24
  40. data/test/integration/test_functions.rb +22 -0
  41. data/test/integration/test_result_set.rb +85 -0
  42. data/test/integration/test_scoring.rb +84 -0
  43. data/test/integration/test_self_linkage.rb +5 -0
  44. data/test/integration/test_within_comparator.rb +100 -0
  45. data/test/unit/comparators/test_compare.rb +105 -0
  46. data/test/unit/comparators/test_within.rb +57 -0
  47. data/test/unit/expectations/test_exhaustive.rb +111 -0
  48. data/test/unit/expectations/test_simple.rb +303 -0
  49. data/test/unit/functions/test_binary.rb +54 -0
  50. data/test/unit/functions/test_cast.rb +98 -0
  51. data/test/unit/functions/test_length.rb +52 -0
  52. data/test/unit/functions/test_strftime.rb +17 -13
  53. data/test/unit/functions/test_trim.rb +11 -4
  54. data/test/unit/test_comparator.rb +124 -0
  55. data/test/unit/test_configuration.rb +137 -175
  56. data/test/unit/test_data.rb +44 -0
  57. data/test/unit/test_dataset.rb +73 -21
  58. data/test/unit/test_decollation.rb +201 -0
  59. data/test/unit/test_field.rb +38 -14
  60. data/test/unit/test_field_set.rb +12 -8
  61. data/test/unit/test_function.rb +83 -16
  62. data/test/unit/test_group.rb +28 -0
  63. data/test/unit/test_import_buffer.rb +13 -27
  64. data/test/unit/test_meta_object.rb +208 -0
  65. data/test/unit/test_result_set.rb +221 -3
  66. metadata +82 -190
@@ -0,0 +1,208 @@
1
+ require 'helper'
2
+
3
+ class UnitTests::TestMetaObject < Test::Unit::TestCase
4
+ test "initialize with static string" do
5
+ meta_object = Linkage::MetaObject.new("foo")
6
+ assert meta_object.static?
7
+ assert_equal "foo", meta_object.object
8
+ assert_nil meta_object.side
9
+ end
10
+
11
+ test "initialize with static function" do
12
+ function = stub_function("foo", :static? => true)
13
+ meta_object = Linkage::MetaObject.new(function)
14
+ assert meta_object.static?
15
+ assert_equal function, meta_object.object
16
+ assert_nil meta_object.side
17
+ end
18
+
19
+ test "initialize with field" do
20
+ field = stub_field("foo")
21
+ meta_object = Linkage::MetaObject.new(field, :lhs)
22
+ assert !meta_object.static?
23
+ assert_equal field, meta_object.object
24
+ assert_equal :lhs, meta_object.side
25
+ end
26
+
27
+ test "getting side for dynamic object without setting it raises error" do
28
+ meta_object = Linkage::MetaObject.new(stub_field('foo'))
29
+ assert_raises(RuntimeError) { meta_object.side }
30
+ end
31
+
32
+ test "getting dataset calls #dataset on object" do
33
+ field = stub_field('foo')
34
+ meta_object = Linkage::MetaObject.new(field)
35
+
36
+ dataset = stub('dataset')
37
+ field.expects(:dataset).returns(dataset)
38
+ assert_equal dataset, meta_object.dataset
39
+ end
40
+
41
+ test "setting dataset sets object's dataset" do
42
+ func = stub_function('foo')
43
+ meta_object = Linkage::MetaObject.new(func)
44
+
45
+ dataset = stub('dataset')
46
+ func.expects(:dataset=).with(dataset)
47
+ meta_object.dataset = dataset
48
+ end
49
+
50
+ test "setting dataset on non-data object raises exception" do
51
+ meta_object = Linkage::MetaObject.new(123)
52
+ dataset = stub('dataset')
53
+ assert_raises(RuntimeError) { meta_object.dataset = dataset }
54
+ end
55
+
56
+ test "objects_equal? compares only objects, not sides" do
57
+ field = stub_field("foo")
58
+ object_1 = Linkage::MetaObject.new(field, :lhs)
59
+ object_2 = Linkage::MetaObject.new(field, :rhs)
60
+ object_3 = Linkage::MetaObject.new(123)
61
+ assert object_1.objects_equal?(object_2)
62
+ assert !object_1.objects_equal?("foo")
63
+ assert !object_2.objects_equal?(object_3)
64
+ end
65
+
66
+ test "dataset reader for field" do
67
+ dataset = stub('dataset')
68
+ field = stub_field("foo", :dataset => dataset)
69
+ object = Linkage::MetaObject.new(field, :lhs)
70
+
71
+ assert_equal dataset, object.dataset
72
+ end
73
+
74
+ test "dataset reader for function" do
75
+ dataset = stub('dataset')
76
+ function = stub_function("foo", :dataset => dataset)
77
+ object = Linkage::MetaObject.new(function, :lhs)
78
+
79
+ assert_equal dataset, object.dataset
80
+ end
81
+
82
+ test "datasets_equal?" do
83
+ dataset_1 = stub('dataset 1')
84
+ field_1 = stub_field('field 1', :dataset => dataset_1)
85
+ object_1 = Linkage::MetaObject.new(field_1, :lhs)
86
+
87
+ dataset_2 = stub('dataset 2')
88
+ field_2 = stub_field('field 2', :dataset => dataset_2)
89
+ object_2 = Linkage::MetaObject.new(field_2, :rhs)
90
+
91
+ field_3 = stub_field('field 3', :dataset => dataset_2)
92
+ object_3 = Linkage::MetaObject.new(field_3, :rhs)
93
+
94
+ object_4 = Linkage::MetaObject.new(123)
95
+
96
+ assert object_1.datasets_equal?(object_1)
97
+ assert object_2.datasets_equal?(object_3)
98
+ assert !object_1.datasets_equal?(object_2)
99
+ assert !object_1.datasets_equal?("foo")
100
+ assert !object_1.datasets_equal?(object_4)
101
+ end
102
+
103
+ test "to_expr for non-data object returns object" do
104
+ object = Linkage::MetaObject.new(123)
105
+ assert_equal 123, object.to_expr
106
+ end
107
+
108
+ test "to_expr for data object returns object.to_expr" do
109
+ field = stub_field('field')
110
+ object = Linkage::MetaObject.new(field, :lhs)
111
+
112
+ field.expects(:to_expr).returns(:foo)
113
+ assert_equal :foo, object.to_expr
114
+ end
115
+
116
+ test "to_identifier for non-data object returns object" do
117
+ object = Linkage::MetaObject.new(123)
118
+ assert_equal 123, object.to_identifier
119
+ end
120
+
121
+ test "to_identifer for data object returns identifier object" do
122
+ field = stub_field('field')
123
+ object = Linkage::MetaObject.new(field, :lhs)
124
+
125
+ field.expects(:to_expr).returns(:foo)
126
+ assert_equal(Sequel::SQL::Identifier.new(:foo), object.to_identifier)
127
+ end
128
+
129
+ test "merge with data object" do
130
+ field_1 = stub_field('field 1')
131
+ object_1 = Linkage::MetaObject.new(field_1, :lhs)
132
+ field_2 = stub_field('field 2')
133
+ object_2 = Linkage::MetaObject.new(field_2, :rhs)
134
+
135
+ merged_field = stub('merged field')
136
+ field_1.expects(:merge).with(field_2).returns(merged_field)
137
+ assert_equal merged_field, object_1.merge(object_2)
138
+ end
139
+
140
+ test "merge with non-data object raises exception" do
141
+ field_1 = stub_field('field 1')
142
+ object_1 = Linkage::MetaObject.new(field_1, :lhs)
143
+ object_2 = Linkage::MetaObject.new(123)
144
+ assert_raises(ArgumentError) { object_1.merge(object_2) }
145
+ assert_raises(ArgumentError) { object_2.merge(object_1) }
146
+ end
147
+
148
+ test "ruby_type calls Field#ruby_type" do
149
+ field = stub_field('field')
150
+ object = Linkage::MetaObject.new(field, :lhs)
151
+ field.expects(:ruby_type).returns(:type => String)
152
+ assert_equal({:type => String}, object.ruby_type)
153
+ end
154
+
155
+ test "ruby_type calls Function#ruby_type" do
156
+ function = stub_function("foo")
157
+ object = Linkage::MetaObject.new(function, :lhs)
158
+ function.expects(:ruby_type).returns(:type => String)
159
+ assert_equal({:type => String}, object.ruby_type)
160
+ end
161
+
162
+ test "ruby_type returns object class for non-data object" do
163
+ object = Linkage::MetaObject.new(123)
164
+ assert_equal({:type => Fixnum}, object.ruby_type)
165
+ end
166
+
167
+ test "database_type" do
168
+ dataset = mock('dataset')
169
+ field = stub_field('field', :dataset => dataset)
170
+ object = Linkage::MetaObject.new(field, :lhs)
171
+ dataset.expects(:database_type).returns(:mysql)
172
+ assert_equal :mysql, object.database_type
173
+ end
174
+
175
+ test "#collation returns Data#collation" do
176
+ dataset = mock('dataset')
177
+ field = stub_field('field', :dataset => dataset, :collation => 'foo')
178
+ object = Linkage::MetaObject.new(field, :lhs)
179
+ assert_equal 'foo', object.collation
180
+ end
181
+
182
+ test "#collation returns nil when underlying object is not a Data object" do
183
+ object = Linkage::MetaObject.new(123, :lhs)
184
+ assert_nil object.collation
185
+ end
186
+
187
+ test "#name for data object" do
188
+ field = stub_field('foo', :name => :foo)
189
+ object = Linkage::MetaObject.new(field, :lhs)
190
+ assert_equal :foo, object.name
191
+ end
192
+
193
+ test "#name for non-data object returns nil" do
194
+ object = Linkage::MetaObject.new(123)
195
+ assert_nil object.name
196
+ end
197
+
198
+ test "#raw? returns true for non-data object" do
199
+ object = Linkage::MetaObject.new(123)
200
+ assert object.raw?
201
+ end
202
+
203
+ test "#raw? returns false for data object" do
204
+ field = stub_field('foo', :name => :foo)
205
+ object = Linkage::MetaObject.new(field, :lhs)
206
+ assert !object.raw?
207
+ end
208
+ end
@@ -4,15 +4,233 @@ class TestResultSet < Test::Unit::TestCase
4
4
  def setup
5
5
  @config = stub('configuration', {
6
6
  :results_uri => 'foo://bar',
7
- :results_uri_options => {:blah => 'junk'}
7
+ :results_uri_options => {:blah => 'junk'},
8
+ :decollation_needed? => true,
9
+ :groups_table_name => :groups,
10
+ :original_groups_table_name => :original_groups,
11
+ :scores_table_name => :scores,
12
+ :matches_table_name => :matches
8
13
  })
14
+ @database = stub('database')
15
+ Sequel.stubs(:connect).with('foo://bar', :blah => 'junk').returns(@database)
9
16
  end
10
17
 
11
18
  test "creating a result set with a configuration" do
12
19
  result_set = Linkage::ResultSet.new(@config)
13
20
  end
14
21
 
15
- test "records?" do
16
- pend
22
+ test '#add_group creates two copies when decollation is needed' do
23
+ result_set = Linkage::ResultSet.new(@config)
24
+
25
+ group = stub('group', {
26
+ :values => {:foo => 'bar '},
27
+ :decollated_values => {:foo => 'BAR'}
28
+ })
29
+
30
+ groups_import_buffer = stub('groups import buffer')
31
+ groups_dataset = stub('groups dataset')
32
+ @database.stubs(:[]).with(:groups).returns(groups_dataset)
33
+ Linkage::ImportBuffer.stubs(:new).with(groups_dataset, [:id, :foo]).
34
+ returns(groups_import_buffer)
35
+
36
+ original_groups_import_buffer = stub('original groups import buffer')
37
+ original_groups_dataset = stub('original groups dataset')
38
+ @database.stubs(:[]).with(:original_groups).returns(original_groups_dataset)
39
+ Linkage::ImportBuffer.stubs(:new).with(original_groups_dataset, [:id, :foo]).
40
+ returns(original_groups_import_buffer)
41
+
42
+ groups_import_buffer.expects(:add).with([1, 'BAR'])
43
+ original_groups_import_buffer.expects(:add).with([1, 'bar '])
44
+ result_set.add_group(group)
45
+ end
46
+
47
+ test "#flush! flushes groups dataset" do
48
+ result_set = Linkage::ResultSet.new(@config)
49
+
50
+ group = stub('group', {
51
+ :values => {:foo => 'bar '},
52
+ :decollated_values => {:foo => 'BAR'}
53
+ })
54
+
55
+ groups_import_buffer = stub('groups import buffer')
56
+ groups_dataset = stub('groups dataset')
57
+ @database.stubs(:[]).with(:groups).returns(groups_dataset)
58
+ Linkage::ImportBuffer.stubs(:new).with(groups_dataset, [:id, :foo]).
59
+ returns(groups_import_buffer)
60
+
61
+ original_groups_import_buffer = stub('original groups import buffer')
62
+ original_groups_dataset = stub('original groups dataset')
63
+ @database.stubs(:[]).with(:original_groups).returns(original_groups_dataset)
64
+ Linkage::ImportBuffer.stubs(:new).with(original_groups_dataset, [:id, :foo]).
65
+ returns(original_groups_import_buffer)
66
+
67
+ groups_import_buffer.stubs(:add)
68
+ original_groups_import_buffer.stubs(:add)
69
+ result_set.add_group(group)
70
+
71
+ groups_import_buffer.expects(:flush)
72
+ original_groups_import_buffer.expects(:flush)
73
+ result_set.flush!
74
+ end
75
+
76
+ test "#add_group doesn't create copies when decollation is not needed" do
77
+ @config.stubs(:decollation_needed?).returns(false)
78
+ result_set = Linkage::ResultSet.new(@config)
79
+
80
+ group = stub('group', :values => {:foo => 'bar '})
81
+
82
+ groups_import_buffer = stub('groups import buffer')
83
+ groups_dataset = stub('groups dataset', :first_source_table => :groups, :db => @database)
84
+ @database.stubs(:[]).with(:groups).returns(groups_dataset)
85
+ Linkage::ImportBuffer.stubs(:new).with(groups_dataset, [:id, :foo]).
86
+ returns(groups_import_buffer)
87
+
88
+ original_groups_dataset = stub('original groups dataset', :first_source_table => :original_groups, :db => @database)
89
+ @database.stubs(:[]).with(:original_groups).returns(original_groups_dataset)
90
+ Linkage::ImportBuffer.expects(:new).with(original_groups_dataset, [:id, :foo]).never
91
+
92
+ groups_import_buffer.expects(:add).with([1, 'bar '])
93
+ result_set.add_group(group)
94
+ end
95
+
96
+ test "#add_score adds to score buffer" do
97
+ result_set = Linkage::ResultSet.new(@config)
98
+ scores_dataset = stub('scores dataset')
99
+ @database.stubs(:[]).with(:scores).returns(scores_dataset)
100
+ scores_import_buffer = stub('scores import buffer')
101
+ Linkage::ImportBuffer.expects(:new).
102
+ with(scores_dataset, [:comparator_id, :record_1_id, :record_2_id, :score]).
103
+ returns(scores_import_buffer)
104
+ scores_import_buffer.expects(:add).with([0, 1, 2, 123])
105
+ scores_import_buffer.expects(:add).with([1, 1, 2, 456])
106
+ result_set.add_score(0, 1, 2, 123)
107
+ result_set.add_score(1, 1, 2, 456)
108
+ end
109
+
110
+ test "#flush! flushes score buffer" do
111
+ result_set = Linkage::ResultSet.new(@config)
112
+ scores_dataset = stub('scores dataset')
113
+ @database.stubs(:[]).with(:scores).returns(scores_dataset)
114
+ scores_import_buffer = stub('scores import buffer')
115
+ Linkage::ImportBuffer.stubs(:new).
116
+ with(scores_dataset, [:comparator_id, :record_1_id, :record_2_id, :score]).
117
+ returns(scores_import_buffer)
118
+ scores_import_buffer.stubs(:add)
119
+ result_set.add_score(0, 1, 2, 123)
120
+
121
+ scores_import_buffer.expects(:flush)
122
+ result_set.flush!
123
+ end
124
+
125
+ test "#add_match adds to match buffer" do
126
+ result_set = Linkage::ResultSet.new(@config)
127
+ matches_dataset = stub('matches dataset')
128
+ @database.stubs(:[]).with(:matches).returns(matches_dataset)
129
+ matches_import_buffer = stub('matches import buffer')
130
+ Linkage::ImportBuffer.expects(:new).
131
+ with(matches_dataset, [:record_1_id, :record_2_id, :total_score]).
132
+ returns(matches_import_buffer)
133
+ matches_import_buffer.expects(:add).with([1, 2, 123])
134
+ matches_import_buffer.expects(:add).with([2, 3, 456])
135
+ result_set.add_match(1, 2, 123)
136
+ result_set.add_match(2, 3, 456)
137
+ end
138
+
139
+ test "#flush! flushes match buffer" do
140
+ result_set = Linkage::ResultSet.new(@config)
141
+ matches_dataset = stub('matches dataset')
142
+ @database.stubs(:[]).with(:matches).returns(matches_dataset)
143
+ matches_import_buffer = stub('matches import buffer')
144
+ Linkage::ImportBuffer.stubs(:new).
145
+ with(matches_dataset, [:record_1_id, :record_2_id, :total_score]).
146
+ returns(matches_import_buffer)
147
+ matches_import_buffer.stubs(:add)
148
+ result_set.add_match(1, 2, 123)
149
+
150
+ matches_import_buffer.expects(:flush)
151
+ result_set.flush!
152
+ end
153
+
154
+ test "#create_tables! uses custom table names" do
155
+ @config.stubs({
156
+ :groups_table_name => :foo_groups,
157
+ :original_groups_table_name => :foo_original_groups,
158
+ :scores_table_name => :foo_scores,
159
+ :matches_table_name => :foo_matches,
160
+ :groups_table_needed? => true,
161
+ :decollation_needed? => true,
162
+ :scores_table_needed? => true,
163
+ :groups_table_schema => [],
164
+ :scores_table_schema => [],
165
+ :matches_table_schema => []
166
+ })
167
+ result_set = Linkage::ResultSet.new(@config)
168
+ @database.expects(:create_table).with(:foo_groups)
169
+ @database.expects(:create_table).with(:foo_original_groups)
170
+ @database.expects(:create_table).with(:foo_scores)
171
+ @database.expects(:create_table).with(:foo_matches)
172
+ result_set.create_tables!
173
+ end
174
+
175
+ test "#add_group uses custom table names" do
176
+ @config.stubs({
177
+ :groups_table_name => :foo_groups,
178
+ :original_groups_table_name => :foo_original_groups
179
+ })
180
+ result_set = Linkage::ResultSet.new(@config)
181
+
182
+ group = stub('group', {
183
+ :values => {:foo => 'bar '},
184
+ :decollated_values => {:foo => 'BAR'}
185
+ })
186
+
187
+ groups_import_buffer = stub('groups import buffer')
188
+ groups_dataset = stub('groups dataset')
189
+ @database.stubs(:[]).with(:foo_groups).returns(groups_dataset)
190
+ Linkage::ImportBuffer.stubs(:new).with(groups_dataset, [:id, :foo]).
191
+ returns(groups_import_buffer)
192
+
193
+ original_groups_import_buffer = stub('original groups import buffer')
194
+ original_groups_dataset = stub('original groups dataset')
195
+ @database.stubs(:[]).with(:foo_original_groups).
196
+ returns(original_groups_dataset)
197
+ Linkage::ImportBuffer.stubs(:new).
198
+ with(original_groups_dataset, [:id, :foo]).
199
+ returns(original_groups_import_buffer)
200
+
201
+ groups_import_buffer.expects(:add).with([1, 'BAR'])
202
+ original_groups_import_buffer.expects(:add).with([1, 'bar '])
203
+ result_set.add_group(group)
204
+ end
205
+
206
+ test "#add_score uses custom table name" do
207
+ @config.stubs(:scores_table_name).returns(:foo_scores)
208
+ result_set = Linkage::ResultSet.new(@config)
209
+ scores_dataset = stub('scores dataset')
210
+ @database.stubs(:[]).with(:foo_scores).returns(scores_dataset)
211
+ scores_import_buffer = stub('scores import buffer')
212
+ Linkage::ImportBuffer.expects(:new).
213
+ with(scores_dataset,
214
+ [:comparator_id, :record_1_id, :record_2_id, :score]).
215
+ returns(scores_import_buffer)
216
+ scores_import_buffer.expects(:add).with([0, 1, 2, 123])
217
+ scores_import_buffer.expects(:add).with([1, 1, 2, 456])
218
+ result_set.add_score(0, 1, 2, 123)
219
+ result_set.add_score(1, 1, 2, 456)
220
+ end
221
+
222
+ test "#add_match uses custom table name" do
223
+ @config.stubs(:matches_table_name).returns(:foo_matches)
224
+ result_set = Linkage::ResultSet.new(@config)
225
+ matches_dataset = stub('matches dataset')
226
+ @database.stubs(:[]).with(:foo_matches).returns(matches_dataset)
227
+ matches_import_buffer = stub('matches import buffer')
228
+ Linkage::ImportBuffer.expects(:new).
229
+ with(matches_dataset, [:record_1_id, :record_2_id, :total_score]).
230
+ returns(matches_import_buffer)
231
+ matches_import_buffer.expects(:add).with([1, 2, 123])
232
+ matches_import_buffer.expects(:add).with([2, 3, 456])
233
+ result_set.add_match(1, 2, 123)
234
+ result_set.add_match(2, 3, 456)
17
235
  end
18
236
  end