linkage 0.0.8 → 0.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.yardopts +1 -0
  4. data/Gemfile +1 -19
  5. data/Gemfile-java +3 -0
  6. data/README.markdown +88 -34
  7. data/Rakefile +16 -15
  8. data/TODO +4 -0
  9. data/lib/linkage/comparator.rb +139 -144
  10. data/lib/linkage/comparators/compare.rb +236 -29
  11. data/lib/linkage/comparators/strcompare.rb +85 -0
  12. data/lib/linkage/comparators/within.rb +24 -20
  13. data/lib/linkage/configuration.rb +44 -466
  14. data/lib/linkage/dataset.rb +28 -127
  15. data/lib/linkage/exceptions.rb +5 -0
  16. data/lib/linkage/field.rb +6 -37
  17. data/lib/linkage/field_set.rb +3 -3
  18. data/lib/linkage/match_recorder.rb +22 -0
  19. data/lib/linkage/match_set.rb +34 -0
  20. data/lib/linkage/match_sets/csv.rb +39 -0
  21. data/lib/linkage/match_sets/database.rb +45 -0
  22. data/lib/linkage/matcher.rb +30 -0
  23. data/lib/linkage/result_set.rb +25 -110
  24. data/lib/linkage/result_sets/csv.rb +54 -0
  25. data/lib/linkage/result_sets/database.rb +42 -0
  26. data/lib/linkage/runner.rb +57 -16
  27. data/lib/linkage/score_recorder.rb +30 -0
  28. data/lib/linkage/score_set.rb +49 -0
  29. data/lib/linkage/score_sets/csv.rb +64 -0
  30. data/lib/linkage/score_sets/database.rb +77 -0
  31. data/lib/linkage/version.rb +1 -1
  32. data/lib/linkage.rb +14 -17
  33. data/linkage.gemspec +13 -1
  34. data/linkage.gemspec-java +32 -0
  35. data/test/helper.rb +30 -23
  36. data/test/integration/test_cross_linkage.rb +46 -25
  37. data/test/integration/test_database_result_set.rb +55 -0
  38. data/test/integration/test_dual_linkage.rb +19 -94
  39. data/test/integration/test_self_linkage.rb +100 -203
  40. data/test/integration/test_within_comparator.rb +24 -77
  41. data/test/unit/comparators/test_compare.rb +254 -50
  42. data/test/unit/comparators/test_strcompare.rb +45 -0
  43. data/test/unit/comparators/test_within.rb +14 -26
  44. data/test/unit/match_sets/test_csv.rb +78 -0
  45. data/test/unit/match_sets/test_database.rb +63 -0
  46. data/test/unit/result_sets/test_csv.rb +111 -0
  47. data/test/unit/result_sets/test_database.rb +68 -0
  48. data/test/unit/score_sets/test_csv.rb +151 -0
  49. data/test/unit/score_sets/test_database.rb +149 -0
  50. data/test/unit/test_comparator.rb +46 -83
  51. data/test/unit/test_comparators.rb +4 -0
  52. data/test/unit/test_configuration.rb +99 -145
  53. data/test/unit/test_dataset.rb +52 -73
  54. data/test/unit/test_field.rb +4 -55
  55. data/test/unit/test_field_set.rb +6 -6
  56. data/test/unit/test_match_recorder.rb +23 -0
  57. data/test/unit/test_match_set.rb +23 -0
  58. data/test/unit/test_match_sets.rb +4 -0
  59. data/test/unit/test_matcher.rb +44 -0
  60. data/test/unit/test_result_set.rb +24 -223
  61. data/test/unit/test_result_sets.rb +4 -0
  62. data/test/unit/test_runner.rb +122 -17
  63. data/test/unit/test_runners.rb +4 -0
  64. data/test/unit/test_score_recorder.rb +25 -0
  65. data/test/unit/test_score_set.rb +37 -0
  66. data/test/unit/test_score_sets.rb +4 -0
  67. metadata +183 -90
  68. data/Gemfile.lock +0 -92
  69. data/lib/linkage/comparators/binary.rb +0 -12
  70. data/lib/linkage/data.rb +0 -175
  71. data/lib/linkage/decollation.rb +0 -93
  72. data/lib/linkage/expectation.rb +0 -21
  73. data/lib/linkage/expectations/exhaustive.rb +0 -63
  74. data/lib/linkage/expectations/simple.rb +0 -168
  75. data/lib/linkage/function.rb +0 -148
  76. data/lib/linkage/functions/binary.rb +0 -30
  77. data/lib/linkage/functions/cast.rb +0 -54
  78. data/lib/linkage/functions/length.rb +0 -29
  79. data/lib/linkage/functions/strftime.rb +0 -33
  80. data/lib/linkage/functions/trim.rb +0 -30
  81. data/lib/linkage/group.rb +0 -55
  82. data/lib/linkage/meta_object.rb +0 -139
  83. data/lib/linkage/runner/single_threaded.rb +0 -187
  84. data/lib/linkage/utils.rb +0 -164
  85. data/lib/linkage/warnings.rb +0 -5
  86. data/test/integration/test_collation.rb +0 -45
  87. data/test/integration/test_configuration.rb +0 -268
  88. data/test/integration/test_dataset.rb +0 -116
  89. data/test/integration/test_functions.rb +0 -88
  90. data/test/integration/test_result_set.rb +0 -85
  91. data/test/integration/test_scoring.rb +0 -84
  92. data/test/unit/expectations/test_exhaustive.rb +0 -111
  93. data/test/unit/expectations/test_simple.rb +0 -303
  94. data/test/unit/functions/test_binary.rb +0 -54
  95. data/test/unit/functions/test_cast.rb +0 -98
  96. data/test/unit/functions/test_length.rb +0 -52
  97. data/test/unit/functions/test_strftime.rb +0 -60
  98. data/test/unit/functions/test_trim.rb +0 -43
  99. data/test/unit/runner/test_single_threaded.rb +0 -12
  100. data/test/unit/test_data.rb +0 -445
  101. data/test/unit/test_decollation.rb +0 -201
  102. data/test/unit/test_function.rb +0 -233
  103. data/test/unit/test_group.rb +0 -38
  104. data/test/unit/test_meta_object.rb +0 -208
  105. data/test/unit/test_utils.rb +0 -341
@@ -1,172 +1,126 @@
1
1
  require 'helper'
2
2
 
3
3
  class UnitTests::TestConfiguration < Test::Unit::TestCase
4
- test "result_set" do
5
- dataset_1 = stub('dataset')
6
- dataset_2 = stub('dataset')
7
- c = Linkage::Configuration.new(dataset_1, dataset_2)
8
-
9
- result_set = stub('result set')
10
- Linkage::ResultSet.expects(:new).with(c).returns(result_set)
11
- assert_equal result_set, c.result_set
4
+ def setup
5
+ @pk_1 = stub('primary key 1', :name => :id)
6
+ @field_1 = stub('field 1')
7
+ @field_set_1 = stub('field set 1', :primary_key => @pk_1, :[] => @field_1)
8
+ @dataset_1 = stub('dataset 1', :field_set => @field_set_1)
9
+ @pk_2 = stub('primary key 2', :name => :id)
10
+ @field_2 = stub('field 2')
11
+ @field_set_2 = stub('field set 2', :primary_key => @pk_2, :[] => @field_2)
12
+ @dataset_2 = stub('dataset 2', :field_set => @field_set_2)
13
+ @score_set = stub('score set')
14
+ @match_set = stub('match set')
15
+ @result_set = stub('result set', :score_set => @score_set, :match_set => @match_set)
16
+ @compare = stub('compare')
17
+ Linkage::Comparators::Compare.stubs(:new).returns(@compare)
12
18
  end
13
19
 
14
- test "groups_table_needed? is false if there are no simple expectations" do
15
- dataset_1 = stub('dataset')
16
- dataset_2 = stub('dataset')
17
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
18
- assert !conf.groups_table_needed?
20
+ test "init with single dataset and result set" do
21
+ config = Linkage::Configuration.new(@dataset_1, @result_set)
22
+ assert_equal @dataset_1, config.dataset_1
23
+ assert_nil config.dataset_2
24
+ assert_equal @result_set, config.result_set
19
25
  end
20
26
 
21
- test "groups_table_needed? is true if there are any simple expectations" do
22
- dataset_1 = stub('dataset')
23
- dataset_2 = stub('dataset')
24
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
25
- exp = stub('simple expectation', :decollation_needed? => false)
26
- conf.add_simple_expectation(exp)
27
- assert conf.groups_table_needed?
27
+ test "init with two datasets and result set" do
28
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
29
+ assert_equal @dataset_1, config.dataset_1
30
+ assert_equal @dataset_2, config.dataset_2
31
+ assert_equal @result_set, config.result_set
28
32
  end
29
33
 
30
- test "scores_table_needed? is false if there are no exhaustive expectations" do
31
- dataset_1 = stub('dataset')
32
- dataset_2 = stub('dataset')
33
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
34
- assert !conf.scores_table_needed?
34
+ test "adding comparator with set arguments and two datasets" do
35
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
36
+
37
+ @field_set_1.expects(:[]).with(:foo).returns(@field_1)
38
+ @field_set_2.expects(:[]).with(:foo).returns(@field_2)
39
+ Linkage::Comparators::Compare.expects(:new).with([@field_1], [@field_2], :equal).returns(@compare)
40
+ config.compare([:foo], [:foo], :equal)
41
+ assert_equal @compare, config.comparators[0]
35
42
  end
36
43
 
37
- test "scores_table_needed? is true if there are any exhaustive expectations" do
38
- dataset_1 = stub('dataset')
39
- dataset_2 = stub('dataset')
40
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
41
- exp = stub('exhaustive expectation')
42
- conf.add_exhaustive_expectation(exp)
43
- assert conf.scores_table_needed?
44
+ test "adding comparator with set arguments and one datasets" do
45
+ config = Linkage::Configuration.new(@dataset_1, @result_set)
46
+
47
+ @field_set_1.expects(:[]).with(:foo).returns(@field_1)
48
+ @field_set_1.expects(:[]).with(:bar).returns(@field_2)
49
+ Linkage::Comparators::Compare.expects(:new).with([@field_1], [@field_2], :equal).returns(@compare)
50
+ config.compare([:foo], [:bar], :equal)
51
+ assert_equal @compare, config.comparators[0]
44
52
  end
45
53
 
46
- test "scores_table_schema" do
47
- dataset_1 = stub('dataset 1', {
48
- :field_set => stub('field set 1', {
49
- :primary_key => stub('primary key 1', {
50
- :ruby_type => {:type => Integer}
51
- })
52
- })
53
- })
54
- dataset_2 = stub('dataset 2', {
55
- :field_set => stub('field set 2', {
56
- :primary_key => stub('primary key 2', {
57
- :ruby_type => {:type => String, :opts => {:size => 10}}
58
- })
59
- })
60
- })
61
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
62
- exp_1 = stub('exhaustive expectation 1')
63
- exp_2 = stub('exhaustive expectation 2')
64
- conf.add_exhaustive_expectation(exp_1)
65
- conf.add_exhaustive_expectation(exp_2)
66
-
67
- expected = [
68
- [:id, Integer, {:primary_key => true}],
69
- [:comparator_id, Integer, {}],
70
- [:record_1_id, Integer, {}],
71
- [:record_2_id, String, {:size => 10}],
72
- [:score, Integer, {}],
73
- ]
74
- assert_equal expected, conf.scores_table_schema
54
+ test "adding comparator with scalar arguments and two datasets" do
55
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
56
+
57
+ @field_set_1.expects(:[]).with(:foo).returns(@field_1)
58
+ @field_set_2.expects(:[]).with(:foo).returns(@field_2)
59
+ within = stub('within')
60
+ Linkage::Comparators::Within.expects(:new).with(@field_1, @field_2, 5).returns(within)
61
+ config.within(:foo, :foo, 5)
62
+ assert_equal within, config.comparators[0]
75
63
  end
76
64
 
77
- test "datasets_with_applied_exhaustive_expectations" do
78
- dataset_1 = stub('dataset 1', {
79
- :field_set => stub('field set 1', {
80
- :primary_key => stub('primary key 1', {
81
- :to_expr => :foo_id
82
- })
83
- })
84
- })
85
- dataset_2 = stub('dataset 2', {
86
- :field_set => stub('field set 2', {
87
- :primary_key => stub('primary key 2', {
88
- :to_expr => :bar_id
89
- })
90
- })
91
- })
92
- dataset_1a = stub('dataset 1a')
93
- dataset_2a = stub('dataset 2a')
94
- dataset_1b = stub('dataset 1b')
95
- dataset_2b = stub('dataset 2b')
96
- dataset_1c = stub('dataset 1c')
97
- dataset_2c = stub('dataset 2c')
98
- exp_1 = stub('exhaustive expectation 1')
99
- exp_2 = stub('exhaustive expectation 2')
100
-
101
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
102
- conf.add_exhaustive_expectation(exp_1)
103
- conf.add_exhaustive_expectation(exp_2)
104
-
105
- dataset_1.expects(:select).with(:foo_id).returns(dataset_1a)
106
- dataset_2.expects(:select).with(:bar_id).returns(dataset_2a)
107
- exp_1.expects(:apply_to).with(dataset_1a, :lhs).returns(dataset_1b)
108
- exp_1.expects(:apply_to).with(dataset_2a, :rhs).returns(dataset_2b)
109
- exp_2.expects(:apply_to).with(dataset_1b, :lhs).returns(dataset_1c)
110
- exp_2.expects(:apply_to).with(dataset_2b, :rhs).returns(dataset_2c)
111
-
112
- assert_equal [dataset_1c, dataset_2c], conf.datasets_with_applied_exhaustive_expectations
65
+ test "score_recorder with two datasets" do
66
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
67
+ config.compare([:foo], [:foo], :equal)
68
+
69
+ @field_set_1.expects(:primary_key).returns(@pk_1)
70
+ @field_set_2.expects(:primary_key).returns(@pk_2)
71
+ @pk_1.expects(:name).returns(:id_1)
72
+ @pk_2.expects(:name).returns(:id_2)
73
+ score_recorder = stub('recorder')
74
+ Linkage::ScoreRecorder.expects(:new).with([@compare], @score_set, [:id_1, :id_2]).returns(score_recorder)
75
+ assert_same score_recorder, config.score_recorder
113
76
  end
114
77
 
115
- test "matches_table_schema" do
116
- dataset_1 = stub('dataset 1', {
117
- :field_set => stub('field set 1', {
118
- :primary_key => stub('primary key 1', {
119
- :ruby_type => {:type => Integer}
120
- })
121
- })
122
- })
123
- dataset_2 = stub('dataset 2', {
124
- :field_set => stub('field set 2', {
125
- :primary_key => stub('primary key 2', {
126
- :ruby_type => {:type => String, :opts => {:size => 10}}
127
- })
128
- })
129
- })
130
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
131
-
132
- expected = [
133
- [:id, Integer, {:primary_key => true}],
134
- [:record_1_id, Integer, {}],
135
- [:record_2_id, String, {:size => 10}],
136
- [:total_score, Integer, {}],
137
- ]
138
- assert_equal expected, conf.matches_table_schema
78
+ test "score_recorder with one dataset" do
79
+ config = Linkage::Configuration.new(@dataset_1, @result_set)
80
+ config.compare([:foo], [:bar], :equal)
81
+
82
+ @field_set_1.expects(:primary_key).returns(@pk_1)
83
+ @pk_1.expects(:name).returns(:id_1)
84
+ score_recorder = stub('score recorder')
85
+ Linkage::ScoreRecorder.expects(:new).with([@compare], @score_set, [:id_1, :id_1]).returns(score_recorder)
86
+ assert_same score_recorder, config.score_recorder
139
87
  end
140
88
 
141
- test "change groups table name from default" do
142
- dataset = stub('dataset')
143
- conf = Linkage::Configuration.new(dataset, dataset)
144
- assert_equal :groups, conf.groups_table_name
145
- conf.groups_table_name = :foo_groups
146
- assert_equal :foo_groups, conf.groups_table_name
89
+ test "default matcher" do
90
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
91
+ config.compare([:foo], [:bar], :equal)
92
+
93
+ matcher = stub('matcher')
94
+ Linkage::Matcher.expects(:new).with([@compare], @score_set, :mean, 0.5).returns(matcher)
95
+ assert_equal matcher, config.matcher
147
96
  end
148
97
 
149
- test "change original groups table name from default" do
150
- dataset = stub('dataset')
151
- conf = Linkage::Configuration.new(dataset, dataset)
152
- assert_equal :original_groups, conf.original_groups_table_name
153
- conf.original_groups_table_name = :foo_original_groups
154
- assert_equal :foo_original_groups, conf.original_groups_table_name
98
+ test "matcher with explicit algorithm" do
99
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
100
+ config.compare([:foo], [:bar], :equal)
101
+ config.algorithm = :foo
102
+
103
+ matcher = stub('matcher')
104
+ Linkage::Matcher.expects(:new).with([@compare], @score_set, :foo, 0.5).returns(matcher)
105
+ assert_equal matcher, config.matcher
155
106
  end
156
107
 
157
- test "change scores table name from default" do
158
- dataset = stub('dataset')
159
- conf = Linkage::Configuration.new(dataset, dataset)
160
- assert_equal :scores, conf.scores_table_name
161
- conf.scores_table_name = :foo_scores
162
- assert_equal :foo_scores, conf.scores_table_name
108
+ test "matcher with explicit threshold" do
109
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
110
+ config.compare([:foo], [:bar], :equal)
111
+ config.threshold = 0.9
112
+
113
+ matcher = stub('matcher')
114
+ Linkage::Matcher.expects(:new).with([@compare], @score_set, :mean, 0.9).returns(matcher)
115
+ assert_equal matcher, config.matcher
163
116
  end
164
117
 
165
- test "change matches table name from default" do
166
- dataset = stub('dataset')
167
- conf = Linkage::Configuration.new(dataset, dataset)
168
- assert_equal :matches, conf.matches_table_name
169
- conf.matches_table_name = :foo_matches
170
- assert_equal :foo_matches, conf.matches_table_name
118
+ test "match_recorder" do
119
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
120
+
121
+ matcher = stub('matcher')
122
+ match_recorder = stub('match recorder')
123
+ Linkage::MatchRecorder.expects(:new).with(matcher, @match_set).returns(match_recorder)
124
+ assert_equal match_recorder, config.match_recorder(matcher)
171
125
  end
172
126
  end
@@ -8,36 +8,45 @@ class UnitTests::TestDataset < Test::Unit::TestCase
8
8
  [:first_name, {:allow_null=>true, :default=>nil, :primary_key=>false, :db_type=>"varchar(255)", :type=>:string, :ruby_default=>nil}],
9
9
  [:last_name, {:allow_null=>true, :default=>nil, :primary_key=>false, :db_type=>"varchar(255)", :type=>:string, :ruby_default=>nil}]
10
10
  ]
11
+
11
12
  @dataset = stub('Sequel dataset', :first_source_table => :foo)
12
- @database = stub('database', :schema => @schema, :[] => @dataset, :extend => nil)
13
+ @dataset.responds_like_instance_of(Sequel::Dataset)
14
+ @dataset.stubs(:kind_of?).with(Sequel::Dataset).returns(true)
15
+
16
+ @database = stub('database', :schema => @schema, :[] => @dataset)
17
+ @database.responds_like_instance_of(Sequel::Database)
18
+ @database.stubs(:kind_of?).with(Sequel::Database).returns(true)
13
19
  @dataset.stubs(:db).returns(@database)
14
20
  Sequel.stubs(:connect).returns(@database)
21
+
15
22
  @field_set = stub("field set")
16
23
  Linkage::FieldSet.stubs(:new).returns(@field_set)
17
24
  end
18
25
 
19
26
  test "initialize with uri and table name" do
20
27
  Sequel.expects(:connect).with('foo:/bar', {:foo => 'bar'}).returns(@database)
21
- @database.expects(:extend).with(Sequel::Collation)
22
28
  @database.expects(:[]).with(:foo).returns(@dataset)
23
29
  Linkage::FieldSet.expects(:new).with(kind_of(Linkage::Dataset)).returns(@field_set)
24
30
  ds = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
31
+ assert_equal @field_set, ds.field_set
25
32
  end
26
33
 
27
34
  test "initialize with sequel dataset" do
28
- Linkage::Dataset.new(@dataset)
29
- end
30
-
31
- test "extend Sequel::Collation when initializing with sequel dataset" do
32
- @database.stubs(:kind_of?).with(Sequel::Collation).returns(false)
33
- @database.expects(:extend).with(Sequel::Collation)
35
+ @dataset.expects(:first_source_table).returns(:foo)
36
+ @dataset.expects(:db).returns(@database)
37
+ Linkage::FieldSet.expects(:new).with(kind_of(Linkage::Dataset)).returns(@field_set)
34
38
  ds = Linkage::Dataset.new(@dataset)
39
+ assert_equal :foo, ds.table_name
40
+ assert_equal @field_set, ds.field_set
35
41
  end
36
42
 
37
- test "don't extend already extended database" do
38
- @database.stubs(:kind_of?).with(Sequel::Collation).returns(true)
39
- @database.expects(:extend).with(Sequel::Collation).never
40
- ds = Linkage::Dataset.new(@dataset)
43
+ test "initialize with sequel database and table name" do
44
+ Sequel.unstub(:connect)
45
+ Sequel.expects(:connect).never
46
+ @database.expects(:[]).with(:foo).returns(@dataset)
47
+ Linkage::FieldSet.expects(:new).with(kind_of(Linkage::Dataset)).returns(@field_set)
48
+ ds = Linkage::Dataset.new(@database, "foo")
49
+ assert_equal @field_set, ds.field_set
41
50
  end
42
51
 
43
52
  test "table_name" do
@@ -69,72 +78,42 @@ class UnitTests::TestDataset < Test::Unit::TestCase
69
78
  assert_equal :foo, ds.database_type
70
79
  end
71
80
 
72
- test "set group_match" do
73
- ds_1 = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
74
- @dataset.expects(:clone).returns(@dataset)
75
- meta_object = stub_instance(Linkage::MetaObject)
76
- ds_2 = ds_1.group_match(meta_object)
77
- assert_not_same ds_1, ds_2
78
- assert_not_equal ds_1.instance_variable_get(:@linkage_options),
79
- ds_2.instance_variable_get(:@linkage_options)
80
- end
81
-
82
- test "subsequent group_match replaces old options" do
83
- ds_1 = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
84
- @dataset.expects(:clone).at_least_once.returns(@dataset)
85
- meta_object_1 = stub_instance(Linkage::MetaObject)
86
- ds_2 = ds_1.group_match(meta_object_1)
87
- assert_equal([{:meta_object => meta_object_1}], ds_2.linkage_options[:group_match])
88
-
89
- meta_object_2 = stub_instance(Linkage::MetaObject)
90
- ds_3 = ds_2.group_match(meta_object_2)
91
- assert_equal([{:meta_object => meta_object_2}], ds_3.linkage_options[:group_match])
92
- end
93
-
94
- test "group_match_more appends to group_match options" do
95
- ds_1 = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
96
- @dataset.expects(:clone).at_least_once.returns(@dataset)
97
- meta_object_1 = stub_instance(Linkage::MetaObject)
98
- ds_2 = ds_1.group_match(meta_object_1)
99
- assert_equal([{:meta_object => meta_object_1}], ds_2.linkage_options[:group_match])
100
-
101
- meta_object_2 = stub_instance(Linkage::MetaObject)
102
- ds_3 = ds_2.group_match_more(meta_object_2)
103
- assert_equal([{:meta_object => meta_object_1}, {:meta_object => meta_object_2}], ds_3.linkage_options[:group_match])
104
- end
105
-
106
- test "group_by_matches" do
81
+ test "primary key" do
107
82
  ds = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
108
-
109
- @dataset.expects(:clone).returns(@dataset)
110
- meta_object = stub_instance(Linkage::MetaObject, :to_expr => :foo)
111
- ds = ds.group_match(meta_object)
112
- @dataset.expects(:group).with(:foo).returns(@dataset)
113
-
114
- ds.group_by_matches
83
+ pk = stub('primary key field')
84
+ @field_set.expects(:primary_key).returns(pk)
85
+ assert_equal pk, ds.primary_key
115
86
  end
116
87
 
117
- test "dataset_for_group" do
118
- ds = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
119
- @dataset.expects(:clone).returns(@dataset)
120
- meta_object = stub_instance(Linkage::MetaObject, :to_expr => :foo)
121
- ds = ds.group_match({:meta_object => meta_object, :alias => :foo_bar})
122
-
123
- group = stub("group", :values => {:foo_bar => 'baz'})
124
- filtered_dataset = stub('filtered dataset')
125
- @dataset.expects(:filter).with(:foo => 'baz').returns(filtered_dataset)
126
- assert_equal filtered_dataset, ds.dataset_for_group(group)
88
+ test "link_with other" do
89
+ ds_1 = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
90
+ ds_2 = Linkage::Dataset.new('foo:/bar', "bar", {:foo => 'bar'})
91
+ result_set = stub('result set')
92
+ conf = stub('configuration')
93
+ Linkage::Configuration.expects(:new).with(ds_1, ds_2, result_set).returns(conf)
94
+ actual = ds_1.link_with(ds_2, result_set) do |arg|
95
+ assert_equal conf, arg
96
+ end
97
+ assert_equal actual, conf
127
98
  end
128
99
 
129
- test "dataset_for_group without aliases" do
100
+ test "link_with self" do
130
101
  ds = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
131
- @dataset.expects(:clone).returns(@dataset)
132
- meta_object = stub_instance(Linkage::MetaObject, :to_expr => :foo)
133
- ds = ds.group_match(meta_object)
134
-
135
- group = stub("group", :values => {:foo => 'baz'})
136
- filtered_dataset = stub('filtered dataset')
137
- @dataset.expects(:filter).with(:foo => 'baz').returns(filtered_dataset)
138
- assert_equal filtered_dataset, ds.dataset_for_group(group)
102
+ result_set = stub('result set')
103
+ conf = stub('configuration')
104
+ Linkage::Configuration.expects(:new).with(ds, nil, result_set).returns(conf)
105
+ actual = ds.link_with(ds, result_set) do |arg|
106
+ assert_equal conf, arg
107
+ end
108
+ assert_equal actual, conf
109
+ end
110
+
111
+ test "delegating" do
112
+ dataset_2 = Sequel::Dataset.allocate
113
+ @dataset.expects(:filter).with(:foo => 123).returns(dataset_2)
114
+ ds_1 = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
115
+ ds_2 = ds_1.filter(:foo => 123)
116
+ assert_kind_of Linkage::Dataset, ds_2
117
+ assert_same dataset_2, ds_2.obj
139
118
  end
140
119
  end
@@ -1,74 +1,23 @@
1
1
  require 'helper'
2
2
 
3
3
  class UnitTests::TestField < Test::Unit::TestCase
4
- test "subclass of data" do
5
- assert_equal Linkage::Data, Linkage::Field.superclass
6
- end
7
-
8
4
  test "initialize with schema info" do
9
- dataset = stub('dataset')
10
5
  schema = {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil}
11
- field = Linkage::Field.new(dataset, :id, schema)
6
+ field = Linkage::Field.new(:id, schema)
12
7
  assert_equal :id, field.name
13
8
  assert_equal schema, field.schema
14
- assert_equal dataset, field.dataset
15
- end
16
-
17
- test "static? is always false" do
18
- dataset = stub('dataset')
19
- schema = {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil}
20
- field = Linkage::Field.new(dataset, :id, schema)
21
- assert !field.static?
22
9
  end
23
10
 
24
11
  test "ruby_type for integer" do
25
- dataset = stub('dataset')
26
- field = Linkage::Field.new(dataset, :id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
12
+ field = Linkage::Field.new(:id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
27
13
  assert_equal({:type => Integer}, field.ruby_type)
28
14
  end
29
15
 
30
16
  test "primary_key? returns true if primary key" do
31
- dataset = stub('dataset')
32
- field_1 = Linkage::Field.new(dataset, :id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
17
+ field_1 = Linkage::Field.new(:id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
33
18
  assert field_1.primary_key?
34
19
 
35
- field_2 = Linkage::Field.new(dataset, :foo, {:allow_null=>true, :default=>nil, :primary_key=>false, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
20
+ field_2 = Linkage::Field.new(:foo, {:allow_null=>true, :default=>nil, :primary_key=>false, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
36
21
  assert !field_2.primary_key?
37
22
  end
38
-
39
- test "to_expr returns name" do
40
- dataset = stub('dataset')
41
- field = Linkage::Field.new(dataset, :id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
42
- assert_equal :id, field.to_expr
43
- end
44
-
45
- test "to_expr ignores adapter argument" do
46
- dataset = stub('dataset')
47
- field = Linkage::Field.new(dataset, :id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
48
- assert_equal :id, field.to_expr(:foo)
49
- end
50
-
51
- test "collation" do
52
- dataset = stub('dataset')
53
- field = Linkage::Field.new(dataset, :foo, {:allow_null=>true, :default=>nil, :primary_key=>false, :db_type=>"varchar(255)", :type=>:string, :collation=>"latin1_general_cs", :ruby_default=>nil})
54
- assert_equal "latin1_general_cs", field.collation
55
- assert_equal "latin1_general_cs", field.ruby_type[:opts][:collate]
56
- end
57
-
58
- test "initialize MergeField with ruby type" do
59
- info = {:type => Integer}
60
- field = Linkage::MergeField.new(:id, info)
61
- assert_equal :id, field.name
62
- assert_equal info, field.ruby_type
63
- assert_nil field.schema
64
- assert_nil field.dataset
65
- end
66
-
67
- test "MergeField#database_type accessor" do
68
- field_1 = Linkage::MergeField.new(:id, {:type => Integer})
69
- assert_nil field_1.database_type
70
-
71
- field_2 = Linkage::MergeField.new(:id, {:type => Integer}, :mysql)
72
- assert_equal :mysql, field_2.database_type
73
- end
74
23
  end
@@ -20,9 +20,9 @@ class UnitTests::TestFieldSet < Test::Unit::TestCase
20
20
  field_1 = stub('id field')
21
21
  field_2 = stub('first_name field')
22
22
  field_3 = stub('last_name field')
23
- Linkage::Field.expects(:new).with(dataset, :id, @schema[:id]).returns(field_1)
24
- Linkage::Field.expects(:new).with(dataset, :first_name, @schema[:first_name]).returns(field_2)
25
- Linkage::Field.expects(:new).with(dataset, :last_name, @schema[:last_name]).returns(field_3)
23
+ Linkage::Field.expects(:new).with(:id, @schema[:id]).returns(field_1)
24
+ Linkage::Field.expects(:new).with(:first_name, @schema[:first_name]).returns(field_2)
25
+ Linkage::Field.expects(:new).with(:last_name, @schema[:last_name]).returns(field_3)
26
26
 
27
27
  fs = Linkage::FieldSet.new(dataset)
28
28
  assert_equal field_1, fs.primary_key
@@ -37,9 +37,9 @@ class UnitTests::TestFieldSet < Test::Unit::TestCase
37
37
  field_1 = stub('id field')
38
38
  field_2 = stub('first_name field')
39
39
  field_3 = stub('last_name field')
40
- Linkage::Field.stubs(:new).with(dataset, :id, @schema[:id]).returns(field_1)
41
- Linkage::Field.stubs(:new).with(dataset, :first_name, @schema[:first_name]).returns(field_2)
42
- Linkage::Field.stubs(:new).with(dataset, :last_name, @schema[:last_name]).returns(field_3)
40
+ Linkage::Field.stubs(:new).with(:id, @schema[:id]).returns(field_1)
41
+ Linkage::Field.stubs(:new).with(:first_name, @schema[:first_name]).returns(field_2)
42
+ Linkage::Field.stubs(:new).with(:last_name, @schema[:last_name]).returns(field_3)
43
43
 
44
44
  fs = Linkage::FieldSet.new(dataset)
45
45
  assert_equal field_1, fs.primary_key
@@ -0,0 +1,23 @@
1
+ require 'helper'
2
+
3
+ class UnitTests::TestMatchRecorder < Test::Unit::TestCase
4
+ def setup
5
+ @match_set = stub('match set')
6
+ @matcher = stub('matcher')
7
+ end
8
+
9
+ test "recording events from a matcher" do
10
+ match_recorder = Linkage::MatchRecorder.new(@matcher, @match_set)
11
+
12
+ @matcher.expects(:add_observer).with(match_recorder)
13
+ @match_set.expects(:open_for_writing)
14
+ match_recorder.start
15
+
16
+ @match_set.expects(:add_match).with(123, 456, 1)
17
+ match_recorder.update(123, 456, 1)
18
+
19
+ @matcher.expects(:delete_observer).with(match_recorder)
20
+ @match_set.expects(:close)
21
+ match_recorder.stop
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ require 'helper'
2
+
3
+ class TestMatchSet < Test::Unit::TestCase
4
+ test "add_match raises NotImplementedError" do
5
+ match_set = Linkage::MatchSet.new
6
+ assert_raises(NotImplementedError) do
7
+ match_set.add_match('foo', 'bar', 'baz')
8
+ end
9
+ end
10
+
11
+ test "getting a registered class" do
12
+ klass = new_match_set
13
+ Linkage::MatchSet.register('foo', klass)
14
+ assert_equal klass, Linkage::MatchSet['foo']
15
+ end
16
+
17
+ test "registered classes required to define add_match" do
18
+ klass = new_match_set do
19
+ remove_method :add_match
20
+ end
21
+ assert_raises(ArgumentError) { Linkage::MatchSet.register('foo', klass) }
22
+ end
23
+ end
@@ -0,0 +1,4 @@
1
+ require 'helper'
2
+
3
+ module UnitTests::TestMatchSets
4
+ end
@@ -0,0 +1,44 @@
1
+ require 'helper'
2
+
3
+ class UnitTests::TestMatcher < Test::Unit::TestCase
4
+ def setup
5
+ @score_set = stub('score set')
6
+ @comparators = [stub('comparator 1'), stub('comparator 2'), stub('comparator 3')]
7
+ end
8
+
9
+ test "finding matches with mean and threshold" do
10
+ matcher = Linkage::Matcher.new(@comparators, @score_set, :mean, 0.5)
11
+ observer = stub('observer')
12
+ observer.expects(:update).with(3, 4, 2.0 / 3)
13
+ observer.expects(:update).with(4, 5, 1.0)
14
+ matcher.add_observer(observer)
15
+
16
+ pairs = [
17
+ [1, 2, {1 => 1, 2 => 0, 3 => 0}],
18
+ [2, 3, {1 => 0, 2 => 0, 3 => 0}],
19
+ [3, 4, {1 => 0, 2 => 1, 3 => 1}],
20
+ [4, 5, {1 => 1, 2 => 1, 3 => 1}]
21
+ ]
22
+ @score_set.expects(:each_pair).multiple_yields(*pairs)
23
+
24
+ matcher.run
25
+ end
26
+
27
+ test "finding matches with mean and threshold with missing scores" do
28
+ matcher = Linkage::Matcher.new(@comparators, @score_set, :mean, 0.5)
29
+ observer = stub('observer')
30
+ observer.expects(:update).with(3, 4, 2.0 / 3)
31
+ observer.expects(:update).with(4, 5, 1.0)
32
+ matcher.add_observer(observer)
33
+
34
+ pairs = [
35
+ [1, 2, {1 => 1, 3 => 0}],
36
+ [2, 3, {1 => 0, 2 => 0, 3 => 0}],
37
+ [3, 4, {2 => 1, 3 => 1}],
38
+ [4, 5, {1 => 1, 2 => 1, 3 => 1}]
39
+ ]
40
+ @score_set.expects(:each_pair).multiple_yields(*pairs)
41
+
42
+ matcher.run
43
+ end
44
+ end