linkage 0.0.8 → 0.1.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.yardopts +1 -0
  4. data/Gemfile +1 -19
  5. data/Gemfile-java +3 -0
  6. data/README.markdown +88 -34
  7. data/Rakefile +16 -15
  8. data/TODO +4 -0
  9. data/lib/linkage/comparator.rb +139 -144
  10. data/lib/linkage/comparators/compare.rb +236 -29
  11. data/lib/linkage/comparators/strcompare.rb +85 -0
  12. data/lib/linkage/comparators/within.rb +24 -20
  13. data/lib/linkage/configuration.rb +44 -466
  14. data/lib/linkage/dataset.rb +28 -127
  15. data/lib/linkage/exceptions.rb +5 -0
  16. data/lib/linkage/field.rb +6 -37
  17. data/lib/linkage/field_set.rb +3 -3
  18. data/lib/linkage/match_recorder.rb +22 -0
  19. data/lib/linkage/match_set.rb +34 -0
  20. data/lib/linkage/match_sets/csv.rb +39 -0
  21. data/lib/linkage/match_sets/database.rb +45 -0
  22. data/lib/linkage/matcher.rb +30 -0
  23. data/lib/linkage/result_set.rb +25 -110
  24. data/lib/linkage/result_sets/csv.rb +54 -0
  25. data/lib/linkage/result_sets/database.rb +42 -0
  26. data/lib/linkage/runner.rb +57 -16
  27. data/lib/linkage/score_recorder.rb +30 -0
  28. data/lib/linkage/score_set.rb +49 -0
  29. data/lib/linkage/score_sets/csv.rb +64 -0
  30. data/lib/linkage/score_sets/database.rb +77 -0
  31. data/lib/linkage/version.rb +1 -1
  32. data/lib/linkage.rb +14 -17
  33. data/linkage.gemspec +13 -1
  34. data/linkage.gemspec-java +32 -0
  35. data/test/helper.rb +30 -23
  36. data/test/integration/test_cross_linkage.rb +46 -25
  37. data/test/integration/test_database_result_set.rb +55 -0
  38. data/test/integration/test_dual_linkage.rb +19 -94
  39. data/test/integration/test_self_linkage.rb +100 -203
  40. data/test/integration/test_within_comparator.rb +24 -77
  41. data/test/unit/comparators/test_compare.rb +254 -50
  42. data/test/unit/comparators/test_strcompare.rb +45 -0
  43. data/test/unit/comparators/test_within.rb +14 -26
  44. data/test/unit/match_sets/test_csv.rb +78 -0
  45. data/test/unit/match_sets/test_database.rb +63 -0
  46. data/test/unit/result_sets/test_csv.rb +111 -0
  47. data/test/unit/result_sets/test_database.rb +68 -0
  48. data/test/unit/score_sets/test_csv.rb +151 -0
  49. data/test/unit/score_sets/test_database.rb +149 -0
  50. data/test/unit/test_comparator.rb +46 -83
  51. data/test/unit/test_comparators.rb +4 -0
  52. data/test/unit/test_configuration.rb +99 -145
  53. data/test/unit/test_dataset.rb +52 -73
  54. data/test/unit/test_field.rb +4 -55
  55. data/test/unit/test_field_set.rb +6 -6
  56. data/test/unit/test_match_recorder.rb +23 -0
  57. data/test/unit/test_match_set.rb +23 -0
  58. data/test/unit/test_match_sets.rb +4 -0
  59. data/test/unit/test_matcher.rb +44 -0
  60. data/test/unit/test_result_set.rb +24 -223
  61. data/test/unit/test_result_sets.rb +4 -0
  62. data/test/unit/test_runner.rb +122 -17
  63. data/test/unit/test_runners.rb +4 -0
  64. data/test/unit/test_score_recorder.rb +25 -0
  65. data/test/unit/test_score_set.rb +37 -0
  66. data/test/unit/test_score_sets.rb +4 -0
  67. metadata +183 -90
  68. data/Gemfile.lock +0 -92
  69. data/lib/linkage/comparators/binary.rb +0 -12
  70. data/lib/linkage/data.rb +0 -175
  71. data/lib/linkage/decollation.rb +0 -93
  72. data/lib/linkage/expectation.rb +0 -21
  73. data/lib/linkage/expectations/exhaustive.rb +0 -63
  74. data/lib/linkage/expectations/simple.rb +0 -168
  75. data/lib/linkage/function.rb +0 -148
  76. data/lib/linkage/functions/binary.rb +0 -30
  77. data/lib/linkage/functions/cast.rb +0 -54
  78. data/lib/linkage/functions/length.rb +0 -29
  79. data/lib/linkage/functions/strftime.rb +0 -33
  80. data/lib/linkage/functions/trim.rb +0 -30
  81. data/lib/linkage/group.rb +0 -55
  82. data/lib/linkage/meta_object.rb +0 -139
  83. data/lib/linkage/runner/single_threaded.rb +0 -187
  84. data/lib/linkage/utils.rb +0 -164
  85. data/lib/linkage/warnings.rb +0 -5
  86. data/test/integration/test_collation.rb +0 -45
  87. data/test/integration/test_configuration.rb +0 -268
  88. data/test/integration/test_dataset.rb +0 -116
  89. data/test/integration/test_functions.rb +0 -88
  90. data/test/integration/test_result_set.rb +0 -85
  91. data/test/integration/test_scoring.rb +0 -84
  92. data/test/unit/expectations/test_exhaustive.rb +0 -111
  93. data/test/unit/expectations/test_simple.rb +0 -303
  94. data/test/unit/functions/test_binary.rb +0 -54
  95. data/test/unit/functions/test_cast.rb +0 -98
  96. data/test/unit/functions/test_length.rb +0 -52
  97. data/test/unit/functions/test_strftime.rb +0 -60
  98. data/test/unit/functions/test_trim.rb +0 -43
  99. data/test/unit/runner/test_single_threaded.rb +0 -12
  100. data/test/unit/test_data.rb +0 -445
  101. data/test/unit/test_decollation.rb +0 -201
  102. data/test/unit/test_function.rb +0 -233
  103. data/test/unit/test_group.rb +0 -38
  104. data/test/unit/test_meta_object.rb +0 -208
  105. data/test/unit/test_utils.rb +0 -341
@@ -1,172 +1,126 @@
1
1
  require 'helper'
2
2
 
3
3
  class UnitTests::TestConfiguration < Test::Unit::TestCase
4
- test "result_set" do
5
- dataset_1 = stub('dataset')
6
- dataset_2 = stub('dataset')
7
- c = Linkage::Configuration.new(dataset_1, dataset_2)
8
-
9
- result_set = stub('result set')
10
- Linkage::ResultSet.expects(:new).with(c).returns(result_set)
11
- assert_equal result_set, c.result_set
4
+ def setup
5
+ @pk_1 = stub('primary key 1', :name => :id)
6
+ @field_1 = stub('field 1')
7
+ @field_set_1 = stub('field set 1', :primary_key => @pk_1, :[] => @field_1)
8
+ @dataset_1 = stub('dataset 1', :field_set => @field_set_1)
9
+ @pk_2 = stub('primary key 2', :name => :id)
10
+ @field_2 = stub('field 2')
11
+ @field_set_2 = stub('field set 2', :primary_key => @pk_2, :[] => @field_2)
12
+ @dataset_2 = stub('dataset 2', :field_set => @field_set_2)
13
+ @score_set = stub('score set')
14
+ @match_set = stub('match set')
15
+ @result_set = stub('result set', :score_set => @score_set, :match_set => @match_set)
16
+ @compare = stub('compare')
17
+ Linkage::Comparators::Compare.stubs(:new).returns(@compare)
12
18
  end
13
19
 
14
- test "groups_table_needed? is false if there are no simple expectations" do
15
- dataset_1 = stub('dataset')
16
- dataset_2 = stub('dataset')
17
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
18
- assert !conf.groups_table_needed?
20
+ test "init with single dataset and result set" do
21
+ config = Linkage::Configuration.new(@dataset_1, @result_set)
22
+ assert_equal @dataset_1, config.dataset_1
23
+ assert_nil config.dataset_2
24
+ assert_equal @result_set, config.result_set
19
25
  end
20
26
 
21
- test "groups_table_needed? is true if there are any simple expectations" do
22
- dataset_1 = stub('dataset')
23
- dataset_2 = stub('dataset')
24
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
25
- exp = stub('simple expectation', :decollation_needed? => false)
26
- conf.add_simple_expectation(exp)
27
- assert conf.groups_table_needed?
27
+ test "init with two datasets and result set" do
28
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
29
+ assert_equal @dataset_1, config.dataset_1
30
+ assert_equal @dataset_2, config.dataset_2
31
+ assert_equal @result_set, config.result_set
28
32
  end
29
33
 
30
- test "scores_table_needed? is false if there are no exhaustive expectations" do
31
- dataset_1 = stub('dataset')
32
- dataset_2 = stub('dataset')
33
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
34
- assert !conf.scores_table_needed?
34
+ test "adding comparator with set arguments and two datasets" do
35
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
36
+
37
+ @field_set_1.expects(:[]).with(:foo).returns(@field_1)
38
+ @field_set_2.expects(:[]).with(:foo).returns(@field_2)
39
+ Linkage::Comparators::Compare.expects(:new).with([@field_1], [@field_2], :equal).returns(@compare)
40
+ config.compare([:foo], [:foo], :equal)
41
+ assert_equal @compare, config.comparators[0]
35
42
  end
36
43
 
37
- test "scores_table_needed? is true if there are any exhaustive expectations" do
38
- dataset_1 = stub('dataset')
39
- dataset_2 = stub('dataset')
40
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
41
- exp = stub('exhaustive expectation')
42
- conf.add_exhaustive_expectation(exp)
43
- assert conf.scores_table_needed?
44
+ test "adding comparator with set arguments and one datasets" do
45
+ config = Linkage::Configuration.new(@dataset_1, @result_set)
46
+
47
+ @field_set_1.expects(:[]).with(:foo).returns(@field_1)
48
+ @field_set_1.expects(:[]).with(:bar).returns(@field_2)
49
+ Linkage::Comparators::Compare.expects(:new).with([@field_1], [@field_2], :equal).returns(@compare)
50
+ config.compare([:foo], [:bar], :equal)
51
+ assert_equal @compare, config.comparators[0]
44
52
  end
45
53
 
46
- test "scores_table_schema" do
47
- dataset_1 = stub('dataset 1', {
48
- :field_set => stub('field set 1', {
49
- :primary_key => stub('primary key 1', {
50
- :ruby_type => {:type => Integer}
51
- })
52
- })
53
- })
54
- dataset_2 = stub('dataset 2', {
55
- :field_set => stub('field set 2', {
56
- :primary_key => stub('primary key 2', {
57
- :ruby_type => {:type => String, :opts => {:size => 10}}
58
- })
59
- })
60
- })
61
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
62
- exp_1 = stub('exhaustive expectation 1')
63
- exp_2 = stub('exhaustive expectation 2')
64
- conf.add_exhaustive_expectation(exp_1)
65
- conf.add_exhaustive_expectation(exp_2)
66
-
67
- expected = [
68
- [:id, Integer, {:primary_key => true}],
69
- [:comparator_id, Integer, {}],
70
- [:record_1_id, Integer, {}],
71
- [:record_2_id, String, {:size => 10}],
72
- [:score, Integer, {}],
73
- ]
74
- assert_equal expected, conf.scores_table_schema
54
+ test "adding comparator with scalar arguments and two datasets" do
55
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
56
+
57
+ @field_set_1.expects(:[]).with(:foo).returns(@field_1)
58
+ @field_set_2.expects(:[]).with(:foo).returns(@field_2)
59
+ within = stub('within')
60
+ Linkage::Comparators::Within.expects(:new).with(@field_1, @field_2, 5).returns(within)
61
+ config.within(:foo, :foo, 5)
62
+ assert_equal within, config.comparators[0]
75
63
  end
76
64
 
77
- test "datasets_with_applied_exhaustive_expectations" do
78
- dataset_1 = stub('dataset 1', {
79
- :field_set => stub('field set 1', {
80
- :primary_key => stub('primary key 1', {
81
- :to_expr => :foo_id
82
- })
83
- })
84
- })
85
- dataset_2 = stub('dataset 2', {
86
- :field_set => stub('field set 2', {
87
- :primary_key => stub('primary key 2', {
88
- :to_expr => :bar_id
89
- })
90
- })
91
- })
92
- dataset_1a = stub('dataset 1a')
93
- dataset_2a = stub('dataset 2a')
94
- dataset_1b = stub('dataset 1b')
95
- dataset_2b = stub('dataset 2b')
96
- dataset_1c = stub('dataset 1c')
97
- dataset_2c = stub('dataset 2c')
98
- exp_1 = stub('exhaustive expectation 1')
99
- exp_2 = stub('exhaustive expectation 2')
100
-
101
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
102
- conf.add_exhaustive_expectation(exp_1)
103
- conf.add_exhaustive_expectation(exp_2)
104
-
105
- dataset_1.expects(:select).with(:foo_id).returns(dataset_1a)
106
- dataset_2.expects(:select).with(:bar_id).returns(dataset_2a)
107
- exp_1.expects(:apply_to).with(dataset_1a, :lhs).returns(dataset_1b)
108
- exp_1.expects(:apply_to).with(dataset_2a, :rhs).returns(dataset_2b)
109
- exp_2.expects(:apply_to).with(dataset_1b, :lhs).returns(dataset_1c)
110
- exp_2.expects(:apply_to).with(dataset_2b, :rhs).returns(dataset_2c)
111
-
112
- assert_equal [dataset_1c, dataset_2c], conf.datasets_with_applied_exhaustive_expectations
65
+ test "score_recorder with two datasets" do
66
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
67
+ config.compare([:foo], [:foo], :equal)
68
+
69
+ @field_set_1.expects(:primary_key).returns(@pk_1)
70
+ @field_set_2.expects(:primary_key).returns(@pk_2)
71
+ @pk_1.expects(:name).returns(:id_1)
72
+ @pk_2.expects(:name).returns(:id_2)
73
+ score_recorder = stub('recorder')
74
+ Linkage::ScoreRecorder.expects(:new).with([@compare], @score_set, [:id_1, :id_2]).returns(score_recorder)
75
+ assert_same score_recorder, config.score_recorder
113
76
  end
114
77
 
115
- test "matches_table_schema" do
116
- dataset_1 = stub('dataset 1', {
117
- :field_set => stub('field set 1', {
118
- :primary_key => stub('primary key 1', {
119
- :ruby_type => {:type => Integer}
120
- })
121
- })
122
- })
123
- dataset_2 = stub('dataset 2', {
124
- :field_set => stub('field set 2', {
125
- :primary_key => stub('primary key 2', {
126
- :ruby_type => {:type => String, :opts => {:size => 10}}
127
- })
128
- })
129
- })
130
- conf = Linkage::Configuration.new(dataset_1, dataset_2)
131
-
132
- expected = [
133
- [:id, Integer, {:primary_key => true}],
134
- [:record_1_id, Integer, {}],
135
- [:record_2_id, String, {:size => 10}],
136
- [:total_score, Integer, {}],
137
- ]
138
- assert_equal expected, conf.matches_table_schema
78
+ test "score_recorder with one dataset" do
79
+ config = Linkage::Configuration.new(@dataset_1, @result_set)
80
+ config.compare([:foo], [:bar], :equal)
81
+
82
+ @field_set_1.expects(:primary_key).returns(@pk_1)
83
+ @pk_1.expects(:name).returns(:id_1)
84
+ score_recorder = stub('score recorder')
85
+ Linkage::ScoreRecorder.expects(:new).with([@compare], @score_set, [:id_1, :id_1]).returns(score_recorder)
86
+ assert_same score_recorder, config.score_recorder
139
87
  end
140
88
 
141
- test "change groups table name from default" do
142
- dataset = stub('dataset')
143
- conf = Linkage::Configuration.new(dataset, dataset)
144
- assert_equal :groups, conf.groups_table_name
145
- conf.groups_table_name = :foo_groups
146
- assert_equal :foo_groups, conf.groups_table_name
89
+ test "default matcher" do
90
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
91
+ config.compare([:foo], [:bar], :equal)
92
+
93
+ matcher = stub('matcher')
94
+ Linkage::Matcher.expects(:new).with([@compare], @score_set, :mean, 0.5).returns(matcher)
95
+ assert_equal matcher, config.matcher
147
96
  end
148
97
 
149
- test "change original groups table name from default" do
150
- dataset = stub('dataset')
151
- conf = Linkage::Configuration.new(dataset, dataset)
152
- assert_equal :original_groups, conf.original_groups_table_name
153
- conf.original_groups_table_name = :foo_original_groups
154
- assert_equal :foo_original_groups, conf.original_groups_table_name
98
+ test "matcher with explicit algorithm" do
99
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
100
+ config.compare([:foo], [:bar], :equal)
101
+ config.algorithm = :foo
102
+
103
+ matcher = stub('matcher')
104
+ Linkage::Matcher.expects(:new).with([@compare], @score_set, :foo, 0.5).returns(matcher)
105
+ assert_equal matcher, config.matcher
155
106
  end
156
107
 
157
- test "change scores table name from default" do
158
- dataset = stub('dataset')
159
- conf = Linkage::Configuration.new(dataset, dataset)
160
- assert_equal :scores, conf.scores_table_name
161
- conf.scores_table_name = :foo_scores
162
- assert_equal :foo_scores, conf.scores_table_name
108
+ test "matcher with explicit threshold" do
109
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
110
+ config.compare([:foo], [:bar], :equal)
111
+ config.threshold = 0.9
112
+
113
+ matcher = stub('matcher')
114
+ Linkage::Matcher.expects(:new).with([@compare], @score_set, :mean, 0.9).returns(matcher)
115
+ assert_equal matcher, config.matcher
163
116
  end
164
117
 
165
- test "change matches table name from default" do
166
- dataset = stub('dataset')
167
- conf = Linkage::Configuration.new(dataset, dataset)
168
- assert_equal :matches, conf.matches_table_name
169
- conf.matches_table_name = :foo_matches
170
- assert_equal :foo_matches, conf.matches_table_name
118
+ test "match_recorder" do
119
+ config = Linkage::Configuration.new(@dataset_1, @dataset_2, @result_set)
120
+
121
+ matcher = stub('matcher')
122
+ match_recorder = stub('match recorder')
123
+ Linkage::MatchRecorder.expects(:new).with(matcher, @match_set).returns(match_recorder)
124
+ assert_equal match_recorder, config.match_recorder(matcher)
171
125
  end
172
126
  end
@@ -8,36 +8,45 @@ class UnitTests::TestDataset < Test::Unit::TestCase
8
8
  [:first_name, {:allow_null=>true, :default=>nil, :primary_key=>false, :db_type=>"varchar(255)", :type=>:string, :ruby_default=>nil}],
9
9
  [:last_name, {:allow_null=>true, :default=>nil, :primary_key=>false, :db_type=>"varchar(255)", :type=>:string, :ruby_default=>nil}]
10
10
  ]
11
+
11
12
  @dataset = stub('Sequel dataset', :first_source_table => :foo)
12
- @database = stub('database', :schema => @schema, :[] => @dataset, :extend => nil)
13
+ @dataset.responds_like_instance_of(Sequel::Dataset)
14
+ @dataset.stubs(:kind_of?).with(Sequel::Dataset).returns(true)
15
+
16
+ @database = stub('database', :schema => @schema, :[] => @dataset)
17
+ @database.responds_like_instance_of(Sequel::Database)
18
+ @database.stubs(:kind_of?).with(Sequel::Database).returns(true)
13
19
  @dataset.stubs(:db).returns(@database)
14
20
  Sequel.stubs(:connect).returns(@database)
21
+
15
22
  @field_set = stub("field set")
16
23
  Linkage::FieldSet.stubs(:new).returns(@field_set)
17
24
  end
18
25
 
19
26
  test "initialize with uri and table name" do
20
27
  Sequel.expects(:connect).with('foo:/bar', {:foo => 'bar'}).returns(@database)
21
- @database.expects(:extend).with(Sequel::Collation)
22
28
  @database.expects(:[]).with(:foo).returns(@dataset)
23
29
  Linkage::FieldSet.expects(:new).with(kind_of(Linkage::Dataset)).returns(@field_set)
24
30
  ds = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
31
+ assert_equal @field_set, ds.field_set
25
32
  end
26
33
 
27
34
  test "initialize with sequel dataset" do
28
- Linkage::Dataset.new(@dataset)
29
- end
30
-
31
- test "extend Sequel::Collation when initializing with sequel dataset" do
32
- @database.stubs(:kind_of?).with(Sequel::Collation).returns(false)
33
- @database.expects(:extend).with(Sequel::Collation)
35
+ @dataset.expects(:first_source_table).returns(:foo)
36
+ @dataset.expects(:db).returns(@database)
37
+ Linkage::FieldSet.expects(:new).with(kind_of(Linkage::Dataset)).returns(@field_set)
34
38
  ds = Linkage::Dataset.new(@dataset)
39
+ assert_equal :foo, ds.table_name
40
+ assert_equal @field_set, ds.field_set
35
41
  end
36
42
 
37
- test "don't extend already extended database" do
38
- @database.stubs(:kind_of?).with(Sequel::Collation).returns(true)
39
- @database.expects(:extend).with(Sequel::Collation).never
40
- ds = Linkage::Dataset.new(@dataset)
43
+ test "initialize with sequel database and table name" do
44
+ Sequel.unstub(:connect)
45
+ Sequel.expects(:connect).never
46
+ @database.expects(:[]).with(:foo).returns(@dataset)
47
+ Linkage::FieldSet.expects(:new).with(kind_of(Linkage::Dataset)).returns(@field_set)
48
+ ds = Linkage::Dataset.new(@database, "foo")
49
+ assert_equal @field_set, ds.field_set
41
50
  end
42
51
 
43
52
  test "table_name" do
@@ -69,72 +78,42 @@ class UnitTests::TestDataset < Test::Unit::TestCase
69
78
  assert_equal :foo, ds.database_type
70
79
  end
71
80
 
72
- test "set group_match" do
73
- ds_1 = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
74
- @dataset.expects(:clone).returns(@dataset)
75
- meta_object = stub_instance(Linkage::MetaObject)
76
- ds_2 = ds_1.group_match(meta_object)
77
- assert_not_same ds_1, ds_2
78
- assert_not_equal ds_1.instance_variable_get(:@linkage_options),
79
- ds_2.instance_variable_get(:@linkage_options)
80
- end
81
-
82
- test "subsequent group_match replaces old options" do
83
- ds_1 = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
84
- @dataset.expects(:clone).at_least_once.returns(@dataset)
85
- meta_object_1 = stub_instance(Linkage::MetaObject)
86
- ds_2 = ds_1.group_match(meta_object_1)
87
- assert_equal([{:meta_object => meta_object_1}], ds_2.linkage_options[:group_match])
88
-
89
- meta_object_2 = stub_instance(Linkage::MetaObject)
90
- ds_3 = ds_2.group_match(meta_object_2)
91
- assert_equal([{:meta_object => meta_object_2}], ds_3.linkage_options[:group_match])
92
- end
93
-
94
- test "group_match_more appends to group_match options" do
95
- ds_1 = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
96
- @dataset.expects(:clone).at_least_once.returns(@dataset)
97
- meta_object_1 = stub_instance(Linkage::MetaObject)
98
- ds_2 = ds_1.group_match(meta_object_1)
99
- assert_equal([{:meta_object => meta_object_1}], ds_2.linkage_options[:group_match])
100
-
101
- meta_object_2 = stub_instance(Linkage::MetaObject)
102
- ds_3 = ds_2.group_match_more(meta_object_2)
103
- assert_equal([{:meta_object => meta_object_1}, {:meta_object => meta_object_2}], ds_3.linkage_options[:group_match])
104
- end
105
-
106
- test "group_by_matches" do
81
+ test "primary key" do
107
82
  ds = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
108
-
109
- @dataset.expects(:clone).returns(@dataset)
110
- meta_object = stub_instance(Linkage::MetaObject, :to_expr => :foo)
111
- ds = ds.group_match(meta_object)
112
- @dataset.expects(:group).with(:foo).returns(@dataset)
113
-
114
- ds.group_by_matches
83
+ pk = stub('primary key field')
84
+ @field_set.expects(:primary_key).returns(pk)
85
+ assert_equal pk, ds.primary_key
115
86
  end
116
87
 
117
- test "dataset_for_group" do
118
- ds = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
119
- @dataset.expects(:clone).returns(@dataset)
120
- meta_object = stub_instance(Linkage::MetaObject, :to_expr => :foo)
121
- ds = ds.group_match({:meta_object => meta_object, :alias => :foo_bar})
122
-
123
- group = stub("group", :values => {:foo_bar => 'baz'})
124
- filtered_dataset = stub('filtered dataset')
125
- @dataset.expects(:filter).with(:foo => 'baz').returns(filtered_dataset)
126
- assert_equal filtered_dataset, ds.dataset_for_group(group)
88
+ test "link_with other" do
89
+ ds_1 = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
90
+ ds_2 = Linkage::Dataset.new('foo:/bar', "bar", {:foo => 'bar'})
91
+ result_set = stub('result set')
92
+ conf = stub('configuration')
93
+ Linkage::Configuration.expects(:new).with(ds_1, ds_2, result_set).returns(conf)
94
+ actual = ds_1.link_with(ds_2, result_set) do |arg|
95
+ assert_equal conf, arg
96
+ end
97
+ assert_equal actual, conf
127
98
  end
128
99
 
129
- test "dataset_for_group without aliases" do
100
+ test "link_with self" do
130
101
  ds = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
131
- @dataset.expects(:clone).returns(@dataset)
132
- meta_object = stub_instance(Linkage::MetaObject, :to_expr => :foo)
133
- ds = ds.group_match(meta_object)
134
-
135
- group = stub("group", :values => {:foo => 'baz'})
136
- filtered_dataset = stub('filtered dataset')
137
- @dataset.expects(:filter).with(:foo => 'baz').returns(filtered_dataset)
138
- assert_equal filtered_dataset, ds.dataset_for_group(group)
102
+ result_set = stub('result set')
103
+ conf = stub('configuration')
104
+ Linkage::Configuration.expects(:new).with(ds, nil, result_set).returns(conf)
105
+ actual = ds.link_with(ds, result_set) do |arg|
106
+ assert_equal conf, arg
107
+ end
108
+ assert_equal actual, conf
109
+ end
110
+
111
+ test "delegating" do
112
+ dataset_2 = Sequel::Dataset.allocate
113
+ @dataset.expects(:filter).with(:foo => 123).returns(dataset_2)
114
+ ds_1 = Linkage::Dataset.new('foo:/bar', "foo", {:foo => 'bar'})
115
+ ds_2 = ds_1.filter(:foo => 123)
116
+ assert_kind_of Linkage::Dataset, ds_2
117
+ assert_same dataset_2, ds_2.obj
139
118
  end
140
119
  end
@@ -1,74 +1,23 @@
1
1
  require 'helper'
2
2
 
3
3
  class UnitTests::TestField < Test::Unit::TestCase
4
- test "subclass of data" do
5
- assert_equal Linkage::Data, Linkage::Field.superclass
6
- end
7
-
8
4
  test "initialize with schema info" do
9
- dataset = stub('dataset')
10
5
  schema = {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil}
11
- field = Linkage::Field.new(dataset, :id, schema)
6
+ field = Linkage::Field.new(:id, schema)
12
7
  assert_equal :id, field.name
13
8
  assert_equal schema, field.schema
14
- assert_equal dataset, field.dataset
15
- end
16
-
17
- test "static? is always false" do
18
- dataset = stub('dataset')
19
- schema = {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil}
20
- field = Linkage::Field.new(dataset, :id, schema)
21
- assert !field.static?
22
9
  end
23
10
 
24
11
  test "ruby_type for integer" do
25
- dataset = stub('dataset')
26
- field = Linkage::Field.new(dataset, :id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
12
+ field = Linkage::Field.new(:id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
27
13
  assert_equal({:type => Integer}, field.ruby_type)
28
14
  end
29
15
 
30
16
  test "primary_key? returns true if primary key" do
31
- dataset = stub('dataset')
32
- field_1 = Linkage::Field.new(dataset, :id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
17
+ field_1 = Linkage::Field.new(:id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
33
18
  assert field_1.primary_key?
34
19
 
35
- field_2 = Linkage::Field.new(dataset, :foo, {:allow_null=>true, :default=>nil, :primary_key=>false, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
20
+ field_2 = Linkage::Field.new(:foo, {:allow_null=>true, :default=>nil, :primary_key=>false, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
36
21
  assert !field_2.primary_key?
37
22
  end
38
-
39
- test "to_expr returns name" do
40
- dataset = stub('dataset')
41
- field = Linkage::Field.new(dataset, :id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
42
- assert_equal :id, field.to_expr
43
- end
44
-
45
- test "to_expr ignores adapter argument" do
46
- dataset = stub('dataset')
47
- field = Linkage::Field.new(dataset, :id, {:allow_null=>true, :default=>nil, :primary_key=>true, :db_type=>"integer", :type=>:integer, :ruby_default=>nil})
48
- assert_equal :id, field.to_expr(:foo)
49
- end
50
-
51
- test "collation" do
52
- dataset = stub('dataset')
53
- field = Linkage::Field.new(dataset, :foo, {:allow_null=>true, :default=>nil, :primary_key=>false, :db_type=>"varchar(255)", :type=>:string, :collation=>"latin1_general_cs", :ruby_default=>nil})
54
- assert_equal "latin1_general_cs", field.collation
55
- assert_equal "latin1_general_cs", field.ruby_type[:opts][:collate]
56
- end
57
-
58
- test "initialize MergeField with ruby type" do
59
- info = {:type => Integer}
60
- field = Linkage::MergeField.new(:id, info)
61
- assert_equal :id, field.name
62
- assert_equal info, field.ruby_type
63
- assert_nil field.schema
64
- assert_nil field.dataset
65
- end
66
-
67
- test "MergeField#database_type accessor" do
68
- field_1 = Linkage::MergeField.new(:id, {:type => Integer})
69
- assert_nil field_1.database_type
70
-
71
- field_2 = Linkage::MergeField.new(:id, {:type => Integer}, :mysql)
72
- assert_equal :mysql, field_2.database_type
73
- end
74
23
  end
@@ -20,9 +20,9 @@ class UnitTests::TestFieldSet < Test::Unit::TestCase
20
20
  field_1 = stub('id field')
21
21
  field_2 = stub('first_name field')
22
22
  field_3 = stub('last_name field')
23
- Linkage::Field.expects(:new).with(dataset, :id, @schema[:id]).returns(field_1)
24
- Linkage::Field.expects(:new).with(dataset, :first_name, @schema[:first_name]).returns(field_2)
25
- Linkage::Field.expects(:new).with(dataset, :last_name, @schema[:last_name]).returns(field_3)
23
+ Linkage::Field.expects(:new).with(:id, @schema[:id]).returns(field_1)
24
+ Linkage::Field.expects(:new).with(:first_name, @schema[:first_name]).returns(field_2)
25
+ Linkage::Field.expects(:new).with(:last_name, @schema[:last_name]).returns(field_3)
26
26
 
27
27
  fs = Linkage::FieldSet.new(dataset)
28
28
  assert_equal field_1, fs.primary_key
@@ -37,9 +37,9 @@ class UnitTests::TestFieldSet < Test::Unit::TestCase
37
37
  field_1 = stub('id field')
38
38
  field_2 = stub('first_name field')
39
39
  field_3 = stub('last_name field')
40
- Linkage::Field.stubs(:new).with(dataset, :id, @schema[:id]).returns(field_1)
41
- Linkage::Field.stubs(:new).with(dataset, :first_name, @schema[:first_name]).returns(field_2)
42
- Linkage::Field.stubs(:new).with(dataset, :last_name, @schema[:last_name]).returns(field_3)
40
+ Linkage::Field.stubs(:new).with(:id, @schema[:id]).returns(field_1)
41
+ Linkage::Field.stubs(:new).with(:first_name, @schema[:first_name]).returns(field_2)
42
+ Linkage::Field.stubs(:new).with(:last_name, @schema[:last_name]).returns(field_3)
43
43
 
44
44
  fs = Linkage::FieldSet.new(dataset)
45
45
  assert_equal field_1, fs.primary_key
@@ -0,0 +1,23 @@
1
+ require 'helper'
2
+
3
+ class UnitTests::TestMatchRecorder < Test::Unit::TestCase
4
+ def setup
5
+ @match_set = stub('match set')
6
+ @matcher = stub('matcher')
7
+ end
8
+
9
+ test "recording events from a matcher" do
10
+ match_recorder = Linkage::MatchRecorder.new(@matcher, @match_set)
11
+
12
+ @matcher.expects(:add_observer).with(match_recorder)
13
+ @match_set.expects(:open_for_writing)
14
+ match_recorder.start
15
+
16
+ @match_set.expects(:add_match).with(123, 456, 1)
17
+ match_recorder.update(123, 456, 1)
18
+
19
+ @matcher.expects(:delete_observer).with(match_recorder)
20
+ @match_set.expects(:close)
21
+ match_recorder.stop
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ require 'helper'
2
+
3
+ class TestMatchSet < Test::Unit::TestCase
4
+ test "add_match raises NotImplementedError" do
5
+ match_set = Linkage::MatchSet.new
6
+ assert_raises(NotImplementedError) do
7
+ match_set.add_match('foo', 'bar', 'baz')
8
+ end
9
+ end
10
+
11
+ test "getting a registered class" do
12
+ klass = new_match_set
13
+ Linkage::MatchSet.register('foo', klass)
14
+ assert_equal klass, Linkage::MatchSet['foo']
15
+ end
16
+
17
+ test "registered classes required to define add_match" do
18
+ klass = new_match_set do
19
+ remove_method :add_match
20
+ end
21
+ assert_raises(ArgumentError) { Linkage::MatchSet.register('foo', klass) }
22
+ end
23
+ end
@@ -0,0 +1,4 @@
1
+ require 'helper'
2
+
3
+ module UnitTests::TestMatchSets
4
+ end
@@ -0,0 +1,44 @@
1
+ require 'helper'
2
+
3
+ class UnitTests::TestMatcher < Test::Unit::TestCase
4
+ def setup
5
+ @score_set = stub('score set')
6
+ @comparators = [stub('comparator 1'), stub('comparator 2'), stub('comparator 3')]
7
+ end
8
+
9
+ test "finding matches with mean and threshold" do
10
+ matcher = Linkage::Matcher.new(@comparators, @score_set, :mean, 0.5)
11
+ observer = stub('observer')
12
+ observer.expects(:update).with(3, 4, 2.0 / 3)
13
+ observer.expects(:update).with(4, 5, 1.0)
14
+ matcher.add_observer(observer)
15
+
16
+ pairs = [
17
+ [1, 2, {1 => 1, 2 => 0, 3 => 0}],
18
+ [2, 3, {1 => 0, 2 => 0, 3 => 0}],
19
+ [3, 4, {1 => 0, 2 => 1, 3 => 1}],
20
+ [4, 5, {1 => 1, 2 => 1, 3 => 1}]
21
+ ]
22
+ @score_set.expects(:each_pair).multiple_yields(*pairs)
23
+
24
+ matcher.run
25
+ end
26
+
27
+ test "finding matches with mean and threshold with missing scores" do
28
+ matcher = Linkage::Matcher.new(@comparators, @score_set, :mean, 0.5)
29
+ observer = stub('observer')
30
+ observer.expects(:update).with(3, 4, 2.0 / 3)
31
+ observer.expects(:update).with(4, 5, 1.0)
32
+ matcher.add_observer(observer)
33
+
34
+ pairs = [
35
+ [1, 2, {1 => 1, 3 => 0}],
36
+ [2, 3, {1 => 0, 2 => 0, 3 => 0}],
37
+ [3, 4, {2 => 1, 3 => 1}],
38
+ [4, 5, {1 => 1, 2 => 1, 3 => 1}]
39
+ ]
40
+ @score_set.expects(:each_pair).multiple_yields(*pairs)
41
+
42
+ matcher.run
43
+ end
44
+ end