linkage 0.0.8 → 0.1.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.yardopts +1 -0
  4. data/Gemfile +1 -19
  5. data/Gemfile-java +3 -0
  6. data/README.markdown +88 -34
  7. data/Rakefile +16 -15
  8. data/TODO +4 -0
  9. data/lib/linkage/comparator.rb +139 -144
  10. data/lib/linkage/comparators/compare.rb +236 -29
  11. data/lib/linkage/comparators/strcompare.rb +85 -0
  12. data/lib/linkage/comparators/within.rb +24 -20
  13. data/lib/linkage/configuration.rb +44 -466
  14. data/lib/linkage/dataset.rb +28 -127
  15. data/lib/linkage/exceptions.rb +5 -0
  16. data/lib/linkage/field.rb +6 -37
  17. data/lib/linkage/field_set.rb +3 -3
  18. data/lib/linkage/match_recorder.rb +22 -0
  19. data/lib/linkage/match_set.rb +34 -0
  20. data/lib/linkage/match_sets/csv.rb +39 -0
  21. data/lib/linkage/match_sets/database.rb +45 -0
  22. data/lib/linkage/matcher.rb +30 -0
  23. data/lib/linkage/result_set.rb +25 -110
  24. data/lib/linkage/result_sets/csv.rb +54 -0
  25. data/lib/linkage/result_sets/database.rb +42 -0
  26. data/lib/linkage/runner.rb +57 -16
  27. data/lib/linkage/score_recorder.rb +30 -0
  28. data/lib/linkage/score_set.rb +49 -0
  29. data/lib/linkage/score_sets/csv.rb +64 -0
  30. data/lib/linkage/score_sets/database.rb +77 -0
  31. data/lib/linkage/version.rb +1 -1
  32. data/lib/linkage.rb +14 -17
  33. data/linkage.gemspec +13 -1
  34. data/linkage.gemspec-java +32 -0
  35. data/test/helper.rb +30 -23
  36. data/test/integration/test_cross_linkage.rb +46 -25
  37. data/test/integration/test_database_result_set.rb +55 -0
  38. data/test/integration/test_dual_linkage.rb +19 -94
  39. data/test/integration/test_self_linkage.rb +100 -203
  40. data/test/integration/test_within_comparator.rb +24 -77
  41. data/test/unit/comparators/test_compare.rb +254 -50
  42. data/test/unit/comparators/test_strcompare.rb +45 -0
  43. data/test/unit/comparators/test_within.rb +14 -26
  44. data/test/unit/match_sets/test_csv.rb +78 -0
  45. data/test/unit/match_sets/test_database.rb +63 -0
  46. data/test/unit/result_sets/test_csv.rb +111 -0
  47. data/test/unit/result_sets/test_database.rb +68 -0
  48. data/test/unit/score_sets/test_csv.rb +151 -0
  49. data/test/unit/score_sets/test_database.rb +149 -0
  50. data/test/unit/test_comparator.rb +46 -83
  51. data/test/unit/test_comparators.rb +4 -0
  52. data/test/unit/test_configuration.rb +99 -145
  53. data/test/unit/test_dataset.rb +52 -73
  54. data/test/unit/test_field.rb +4 -55
  55. data/test/unit/test_field_set.rb +6 -6
  56. data/test/unit/test_match_recorder.rb +23 -0
  57. data/test/unit/test_match_set.rb +23 -0
  58. data/test/unit/test_match_sets.rb +4 -0
  59. data/test/unit/test_matcher.rb +44 -0
  60. data/test/unit/test_result_set.rb +24 -223
  61. data/test/unit/test_result_sets.rb +4 -0
  62. data/test/unit/test_runner.rb +122 -17
  63. data/test/unit/test_runners.rb +4 -0
  64. data/test/unit/test_score_recorder.rb +25 -0
  65. data/test/unit/test_score_set.rb +37 -0
  66. data/test/unit/test_score_sets.rb +4 -0
  67. metadata +183 -90
  68. data/Gemfile.lock +0 -92
  69. data/lib/linkage/comparators/binary.rb +0 -12
  70. data/lib/linkage/data.rb +0 -175
  71. data/lib/linkage/decollation.rb +0 -93
  72. data/lib/linkage/expectation.rb +0 -21
  73. data/lib/linkage/expectations/exhaustive.rb +0 -63
  74. data/lib/linkage/expectations/simple.rb +0 -168
  75. data/lib/linkage/function.rb +0 -148
  76. data/lib/linkage/functions/binary.rb +0 -30
  77. data/lib/linkage/functions/cast.rb +0 -54
  78. data/lib/linkage/functions/length.rb +0 -29
  79. data/lib/linkage/functions/strftime.rb +0 -33
  80. data/lib/linkage/functions/trim.rb +0 -30
  81. data/lib/linkage/group.rb +0 -55
  82. data/lib/linkage/meta_object.rb +0 -139
  83. data/lib/linkage/runner/single_threaded.rb +0 -187
  84. data/lib/linkage/utils.rb +0 -164
  85. data/lib/linkage/warnings.rb +0 -5
  86. data/test/integration/test_collation.rb +0 -45
  87. data/test/integration/test_configuration.rb +0 -268
  88. data/test/integration/test_dataset.rb +0 -116
  89. data/test/integration/test_functions.rb +0 -88
  90. data/test/integration/test_result_set.rb +0 -85
  91. data/test/integration/test_scoring.rb +0 -84
  92. data/test/unit/expectations/test_exhaustive.rb +0 -111
  93. data/test/unit/expectations/test_simple.rb +0 -303
  94. data/test/unit/functions/test_binary.rb +0 -54
  95. data/test/unit/functions/test_cast.rb +0 -98
  96. data/test/unit/functions/test_length.rb +0 -52
  97. data/test/unit/functions/test_strftime.rb +0 -60
  98. data/test/unit/functions/test_trim.rb +0 -43
  99. data/test/unit/runner/test_single_threaded.rb +0 -12
  100. data/test/unit/test_data.rb +0 -445
  101. data/test/unit/test_decollation.rb +0 -201
  102. data/test/unit/test_function.rb +0 -233
  103. data/test/unit/test_group.rb +0 -38
  104. data/test/unit/test_meta_object.rb +0 -208
  105. data/test/unit/test_utils.rb +0 -341
data/lib/linkage/group.rb DELETED
@@ -1,55 +0,0 @@
1
- module Linkage
2
- class Group
3
- include Linkage::Decollation
4
-
5
- # @return [Hash] Hash of matching values
6
- attr_reader :values
7
-
8
- # @return [Integer] Number of records in this group
9
- attr_reader :count
10
-
11
- # @return [Integer] This group's ID (if it exists)
12
- attr_reader :id
13
-
14
- def self.from_row(row)
15
- values = {}
16
- options = {}
17
- row.each_pair do |key, value|
18
- if key == :id || key == :count
19
- options[key] = value
20
- else
21
- values[key] = value
22
- end
23
- end
24
- new(values, options)
25
- end
26
-
27
- # @param [Hash] values Values that define this group
28
- # @param [Hash] options
29
- # @option options [Fixnum] :id The group ID
30
- # @option options [Fixnum] :count How many records are in the group
31
- # @option options [Hash] :ruby_types Hash of ruby types for each value
32
- # @option options [Symbol] :database_type
33
- # @example
34
- # Linkage::Group.new({:foo => 123, :bar => 'baz'}, {:count => 5, :id => 456})
35
- def initialize(values, options)
36
- @count = options[:count]
37
- @id = options[:id]
38
- @ruby_types = options[:ruby_types]
39
- @database_type = options[:database_type]
40
- @values = values
41
- end
42
-
43
- def decollated_values
44
- @values.inject({}) do |hsh, (key, value)|
45
- ruby_type = @ruby_types[key]
46
- if ruby_type && ruby_type.has_key?(:opts) && ruby_type[:opts].has_key?(:collate)
47
- hsh[key] = decollate(value, @database_type, ruby_type[:opts][:collate])
48
- else
49
- hsh[key] = value
50
- end
51
- hsh
52
- end
53
- end
54
- end
55
- end
@@ -1,139 +0,0 @@
1
- module Linkage
2
- class MetaObject
3
- attr_reader :object
4
- attr_writer :side
5
-
6
- # Creates a new MetaObject.
7
- #
8
- # @param [Object] object This can be a {Field}, {Function} or a regular
9
- # Ruby object (Fixnum, String, etc). If `object` is not static (a {Field}
10
- # or a {Function} that contains one or more {Field} objects), you should
11
- # specify which "side" of the linkage the object belongs to (left-hand
12
- # side or right-hand side) in the `side` argument.
13
- # @param [Symbol] side `:lhs` for left-hand side or `:rhs` for right-hand
14
- # side
15
- def initialize(object, side = nil)
16
- @object = object
17
- @static = object.kind_of?(Linkage::Data) ? object.static? : true
18
- if !side.nil? && side != :lhs && side != :rhs
19
- raise ArgumentError, "invalid `side` argument, must be :lhs or :rhs"
20
- end
21
- @side = side
22
- end
23
-
24
- def side
25
- if !@static && @side.nil?
26
- raise RuntimeError, "Object is dynamic and side is not set"
27
- end
28
- @side
29
- end
30
-
31
- def dataset
32
- @object.kind_of?(Linkage::Data) ? @object.dataset : nil
33
- end
34
-
35
- def dataset=(dataset)
36
- if @object.kind_of?(Linkage::Data)
37
- @object.dataset = dataset
38
- else
39
- raise RuntimeError, "You can't set the dataset of a non-data object."
40
- end
41
- end
42
-
43
- def database_type
44
- ds = dataset
45
- ds ? ds.database_type : nil
46
- end
47
-
48
- def static?
49
- @static
50
- end
51
-
52
- # Returns true if the argument has the same object as the instance.
53
- #
54
- # @param [Linkage::MetaObject] other
55
- # @return [Boolean]
56
- def objects_equal?(other)
57
- other.is_a?(Linkage::MetaObject) && other.object == self.object
58
- end
59
-
60
- # Returns true if the argument has the same dataset as the instance.
61
- #
62
- # @param [Linkage::MetaObject] other
63
- # @return [Boolean]
64
- def datasets_equal?(other)
65
- other.is_a?(Linkage::MetaObject) && other.dataset == self.dataset
66
- end
67
-
68
- # Returns an expression suitable for use in Sequel queries.
69
- # @return [Object]
70
- def to_expr
71
- if @object.kind_of?(Linkage::Data)
72
- @object.to_expr
73
- else
74
- @object
75
- end
76
- end
77
-
78
- # Returns a Sequel identifier for {Data} objects, or the object itself.
79
- # @return [Sequel::SQL::Identifier, Object]
80
- def to_identifier
81
- if @object.kind_of?(Linkage::Data)
82
- Sequel::SQL::Identifier.new(@object.to_expr)
83
- else
84
- @object
85
- end
86
- end
87
-
88
- # Return the name of the object for {Data} objects, nil for others.
89
- # @return [Symbol, nil]
90
- def name
91
- if @object.kind_of?(Linkage::Data)
92
- @object.name
93
- else
94
- nil
95
- end
96
- end
97
-
98
- # Returns a {MergeField} if both objects are {Data} objects, otherwise,
99
- # raises an exception.
100
- #
101
- # @return [Linkage::MergeField]
102
- def merge(other)
103
- if @object.kind_of?(Linkage::Data) && other.object.kind_of?(Linkage::Data)
104
- @object.merge(other.object)
105
- else
106
- raise ArgumentError, "Cannot merge a non-data object"
107
- end
108
- end
109
-
110
- # Returns the Ruby type of the underlying object.
111
- #
112
- # @return [Hash]
113
- # @see Linkage::Field#ruby_type
114
- # @see Linkage::Function#ruby_type
115
- def ruby_type
116
- if @object.kind_of?(Linkage::Data)
117
- @object.ruby_type
118
- else
119
- {:type => @object.class}
120
- end
121
- end
122
-
123
- # Returns the collation of the underlying object.
124
- #
125
- # @return [Symbol]
126
- def collation
127
- if @object.kind_of?(Linkage::Data)
128
- @object.collation
129
- else
130
- nil
131
- end
132
- end
133
-
134
- # Returns true if underlying object is not a subclass of {Linkage::Data}.
135
- def raw?
136
- !@object.kind_of?(Linkage::Data)
137
- end
138
- end
139
- end
@@ -1,187 +0,0 @@
1
- module Linkage
2
- # A runner class that only uses a single thread to execute a linkage.
3
- #
4
- # @see Runner
5
- class SingleThreadedRunner < Runner
6
- # @return [Linkage::ResultSet]
7
- def execute
8
- result_set.create_tables!
9
-
10
- @pk_1 = config.dataset_1.field_set.primary_key.to_expr
11
- @pk_2 = config.dataset_2.field_set.primary_key.to_expr
12
- if config.has_simple_expectations?
13
- setup_datasets
14
- group_records
15
-
16
- if config.has_exhaustive_expectations?
17
- score_records_with_groups
18
- else
19
- create_matches
20
- end
21
- else
22
- dataset_1, dataset_2 = config.datasets_with_applied_exhaustive_expectations
23
- score_records_without_groups(dataset_1, dataset_2)
24
- end
25
-
26
- result_set.flush!
27
- return result_set
28
- end
29
-
30
- private
31
-
32
- def setup_datasets
33
- @dataset_1, @dataset_2 = config.datasets_with_applied_simple_expectations
34
-
35
- @dataset_1 = @dataset_1.select(@pk_1)
36
- if @config.linkage_type != :self
37
- @dataset_2 = @dataset_2.select(@pk_2)
38
- end
39
- end
40
-
41
- def group_records
42
- if config.linkage_type == :self
43
- group_records_for(@dataset_1, 1)
44
- else
45
- group_records_for(@dataset_1, 1, false)
46
- group_records_for(@dataset_2, 2, false)
47
- combine_groups
48
- end
49
- end
50
-
51
- # @param [Linkage::Dataset] dataset
52
- # @param [Fixnum, nil] dataset_id
53
- # @param [Boolean] ignore_empty_groups
54
- # @yield [Linkage::Group] If a block is given, yield completed groups to
55
- # the block. Otherwise, call ResultSet#add_group on the group.
56
- def group_records_for(dataset, dataset_id, ignore_empty_groups = true)
57
- group_minimum = ignore_empty_groups ? 2 : 1
58
- dataset.each_group(group_minimum) do |group|
59
- result_set.add_group(group, dataset_id)
60
- end
61
- result_set.flush!
62
- end
63
-
64
- def combine_groups
65
- # Create a new dataset for the groups table
66
- groups_dataset = result_set.groups_dataset
67
-
68
- groups_dataset.field_set.values.each do |field|
69
- # Sort on all fields
70
- if !field.primary_key?
71
- meta_object = MetaObject.new(field)
72
- groups_dataset = groups_dataset.group_match_more(meta_object)
73
- end
74
- end
75
-
76
- # Delete non-matching groups
77
- sub_dataset = groups_dataset.select(:id).group_by_matches.having(:count.sql_function(:id) => 1)
78
- groups_dataset.filter(:id => sub_dataset.obj).delete
79
-
80
- # Delete duplicate groups
81
- sub_dataset = groups_dataset.select(:max.sql_function(:id).as(:id)).group_by_matches
82
- groups_dataset.filter(:id => sub_dataset.obj).delete
83
- end
84
-
85
- def score_records_with_groups
86
- result_set.groups_dataset.each do |group_record|
87
- group = Group.from_row(group_record)
88
- dataset_1, dataset_2 = config.apply_exhaustive_expectations(
89
- *result_set.groups_records_datasets(group))
90
- score_records_without_groups(dataset_1, dataset_2)
91
- end
92
- end
93
-
94
- def score_records_without_groups(dataset_1, dataset_2)
95
- if config.linkage_type == :self
96
- keys = dataset_1.select_map(@pk_1)
97
- unfiltered_dataset = dataset_1.unfiltered
98
- cache = Hashery::LRUHash.new(config.record_cache_size) do |h, k|
99
- h[k] = unfiltered_dataset.filter(@pk_1 => k).first
100
- end
101
- upper_bound = keys.length - 1
102
-
103
- forward = true
104
- keys.each_with_index do |key_1, key_1_index|
105
- record_1 = cache[key_1]
106
-
107
- lower_bound = key_1_index + 1
108
- enum =
109
- if forward
110
- lower_bound.upto(upper_bound)
111
- else
112
- upper_bound.downto(lower_bound)
113
- end
114
- enum.each do |key_2_index|
115
- record_2 = cache[keys[key_2_index]]
116
- score(record_1, record_2)
117
- end
118
- forward = !forward
119
- end
120
- else
121
- keys_2 = dataset_2.select_map(@pk_2)
122
- unfiltered_dataset_2 = dataset_2.unfiltered
123
- cache_2 = Hashery::LRUHash.new(config.record_cache_size) do |h, k|
124
- h[k] = unfiltered_dataset_2.filter(@pk_2 => k).first
125
- end
126
- keys_2_last = keys_2.length - 1
127
-
128
- forward = true
129
- dataset_1.each do |record_1|
130
- enum = forward ? 0.upto(keys_2_last) : keys_2_last.downto(0)
131
- enum.each do |key_2_index|
132
- record_2 = cache_2[keys_2[key_2_index]]
133
- score(record_1, record_2)
134
- end
135
- forward = !forward
136
- end
137
- end
138
- end
139
-
140
- def score(record_1, record_2)
141
- pk_1 = record_1[@pk_1]
142
- pk_2 = record_2[@pk_2]
143
-
144
- catch(:stop) do
145
- total_score = 0
146
- config.exhaustive_expectations.each_with_index do |expectation, comparator_id|
147
- comparator = expectation.comparator
148
-
149
- score = comparator.score(record_1, record_2)
150
- result_set.add_score(comparator_id, pk_1, pk_2, score)
151
-
152
- throw(:stop) unless expectation.satisfied?(score)
153
- total_score += score
154
- end
155
- result_set.add_match(pk_1, pk_2, total_score)
156
- end
157
- end
158
-
159
- # Only needed for linkages without exhaustive expectations
160
- def create_matches
161
- result_set.groups_dataset.each do |group_record|
162
- group = Group.from_row(group_record)
163
- dataset_1, dataset_2 = result_set.groups_records_datasets(group)
164
-
165
- if config.linkage_type == :self
166
- keys = dataset_1.select_map(@pk_1)
167
- keys_last = keys.length - 1
168
- keys.each_with_index do |key_1, key_1_index|
169
- (key_1_index + 1).upto(keys_last) do |key_2_index|
170
- key_2 = keys[key_2_index]
171
- result_set.add_match(key_1, key_2, nil)
172
- end
173
- end
174
- else
175
- keys_1 = dataset_1.select_map(@pk_1)
176
- keys_2 = dataset_2.select_map(@pk_2)
177
-
178
- keys_1.each do |key_1|
179
- keys_2.each do |key_2|
180
- result_set.add_match(key_1, key_2, nil)
181
- end
182
- end
183
- end
184
- end
185
- end
186
- end
187
- end
data/lib/linkage/utils.rb DELETED
@@ -1,164 +0,0 @@
1
- module Linkage
2
- module Utils
3
- # A "tree" used to find compatible types.
4
- TYPE_CONVERSION_TREE = {
5
- TrueClass => [Integer],
6
- Integer => [Bignum, Float],
7
- Bignum => [BigDecimal],
8
- Float => [BigDecimal],
9
- BigDecimal => [String],
10
- String => nil,
11
- DateTime => nil,
12
- Date => nil,
13
- Time => nil,
14
- File => nil
15
- }
16
-
17
- # Create field information for a field that can hold data from two other
18
- # fields. If the fields have different types, the resulting type is
19
- # determined via a type-conversion tree.
20
- #
21
- # @param [Array] field_1 Schema information for the first field
22
- # @param [Array] field_2 Schema information for the second field
23
- # @return [Array] Schema information for the new field
24
- def merge_fields(field_1, field_2)
25
- schema_1 = column_schema_to_ruby_type(field_1)
26
- schema_1.delete_if { |k, v| v.nil? }
27
- schema_2 = column_schema_to_ruby_type(field_2)
28
- schema_2.delete_if { |k, v| v.nil? }
29
- if schema_1 == schema_2
30
- result = schema_1
31
- else
32
- result = schema_1.dup
33
-
34
- # type
35
- if schema_1[:type] != schema_2[:type]
36
- result[:type] = first_common_type(schema_1[:type], schema_2[:type])
37
- end
38
-
39
- # text
40
- if schema_1[:text] != schema_2[:text]
41
- # This can only be of type String.
42
- result[:text] = true
43
- result.delete(:size)
44
- end
45
-
46
- # size
47
- if !result[:text] && schema_1[:size] != schema_2[:size]
48
- types = [schema_1[:type], schema_2[:type]].uniq
49
- if types.length == 1 && types[0] == BigDecimal
50
- # Two decimals
51
- if schema_1.has_key?(:size) && schema_2.has_key?(:size)
52
- s_1 = schema_1[:size]
53
- s_2 = schema_2[:size]
54
- result[:size] = [ s_1[0] > s_2[0] ? s_1[0] : s_2[0] ]
55
-
56
- if s_1[1] && s_2[1]
57
- result[:size][1] = s_1[1] > s_2[1] ? s_1[1] : s_2[1]
58
- else
59
- result[:size][1] = s_1[1] ? s_1[1] : s_2[1]
60
- end
61
- else
62
- result[:size] = schema_1.has_key?(:size) ? schema_1[:size] : schema_2[:size]
63
- end
64
- elsif types.include?(String) && types.include?(BigDecimal)
65
- # Add one to the precision of the BigDecimal (for the dot)
66
- if schema_1.has_key?(:size) && schema_2.has_key?(:size)
67
- s_1 = schema_1[:size].is_a?(Array) ? schema_1[:size][0] + 1 : schema_1[:size]
68
- s_2 = schema_2[:size].is_a?(Array) ? schema_2[:size][0] + 1 : schema_2[:size]
69
- result[:size] = s_1 > s_2 ? s_1 : s_2
70
- elsif schema_1.has_key?(:size)
71
- result[:size] = schema_1[:size].is_a?(Array) ? schema_1[:size][0] + 1 : schema_1[:size]
72
- elsif schema_2.has_key?(:size)
73
- result[:size] = schema_2[:size].is_a?(Array) ? schema_2[:size][0] + 1 : schema_2[:size]
74
- end
75
- else
76
- # Treat as two strings
77
- if schema_1.has_key?(:size) && schema_2.has_key?(:size)
78
- result[:size] = schema_1[:size] > schema_2[:size] ? schema_1[:size] : schema_2[:size]
79
- elsif schema_1.has_key?(:size)
80
- result[:size] = schema_1[:size]
81
- else
82
- result[:size] = schema_2[:size]
83
- end
84
- end
85
- end
86
-
87
- # fixed
88
- if schema_1[:fixed] != schema_2[:fixed]
89
- # This can only be of type String.
90
- result[:fixed] = true
91
- end
92
- end
93
-
94
- {:type => result.delete(:type), :opts => result}
95
- end
96
-
97
- private
98
-
99
- # Convert the column schema information to a hash of column options, one of which must
100
- # be :type. The other options added should modify that type (e.g. :size). If a
101
- # database type is not recognized, return it as a String type.
102
- #
103
- # @note This method comes straight from Sequel (lib/sequel/extensions/schema_dumper.rb).
104
- def column_schema_to_ruby_type(schema)
105
- case t = schema[:db_type].downcase
106
- when /\A(?:medium|small)?int(?:eger)?(?:\((?:\d+)\))?(?: unsigned)?\z/o
107
- {:type=>Integer}
108
- when /\Atinyint(?:\((\d+)\))?\z/o
109
- {:type =>schema[:type] == :boolean ? TrueClass : Integer}
110
- when /\Abigint(?:\((?:\d+)\))?(?: unsigned)?\z/o
111
- {:type=>Bignum}
112
- when /\A(?:real|float|double(?: precision)?)\z/o
113
- {:type=>Float}
114
- when 'boolean'
115
- {:type=>TrueClass}
116
- when /\A(?:(?:tiny|medium|long|n)?text|clob)\z/o
117
- {:type=>String, :text=>true}
118
- when 'date'
119
- {:type=>Date}
120
- when /\A(?:small)?datetime\z/o
121
- {:type=>DateTime}
122
- when /\Atimestamp(?:\((\d+)\))?(?: with(?:out)? time zone)?\z/o
123
- {:type=>DateTime, :size=>($1.to_i if $1)}
124
- when /\Atime(?: with(?:out)? time zone)?\z/o
125
- {:type=>Time, :only_time=>true}
126
- when /\An?char(?:acter)?(?:\((\d+)\))?\z/o
127
- {:type=>String, :size=>($1.to_i if $1), :fixed=>true}
128
- when /\A(?:n?varchar|character varying|bpchar|string)(?:\((\d+)\))?\z/o
129
- {:type=>String, :size=>($1.to_i if $1)}
130
- when /\A(?:small)?money\z/o
131
- {:type=>BigDecimal, :size=>[19,2]}
132
- when /\A(?:decimal|numeric|number)(?:\((\d+)(?:,\s*(\d+))?\))?\z/o
133
- s = [($1.to_i if $1), ($2.to_i if $2)].compact
134
- {:type=>BigDecimal, :size=>(s.empty? ? nil : s)}
135
- when /\A(?:bytea|(?:tiny|medium|long)?blob|(?:var)?binary)(?:\((\d+)\))?\z/o
136
- {:type=>File, :size=>($1.to_i if $1)}
137
- when 'year'
138
- {:type=>Integer}
139
- else
140
- {:type=>String}
141
- end
142
- end
143
-
144
- def first_common_type(type_1, type_2)
145
- types_1 = [type_1] + get_types(type_1)
146
- types_2 = [type_2] + get_types(type_2)
147
- (types_1 & types_2).first
148
- end
149
-
150
- # Get all types that the specified type can be converted to. Order
151
- # matters.
152
- def get_types(type)
153
- result = []
154
- types = TYPE_CONVERSION_TREE[type]
155
- if types
156
- result += types
157
- types.each do |t|
158
- result |= get_types(t)
159
- end
160
- end
161
- result
162
- end
163
- end
164
- end
@@ -1,5 +0,0 @@
1
- module Linkage
2
- module Warnings
3
- # TODO :)
4
- end
5
- end
@@ -1,45 +0,0 @@
1
- require 'helper'
2
-
3
- module IntegrationTests
4
- class TestCollation < Test::Unit::TestCase
5
- def setup
6
- @tmpdir = Dir.mktmpdir('linkage')
7
- @tmpuri = "sqlite://" + File.join(@tmpdir, "foo")
8
- end
9
-
10
- def database(options = {}, &block)
11
- Sequel.connect(@tmpuri, options, &block)
12
- end
13
-
14
- def teardown
15
- FileUtils.remove_entry_secure(@tmpdir)
16
- end
17
-
18
- test "comparing strings exactly in MySQL" do
19
- options = database_options_for('mysql')
20
- database_for('mysql') do |db|
21
- db.create_table!(:foo) do
22
- primary_key :id
23
- String :foo
24
- String :bar
25
- end
26
- db[:foo].import([:foo, :bar], [
27
- ["Foo", "foo"],
28
- ["bar", "bar "],
29
- ])
30
- end
31
- dataset = Linkage::Dataset.new(options, :foo)
32
- tmpuri = @tmpuri
33
- conf = dataset.link_with(dataset) do
34
- (lhs[:foo].must == rhs[:bar]).exactly
35
- save_results_in(tmpuri)
36
- end
37
- runner = Linkage::SingleThreadedRunner.new(conf)
38
- runner.execute
39
-
40
- database do |db|
41
- assert_equal 0, db[:groups].count
42
- end
43
- end
44
- end
45
- end