linkage 0.0.8 → 0.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.yardopts +1 -0
  4. data/Gemfile +1 -19
  5. data/Gemfile-java +3 -0
  6. data/README.markdown +88 -34
  7. data/Rakefile +16 -15
  8. data/TODO +4 -0
  9. data/lib/linkage/comparator.rb +139 -144
  10. data/lib/linkage/comparators/compare.rb +236 -29
  11. data/lib/linkage/comparators/strcompare.rb +85 -0
  12. data/lib/linkage/comparators/within.rb +24 -20
  13. data/lib/linkage/configuration.rb +44 -466
  14. data/lib/linkage/dataset.rb +28 -127
  15. data/lib/linkage/exceptions.rb +5 -0
  16. data/lib/linkage/field.rb +6 -37
  17. data/lib/linkage/field_set.rb +3 -3
  18. data/lib/linkage/match_recorder.rb +22 -0
  19. data/lib/linkage/match_set.rb +34 -0
  20. data/lib/linkage/match_sets/csv.rb +39 -0
  21. data/lib/linkage/match_sets/database.rb +45 -0
  22. data/lib/linkage/matcher.rb +30 -0
  23. data/lib/linkage/result_set.rb +25 -110
  24. data/lib/linkage/result_sets/csv.rb +54 -0
  25. data/lib/linkage/result_sets/database.rb +42 -0
  26. data/lib/linkage/runner.rb +57 -16
  27. data/lib/linkage/score_recorder.rb +30 -0
  28. data/lib/linkage/score_set.rb +49 -0
  29. data/lib/linkage/score_sets/csv.rb +64 -0
  30. data/lib/linkage/score_sets/database.rb +77 -0
  31. data/lib/linkage/version.rb +1 -1
  32. data/lib/linkage.rb +14 -17
  33. data/linkage.gemspec +13 -1
  34. data/linkage.gemspec-java +32 -0
  35. data/test/helper.rb +30 -23
  36. data/test/integration/test_cross_linkage.rb +46 -25
  37. data/test/integration/test_database_result_set.rb +55 -0
  38. data/test/integration/test_dual_linkage.rb +19 -94
  39. data/test/integration/test_self_linkage.rb +100 -203
  40. data/test/integration/test_within_comparator.rb +24 -77
  41. data/test/unit/comparators/test_compare.rb +254 -50
  42. data/test/unit/comparators/test_strcompare.rb +45 -0
  43. data/test/unit/comparators/test_within.rb +14 -26
  44. data/test/unit/match_sets/test_csv.rb +78 -0
  45. data/test/unit/match_sets/test_database.rb +63 -0
  46. data/test/unit/result_sets/test_csv.rb +111 -0
  47. data/test/unit/result_sets/test_database.rb +68 -0
  48. data/test/unit/score_sets/test_csv.rb +151 -0
  49. data/test/unit/score_sets/test_database.rb +149 -0
  50. data/test/unit/test_comparator.rb +46 -83
  51. data/test/unit/test_comparators.rb +4 -0
  52. data/test/unit/test_configuration.rb +99 -145
  53. data/test/unit/test_dataset.rb +52 -73
  54. data/test/unit/test_field.rb +4 -55
  55. data/test/unit/test_field_set.rb +6 -6
  56. data/test/unit/test_match_recorder.rb +23 -0
  57. data/test/unit/test_match_set.rb +23 -0
  58. data/test/unit/test_match_sets.rb +4 -0
  59. data/test/unit/test_matcher.rb +44 -0
  60. data/test/unit/test_result_set.rb +24 -223
  61. data/test/unit/test_result_sets.rb +4 -0
  62. data/test/unit/test_runner.rb +122 -17
  63. data/test/unit/test_runners.rb +4 -0
  64. data/test/unit/test_score_recorder.rb +25 -0
  65. data/test/unit/test_score_set.rb +37 -0
  66. data/test/unit/test_score_sets.rb +4 -0
  67. metadata +183 -90
  68. data/Gemfile.lock +0 -92
  69. data/lib/linkage/comparators/binary.rb +0 -12
  70. data/lib/linkage/data.rb +0 -175
  71. data/lib/linkage/decollation.rb +0 -93
  72. data/lib/linkage/expectation.rb +0 -21
  73. data/lib/linkage/expectations/exhaustive.rb +0 -63
  74. data/lib/linkage/expectations/simple.rb +0 -168
  75. data/lib/linkage/function.rb +0 -148
  76. data/lib/linkage/functions/binary.rb +0 -30
  77. data/lib/linkage/functions/cast.rb +0 -54
  78. data/lib/linkage/functions/length.rb +0 -29
  79. data/lib/linkage/functions/strftime.rb +0 -33
  80. data/lib/linkage/functions/trim.rb +0 -30
  81. data/lib/linkage/group.rb +0 -55
  82. data/lib/linkage/meta_object.rb +0 -139
  83. data/lib/linkage/runner/single_threaded.rb +0 -187
  84. data/lib/linkage/utils.rb +0 -164
  85. data/lib/linkage/warnings.rb +0 -5
  86. data/test/integration/test_collation.rb +0 -45
  87. data/test/integration/test_configuration.rb +0 -268
  88. data/test/integration/test_dataset.rb +0 -116
  89. data/test/integration/test_functions.rb +0 -88
  90. data/test/integration/test_result_set.rb +0 -85
  91. data/test/integration/test_scoring.rb +0 -84
  92. data/test/unit/expectations/test_exhaustive.rb +0 -111
  93. data/test/unit/expectations/test_simple.rb +0 -303
  94. data/test/unit/functions/test_binary.rb +0 -54
  95. data/test/unit/functions/test_cast.rb +0 -98
  96. data/test/unit/functions/test_length.rb +0 -52
  97. data/test/unit/functions/test_strftime.rb +0 -60
  98. data/test/unit/functions/test_trim.rb +0 -43
  99. data/test/unit/runner/test_single_threaded.rb +0 -12
  100. data/test/unit/test_data.rb +0 -445
  101. data/test/unit/test_decollation.rb +0 -201
  102. data/test/unit/test_function.rb +0 -233
  103. data/test/unit/test_group.rb +0 -38
  104. data/test/unit/test_meta_object.rb +0 -208
  105. data/test/unit/test_utils.rb +0 -341
data/lib/linkage/group.rb DELETED
@@ -1,55 +0,0 @@
1
- module Linkage
2
- class Group
3
- include Linkage::Decollation
4
-
5
- # @return [Hash] Hash of matching values
6
- attr_reader :values
7
-
8
- # @return [Integer] Number of records in this group
9
- attr_reader :count
10
-
11
- # @return [Integer] This group's ID (if it exists)
12
- attr_reader :id
13
-
14
- def self.from_row(row)
15
- values = {}
16
- options = {}
17
- row.each_pair do |key, value|
18
- if key == :id || key == :count
19
- options[key] = value
20
- else
21
- values[key] = value
22
- end
23
- end
24
- new(values, options)
25
- end
26
-
27
- # @param [Hash] values Values that define this group
28
- # @param [Hash] options
29
- # @option options [Fixnum] :id The group ID
30
- # @option options [Fixnum] :count How many records are in the group
31
- # @option options [Hash] :ruby_types Hash of ruby types for each value
32
- # @option options [Symbol] :database_type
33
- # @example
34
- # Linkage::Group.new({:foo => 123, :bar => 'baz'}, {:count => 5, :id => 456})
35
- def initialize(values, options)
36
- @count = options[:count]
37
- @id = options[:id]
38
- @ruby_types = options[:ruby_types]
39
- @database_type = options[:database_type]
40
- @values = values
41
- end
42
-
43
- def decollated_values
44
- @values.inject({}) do |hsh, (key, value)|
45
- ruby_type = @ruby_types[key]
46
- if ruby_type && ruby_type.has_key?(:opts) && ruby_type[:opts].has_key?(:collate)
47
- hsh[key] = decollate(value, @database_type, ruby_type[:opts][:collate])
48
- else
49
- hsh[key] = value
50
- end
51
- hsh
52
- end
53
- end
54
- end
55
- end
@@ -1,139 +0,0 @@
1
- module Linkage
2
- class MetaObject
3
- attr_reader :object
4
- attr_writer :side
5
-
6
- # Creates a new MetaObject.
7
- #
8
- # @param [Object] object This can be a {Field}, {Function} or a regular
9
- # Ruby object (Fixnum, String, etc). If `object` is not static (a {Field}
10
- # or a {Function} that contains one or more {Field} objects), you should
11
- # specify which "side" of the linkage the object belongs to (left-hand
12
- # side or right-hand side) in the `side` argument.
13
- # @param [Symbol] side `:lhs` for left-hand side or `:rhs` for right-hand
14
- # side
15
- def initialize(object, side = nil)
16
- @object = object
17
- @static = object.kind_of?(Linkage::Data) ? object.static? : true
18
- if !side.nil? && side != :lhs && side != :rhs
19
- raise ArgumentError, "invalid `side` argument, must be :lhs or :rhs"
20
- end
21
- @side = side
22
- end
23
-
24
- def side
25
- if !@static && @side.nil?
26
- raise RuntimeError, "Object is dynamic and side is not set"
27
- end
28
- @side
29
- end
30
-
31
- def dataset
32
- @object.kind_of?(Linkage::Data) ? @object.dataset : nil
33
- end
34
-
35
- def dataset=(dataset)
36
- if @object.kind_of?(Linkage::Data)
37
- @object.dataset = dataset
38
- else
39
- raise RuntimeError, "You can't set the dataset of a non-data object."
40
- end
41
- end
42
-
43
- def database_type
44
- ds = dataset
45
- ds ? ds.database_type : nil
46
- end
47
-
48
- def static?
49
- @static
50
- end
51
-
52
- # Returns true if the argument has the same object as the instance.
53
- #
54
- # @param [Linkage::MetaObject] other
55
- # @return [Boolean]
56
- def objects_equal?(other)
57
- other.is_a?(Linkage::MetaObject) && other.object == self.object
58
- end
59
-
60
- # Returns true if the argument has the same dataset as the instance.
61
- #
62
- # @param [Linkage::MetaObject] other
63
- # @return [Boolean]
64
- def datasets_equal?(other)
65
- other.is_a?(Linkage::MetaObject) && other.dataset == self.dataset
66
- end
67
-
68
- # Returns an expression suitable for use in Sequel queries.
69
- # @return [Object]
70
- def to_expr
71
- if @object.kind_of?(Linkage::Data)
72
- @object.to_expr
73
- else
74
- @object
75
- end
76
- end
77
-
78
- # Returns a Sequel identifier for {Data} objects, or the object itself.
79
- # @return [Sequel::SQL::Identifier, Object]
80
- def to_identifier
81
- if @object.kind_of?(Linkage::Data)
82
- Sequel::SQL::Identifier.new(@object.to_expr)
83
- else
84
- @object
85
- end
86
- end
87
-
88
- # Return the name of the object for {Data} objects, nil for others.
89
- # @return [Symbol, nil]
90
- def name
91
- if @object.kind_of?(Linkage::Data)
92
- @object.name
93
- else
94
- nil
95
- end
96
- end
97
-
98
- # Returns a {MergeField} if both objects are {Data} objects, otherwise,
99
- # raises an exception.
100
- #
101
- # @return [Linkage::MergeField]
102
- def merge(other)
103
- if @object.kind_of?(Linkage::Data) && other.object.kind_of?(Linkage::Data)
104
- @object.merge(other.object)
105
- else
106
- raise ArgumentError, "Cannot merge a non-data object"
107
- end
108
- end
109
-
110
- # Returns the Ruby type of the underlying object.
111
- #
112
- # @return [Hash]
113
- # @see Linkage::Field#ruby_type
114
- # @see Linkage::Function#ruby_type
115
- def ruby_type
116
- if @object.kind_of?(Linkage::Data)
117
- @object.ruby_type
118
- else
119
- {:type => @object.class}
120
- end
121
- end
122
-
123
- # Returns the collation of the underlying object.
124
- #
125
- # @return [Symbol]
126
- def collation
127
- if @object.kind_of?(Linkage::Data)
128
- @object.collation
129
- else
130
- nil
131
- end
132
- end
133
-
134
- # Returns true if underlying object is not a subclass of {Linkage::Data}.
135
- def raw?
136
- !@object.kind_of?(Linkage::Data)
137
- end
138
- end
139
- end
@@ -1,187 +0,0 @@
1
- module Linkage
2
- # A runner class that only uses a single thread to execute a linkage.
3
- #
4
- # @see Runner
5
- class SingleThreadedRunner < Runner
6
- # @return [Linkage::ResultSet]
7
- def execute
8
- result_set.create_tables!
9
-
10
- @pk_1 = config.dataset_1.field_set.primary_key.to_expr
11
- @pk_2 = config.dataset_2.field_set.primary_key.to_expr
12
- if config.has_simple_expectations?
13
- setup_datasets
14
- group_records
15
-
16
- if config.has_exhaustive_expectations?
17
- score_records_with_groups
18
- else
19
- create_matches
20
- end
21
- else
22
- dataset_1, dataset_2 = config.datasets_with_applied_exhaustive_expectations
23
- score_records_without_groups(dataset_1, dataset_2)
24
- end
25
-
26
- result_set.flush!
27
- return result_set
28
- end
29
-
30
- private
31
-
32
- def setup_datasets
33
- @dataset_1, @dataset_2 = config.datasets_with_applied_simple_expectations
34
-
35
- @dataset_1 = @dataset_1.select(@pk_1)
36
- if @config.linkage_type != :self
37
- @dataset_2 = @dataset_2.select(@pk_2)
38
- end
39
- end
40
-
41
- def group_records
42
- if config.linkage_type == :self
43
- group_records_for(@dataset_1, 1)
44
- else
45
- group_records_for(@dataset_1, 1, false)
46
- group_records_for(@dataset_2, 2, false)
47
- combine_groups
48
- end
49
- end
50
-
51
- # @param [Linkage::Dataset] dataset
52
- # @param [Fixnum, nil] dataset_id
53
- # @param [Boolean] ignore_empty_groups
54
- # @yield [Linkage::Group] If a block is given, yield completed groups to
55
- # the block. Otherwise, call ResultSet#add_group on the group.
56
- def group_records_for(dataset, dataset_id, ignore_empty_groups = true)
57
- group_minimum = ignore_empty_groups ? 2 : 1
58
- dataset.each_group(group_minimum) do |group|
59
- result_set.add_group(group, dataset_id)
60
- end
61
- result_set.flush!
62
- end
63
-
64
- def combine_groups
65
- # Create a new dataset for the groups table
66
- groups_dataset = result_set.groups_dataset
67
-
68
- groups_dataset.field_set.values.each do |field|
69
- # Sort on all fields
70
- if !field.primary_key?
71
- meta_object = MetaObject.new(field)
72
- groups_dataset = groups_dataset.group_match_more(meta_object)
73
- end
74
- end
75
-
76
- # Delete non-matching groups
77
- sub_dataset = groups_dataset.select(:id).group_by_matches.having(:count.sql_function(:id) => 1)
78
- groups_dataset.filter(:id => sub_dataset.obj).delete
79
-
80
- # Delete duplicate groups
81
- sub_dataset = groups_dataset.select(:max.sql_function(:id).as(:id)).group_by_matches
82
- groups_dataset.filter(:id => sub_dataset.obj).delete
83
- end
84
-
85
- def score_records_with_groups
86
- result_set.groups_dataset.each do |group_record|
87
- group = Group.from_row(group_record)
88
- dataset_1, dataset_2 = config.apply_exhaustive_expectations(
89
- *result_set.groups_records_datasets(group))
90
- score_records_without_groups(dataset_1, dataset_2)
91
- end
92
- end
93
-
94
- def score_records_without_groups(dataset_1, dataset_2)
95
- if config.linkage_type == :self
96
- keys = dataset_1.select_map(@pk_1)
97
- unfiltered_dataset = dataset_1.unfiltered
98
- cache = Hashery::LRUHash.new(config.record_cache_size) do |h, k|
99
- h[k] = unfiltered_dataset.filter(@pk_1 => k).first
100
- end
101
- upper_bound = keys.length - 1
102
-
103
- forward = true
104
- keys.each_with_index do |key_1, key_1_index|
105
- record_1 = cache[key_1]
106
-
107
- lower_bound = key_1_index + 1
108
- enum =
109
- if forward
110
- lower_bound.upto(upper_bound)
111
- else
112
- upper_bound.downto(lower_bound)
113
- end
114
- enum.each do |key_2_index|
115
- record_2 = cache[keys[key_2_index]]
116
- score(record_1, record_2)
117
- end
118
- forward = !forward
119
- end
120
- else
121
- keys_2 = dataset_2.select_map(@pk_2)
122
- unfiltered_dataset_2 = dataset_2.unfiltered
123
- cache_2 = Hashery::LRUHash.new(config.record_cache_size) do |h, k|
124
- h[k] = unfiltered_dataset_2.filter(@pk_2 => k).first
125
- end
126
- keys_2_last = keys_2.length - 1
127
-
128
- forward = true
129
- dataset_1.each do |record_1|
130
- enum = forward ? 0.upto(keys_2_last) : keys_2_last.downto(0)
131
- enum.each do |key_2_index|
132
- record_2 = cache_2[keys_2[key_2_index]]
133
- score(record_1, record_2)
134
- end
135
- forward = !forward
136
- end
137
- end
138
- end
139
-
140
- def score(record_1, record_2)
141
- pk_1 = record_1[@pk_1]
142
- pk_2 = record_2[@pk_2]
143
-
144
- catch(:stop) do
145
- total_score = 0
146
- config.exhaustive_expectations.each_with_index do |expectation, comparator_id|
147
- comparator = expectation.comparator
148
-
149
- score = comparator.score(record_1, record_2)
150
- result_set.add_score(comparator_id, pk_1, pk_2, score)
151
-
152
- throw(:stop) unless expectation.satisfied?(score)
153
- total_score += score
154
- end
155
- result_set.add_match(pk_1, pk_2, total_score)
156
- end
157
- end
158
-
159
- # Only needed for linkages without exhaustive expectations
160
- def create_matches
161
- result_set.groups_dataset.each do |group_record|
162
- group = Group.from_row(group_record)
163
- dataset_1, dataset_2 = result_set.groups_records_datasets(group)
164
-
165
- if config.linkage_type == :self
166
- keys = dataset_1.select_map(@pk_1)
167
- keys_last = keys.length - 1
168
- keys.each_with_index do |key_1, key_1_index|
169
- (key_1_index + 1).upto(keys_last) do |key_2_index|
170
- key_2 = keys[key_2_index]
171
- result_set.add_match(key_1, key_2, nil)
172
- end
173
- end
174
- else
175
- keys_1 = dataset_1.select_map(@pk_1)
176
- keys_2 = dataset_2.select_map(@pk_2)
177
-
178
- keys_1.each do |key_1|
179
- keys_2.each do |key_2|
180
- result_set.add_match(key_1, key_2, nil)
181
- end
182
- end
183
- end
184
- end
185
- end
186
- end
187
- end
data/lib/linkage/utils.rb DELETED
@@ -1,164 +0,0 @@
1
- module Linkage
2
- module Utils
3
- # A "tree" used to find compatible types.
4
- TYPE_CONVERSION_TREE = {
5
- TrueClass => [Integer],
6
- Integer => [Bignum, Float],
7
- Bignum => [BigDecimal],
8
- Float => [BigDecimal],
9
- BigDecimal => [String],
10
- String => nil,
11
- DateTime => nil,
12
- Date => nil,
13
- Time => nil,
14
- File => nil
15
- }
16
-
17
- # Create field information for a field that can hold data from two other
18
- # fields. If the fields have different types, the resulting type is
19
- # determined via a type-conversion tree.
20
- #
21
- # @param [Array] field_1 Schema information for the first field
22
- # @param [Array] field_2 Schema information for the second field
23
- # @return [Array] Schema information for the new field
24
- def merge_fields(field_1, field_2)
25
- schema_1 = column_schema_to_ruby_type(field_1)
26
- schema_1.delete_if { |k, v| v.nil? }
27
- schema_2 = column_schema_to_ruby_type(field_2)
28
- schema_2.delete_if { |k, v| v.nil? }
29
- if schema_1 == schema_2
30
- result = schema_1
31
- else
32
- result = schema_1.dup
33
-
34
- # type
35
- if schema_1[:type] != schema_2[:type]
36
- result[:type] = first_common_type(schema_1[:type], schema_2[:type])
37
- end
38
-
39
- # text
40
- if schema_1[:text] != schema_2[:text]
41
- # This can only be of type String.
42
- result[:text] = true
43
- result.delete(:size)
44
- end
45
-
46
- # size
47
- if !result[:text] && schema_1[:size] != schema_2[:size]
48
- types = [schema_1[:type], schema_2[:type]].uniq
49
- if types.length == 1 && types[0] == BigDecimal
50
- # Two decimals
51
- if schema_1.has_key?(:size) && schema_2.has_key?(:size)
52
- s_1 = schema_1[:size]
53
- s_2 = schema_2[:size]
54
- result[:size] = [ s_1[0] > s_2[0] ? s_1[0] : s_2[0] ]
55
-
56
- if s_1[1] && s_2[1]
57
- result[:size][1] = s_1[1] > s_2[1] ? s_1[1] : s_2[1]
58
- else
59
- result[:size][1] = s_1[1] ? s_1[1] : s_2[1]
60
- end
61
- else
62
- result[:size] = schema_1.has_key?(:size) ? schema_1[:size] : schema_2[:size]
63
- end
64
- elsif types.include?(String) && types.include?(BigDecimal)
65
- # Add one to the precision of the BigDecimal (for the dot)
66
- if schema_1.has_key?(:size) && schema_2.has_key?(:size)
67
- s_1 = schema_1[:size].is_a?(Array) ? schema_1[:size][0] + 1 : schema_1[:size]
68
- s_2 = schema_2[:size].is_a?(Array) ? schema_2[:size][0] + 1 : schema_2[:size]
69
- result[:size] = s_1 > s_2 ? s_1 : s_2
70
- elsif schema_1.has_key?(:size)
71
- result[:size] = schema_1[:size].is_a?(Array) ? schema_1[:size][0] + 1 : schema_1[:size]
72
- elsif schema_2.has_key?(:size)
73
- result[:size] = schema_2[:size].is_a?(Array) ? schema_2[:size][0] + 1 : schema_2[:size]
74
- end
75
- else
76
- # Treat as two strings
77
- if schema_1.has_key?(:size) && schema_2.has_key?(:size)
78
- result[:size] = schema_1[:size] > schema_2[:size] ? schema_1[:size] : schema_2[:size]
79
- elsif schema_1.has_key?(:size)
80
- result[:size] = schema_1[:size]
81
- else
82
- result[:size] = schema_2[:size]
83
- end
84
- end
85
- end
86
-
87
- # fixed
88
- if schema_1[:fixed] != schema_2[:fixed]
89
- # This can only be of type String.
90
- result[:fixed] = true
91
- end
92
- end
93
-
94
- {:type => result.delete(:type), :opts => result}
95
- end
96
-
97
- private
98
-
99
- # Convert the column schema information to a hash of column options, one of which must
100
- # be :type. The other options added should modify that type (e.g. :size). If a
101
- # database type is not recognized, return it as a String type.
102
- #
103
- # @note This method comes straight from Sequel (lib/sequel/extensions/schema_dumper.rb).
104
- def column_schema_to_ruby_type(schema)
105
- case t = schema[:db_type].downcase
106
- when /\A(?:medium|small)?int(?:eger)?(?:\((?:\d+)\))?(?: unsigned)?\z/o
107
- {:type=>Integer}
108
- when /\Atinyint(?:\((\d+)\))?\z/o
109
- {:type =>schema[:type] == :boolean ? TrueClass : Integer}
110
- when /\Abigint(?:\((?:\d+)\))?(?: unsigned)?\z/o
111
- {:type=>Bignum}
112
- when /\A(?:real|float|double(?: precision)?)\z/o
113
- {:type=>Float}
114
- when 'boolean'
115
- {:type=>TrueClass}
116
- when /\A(?:(?:tiny|medium|long|n)?text|clob)\z/o
117
- {:type=>String, :text=>true}
118
- when 'date'
119
- {:type=>Date}
120
- when /\A(?:small)?datetime\z/o
121
- {:type=>DateTime}
122
- when /\Atimestamp(?:\((\d+)\))?(?: with(?:out)? time zone)?\z/o
123
- {:type=>DateTime, :size=>($1.to_i if $1)}
124
- when /\Atime(?: with(?:out)? time zone)?\z/o
125
- {:type=>Time, :only_time=>true}
126
- when /\An?char(?:acter)?(?:\((\d+)\))?\z/o
127
- {:type=>String, :size=>($1.to_i if $1), :fixed=>true}
128
- when /\A(?:n?varchar|character varying|bpchar|string)(?:\((\d+)\))?\z/o
129
- {:type=>String, :size=>($1.to_i if $1)}
130
- when /\A(?:small)?money\z/o
131
- {:type=>BigDecimal, :size=>[19,2]}
132
- when /\A(?:decimal|numeric|number)(?:\((\d+)(?:,\s*(\d+))?\))?\z/o
133
- s = [($1.to_i if $1), ($2.to_i if $2)].compact
134
- {:type=>BigDecimal, :size=>(s.empty? ? nil : s)}
135
- when /\A(?:bytea|(?:tiny|medium|long)?blob|(?:var)?binary)(?:\((\d+)\))?\z/o
136
- {:type=>File, :size=>($1.to_i if $1)}
137
- when 'year'
138
- {:type=>Integer}
139
- else
140
- {:type=>String}
141
- end
142
- end
143
-
144
- def first_common_type(type_1, type_2)
145
- types_1 = [type_1] + get_types(type_1)
146
- types_2 = [type_2] + get_types(type_2)
147
- (types_1 & types_2).first
148
- end
149
-
150
- # Get all types that the specified type can be converted to. Order
151
- # matters.
152
- def get_types(type)
153
- result = []
154
- types = TYPE_CONVERSION_TREE[type]
155
- if types
156
- result += types
157
- types.each do |t|
158
- result |= get_types(t)
159
- end
160
- end
161
- result
162
- end
163
- end
164
- end
@@ -1,5 +0,0 @@
1
- module Linkage
2
- module Warnings
3
- # TODO :)
4
- end
5
- end
@@ -1,45 +0,0 @@
1
- require 'helper'
2
-
3
- module IntegrationTests
4
- class TestCollation < Test::Unit::TestCase
5
- def setup
6
- @tmpdir = Dir.mktmpdir('linkage')
7
- @tmpuri = "sqlite://" + File.join(@tmpdir, "foo")
8
- end
9
-
10
- def database(options = {}, &block)
11
- Sequel.connect(@tmpuri, options, &block)
12
- end
13
-
14
- def teardown
15
- FileUtils.remove_entry_secure(@tmpdir)
16
- end
17
-
18
- test "comparing strings exactly in MySQL" do
19
- options = database_options_for('mysql')
20
- database_for('mysql') do |db|
21
- db.create_table!(:foo) do
22
- primary_key :id
23
- String :foo
24
- String :bar
25
- end
26
- db[:foo].import([:foo, :bar], [
27
- ["Foo", "foo"],
28
- ["bar", "bar "],
29
- ])
30
- end
31
- dataset = Linkage::Dataset.new(options, :foo)
32
- tmpuri = @tmpuri
33
- conf = dataset.link_with(dataset) do
34
- (lhs[:foo].must == rhs[:bar]).exactly
35
- save_results_in(tmpuri)
36
- end
37
- runner = Linkage::SingleThreadedRunner.new(conf)
38
- runner.execute
39
-
40
- database do |db|
41
- assert_equal 0, db[:groups].count
42
- end
43
- end
44
- end
45
- end