linkage 0.0.6 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +10 -0
- data/Gemfile +15 -13
- data/Gemfile.lock +67 -37
- data/Guardfile +0 -2
- data/Rakefile +122 -25
- data/lib/linkage/comparator.rb +172 -0
- data/lib/linkage/comparators/binary.rb +12 -0
- data/lib/linkage/comparators/compare.rb +46 -0
- data/lib/linkage/comparators/within.rb +32 -0
- data/lib/linkage/configuration.rb +285 -153
- data/lib/linkage/data.rb +32 -7
- data/lib/linkage/dataset.rb +107 -32
- data/lib/linkage/decollation.rb +93 -0
- data/lib/linkage/expectation.rb +21 -0
- data/lib/linkage/expectations/exhaustive.rb +63 -0
- data/lib/linkage/expectations/simple.rb +168 -0
- data/lib/linkage/field.rb +30 -4
- data/lib/linkage/field_set.rb +6 -3
- data/lib/linkage/function.rb +50 -3
- data/lib/linkage/functions/binary.rb +30 -0
- data/lib/linkage/functions/cast.rb +54 -0
- data/lib/linkage/functions/length.rb +29 -0
- data/lib/linkage/functions/strftime.rb +12 -11
- data/lib/linkage/functions/trim.rb +8 -0
- data/lib/linkage/group.rb +20 -0
- data/lib/linkage/import_buffer.rb +5 -16
- data/lib/linkage/meta_object.rb +139 -0
- data/lib/linkage/result_set.rb +74 -17
- data/lib/linkage/runner/single_threaded.rb +125 -10
- data/lib/linkage/version.rb +3 -0
- data/lib/linkage.rb +11 -0
- data/linkage.gemspec +16 -121
- data/test/config.yml +5 -0
- data/test/helper.rb +73 -8
- data/test/integration/test_collation.rb +45 -0
- data/test/integration/test_configuration.rb +268 -0
- data/test/integration/test_cross_linkage.rb +4 -17
- data/test/integration/test_dataset.rb +45 -2
- data/test/integration/test_dual_linkage.rb +40 -24
- data/test/integration/test_functions.rb +22 -0
- data/test/integration/test_result_set.rb +85 -0
- data/test/integration/test_scoring.rb +84 -0
- data/test/integration/test_self_linkage.rb +5 -0
- data/test/integration/test_within_comparator.rb +100 -0
- data/test/unit/comparators/test_compare.rb +105 -0
- data/test/unit/comparators/test_within.rb +57 -0
- data/test/unit/expectations/test_exhaustive.rb +111 -0
- data/test/unit/expectations/test_simple.rb +303 -0
- data/test/unit/functions/test_binary.rb +54 -0
- data/test/unit/functions/test_cast.rb +98 -0
- data/test/unit/functions/test_length.rb +52 -0
- data/test/unit/functions/test_strftime.rb +17 -13
- data/test/unit/functions/test_trim.rb +11 -4
- data/test/unit/test_comparator.rb +124 -0
- data/test/unit/test_configuration.rb +137 -175
- data/test/unit/test_data.rb +44 -0
- data/test/unit/test_dataset.rb +73 -21
- data/test/unit/test_decollation.rb +201 -0
- data/test/unit/test_field.rb +38 -14
- data/test/unit/test_field_set.rb +12 -8
- data/test/unit/test_function.rb +83 -16
- data/test/unit/test_group.rb +28 -0
- data/test/unit/test_import_buffer.rb +13 -27
- data/test/unit/test_meta_object.rb +208 -0
- data/test/unit/test_result_set.rb +221 -3
- metadata +82 -190
data/lib/linkage/dataset.rb
CHANGED
@@ -2,13 +2,28 @@ module Linkage
|
|
2
2
|
# Delegator around Sequel::Dataset with some extra functionality.
|
3
3
|
class Dataset
|
4
4
|
attr_reader :field_set, :table_name
|
5
|
+
attr_accessor :linkage_options
|
5
6
|
|
6
|
-
def initialize(
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
def initialize(*args)
|
8
|
+
if args.length == 1
|
9
|
+
@dataset = args[0]
|
10
|
+
@db = @dataset.db
|
11
|
+
@table_name = @dataset.first_source_table
|
12
|
+
|
13
|
+
if !@db.kind_of?(Sequel::Collation)
|
14
|
+
@db.extend(Sequel::Collation)
|
15
|
+
end
|
16
|
+
else
|
17
|
+
uri, table, options = args
|
18
|
+
options ||= {}
|
19
|
+
|
20
|
+
@table_name = table.to_sym
|
21
|
+
@db = Sequel.connect(uri, options)
|
22
|
+
@db.extend(Sequel::Collation)
|
23
|
+
@dataset = @db[@table_name]
|
24
|
+
end
|
25
|
+
@field_set = FieldSet.new(self)
|
26
|
+
@linkage_options = {}
|
12
27
|
end
|
13
28
|
|
14
29
|
def obj
|
@@ -28,67 +43,127 @@ module Linkage
|
|
28
43
|
conf
|
29
44
|
end
|
30
45
|
|
31
|
-
def
|
32
|
-
@
|
46
|
+
def database_type
|
47
|
+
@db.database_type
|
33
48
|
end
|
34
49
|
|
35
|
-
|
36
|
-
|
50
|
+
# Set objects to use for group matching. Accepts either {Linkage::MetaObject} or a
|
51
|
+
# hash with options (valid options are :meta_object, :alias, and :cast).
|
52
|
+
#
|
53
|
+
# @example
|
54
|
+
# dataset.group_match(meta_object_1,
|
55
|
+
# {:meta_object => meta_object_2, :alias => :foo})
|
56
|
+
def group_match(*args)
|
57
|
+
args.collect! do |arg|
|
58
|
+
case arg
|
59
|
+
when Linkage::MetaObject
|
60
|
+
{ :meta_object => arg }
|
61
|
+
when Hash
|
62
|
+
if !arg.has_key?(:meta_object)
|
63
|
+
raise ArgumentError, "Invalid option hash, missing :meta_object key"
|
64
|
+
end
|
65
|
+
(arg.keys - [:meta_object, :alias, :cast]).each do |invalid_key|
|
66
|
+
warn "Invalid key in option hash: #{invalid_key}"
|
67
|
+
end
|
68
|
+
arg
|
69
|
+
else
|
70
|
+
raise ArgumentError, "expected Hash or MetaObject, got #{arg.class}"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
clone(:group_match => args)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Add additional objects to use for group matching.
|
77
|
+
def group_match_more(*args)
|
78
|
+
args = @linkage_options[:group_match] + args if @linkage_options[:group_match]
|
79
|
+
group_match(*args)
|
37
80
|
end
|
38
81
|
|
39
|
-
def clone(
|
40
|
-
|
41
|
-
|
82
|
+
def clone(new_options = {})
|
83
|
+
new_linkage_options = {}
|
84
|
+
new_obj_options = {}
|
85
|
+
new_options.each_pair do |k, v|
|
86
|
+
case k
|
87
|
+
when :group_match
|
88
|
+
new_linkage_options[k] = v
|
89
|
+
else
|
90
|
+
new_obj_options[k] = v
|
91
|
+
end
|
92
|
+
end
|
93
|
+
new_obj = new_options[:new_obj]
|
42
94
|
|
43
|
-
match = new_opts.delete(:match)
|
44
95
|
result = super()
|
45
|
-
result.
|
96
|
+
result.linkage_options = @linkage_options.merge(new_linkage_options)
|
46
97
|
|
47
98
|
if new_obj
|
48
99
|
result.obj = new_obj
|
49
100
|
else
|
50
|
-
result.obj = obj.clone(
|
101
|
+
result.obj = obj.clone(new_options)
|
51
102
|
end
|
103
|
+
|
52
104
|
result
|
53
105
|
end
|
54
106
|
|
55
107
|
def each_group(min = 2)
|
56
|
-
|
108
|
+
group_match = @linkage_options[:group_match] || []
|
109
|
+
ruby_types = group_match.inject({}) do |hsh, m|
|
110
|
+
key = m[:alias] || m[:meta_object].to_expr
|
111
|
+
hsh[key] = m[:meta_object].ruby_type
|
112
|
+
hsh
|
113
|
+
end
|
114
|
+
options = {:database_type => database_type, :ruby_types => ruby_types }
|
115
|
+
@dataset.group_and_count(*match_expressions).having{count >= min}.each do |row|
|
57
116
|
count = row.delete(:count)
|
58
|
-
|
117
|
+
group = Group.new(row, options.merge(:count => count))
|
118
|
+
yield group
|
59
119
|
end
|
60
120
|
end
|
61
121
|
|
62
|
-
def group_by_matches(
|
63
|
-
expr =
|
122
|
+
def group_by_matches(raw = true)
|
123
|
+
expr = raw ? raw_match_expressions : match_expressions
|
64
124
|
group(*expr)
|
65
125
|
end
|
66
126
|
|
67
127
|
def dataset_for_group(group)
|
68
128
|
filters = []
|
129
|
+
group_match = @linkage_options[:group_match] || []
|
69
130
|
group.values.each_pair do |key, value|
|
70
131
|
# find a matched expression with this alias
|
71
|
-
|
72
|
-
|
73
|
-
|
132
|
+
found = false
|
133
|
+
group_match.each do |m|
|
134
|
+
expr = m[:meta_object].to_expr
|
135
|
+
if (m[:alias] && m[:alias] == key) || expr == key
|
136
|
+
found = true
|
137
|
+
filters << {expr => value}
|
138
|
+
break
|
139
|
+
end
|
140
|
+
end
|
141
|
+
if !found
|
142
|
+
raise "this dataset isn't compatible with the given group"
|
143
|
+
end
|
74
144
|
end
|
75
145
|
filter(*filters)
|
76
146
|
end
|
77
147
|
|
148
|
+
def schema
|
149
|
+
@db.schema(@table_name)
|
150
|
+
end
|
151
|
+
|
78
152
|
private
|
79
153
|
|
80
|
-
def
|
81
|
-
|
82
|
-
|
83
|
-
end
|
154
|
+
def raw_match_expressions
|
155
|
+
group_match = @linkage_options[:group_match] || []
|
156
|
+
group_match.collect { |m| m[:meta_object].to_expr }
|
84
157
|
end
|
85
158
|
|
86
159
|
def match_expressions
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
160
|
+
group_match = @linkage_options[:group_match] || []
|
161
|
+
group_match.collect do |m|
|
162
|
+
expr = m[:meta_object].to_expr
|
163
|
+
expr = expr.as(m[:alias]) if m[:alias]
|
164
|
+
expr = expr.cast(m[:cast]) if m[:cast]
|
165
|
+
expr
|
166
|
+
end
|
92
167
|
end
|
93
168
|
|
94
169
|
def method_missing(name, *args, &block)
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Linkage
|
3
|
+
module Decollation
|
4
|
+
def decollate(string, database_type, collation)
|
5
|
+
case database_type
|
6
|
+
when :mysql
|
7
|
+
decollate_mysql(string, collation)
|
8
|
+
else
|
9
|
+
string
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def decollate_mysql(string, collation)
|
14
|
+
case collation
|
15
|
+
when "latin1_swedish_ci"
|
16
|
+
decollate_mysql_latin1_swedish_ci(string)
|
17
|
+
else
|
18
|
+
string
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def decollate_mysql_latin1_swedish_ci(string)
|
23
|
+
result = string.strip
|
24
|
+
result.each_char.with_index do |char, i|
|
25
|
+
case char
|
26
|
+
when 'A', 'a', 'À', 'Á', 'Â', 'Ã', 'à', 'á', 'â', 'ã'
|
27
|
+
result[i] = 'A'
|
28
|
+
when 'B', 'b'
|
29
|
+
result[i] = 'B'
|
30
|
+
when 'C', 'c', 'Ç', 'ç'
|
31
|
+
result[i] = 'C'
|
32
|
+
when 'D', 'd', 'Ð', 'ð'
|
33
|
+
result[i] = 'D'
|
34
|
+
when 'E', 'e', 'È', 'É', 'Ê', 'Ë', 'è', 'é', 'ê', 'ë'
|
35
|
+
result[i] = 'E'
|
36
|
+
when 'F', 'f'
|
37
|
+
result[i] = 'F'
|
38
|
+
when 'G', 'g'
|
39
|
+
result[i] = 'G'
|
40
|
+
when 'H', 'h'
|
41
|
+
result[i] = 'H'
|
42
|
+
when 'I', 'i', 'Ì', 'Í', 'Î', 'Ï', 'ì', 'í', 'î', 'ï'
|
43
|
+
result[i] = 'I'
|
44
|
+
when 'J', 'j'
|
45
|
+
result[i] = 'J'
|
46
|
+
when 'K', 'k'
|
47
|
+
result[i] = 'K'
|
48
|
+
when 'L', 'l'
|
49
|
+
result[i] = 'L'
|
50
|
+
when 'M', 'm'
|
51
|
+
result[i] = 'M'
|
52
|
+
when 'N', 'n', 'Ñ', 'ñ'
|
53
|
+
result[i] = 'N'
|
54
|
+
when 'O', 'o', 'Ò', 'Ó', 'Ô', 'Õ', 'ò', 'ó', 'ô', 'õ'
|
55
|
+
result[i] = 'O'
|
56
|
+
when 'P', 'p'
|
57
|
+
result[i] = 'P'
|
58
|
+
when 'Q', 'q'
|
59
|
+
result[i] = 'Q'
|
60
|
+
when 'R', 'r'
|
61
|
+
result[i] = 'R'
|
62
|
+
when 'S', 's'
|
63
|
+
result[i] = 'S'
|
64
|
+
when 'T', 't'
|
65
|
+
result[i] = 'T'
|
66
|
+
when 'U', 'u', 'Ù', 'Ú', 'Û', 'ù', 'ú', 'û'
|
67
|
+
result[i] = 'U'
|
68
|
+
when 'V', 'v'
|
69
|
+
result[i] = 'V'
|
70
|
+
when 'W', 'w'
|
71
|
+
result[i] = 'W'
|
72
|
+
when 'X', 'x'
|
73
|
+
result[i] = 'X'
|
74
|
+
when 'Y', 'y', 'Ü', 'Ý', 'ü', 'ý'
|
75
|
+
result[i] = 'Y'
|
76
|
+
when 'Z', 'z'
|
77
|
+
result[i] = 'Z'
|
78
|
+
when '[', 'Å', 'å'
|
79
|
+
result[i] = '['
|
80
|
+
when '\\', 'Ä', 'Æ', 'ä', 'æ'
|
81
|
+
result[i] = '\\'
|
82
|
+
when ']', 'Ö', 'ö'
|
83
|
+
result[i] = ']'
|
84
|
+
when 'Ø', 'ø'
|
85
|
+
result[i] = 'Ø'
|
86
|
+
when 'Þ', 'þ'
|
87
|
+
result[i] = 'Þ'
|
88
|
+
end
|
89
|
+
end
|
90
|
+
result
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Linkage
|
2
|
+
# The Expectation class contains information about how two datasets
|
3
|
+
# should be linked.
|
4
|
+
class Expectation
|
5
|
+
def kind
|
6
|
+
raise NotImplementedError
|
7
|
+
end
|
8
|
+
|
9
|
+
def apply_to(*args)
|
10
|
+
raise NotImplementedError
|
11
|
+
end
|
12
|
+
|
13
|
+
def decollation_needed?
|
14
|
+
false
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
Dir.glob(File.expand_path(File.join(File.dirname(__FILE__), "expectations", "*.rb"))).each do |filename|
|
20
|
+
require filename
|
21
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Linkage
|
2
|
+
module Expectations
|
3
|
+
class Exhaustive < Expectation
|
4
|
+
attr_reader :comparator, :threshold, :mode
|
5
|
+
|
6
|
+
def initialize(comparator, threshold, mode)
|
7
|
+
@comparator = comparator
|
8
|
+
@threshold = threshold
|
9
|
+
@mode = mode
|
10
|
+
end
|
11
|
+
|
12
|
+
def kind
|
13
|
+
if @kind.nil?
|
14
|
+
if @comparator.lhs_args.length != @comparator.rhs_args.length
|
15
|
+
@kind = :cross
|
16
|
+
else
|
17
|
+
@kind = :self
|
18
|
+
@comparator.lhs_args.each_with_index do |lhs_arg, index|
|
19
|
+
rhs_arg = @comparator.rhs_args[index]
|
20
|
+
if !lhs_arg.objects_equal?(rhs_arg)
|
21
|
+
@kind = :cross
|
22
|
+
break
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Check for dual-linkage.
|
28
|
+
if @kind == :cross
|
29
|
+
# Assume that all lhs arguments have the same dataset, as well
|
30
|
+
# as all the rhs arguments. Only check the first argument of each
|
31
|
+
# side.
|
32
|
+
lhs_arg = @comparator.lhs_args[0]
|
33
|
+
rhs_arg = @comparator.rhs_args[0]
|
34
|
+
if !lhs_arg.datasets_equal?(rhs_arg)
|
35
|
+
@kind = :dual
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
@kind
|
40
|
+
end
|
41
|
+
|
42
|
+
def apply_to(dataset, side)
|
43
|
+
exprs =
|
44
|
+
case side
|
45
|
+
when :lhs
|
46
|
+
comparator.lhs_args.collect { |arg| arg.to_expr.as(arg.name) }
|
47
|
+
when :rhs
|
48
|
+
comparator.rhs_args.collect { |arg| arg.to_expr.as(arg.name) }
|
49
|
+
end
|
50
|
+
dataset.select_more(*exprs)
|
51
|
+
end
|
52
|
+
|
53
|
+
def satisfied?(score)
|
54
|
+
case mode
|
55
|
+
when :equal
|
56
|
+
score == threshold
|
57
|
+
when :min
|
58
|
+
score >= threshold
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
module Linkage
|
2
|
+
module Expectations
|
3
|
+
class Simple < Expectation
|
4
|
+
# The dataset this expectation applies to: `:lhs` or `:rhs`. This
|
5
|
+
# only applies to filter expectations.
|
6
|
+
# @return [Symbol]
|
7
|
+
attr_reader :side
|
8
|
+
|
9
|
+
attr_reader :meta_object_1, :meta_object_2, :operator
|
10
|
+
|
11
|
+
VALID_OPERATORS = [:==, :'!=', :>, :<, :>=, :<=]
|
12
|
+
|
13
|
+
# Automatically create an expectation type depending on the arguments.
|
14
|
+
#
|
15
|
+
# @param [Linkage::MetaObject] meta_object_1
|
16
|
+
# @param [Linkage::MetaObject] meta_object_2
|
17
|
+
# @param [Symbol] operator Valid operators: `:==`, `:'!='`, `:>`, `:<`, `:>=`, `:<=`
|
18
|
+
def self.create(meta_object_1, meta_object_2, operator)
|
19
|
+
klass =
|
20
|
+
if meta_object_1.static? && meta_object_2.static?
|
21
|
+
raise ArgumentError, "An expectation with two static objects is invalid"
|
22
|
+
elsif meta_object_1.static? || meta_object_2.static?
|
23
|
+
Filter
|
24
|
+
elsif meta_object_1.side == meta_object_2.side
|
25
|
+
if !meta_object_1.datasets_equal?(meta_object_2)
|
26
|
+
raise ArgumentError, "An expectation with two dynamic objects with the same side but different datasets is invalid"
|
27
|
+
end
|
28
|
+
Filter
|
29
|
+
elsif meta_object_1.objects_equal?(meta_object_2)
|
30
|
+
Self
|
31
|
+
elsif meta_object_1.datasets_equal?(meta_object_2)
|
32
|
+
Cross
|
33
|
+
else
|
34
|
+
Dual
|
35
|
+
end
|
36
|
+
|
37
|
+
klass.new(meta_object_1, meta_object_2, operator)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Creates a new Simple.
|
41
|
+
#
|
42
|
+
# @param [Linkage::MetaObject] meta_object_1
|
43
|
+
# @param [Linkage::MetaObject] meta_object_2
|
44
|
+
# @param [Symbol] operator Valid operators: `:==`, `:'!='`, `:>`, `:<`, `:>=`, `:<=`
|
45
|
+
def initialize(meta_object_1, meta_object_2, operator)
|
46
|
+
@meta_object_1 = meta_object_1
|
47
|
+
@meta_object_2 = meta_object_2
|
48
|
+
@operator = operator
|
49
|
+
|
50
|
+
if !VALID_OPERATORS.include?(operator)
|
51
|
+
raise ArgumentError, "Invalid operator: #{operator.inspect}"
|
52
|
+
end
|
53
|
+
|
54
|
+
after_initialize
|
55
|
+
end
|
56
|
+
|
57
|
+
def same_except_side?(other)
|
58
|
+
other.is_a?(Simple) &&
|
59
|
+
operator == other.operator &&
|
60
|
+
meta_object_1.objects_equal?(other.meta_object_1) &&
|
61
|
+
meta_object_2.objects_equal?(other.meta_object_2)
|
62
|
+
end
|
63
|
+
|
64
|
+
def exactly!
|
65
|
+
function_1 = Function['binary'].new(@meta_object_1.object, :dataset => @meta_object_1.dataset)
|
66
|
+
function_2 = Function['binary'].new(@meta_object_2.object, :dataset => @meta_object_2.dataset)
|
67
|
+
@meta_object_1 = MetaObject.new(function_1, @meta_object_1.side)
|
68
|
+
@meta_object_2 = MetaObject.new(function_2, @meta_object_2.side)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Display any warnings about this expectation.
|
72
|
+
def display_warnings
|
73
|
+
end
|
74
|
+
|
75
|
+
def decollation_needed?
|
76
|
+
merged_field.ruby_type[:type] == String && (
|
77
|
+
@meta_object_1.collation != @meta_object_2.collation ||
|
78
|
+
@meta_object_1.database_type != @meta_object_2.database_type
|
79
|
+
)
|
80
|
+
end
|
81
|
+
|
82
|
+
protected
|
83
|
+
|
84
|
+
def after_initialize
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
class Filter < Simple
|
89
|
+
def kind; :filter; end
|
90
|
+
|
91
|
+
def to_expr
|
92
|
+
case @operator
|
93
|
+
when :==, :'!='
|
94
|
+
expr = { @meta_object_1.to_expr => @meta_object_2.to_expr }
|
95
|
+
@operator == :== ? expr : ~expr
|
96
|
+
else
|
97
|
+
Sequel::SQL::BooleanExpression.new(@operator,
|
98
|
+
@meta_object_1.to_identifier, @meta_object_2.to_identifier)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def apply_to(dataset, side)
|
103
|
+
if side != @side
|
104
|
+
return dataset
|
105
|
+
end
|
106
|
+
|
107
|
+
dataset.filter(self.to_expr)
|
108
|
+
end
|
109
|
+
|
110
|
+
def decollation_needed?
|
111
|
+
false
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
def after_initialize
|
117
|
+
super
|
118
|
+
@side = @meta_object_1.static? ? @meta_object_2.side : @meta_object_1.side
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
class Match < Simple
|
123
|
+
def apply_to(dataset, side)
|
124
|
+
target =
|
125
|
+
if @meta_object_1.side == side
|
126
|
+
@meta_object_1
|
127
|
+
elsif @meta_object_2.side == side
|
128
|
+
@meta_object_2
|
129
|
+
else
|
130
|
+
raise ArgumentError, "Invalid `side` argument: #{side}"
|
131
|
+
end
|
132
|
+
|
133
|
+
dataset.group_match_more({
|
134
|
+
:meta_object => target,
|
135
|
+
:alias => merged_field.name
|
136
|
+
})
|
137
|
+
end
|
138
|
+
|
139
|
+
def merged_field
|
140
|
+
@merged_field ||= @meta_object_1.merge(@meta_object_2)
|
141
|
+
end
|
142
|
+
|
143
|
+
def display_warnings
|
144
|
+
object_1 = @meta_object_1.object
|
145
|
+
object_2 = @meta_object_2.object
|
146
|
+
if object_1.ruby_type[:type] == String && object_2.ruby_type[:type] == String
|
147
|
+
if @meta_object_1.dataset.database_type != @meta_object_2.dataset.database_type
|
148
|
+
warn "NOTE: You are comparing two string fields (#{object_1.name} and #{object_2.name}) from different databases. This may result in unexpected results, as different databases compare strings differently. Consider using the =binary= function."
|
149
|
+
elsif object_1.respond_to?(:collation) && object_1.respond_to?(:collation) && object_1.collation != object_2.collation
|
150
|
+
warn "NOTE: The two string fields you are comparing (#{object_1.name} and #{object_2.name}) have different collations (#{ldata.collation} vs. #{rdata.collation}). This may result in unexpected results, as the database may compare them differently. Consider using the =exactly= method."
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
class Self < Match
|
157
|
+
def kind; :self; end
|
158
|
+
end
|
159
|
+
|
160
|
+
class Cross < Match
|
161
|
+
def kind; :cross; end
|
162
|
+
end
|
163
|
+
|
164
|
+
class Dual < Match
|
165
|
+
def kind; :dual; end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
data/lib/linkage/field.rb
CHANGED
@@ -7,13 +7,13 @@ module Linkage
|
|
7
7
|
|
8
8
|
# Create a new instance of Field.
|
9
9
|
#
|
10
|
+
# @param [Linkage::Dataset] dataset
|
10
11
|
# @param [Symbol] name The field's name
|
11
12
|
# @param [Hash] schema The field's schema information
|
12
|
-
|
13
|
-
|
13
|
+
def initialize(dataset, name, schema)
|
14
|
+
@dataset = dataset
|
14
15
|
@name = name
|
15
16
|
@schema = schema
|
16
|
-
@ruby_type = ruby_type
|
17
17
|
end
|
18
18
|
|
19
19
|
# Convert the column schema information to a hash of column options, one of
|
@@ -63,6 +63,8 @@ module Linkage
|
|
63
63
|
else
|
64
64
|
{:type=>String}
|
65
65
|
end
|
66
|
+
hsh[:collate] = collation
|
67
|
+
|
66
68
|
hsh.delete_if { |k, v| v.nil? }
|
67
69
|
@ruby_type = {:type => hsh.delete(:type)}
|
68
70
|
@ruby_type[:opts] = hsh if !hsh.empty?
|
@@ -70,7 +72,7 @@ module Linkage
|
|
70
72
|
@ruby_type
|
71
73
|
end
|
72
74
|
|
73
|
-
def to_expr(
|
75
|
+
def to_expr(options = {})
|
74
76
|
@name
|
75
77
|
end
|
76
78
|
|
@@ -81,5 +83,29 @@ module Linkage
|
|
81
83
|
def primary_key?
|
82
84
|
schema && schema[:primary_key]
|
83
85
|
end
|
86
|
+
|
87
|
+
def collation
|
88
|
+
schema[:collation]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# A special field used for merging two {Data} objects together. It
|
93
|
+
# has no dataset or schema.
|
94
|
+
class MergeField < Field
|
95
|
+
attr_reader :database_type
|
96
|
+
|
97
|
+
# Create a new instance of MergeField.
|
98
|
+
#
|
99
|
+
# @param [Symbol] name The field's name
|
100
|
+
# @param [Hash] ruby_type The field's schema information
|
101
|
+
def initialize(name, ruby_type, database_type = nil)
|
102
|
+
@name = name
|
103
|
+
@ruby_type = ruby_type
|
104
|
+
@database_type = database_type
|
105
|
+
end
|
106
|
+
|
107
|
+
def collation
|
108
|
+
@ruby_type.has_key?(:opts) ? @ruby_type[:opts][:collate] : nil
|
109
|
+
end
|
84
110
|
end
|
85
111
|
end
|
data/lib/linkage/field_set.rb
CHANGED
@@ -2,9 +2,12 @@ module Linkage
|
|
2
2
|
class FieldSet < Hash
|
3
3
|
attr_reader :primary_key
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
# Create a new FieldSet.
|
6
|
+
#
|
7
|
+
# @param [Linkage::Dataset] dataset
|
8
|
+
def initialize(dataset)
|
9
|
+
dataset.schema.each do |(name, column_schema)|
|
10
|
+
f = Field.new(dataset, name, column_schema)
|
8
11
|
self[name] = f
|
9
12
|
|
10
13
|
if @primary_key.nil? && column_schema[:primary_key]
|