linkage 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +10 -0
- data/Gemfile +15 -13
- data/Gemfile.lock +67 -37
- data/Guardfile +0 -2
- data/Rakefile +122 -25
- data/lib/linkage/comparator.rb +172 -0
- data/lib/linkage/comparators/binary.rb +12 -0
- data/lib/linkage/comparators/compare.rb +46 -0
- data/lib/linkage/comparators/within.rb +32 -0
- data/lib/linkage/configuration.rb +285 -153
- data/lib/linkage/data.rb +32 -7
- data/lib/linkage/dataset.rb +107 -32
- data/lib/linkage/decollation.rb +93 -0
- data/lib/linkage/expectation.rb +21 -0
- data/lib/linkage/expectations/exhaustive.rb +63 -0
- data/lib/linkage/expectations/simple.rb +168 -0
- data/lib/linkage/field.rb +30 -4
- data/lib/linkage/field_set.rb +6 -3
- data/lib/linkage/function.rb +50 -3
- data/lib/linkage/functions/binary.rb +30 -0
- data/lib/linkage/functions/cast.rb +54 -0
- data/lib/linkage/functions/length.rb +29 -0
- data/lib/linkage/functions/strftime.rb +12 -11
- data/lib/linkage/functions/trim.rb +8 -0
- data/lib/linkage/group.rb +20 -0
- data/lib/linkage/import_buffer.rb +5 -16
- data/lib/linkage/meta_object.rb +139 -0
- data/lib/linkage/result_set.rb +74 -17
- data/lib/linkage/runner/single_threaded.rb +125 -10
- data/lib/linkage/version.rb +3 -0
- data/lib/linkage.rb +11 -0
- data/linkage.gemspec +16 -121
- data/test/config.yml +5 -0
- data/test/helper.rb +73 -8
- data/test/integration/test_collation.rb +45 -0
- data/test/integration/test_configuration.rb +268 -0
- data/test/integration/test_cross_linkage.rb +4 -17
- data/test/integration/test_dataset.rb +45 -2
- data/test/integration/test_dual_linkage.rb +40 -24
- data/test/integration/test_functions.rb +22 -0
- data/test/integration/test_result_set.rb +85 -0
- data/test/integration/test_scoring.rb +84 -0
- data/test/integration/test_self_linkage.rb +5 -0
- data/test/integration/test_within_comparator.rb +100 -0
- data/test/unit/comparators/test_compare.rb +105 -0
- data/test/unit/comparators/test_within.rb +57 -0
- data/test/unit/expectations/test_exhaustive.rb +111 -0
- data/test/unit/expectations/test_simple.rb +303 -0
- data/test/unit/functions/test_binary.rb +54 -0
- data/test/unit/functions/test_cast.rb +98 -0
- data/test/unit/functions/test_length.rb +52 -0
- data/test/unit/functions/test_strftime.rb +17 -13
- data/test/unit/functions/test_trim.rb +11 -4
- data/test/unit/test_comparator.rb +124 -0
- data/test/unit/test_configuration.rb +137 -175
- data/test/unit/test_data.rb +44 -0
- data/test/unit/test_dataset.rb +73 -21
- data/test/unit/test_decollation.rb +201 -0
- data/test/unit/test_field.rb +38 -14
- data/test/unit/test_field_set.rb +12 -8
- data/test/unit/test_function.rb +83 -16
- data/test/unit/test_group.rb +28 -0
- data/test/unit/test_import_buffer.rb +13 -27
- data/test/unit/test_meta_object.rb +208 -0
- data/test/unit/test_result_set.rb +221 -3
- metadata +82 -190
data/lib/linkage/dataset.rb
CHANGED
@@ -2,13 +2,28 @@ module Linkage
|
|
2
2
|
# Delegator around Sequel::Dataset with some extra functionality.
|
3
3
|
class Dataset
|
4
4
|
attr_reader :field_set, :table_name
|
5
|
+
attr_accessor :linkage_options
|
5
6
|
|
6
|
-
def initialize(
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
def initialize(*args)
|
8
|
+
if args.length == 1
|
9
|
+
@dataset = args[0]
|
10
|
+
@db = @dataset.db
|
11
|
+
@table_name = @dataset.first_source_table
|
12
|
+
|
13
|
+
if !@db.kind_of?(Sequel::Collation)
|
14
|
+
@db.extend(Sequel::Collation)
|
15
|
+
end
|
16
|
+
else
|
17
|
+
uri, table, options = args
|
18
|
+
options ||= {}
|
19
|
+
|
20
|
+
@table_name = table.to_sym
|
21
|
+
@db = Sequel.connect(uri, options)
|
22
|
+
@db.extend(Sequel::Collation)
|
23
|
+
@dataset = @db[@table_name]
|
24
|
+
end
|
25
|
+
@field_set = FieldSet.new(self)
|
26
|
+
@linkage_options = {}
|
12
27
|
end
|
13
28
|
|
14
29
|
def obj
|
@@ -28,67 +43,127 @@ module Linkage
|
|
28
43
|
conf
|
29
44
|
end
|
30
45
|
|
31
|
-
def
|
32
|
-
@
|
46
|
+
def database_type
|
47
|
+
@db.database_type
|
33
48
|
end
|
34
49
|
|
35
|
-
|
36
|
-
|
50
|
+
# Set objects to use for group matching. Accepts either {Linkage::MetaObject} or a
|
51
|
+
# hash with options (valid options are :meta_object, :alias, and :cast).
|
52
|
+
#
|
53
|
+
# @example
|
54
|
+
# dataset.group_match(meta_object_1,
|
55
|
+
# {:meta_object => meta_object_2, :alias => :foo})
|
56
|
+
def group_match(*args)
|
57
|
+
args.collect! do |arg|
|
58
|
+
case arg
|
59
|
+
when Linkage::MetaObject
|
60
|
+
{ :meta_object => arg }
|
61
|
+
when Hash
|
62
|
+
if !arg.has_key?(:meta_object)
|
63
|
+
raise ArgumentError, "Invalid option hash, missing :meta_object key"
|
64
|
+
end
|
65
|
+
(arg.keys - [:meta_object, :alias, :cast]).each do |invalid_key|
|
66
|
+
warn "Invalid key in option hash: #{invalid_key}"
|
67
|
+
end
|
68
|
+
arg
|
69
|
+
else
|
70
|
+
raise ArgumentError, "expected Hash or MetaObject, got #{arg.class}"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
clone(:group_match => args)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Add additional objects to use for group matching.
|
77
|
+
def group_match_more(*args)
|
78
|
+
args = @linkage_options[:group_match] + args if @linkage_options[:group_match]
|
79
|
+
group_match(*args)
|
37
80
|
end
|
38
81
|
|
39
|
-
def clone(
|
40
|
-
|
41
|
-
|
82
|
+
def clone(new_options = {})
|
83
|
+
new_linkage_options = {}
|
84
|
+
new_obj_options = {}
|
85
|
+
new_options.each_pair do |k, v|
|
86
|
+
case k
|
87
|
+
when :group_match
|
88
|
+
new_linkage_options[k] = v
|
89
|
+
else
|
90
|
+
new_obj_options[k] = v
|
91
|
+
end
|
92
|
+
end
|
93
|
+
new_obj = new_options[:new_obj]
|
42
94
|
|
43
|
-
match = new_opts.delete(:match)
|
44
95
|
result = super()
|
45
|
-
result.
|
96
|
+
result.linkage_options = @linkage_options.merge(new_linkage_options)
|
46
97
|
|
47
98
|
if new_obj
|
48
99
|
result.obj = new_obj
|
49
100
|
else
|
50
|
-
result.obj = obj.clone(
|
101
|
+
result.obj = obj.clone(new_options)
|
51
102
|
end
|
103
|
+
|
52
104
|
result
|
53
105
|
end
|
54
106
|
|
55
107
|
def each_group(min = 2)
|
56
|
-
|
108
|
+
group_match = @linkage_options[:group_match] || []
|
109
|
+
ruby_types = group_match.inject({}) do |hsh, m|
|
110
|
+
key = m[:alias] || m[:meta_object].to_expr
|
111
|
+
hsh[key] = m[:meta_object].ruby_type
|
112
|
+
hsh
|
113
|
+
end
|
114
|
+
options = {:database_type => database_type, :ruby_types => ruby_types }
|
115
|
+
@dataset.group_and_count(*match_expressions).having{count >= min}.each do |row|
|
57
116
|
count = row.delete(:count)
|
58
|
-
|
117
|
+
group = Group.new(row, options.merge(:count => count))
|
118
|
+
yield group
|
59
119
|
end
|
60
120
|
end
|
61
121
|
|
62
|
-
def group_by_matches(
|
63
|
-
expr =
|
122
|
+
def group_by_matches(raw = true)
|
123
|
+
expr = raw ? raw_match_expressions : match_expressions
|
64
124
|
group(*expr)
|
65
125
|
end
|
66
126
|
|
67
127
|
def dataset_for_group(group)
|
68
128
|
filters = []
|
129
|
+
group_match = @linkage_options[:group_match] || []
|
69
130
|
group.values.each_pair do |key, value|
|
70
131
|
# find a matched expression with this alias
|
71
|
-
|
72
|
-
|
73
|
-
|
132
|
+
found = false
|
133
|
+
group_match.each do |m|
|
134
|
+
expr = m[:meta_object].to_expr
|
135
|
+
if (m[:alias] && m[:alias] == key) || expr == key
|
136
|
+
found = true
|
137
|
+
filters << {expr => value}
|
138
|
+
break
|
139
|
+
end
|
140
|
+
end
|
141
|
+
if !found
|
142
|
+
raise "this dataset isn't compatible with the given group"
|
143
|
+
end
|
74
144
|
end
|
75
145
|
filter(*filters)
|
76
146
|
end
|
77
147
|
|
148
|
+
def schema
|
149
|
+
@db.schema(@table_name)
|
150
|
+
end
|
151
|
+
|
78
152
|
private
|
79
153
|
|
80
|
-
def
|
81
|
-
|
82
|
-
|
83
|
-
end
|
154
|
+
def raw_match_expressions
|
155
|
+
group_match = @linkage_options[:group_match] || []
|
156
|
+
group_match.collect { |m| m[:meta_object].to_expr }
|
84
157
|
end
|
85
158
|
|
86
159
|
def match_expressions
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
160
|
+
group_match = @linkage_options[:group_match] || []
|
161
|
+
group_match.collect do |m|
|
162
|
+
expr = m[:meta_object].to_expr
|
163
|
+
expr = expr.as(m[:alias]) if m[:alias]
|
164
|
+
expr = expr.cast(m[:cast]) if m[:cast]
|
165
|
+
expr
|
166
|
+
end
|
92
167
|
end
|
93
168
|
|
94
169
|
def method_missing(name, *args, &block)
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Linkage
|
3
|
+
module Decollation
|
4
|
+
def decollate(string, database_type, collation)
|
5
|
+
case database_type
|
6
|
+
when :mysql
|
7
|
+
decollate_mysql(string, collation)
|
8
|
+
else
|
9
|
+
string
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def decollate_mysql(string, collation)
|
14
|
+
case collation
|
15
|
+
when "latin1_swedish_ci"
|
16
|
+
decollate_mysql_latin1_swedish_ci(string)
|
17
|
+
else
|
18
|
+
string
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def decollate_mysql_latin1_swedish_ci(string)
|
23
|
+
result = string.strip
|
24
|
+
result.each_char.with_index do |char, i|
|
25
|
+
case char
|
26
|
+
when 'A', 'a', 'À', 'Á', 'Â', 'Ã', 'à', 'á', 'â', 'ã'
|
27
|
+
result[i] = 'A'
|
28
|
+
when 'B', 'b'
|
29
|
+
result[i] = 'B'
|
30
|
+
when 'C', 'c', 'Ç', 'ç'
|
31
|
+
result[i] = 'C'
|
32
|
+
when 'D', 'd', 'Ð', 'ð'
|
33
|
+
result[i] = 'D'
|
34
|
+
when 'E', 'e', 'È', 'É', 'Ê', 'Ë', 'è', 'é', 'ê', 'ë'
|
35
|
+
result[i] = 'E'
|
36
|
+
when 'F', 'f'
|
37
|
+
result[i] = 'F'
|
38
|
+
when 'G', 'g'
|
39
|
+
result[i] = 'G'
|
40
|
+
when 'H', 'h'
|
41
|
+
result[i] = 'H'
|
42
|
+
when 'I', 'i', 'Ì', 'Í', 'Î', 'Ï', 'ì', 'í', 'î', 'ï'
|
43
|
+
result[i] = 'I'
|
44
|
+
when 'J', 'j'
|
45
|
+
result[i] = 'J'
|
46
|
+
when 'K', 'k'
|
47
|
+
result[i] = 'K'
|
48
|
+
when 'L', 'l'
|
49
|
+
result[i] = 'L'
|
50
|
+
when 'M', 'm'
|
51
|
+
result[i] = 'M'
|
52
|
+
when 'N', 'n', 'Ñ', 'ñ'
|
53
|
+
result[i] = 'N'
|
54
|
+
when 'O', 'o', 'Ò', 'Ó', 'Ô', 'Õ', 'ò', 'ó', 'ô', 'õ'
|
55
|
+
result[i] = 'O'
|
56
|
+
when 'P', 'p'
|
57
|
+
result[i] = 'P'
|
58
|
+
when 'Q', 'q'
|
59
|
+
result[i] = 'Q'
|
60
|
+
when 'R', 'r'
|
61
|
+
result[i] = 'R'
|
62
|
+
when 'S', 's'
|
63
|
+
result[i] = 'S'
|
64
|
+
when 'T', 't'
|
65
|
+
result[i] = 'T'
|
66
|
+
when 'U', 'u', 'Ù', 'Ú', 'Û', 'ù', 'ú', 'û'
|
67
|
+
result[i] = 'U'
|
68
|
+
when 'V', 'v'
|
69
|
+
result[i] = 'V'
|
70
|
+
when 'W', 'w'
|
71
|
+
result[i] = 'W'
|
72
|
+
when 'X', 'x'
|
73
|
+
result[i] = 'X'
|
74
|
+
when 'Y', 'y', 'Ü', 'Ý', 'ü', 'ý'
|
75
|
+
result[i] = 'Y'
|
76
|
+
when 'Z', 'z'
|
77
|
+
result[i] = 'Z'
|
78
|
+
when '[', 'Å', 'å'
|
79
|
+
result[i] = '['
|
80
|
+
when '\\', 'Ä', 'Æ', 'ä', 'æ'
|
81
|
+
result[i] = '\\'
|
82
|
+
when ']', 'Ö', 'ö'
|
83
|
+
result[i] = ']'
|
84
|
+
when 'Ø', 'ø'
|
85
|
+
result[i] = 'Ø'
|
86
|
+
when 'Þ', 'þ'
|
87
|
+
result[i] = 'Þ'
|
88
|
+
end
|
89
|
+
end
|
90
|
+
result
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Linkage
|
2
|
+
# The Expectation class contains information about how two datasets
|
3
|
+
# should be linked.
|
4
|
+
class Expectation
|
5
|
+
def kind
|
6
|
+
raise NotImplementedError
|
7
|
+
end
|
8
|
+
|
9
|
+
def apply_to(*args)
|
10
|
+
raise NotImplementedError
|
11
|
+
end
|
12
|
+
|
13
|
+
def decollation_needed?
|
14
|
+
false
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
Dir.glob(File.expand_path(File.join(File.dirname(__FILE__), "expectations", "*.rb"))).each do |filename|
|
20
|
+
require filename
|
21
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Linkage
|
2
|
+
module Expectations
|
3
|
+
class Exhaustive < Expectation
|
4
|
+
attr_reader :comparator, :threshold, :mode
|
5
|
+
|
6
|
+
def initialize(comparator, threshold, mode)
|
7
|
+
@comparator = comparator
|
8
|
+
@threshold = threshold
|
9
|
+
@mode = mode
|
10
|
+
end
|
11
|
+
|
12
|
+
def kind
|
13
|
+
if @kind.nil?
|
14
|
+
if @comparator.lhs_args.length != @comparator.rhs_args.length
|
15
|
+
@kind = :cross
|
16
|
+
else
|
17
|
+
@kind = :self
|
18
|
+
@comparator.lhs_args.each_with_index do |lhs_arg, index|
|
19
|
+
rhs_arg = @comparator.rhs_args[index]
|
20
|
+
if !lhs_arg.objects_equal?(rhs_arg)
|
21
|
+
@kind = :cross
|
22
|
+
break
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Check for dual-linkage.
|
28
|
+
if @kind == :cross
|
29
|
+
# Assume that all lhs arguments have the same dataset, as well
|
30
|
+
# as all the rhs arguments. Only check the first argument of each
|
31
|
+
# side.
|
32
|
+
lhs_arg = @comparator.lhs_args[0]
|
33
|
+
rhs_arg = @comparator.rhs_args[0]
|
34
|
+
if !lhs_arg.datasets_equal?(rhs_arg)
|
35
|
+
@kind = :dual
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
@kind
|
40
|
+
end
|
41
|
+
|
42
|
+
def apply_to(dataset, side)
|
43
|
+
exprs =
|
44
|
+
case side
|
45
|
+
when :lhs
|
46
|
+
comparator.lhs_args.collect { |arg| arg.to_expr.as(arg.name) }
|
47
|
+
when :rhs
|
48
|
+
comparator.rhs_args.collect { |arg| arg.to_expr.as(arg.name) }
|
49
|
+
end
|
50
|
+
dataset.select_more(*exprs)
|
51
|
+
end
|
52
|
+
|
53
|
+
def satisfied?(score)
|
54
|
+
case mode
|
55
|
+
when :equal
|
56
|
+
score == threshold
|
57
|
+
when :min
|
58
|
+
score >= threshold
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
module Linkage
|
2
|
+
module Expectations
|
3
|
+
class Simple < Expectation
|
4
|
+
# The dataset this expectation applies to: `:lhs` or `:rhs`. This
|
5
|
+
# only applies to filter expectations.
|
6
|
+
# @return [Symbol]
|
7
|
+
attr_reader :side
|
8
|
+
|
9
|
+
attr_reader :meta_object_1, :meta_object_2, :operator
|
10
|
+
|
11
|
+
VALID_OPERATORS = [:==, :'!=', :>, :<, :>=, :<=]
|
12
|
+
|
13
|
+
# Automatically create an expectation type depending on the arguments.
|
14
|
+
#
|
15
|
+
# @param [Linkage::MetaObject] meta_object_1
|
16
|
+
# @param [Linkage::MetaObject] meta_object_2
|
17
|
+
# @param [Symbol] operator Valid operators: `:==`, `:'!='`, `:>`, `:<`, `:>=`, `:<=`
|
18
|
+
def self.create(meta_object_1, meta_object_2, operator)
|
19
|
+
klass =
|
20
|
+
if meta_object_1.static? && meta_object_2.static?
|
21
|
+
raise ArgumentError, "An expectation with two static objects is invalid"
|
22
|
+
elsif meta_object_1.static? || meta_object_2.static?
|
23
|
+
Filter
|
24
|
+
elsif meta_object_1.side == meta_object_2.side
|
25
|
+
if !meta_object_1.datasets_equal?(meta_object_2)
|
26
|
+
raise ArgumentError, "An expectation with two dynamic objects with the same side but different datasets is invalid"
|
27
|
+
end
|
28
|
+
Filter
|
29
|
+
elsif meta_object_1.objects_equal?(meta_object_2)
|
30
|
+
Self
|
31
|
+
elsif meta_object_1.datasets_equal?(meta_object_2)
|
32
|
+
Cross
|
33
|
+
else
|
34
|
+
Dual
|
35
|
+
end
|
36
|
+
|
37
|
+
klass.new(meta_object_1, meta_object_2, operator)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Creates a new Simple.
|
41
|
+
#
|
42
|
+
# @param [Linkage::MetaObject] meta_object_1
|
43
|
+
# @param [Linkage::MetaObject] meta_object_2
|
44
|
+
# @param [Symbol] operator Valid operators: `:==`, `:'!='`, `:>`, `:<`, `:>=`, `:<=`
|
45
|
+
def initialize(meta_object_1, meta_object_2, operator)
|
46
|
+
@meta_object_1 = meta_object_1
|
47
|
+
@meta_object_2 = meta_object_2
|
48
|
+
@operator = operator
|
49
|
+
|
50
|
+
if !VALID_OPERATORS.include?(operator)
|
51
|
+
raise ArgumentError, "Invalid operator: #{operator.inspect}"
|
52
|
+
end
|
53
|
+
|
54
|
+
after_initialize
|
55
|
+
end
|
56
|
+
|
57
|
+
def same_except_side?(other)
|
58
|
+
other.is_a?(Simple) &&
|
59
|
+
operator == other.operator &&
|
60
|
+
meta_object_1.objects_equal?(other.meta_object_1) &&
|
61
|
+
meta_object_2.objects_equal?(other.meta_object_2)
|
62
|
+
end
|
63
|
+
|
64
|
+
def exactly!
|
65
|
+
function_1 = Function['binary'].new(@meta_object_1.object, :dataset => @meta_object_1.dataset)
|
66
|
+
function_2 = Function['binary'].new(@meta_object_2.object, :dataset => @meta_object_2.dataset)
|
67
|
+
@meta_object_1 = MetaObject.new(function_1, @meta_object_1.side)
|
68
|
+
@meta_object_2 = MetaObject.new(function_2, @meta_object_2.side)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Display any warnings about this expectation.
|
72
|
+
def display_warnings
|
73
|
+
end
|
74
|
+
|
75
|
+
def decollation_needed?
|
76
|
+
merged_field.ruby_type[:type] == String && (
|
77
|
+
@meta_object_1.collation != @meta_object_2.collation ||
|
78
|
+
@meta_object_1.database_type != @meta_object_2.database_type
|
79
|
+
)
|
80
|
+
end
|
81
|
+
|
82
|
+
protected
|
83
|
+
|
84
|
+
def after_initialize
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
class Filter < Simple
|
89
|
+
def kind; :filter; end
|
90
|
+
|
91
|
+
def to_expr
|
92
|
+
case @operator
|
93
|
+
when :==, :'!='
|
94
|
+
expr = { @meta_object_1.to_expr => @meta_object_2.to_expr }
|
95
|
+
@operator == :== ? expr : ~expr
|
96
|
+
else
|
97
|
+
Sequel::SQL::BooleanExpression.new(@operator,
|
98
|
+
@meta_object_1.to_identifier, @meta_object_2.to_identifier)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def apply_to(dataset, side)
|
103
|
+
if side != @side
|
104
|
+
return dataset
|
105
|
+
end
|
106
|
+
|
107
|
+
dataset.filter(self.to_expr)
|
108
|
+
end
|
109
|
+
|
110
|
+
def decollation_needed?
|
111
|
+
false
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
def after_initialize
|
117
|
+
super
|
118
|
+
@side = @meta_object_1.static? ? @meta_object_2.side : @meta_object_1.side
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
class Match < Simple
|
123
|
+
def apply_to(dataset, side)
|
124
|
+
target =
|
125
|
+
if @meta_object_1.side == side
|
126
|
+
@meta_object_1
|
127
|
+
elsif @meta_object_2.side == side
|
128
|
+
@meta_object_2
|
129
|
+
else
|
130
|
+
raise ArgumentError, "Invalid `side` argument: #{side}"
|
131
|
+
end
|
132
|
+
|
133
|
+
dataset.group_match_more({
|
134
|
+
:meta_object => target,
|
135
|
+
:alias => merged_field.name
|
136
|
+
})
|
137
|
+
end
|
138
|
+
|
139
|
+
def merged_field
|
140
|
+
@merged_field ||= @meta_object_1.merge(@meta_object_2)
|
141
|
+
end
|
142
|
+
|
143
|
+
def display_warnings
|
144
|
+
object_1 = @meta_object_1.object
|
145
|
+
object_2 = @meta_object_2.object
|
146
|
+
if object_1.ruby_type[:type] == String && object_2.ruby_type[:type] == String
|
147
|
+
if @meta_object_1.dataset.database_type != @meta_object_2.dataset.database_type
|
148
|
+
warn "NOTE: You are comparing two string fields (#{object_1.name} and #{object_2.name}) from different databases. This may result in unexpected results, as different databases compare strings differently. Consider using the =binary= function."
|
149
|
+
elsif object_1.respond_to?(:collation) && object_1.respond_to?(:collation) && object_1.collation != object_2.collation
|
150
|
+
warn "NOTE: The two string fields you are comparing (#{object_1.name} and #{object_2.name}) have different collations (#{ldata.collation} vs. #{rdata.collation}). This may result in unexpected results, as the database may compare them differently. Consider using the =exactly= method."
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
class Self < Match
|
157
|
+
def kind; :self; end
|
158
|
+
end
|
159
|
+
|
160
|
+
class Cross < Match
|
161
|
+
def kind; :cross; end
|
162
|
+
end
|
163
|
+
|
164
|
+
class Dual < Match
|
165
|
+
def kind; :dual; end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
data/lib/linkage/field.rb
CHANGED
@@ -7,13 +7,13 @@ module Linkage
|
|
7
7
|
|
8
8
|
# Create a new instance of Field.
|
9
9
|
#
|
10
|
+
# @param [Linkage::Dataset] dataset
|
10
11
|
# @param [Symbol] name The field's name
|
11
12
|
# @param [Hash] schema The field's schema information
|
12
|
-
|
13
|
-
|
13
|
+
def initialize(dataset, name, schema)
|
14
|
+
@dataset = dataset
|
14
15
|
@name = name
|
15
16
|
@schema = schema
|
16
|
-
@ruby_type = ruby_type
|
17
17
|
end
|
18
18
|
|
19
19
|
# Convert the column schema information to a hash of column options, one of
|
@@ -63,6 +63,8 @@ module Linkage
|
|
63
63
|
else
|
64
64
|
{:type=>String}
|
65
65
|
end
|
66
|
+
hsh[:collate] = collation
|
67
|
+
|
66
68
|
hsh.delete_if { |k, v| v.nil? }
|
67
69
|
@ruby_type = {:type => hsh.delete(:type)}
|
68
70
|
@ruby_type[:opts] = hsh if !hsh.empty?
|
@@ -70,7 +72,7 @@ module Linkage
|
|
70
72
|
@ruby_type
|
71
73
|
end
|
72
74
|
|
73
|
-
def to_expr(
|
75
|
+
def to_expr(options = {})
|
74
76
|
@name
|
75
77
|
end
|
76
78
|
|
@@ -81,5 +83,29 @@ module Linkage
|
|
81
83
|
def primary_key?
|
82
84
|
schema && schema[:primary_key]
|
83
85
|
end
|
86
|
+
|
87
|
+
def collation
|
88
|
+
schema[:collation]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# A special field used for merging two {Data} objects together. It
|
93
|
+
# has no dataset or schema.
|
94
|
+
class MergeField < Field
|
95
|
+
attr_reader :database_type
|
96
|
+
|
97
|
+
# Create a new instance of MergeField.
|
98
|
+
#
|
99
|
+
# @param [Symbol] name The field's name
|
100
|
+
# @param [Hash] ruby_type The field's schema information
|
101
|
+
def initialize(name, ruby_type, database_type = nil)
|
102
|
+
@name = name
|
103
|
+
@ruby_type = ruby_type
|
104
|
+
@database_type = database_type
|
105
|
+
end
|
106
|
+
|
107
|
+
def collation
|
108
|
+
@ruby_type.has_key?(:opts) ? @ruby_type[:opts][:collate] : nil
|
109
|
+
end
|
84
110
|
end
|
85
111
|
end
|
data/lib/linkage/field_set.rb
CHANGED
@@ -2,9 +2,12 @@ module Linkage
|
|
2
2
|
class FieldSet < Hash
|
3
3
|
attr_reader :primary_key
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
# Create a new FieldSet.
|
6
|
+
#
|
7
|
+
# @param [Linkage::Dataset] dataset
|
8
|
+
def initialize(dataset)
|
9
|
+
dataset.schema.each do |(name, column_schema)|
|
10
|
+
f = Field.new(dataset, name, column_schema)
|
8
11
|
self[name] = f
|
9
12
|
|
10
13
|
if @primary_key.nil? && column_schema[:primary_key]
|