linkage 0.0.6 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +10 -0
- data/Gemfile +15 -13
- data/Gemfile.lock +67 -37
- data/Guardfile +0 -2
- data/Rakefile +122 -25
- data/lib/linkage/comparator.rb +172 -0
- data/lib/linkage/comparators/binary.rb +12 -0
- data/lib/linkage/comparators/compare.rb +46 -0
- data/lib/linkage/comparators/within.rb +32 -0
- data/lib/linkage/configuration.rb +285 -153
- data/lib/linkage/data.rb +32 -7
- data/lib/linkage/dataset.rb +107 -32
- data/lib/linkage/decollation.rb +93 -0
- data/lib/linkage/expectation.rb +21 -0
- data/lib/linkage/expectations/exhaustive.rb +63 -0
- data/lib/linkage/expectations/simple.rb +168 -0
- data/lib/linkage/field.rb +30 -4
- data/lib/linkage/field_set.rb +6 -3
- data/lib/linkage/function.rb +50 -3
- data/lib/linkage/functions/binary.rb +30 -0
- data/lib/linkage/functions/cast.rb +54 -0
- data/lib/linkage/functions/length.rb +29 -0
- data/lib/linkage/functions/strftime.rb +12 -11
- data/lib/linkage/functions/trim.rb +8 -0
- data/lib/linkage/group.rb +20 -0
- data/lib/linkage/import_buffer.rb +5 -16
- data/lib/linkage/meta_object.rb +139 -0
- data/lib/linkage/result_set.rb +74 -17
- data/lib/linkage/runner/single_threaded.rb +125 -10
- data/lib/linkage/version.rb +3 -0
- data/lib/linkage.rb +11 -0
- data/linkage.gemspec +16 -121
- data/test/config.yml +5 -0
- data/test/helper.rb +73 -8
- data/test/integration/test_collation.rb +45 -0
- data/test/integration/test_configuration.rb +268 -0
- data/test/integration/test_cross_linkage.rb +4 -17
- data/test/integration/test_dataset.rb +45 -2
- data/test/integration/test_dual_linkage.rb +40 -24
- data/test/integration/test_functions.rb +22 -0
- data/test/integration/test_result_set.rb +85 -0
- data/test/integration/test_scoring.rb +84 -0
- data/test/integration/test_self_linkage.rb +5 -0
- data/test/integration/test_within_comparator.rb +100 -0
- data/test/unit/comparators/test_compare.rb +105 -0
- data/test/unit/comparators/test_within.rb +57 -0
- data/test/unit/expectations/test_exhaustive.rb +111 -0
- data/test/unit/expectations/test_simple.rb +303 -0
- data/test/unit/functions/test_binary.rb +54 -0
- data/test/unit/functions/test_cast.rb +98 -0
- data/test/unit/functions/test_length.rb +52 -0
- data/test/unit/functions/test_strftime.rb +17 -13
- data/test/unit/functions/test_trim.rb +11 -4
- data/test/unit/test_comparator.rb +124 -0
- data/test/unit/test_configuration.rb +137 -175
- data/test/unit/test_data.rb +44 -0
- data/test/unit/test_dataset.rb +73 -21
- data/test/unit/test_decollation.rb +201 -0
- data/test/unit/test_field.rb +38 -14
- data/test/unit/test_field_set.rb +12 -8
- data/test/unit/test_function.rb +83 -16
- data/test/unit/test_group.rb +28 -0
- data/test/unit/test_import_buffer.rb +13 -27
- data/test/unit/test_meta_object.rb +208 -0
- data/test/unit/test_result_set.rb +221 -3
- metadata +82 -190
data/lib/linkage/function.rb
CHANGED
@@ -38,10 +38,23 @@ module Linkage
|
|
38
38
|
nil
|
39
39
|
end
|
40
40
|
|
41
|
-
|
41
|
+
attr_reader :args
|
42
|
+
|
43
|
+
# Creates a new Function object. If the arguments contain only
|
44
|
+
# static objects, you should specify the dataset that this function
|
45
|
+
# belongs to as the last argument like so:
|
46
|
+
#
|
47
|
+
# Function.new(foo, bar, :dataset => dataset)
|
48
|
+
#
|
49
|
+
# Optionally, you can use the `dataset=` setter to do it later. Many
|
50
|
+
# functions require a dataset to work properly. If you try to use
|
51
|
+
# such a function without setting a dataset, it will raise a RuntimeError.
|
52
|
+
#
|
53
|
+
# @param [Linkage::Data, Object] args Function arguments
|
42
54
|
def initialize(*args)
|
43
55
|
@names = [self.class.function_name]
|
44
56
|
@args = args
|
57
|
+
@options = args.last.is_a?(Hash) ? args.pop : {}
|
45
58
|
process_args
|
46
59
|
end
|
47
60
|
|
@@ -49,10 +62,25 @@ module Linkage
|
|
49
62
|
@name ||= @names.join("_").to_sym
|
50
63
|
end
|
51
64
|
|
65
|
+
def dataset
|
66
|
+
if @dataset.nil?
|
67
|
+
raise RuntimeError, "You must specify a dataset for static functions"
|
68
|
+
end
|
69
|
+
@dataset
|
70
|
+
end
|
71
|
+
|
72
|
+
def dataset=(dataset)
|
73
|
+
@dataset = dataset
|
74
|
+
end
|
75
|
+
|
52
76
|
def static?
|
53
77
|
@static
|
54
78
|
end
|
55
79
|
|
80
|
+
def ==(other)
|
81
|
+
equal?(other) || (other.is_a?(Function) && name == other.name && args == other.args && dataset == other.dataset)
|
82
|
+
end
|
83
|
+
|
56
84
|
# Subclasses must define this. The return value should be a Hash with
|
57
85
|
# the following elements:
|
58
86
|
# :type - column type (Ruby class) of the result
|
@@ -61,8 +89,14 @@ module Linkage
|
|
61
89
|
raise NotImplementedError
|
62
90
|
end
|
63
91
|
|
92
|
+
# Returns `nil` by default. Subclasses should redefine this if
|
93
|
+
# there is a collation.
|
94
|
+
def collation
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
|
64
98
|
# @return [Sequel::SQL::Function]
|
65
|
-
def to_expr(
|
99
|
+
def to_expr(options = {})
|
66
100
|
self.class.function_name.to_sym.sql_function(*@values)
|
67
101
|
end
|
68
102
|
|
@@ -80,6 +114,14 @@ module Linkage
|
|
80
114
|
if arg.kind_of?(Data)
|
81
115
|
@names << arg.name
|
82
116
|
@static &&= arg.static?
|
117
|
+
|
118
|
+
# possibly set dataset
|
119
|
+
if @dataset.nil?
|
120
|
+
@dataset = arg.dataset
|
121
|
+
elsif @dataset != arg.dataset
|
122
|
+
raise ArgumentError, "Using dynamic data sources with different datasets is not permitted"
|
123
|
+
end
|
124
|
+
|
83
125
|
type = arg.ruby_type[:type]
|
84
126
|
value = arg.to_expr
|
85
127
|
else
|
@@ -87,11 +129,16 @@ module Linkage
|
|
87
129
|
type = arg.class
|
88
130
|
value = arg
|
89
131
|
end
|
90
|
-
if parameters && !parameters[i].include?(type)
|
132
|
+
if parameters && parameters[i] != [:any] && !parameters[i].include?(type)
|
91
133
|
raise TypeError, "expected type #{parameters[i].join(" or ")}, got #{type}"
|
92
134
|
end
|
93
135
|
@values << value
|
94
136
|
end
|
137
|
+
|
138
|
+
if @dataset.nil? && @options[:dataset]
|
139
|
+
# Set dataset for static functions manually
|
140
|
+
@dataset = @options[:dataset]
|
141
|
+
end
|
95
142
|
end
|
96
143
|
end
|
97
144
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Linkage
|
2
|
+
module Functions
|
3
|
+
class Binary < Function
|
4
|
+
def self.function_name
|
5
|
+
"binary"
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.parameters
|
9
|
+
[[String]]
|
10
|
+
end
|
11
|
+
|
12
|
+
def ruby_type
|
13
|
+
{:type => File}
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_expr(options = {})
|
17
|
+
expr =
|
18
|
+
case dataset.database_type
|
19
|
+
when :sqlite
|
20
|
+
@values[0].cast(:blob)
|
21
|
+
when :postgres
|
22
|
+
@values[0].cast(:bytea)
|
23
|
+
else
|
24
|
+
@values[0].cast(:binary)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
Function.register(Binary)
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Linkage
|
2
|
+
module Functions
|
3
|
+
class Cast < Function
|
4
|
+
def self.function_name
|
5
|
+
"cast"
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.parameters
|
9
|
+
[[:any], [String]]
|
10
|
+
end
|
11
|
+
|
12
|
+
def ruby_type
|
13
|
+
type =
|
14
|
+
case @values[1]
|
15
|
+
when 'integer'
|
16
|
+
Fixnum
|
17
|
+
when 'binary'
|
18
|
+
File
|
19
|
+
else
|
20
|
+
raise "unknown type: #{@values[1]}"
|
21
|
+
end
|
22
|
+
|
23
|
+
{:type => type}
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_expr(options = {})
|
27
|
+
cast =
|
28
|
+
case @values[1]
|
29
|
+
when 'integer'
|
30
|
+
case dataset.database_type
|
31
|
+
when :sqlite, :postgres, :h2
|
32
|
+
:integer
|
33
|
+
when :mysql
|
34
|
+
:signed
|
35
|
+
end
|
36
|
+
when 'binary'
|
37
|
+
case dataset.database_type
|
38
|
+
when :sqlite
|
39
|
+
:blob
|
40
|
+
when :postgres
|
41
|
+
:bytea
|
42
|
+
when :mysql, :h2
|
43
|
+
:binary
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
if cast
|
48
|
+
@values[0].cast(cast)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
Function.register(Cast)
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Linkage
|
2
|
+
module Functions
|
3
|
+
# Returns the number of characters in a string.
|
4
|
+
class Length < Function
|
5
|
+
def self.function_name
|
6
|
+
"length"
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.parameters
|
10
|
+
[[String]]
|
11
|
+
end
|
12
|
+
|
13
|
+
def ruby_type
|
14
|
+
{:type => Fixnum}
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_expr(options = {})
|
18
|
+
expr =
|
19
|
+
case dataset.database_type
|
20
|
+
when :mysql, :postgres
|
21
|
+
:char_length.sql_function(@values[0])
|
22
|
+
else
|
23
|
+
:length.sql_function(@values[0])
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
Function.register(Length)
|
28
|
+
end
|
29
|
+
end
|
@@ -14,17 +14,18 @@ module Linkage
|
|
14
14
|
{:type => String}
|
15
15
|
end
|
16
16
|
|
17
|
-
def to_expr(
|
18
|
-
|
19
|
-
|
20
|
-
:
|
21
|
-
|
22
|
-
:
|
23
|
-
|
24
|
-
:
|
25
|
-
|
26
|
-
|
27
|
-
|
17
|
+
def to_expr(options = {})
|
18
|
+
expr =
|
19
|
+
case dataset.database_type
|
20
|
+
when :mysql
|
21
|
+
:date_format.sql_function(*@values)
|
22
|
+
when :sqlite
|
23
|
+
:strftime.sql_function(@values[1], @values[0])
|
24
|
+
when :postgres
|
25
|
+
:to_char.sql_function(*@values)
|
26
|
+
else
|
27
|
+
:strftime.sql_function(@values[0], @values[1])
|
28
|
+
end
|
28
29
|
end
|
29
30
|
end
|
30
31
|
Function.register(Strftime)
|
data/lib/linkage/group.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
module Linkage
|
2
2
|
class Group
|
3
|
+
include Linkage::Decollation
|
4
|
+
|
3
5
|
# @return [Hash] Hash of matching values
|
4
6
|
attr_reader :values
|
5
7
|
|
@@ -24,12 +26,30 @@ module Linkage
|
|
24
26
|
|
25
27
|
# @param [Hash] values Values that define this group
|
26
28
|
# @param [Hash] options
|
29
|
+
# @option options [Fixnum] :id The group ID
|
30
|
+
# @option options [Fixnum] :count How many records are in the group
|
31
|
+
# @option options [Hash] :ruby_types Hash of ruby types for each value
|
32
|
+
# @option options [Symbol] :database_type
|
27
33
|
# @example
|
28
34
|
# Linkage::Group.new({:foo => 123, :bar => 'baz'}, {:count => 5, :id => 456})
|
29
35
|
def initialize(values, options)
|
30
36
|
@count = options[:count]
|
31
37
|
@id = options[:id]
|
38
|
+
@ruby_types = options[:ruby_types]
|
39
|
+
@database_type = options[:database_type]
|
32
40
|
@values = values
|
33
41
|
end
|
42
|
+
|
43
|
+
def decollated_values
|
44
|
+
@values.inject({}) do |hsh, (key, value)|
|
45
|
+
ruby_type = @ruby_types[key]
|
46
|
+
if ruby_type && ruby_type.has_key?(:opts) && ruby_type[:opts].has_key?(:collate)
|
47
|
+
hsh[key] = decollate(value, @database_type, ruby_type[:opts][:collate])
|
48
|
+
else
|
49
|
+
hsh[key] = value
|
50
|
+
end
|
51
|
+
hsh
|
52
|
+
end
|
53
|
+
end
|
34
54
|
end
|
35
55
|
end
|
@@ -1,15 +1,11 @@
|
|
1
1
|
module Linkage
|
2
2
|
class ImportBuffer
|
3
|
-
# @param [
|
4
|
-
# @param [Symbol, String] table_name
|
3
|
+
# @param [Sequel::Dataset] dataset
|
5
4
|
# @param [Array<Symbol>] headers List of fields you want to insert
|
6
|
-
# @param [Hash] options Sequel.connect options
|
7
5
|
# @param [Fixnum] limit Number of records to insert at a time
|
8
|
-
def initialize(
|
9
|
-
@
|
10
|
-
@table_name = table_name.to_sym
|
6
|
+
def initialize(dataset, headers, limit = 1000)
|
7
|
+
@dataset = dataset
|
11
8
|
@headers = headers
|
12
|
-
@options = options
|
13
9
|
@limit = limit
|
14
10
|
@values = []
|
15
11
|
end
|
@@ -23,17 +19,10 @@ module Linkage
|
|
23
19
|
|
24
20
|
def flush
|
25
21
|
return if @values.empty?
|
26
|
-
|
27
|
-
|
28
|
-
ds.import(@headers, @values)
|
22
|
+
@dataset.db.synchronize do
|
23
|
+
@dataset.import(@headers, @values)
|
29
24
|
@values.clear
|
30
25
|
end
|
31
26
|
end
|
32
|
-
|
33
|
-
private
|
34
|
-
|
35
|
-
def database(&block)
|
36
|
-
Sequel.connect(@uri, @options, &block)
|
37
|
-
end
|
38
27
|
end
|
39
28
|
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
module Linkage
|
2
|
+
class MetaObject
|
3
|
+
attr_reader :object
|
4
|
+
attr_writer :side
|
5
|
+
|
6
|
+
# Creates a new MetaObject.
|
7
|
+
#
|
8
|
+
# @param [Object] object This can be a {Field}, {Function} or a regular
|
9
|
+
# Ruby object (Fixnum, String, etc). If `object` is not static (a {Field}
|
10
|
+
# or a {Function} that contains one or more {Field} objects), you should
|
11
|
+
# specify which "side" of the linkage the object belongs to (left-hand
|
12
|
+
# side or right-hand side) in the `side` argument.
|
13
|
+
# @param [Symbol] side `:lhs` for left-hand side or `:rhs` for right-hand
|
14
|
+
# side
|
15
|
+
def initialize(object, side = nil)
|
16
|
+
@object = object
|
17
|
+
@static = object.kind_of?(Linkage::Data) ? object.static? : true
|
18
|
+
if !side.nil? && side != :lhs && side != :rhs
|
19
|
+
raise ArgumentError, "invalid `side` argument, must be :lhs or :rhs"
|
20
|
+
end
|
21
|
+
@side = side
|
22
|
+
end
|
23
|
+
|
24
|
+
def side
|
25
|
+
if !@static && @side.nil?
|
26
|
+
raise RuntimeError, "Object is dynamic and side is not set"
|
27
|
+
end
|
28
|
+
@side
|
29
|
+
end
|
30
|
+
|
31
|
+
def dataset
|
32
|
+
@object.kind_of?(Linkage::Data) ? @object.dataset : nil
|
33
|
+
end
|
34
|
+
|
35
|
+
def dataset=(dataset)
|
36
|
+
if @object.kind_of?(Linkage::Data)
|
37
|
+
@object.dataset = dataset
|
38
|
+
else
|
39
|
+
raise RuntimeError, "You can't set the dataset of a non-data object."
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def database_type
|
44
|
+
ds = dataset
|
45
|
+
ds ? ds.database_type : nil
|
46
|
+
end
|
47
|
+
|
48
|
+
def static?
|
49
|
+
@static
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns true if the argument has the same object as the instance.
|
53
|
+
#
|
54
|
+
# @param [Linkage::MetaObject] other
|
55
|
+
# @return [Boolean]
|
56
|
+
def objects_equal?(other)
|
57
|
+
other.is_a?(Linkage::MetaObject) && other.object == self.object
|
58
|
+
end
|
59
|
+
|
60
|
+
# Returns true if the argument has the same dataset as the instance.
|
61
|
+
#
|
62
|
+
# @param [Linkage::MetaObject] other
|
63
|
+
# @return [Boolean]
|
64
|
+
def datasets_equal?(other)
|
65
|
+
other.is_a?(Linkage::MetaObject) && other.dataset == self.dataset
|
66
|
+
end
|
67
|
+
|
68
|
+
# Returns an expression suitable for use in Sequel queries.
|
69
|
+
# @return [Object]
|
70
|
+
def to_expr
|
71
|
+
if @object.kind_of?(Linkage::Data)
|
72
|
+
@object.to_expr
|
73
|
+
else
|
74
|
+
@object
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns a Sequel identifier for {Data} objects, or the object itself.
|
79
|
+
# @return [Sequel::SQL::Identifier, Object]
|
80
|
+
def to_identifier
|
81
|
+
if @object.kind_of?(Linkage::Data)
|
82
|
+
Sequel::SQL::Identifier.new(@object.to_expr)
|
83
|
+
else
|
84
|
+
@object
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Return the name of the object for {Data} objects, nil for others.
|
89
|
+
# @return [Symbol, nil]
|
90
|
+
def name
|
91
|
+
if @object.kind_of?(Linkage::Data)
|
92
|
+
@object.name
|
93
|
+
else
|
94
|
+
nil
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# Returns a {MergeField} if both objects are {Data} objects, otherwise,
|
99
|
+
# raises an exception.
|
100
|
+
#
|
101
|
+
# @return [Linkage::MergeField]
|
102
|
+
def merge(other)
|
103
|
+
if @object.kind_of?(Linkage::Data) && other.object.kind_of?(Linkage::Data)
|
104
|
+
@object.merge(other.object)
|
105
|
+
else
|
106
|
+
raise ArgumentError, "Cannot merge a non-data object"
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Returns the Ruby type of the underlying object.
|
111
|
+
#
|
112
|
+
# @return [Hash]
|
113
|
+
# @see Linkage::Field#ruby_type
|
114
|
+
# @see Linkage::Function#ruby_type
|
115
|
+
def ruby_type
|
116
|
+
if @object.kind_of?(Linkage::Data)
|
117
|
+
@object.ruby_type
|
118
|
+
else
|
119
|
+
{:type => @object.class}
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Returns the collation of the underlying object.
|
124
|
+
#
|
125
|
+
# @return [Symbol]
|
126
|
+
def collation
|
127
|
+
if @object.kind_of?(Linkage::Data)
|
128
|
+
@object.collation
|
129
|
+
else
|
130
|
+
nil
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# Returns true if underlying object is not a subclass of {Linkage::Data}.
|
135
|
+
def raw?
|
136
|
+
!@object.kind_of?(Linkage::Data)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
data/lib/linkage/result_set.rb
CHANGED
@@ -7,41 +7,98 @@ module Linkage
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def groups_dataset
|
10
|
-
@groups_dataset ||= Dataset.new(
|
10
|
+
@groups_dataset ||= Dataset.new(database[:groups])
|
11
11
|
end
|
12
12
|
|
13
|
-
def database
|
14
|
-
|
13
|
+
def database
|
14
|
+
# FIXME: If the results database is the same as one of the datasets
|
15
|
+
# being linked, there will be two connections to said database. This
|
16
|
+
# could result in unexpected locking for non-concurrent databases (like
|
17
|
+
# SQLite).
|
18
|
+
@database ||= Sequel.connect(@config.results_uri, @config.results_uri_options)
|
15
19
|
end
|
16
20
|
|
17
21
|
def create_tables!
|
18
|
-
|
22
|
+
if @config.groups_table_needed?
|
19
23
|
schema = @config.groups_table_schema
|
20
|
-
|
24
|
+
if @config.decollation_needed?
|
25
|
+
database.create_table(@config.original_groups_table_name) do
|
26
|
+
schema.each { |col| column(*col) }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
database.create_table(@config.groups_table_name) do
|
21
31
|
schema.each { |col| column(*col) }
|
22
32
|
end
|
33
|
+
end
|
23
34
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
Integer :dataset
|
29
|
-
index :group_id
|
35
|
+
if @config.scores_table_needed?
|
36
|
+
schema = @config.scores_table_schema
|
37
|
+
database.create_table(@config.scores_table_name) do
|
38
|
+
schema.each { |col| column(*col) }
|
30
39
|
end
|
31
40
|
end
|
41
|
+
|
42
|
+
schema = @config.matches_table_schema
|
43
|
+
database.create_table(@config.matches_table_name) do
|
44
|
+
schema.each { |col| column(*col) }
|
45
|
+
end
|
32
46
|
end
|
33
47
|
|
34
48
|
def add_group(group, dataset_id = nil)
|
35
|
-
if
|
36
|
-
|
37
|
-
|
49
|
+
if @config.decollation_needed?
|
50
|
+
original_values = group.values
|
51
|
+
values = group.decollated_values
|
52
|
+
if !@groups_buffer
|
53
|
+
groups_headers = [:id] + values.keys
|
54
|
+
@groups_buffer = ImportBuffer.new(database[@config.groups_table_name],
|
55
|
+
groups_headers)
|
56
|
+
|
57
|
+
original_groups_headers = [:id] + original_values.keys
|
58
|
+
@original_groups_buffer = ImportBuffer.new(
|
59
|
+
database[@config.original_groups_table_name],
|
60
|
+
original_groups_headers)
|
61
|
+
end
|
62
|
+
|
63
|
+
group_id = next_group_id
|
64
|
+
@groups_buffer.add([group_id] + values.values)
|
65
|
+
@original_groups_buffer.add([group_id] + original_values.values)
|
66
|
+
else
|
67
|
+
# Non-DRY for minute speed improvements
|
68
|
+
values = group.values
|
69
|
+
if !@groups_buffer
|
70
|
+
groups_headers = [:id] + values.keys
|
71
|
+
@groups_buffer = ImportBuffer.new(database[@config.groups_table_name],
|
72
|
+
groups_headers)
|
73
|
+
end
|
74
|
+
group_id = next_group_id
|
75
|
+
@groups_buffer.add([group_id] + values.values)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def add_score(comparator_id, record_1_id, record_2_id, score)
|
80
|
+
if !@scores_buffer
|
81
|
+
scores_headers = [:comparator_id, :record_1_id, :record_2_id, :score]
|
82
|
+
@scores_buffer = ImportBuffer.new(database[@config.scores_table_name],
|
83
|
+
scores_headers)
|
84
|
+
end
|
85
|
+
@scores_buffer.add([comparator_id, record_1_id, record_2_id, score])
|
86
|
+
end
|
87
|
+
|
88
|
+
def add_match(record_1_id, record_2_id, total_score)
|
89
|
+
if !@matches_buffer
|
90
|
+
matches_headers = [:record_1_id, :record_2_id, :total_score]
|
91
|
+
@matches_buffer = ImportBuffer.new(database[@config.matches_table_name],
|
92
|
+
matches_headers)
|
38
93
|
end
|
39
|
-
|
40
|
-
@groups_buffer.add([group_id] + group.values.values)
|
94
|
+
@matches_buffer.add([record_1_id, record_2_id, total_score])
|
41
95
|
end
|
42
96
|
|
43
97
|
def flush!
|
44
98
|
@groups_buffer.flush if @groups_buffer
|
99
|
+
@original_groups_buffer.flush if @original_groups_buffer
|
100
|
+
@scores_buffer.flush if @scores_buffer
|
101
|
+
@matches_buffer.flush if @matches_buffer
|
45
102
|
end
|
46
103
|
|
47
104
|
def get_group(index)
|
@@ -50,7 +107,7 @@ module Linkage
|
|
50
107
|
end
|
51
108
|
|
52
109
|
def groups_records_datasets(group)
|
53
|
-
datasets = @config.
|
110
|
+
datasets = @config.datasets_with_applied_simple_expectations
|
54
111
|
datasets.collect! { |ds| ds.dataset_for_group(group) }
|
55
112
|
end
|
56
113
|
|