linkage 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +10 -0
- data/Gemfile +15 -13
- data/Gemfile.lock +67 -37
- data/Guardfile +0 -2
- data/Rakefile +122 -25
- data/lib/linkage/comparator.rb +172 -0
- data/lib/linkage/comparators/binary.rb +12 -0
- data/lib/linkage/comparators/compare.rb +46 -0
- data/lib/linkage/comparators/within.rb +32 -0
- data/lib/linkage/configuration.rb +285 -153
- data/lib/linkage/data.rb +32 -7
- data/lib/linkage/dataset.rb +107 -32
- data/lib/linkage/decollation.rb +93 -0
- data/lib/linkage/expectation.rb +21 -0
- data/lib/linkage/expectations/exhaustive.rb +63 -0
- data/lib/linkage/expectations/simple.rb +168 -0
- data/lib/linkage/field.rb +30 -4
- data/lib/linkage/field_set.rb +6 -3
- data/lib/linkage/function.rb +50 -3
- data/lib/linkage/functions/binary.rb +30 -0
- data/lib/linkage/functions/cast.rb +54 -0
- data/lib/linkage/functions/length.rb +29 -0
- data/lib/linkage/functions/strftime.rb +12 -11
- data/lib/linkage/functions/trim.rb +8 -0
- data/lib/linkage/group.rb +20 -0
- data/lib/linkage/import_buffer.rb +5 -16
- data/lib/linkage/meta_object.rb +139 -0
- data/lib/linkage/result_set.rb +74 -17
- data/lib/linkage/runner/single_threaded.rb +125 -10
- data/lib/linkage/version.rb +3 -0
- data/lib/linkage.rb +11 -0
- data/linkage.gemspec +16 -121
- data/test/config.yml +5 -0
- data/test/helper.rb +73 -8
- data/test/integration/test_collation.rb +45 -0
- data/test/integration/test_configuration.rb +268 -0
- data/test/integration/test_cross_linkage.rb +4 -17
- data/test/integration/test_dataset.rb +45 -2
- data/test/integration/test_dual_linkage.rb +40 -24
- data/test/integration/test_functions.rb +22 -0
- data/test/integration/test_result_set.rb +85 -0
- data/test/integration/test_scoring.rb +84 -0
- data/test/integration/test_self_linkage.rb +5 -0
- data/test/integration/test_within_comparator.rb +100 -0
- data/test/unit/comparators/test_compare.rb +105 -0
- data/test/unit/comparators/test_within.rb +57 -0
- data/test/unit/expectations/test_exhaustive.rb +111 -0
- data/test/unit/expectations/test_simple.rb +303 -0
- data/test/unit/functions/test_binary.rb +54 -0
- data/test/unit/functions/test_cast.rb +98 -0
- data/test/unit/functions/test_length.rb +52 -0
- data/test/unit/functions/test_strftime.rb +17 -13
- data/test/unit/functions/test_trim.rb +11 -4
- data/test/unit/test_comparator.rb +124 -0
- data/test/unit/test_configuration.rb +137 -175
- data/test/unit/test_data.rb +44 -0
- data/test/unit/test_dataset.rb +73 -21
- data/test/unit/test_decollation.rb +201 -0
- data/test/unit/test_field.rb +38 -14
- data/test/unit/test_field_set.rb +12 -8
- data/test/unit/test_function.rb +83 -16
- data/test/unit/test_group.rb +28 -0
- data/test/unit/test_import_buffer.rb +13 -27
- data/test/unit/test_meta_object.rb +208 -0
- data/test/unit/test_result_set.rb +221 -3
- metadata +82 -190
data/lib/linkage/function.rb
CHANGED
@@ -38,10 +38,23 @@ module Linkage
|
|
38
38
|
nil
|
39
39
|
end
|
40
40
|
|
41
|
-
|
41
|
+
attr_reader :args
|
42
|
+
|
43
|
+
# Creates a new Function object. If the arguments contain only
|
44
|
+
# static objects, you should specify the dataset that this function
|
45
|
+
# belongs to as the last argument like so:
|
46
|
+
#
|
47
|
+
# Function.new(foo, bar, :dataset => dataset)
|
48
|
+
#
|
49
|
+
# Optionally, you can use the `dataset=` setter to do it later. Many
|
50
|
+
# functions require a dataset to work properly. If you try to use
|
51
|
+
# such a function without setting a dataset, it will raise a RuntimeError.
|
52
|
+
#
|
53
|
+
# @param [Linkage::Data, Object] args Function arguments
|
42
54
|
def initialize(*args)
|
43
55
|
@names = [self.class.function_name]
|
44
56
|
@args = args
|
57
|
+
@options = args.last.is_a?(Hash) ? args.pop : {}
|
45
58
|
process_args
|
46
59
|
end
|
47
60
|
|
@@ -49,10 +62,25 @@ module Linkage
|
|
49
62
|
@name ||= @names.join("_").to_sym
|
50
63
|
end
|
51
64
|
|
65
|
+
def dataset
|
66
|
+
if @dataset.nil?
|
67
|
+
raise RuntimeError, "You must specify a dataset for static functions"
|
68
|
+
end
|
69
|
+
@dataset
|
70
|
+
end
|
71
|
+
|
72
|
+
def dataset=(dataset)
|
73
|
+
@dataset = dataset
|
74
|
+
end
|
75
|
+
|
52
76
|
def static?
|
53
77
|
@static
|
54
78
|
end
|
55
79
|
|
80
|
+
def ==(other)
|
81
|
+
equal?(other) || (other.is_a?(Function) && name == other.name && args == other.args && dataset == other.dataset)
|
82
|
+
end
|
83
|
+
|
56
84
|
# Subclasses must define this. The return value should be a Hash with
|
57
85
|
# the following elements:
|
58
86
|
# :type - column type (Ruby class) of the result
|
@@ -61,8 +89,14 @@ module Linkage
|
|
61
89
|
raise NotImplementedError
|
62
90
|
end
|
63
91
|
|
92
|
+
# Returns `nil` by default. Subclasses should redefine this if
|
93
|
+
# there is a collation.
|
94
|
+
def collation
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
|
64
98
|
# @return [Sequel::SQL::Function]
|
65
|
-
def to_expr(
|
99
|
+
def to_expr(options = {})
|
66
100
|
self.class.function_name.to_sym.sql_function(*@values)
|
67
101
|
end
|
68
102
|
|
@@ -80,6 +114,14 @@ module Linkage
|
|
80
114
|
if arg.kind_of?(Data)
|
81
115
|
@names << arg.name
|
82
116
|
@static &&= arg.static?
|
117
|
+
|
118
|
+
# possibly set dataset
|
119
|
+
if @dataset.nil?
|
120
|
+
@dataset = arg.dataset
|
121
|
+
elsif @dataset != arg.dataset
|
122
|
+
raise ArgumentError, "Using dynamic data sources with different datasets is not permitted"
|
123
|
+
end
|
124
|
+
|
83
125
|
type = arg.ruby_type[:type]
|
84
126
|
value = arg.to_expr
|
85
127
|
else
|
@@ -87,11 +129,16 @@ module Linkage
|
|
87
129
|
type = arg.class
|
88
130
|
value = arg
|
89
131
|
end
|
90
|
-
if parameters && !parameters[i].include?(type)
|
132
|
+
if parameters && parameters[i] != [:any] && !parameters[i].include?(type)
|
91
133
|
raise TypeError, "expected type #{parameters[i].join(" or ")}, got #{type}"
|
92
134
|
end
|
93
135
|
@values << value
|
94
136
|
end
|
137
|
+
|
138
|
+
if @dataset.nil? && @options[:dataset]
|
139
|
+
# Set dataset for static functions manually
|
140
|
+
@dataset = @options[:dataset]
|
141
|
+
end
|
95
142
|
end
|
96
143
|
end
|
97
144
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Linkage
|
2
|
+
module Functions
|
3
|
+
class Binary < Function
|
4
|
+
def self.function_name
|
5
|
+
"binary"
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.parameters
|
9
|
+
[[String]]
|
10
|
+
end
|
11
|
+
|
12
|
+
def ruby_type
|
13
|
+
{:type => File}
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_expr(options = {})
|
17
|
+
expr =
|
18
|
+
case dataset.database_type
|
19
|
+
when :sqlite
|
20
|
+
@values[0].cast(:blob)
|
21
|
+
when :postgres
|
22
|
+
@values[0].cast(:bytea)
|
23
|
+
else
|
24
|
+
@values[0].cast(:binary)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
Function.register(Binary)
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Linkage
|
2
|
+
module Functions
|
3
|
+
class Cast < Function
|
4
|
+
def self.function_name
|
5
|
+
"cast"
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.parameters
|
9
|
+
[[:any], [String]]
|
10
|
+
end
|
11
|
+
|
12
|
+
def ruby_type
|
13
|
+
type =
|
14
|
+
case @values[1]
|
15
|
+
when 'integer'
|
16
|
+
Fixnum
|
17
|
+
when 'binary'
|
18
|
+
File
|
19
|
+
else
|
20
|
+
raise "unknown type: #{@values[1]}"
|
21
|
+
end
|
22
|
+
|
23
|
+
{:type => type}
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_expr(options = {})
|
27
|
+
cast =
|
28
|
+
case @values[1]
|
29
|
+
when 'integer'
|
30
|
+
case dataset.database_type
|
31
|
+
when :sqlite, :postgres, :h2
|
32
|
+
:integer
|
33
|
+
when :mysql
|
34
|
+
:signed
|
35
|
+
end
|
36
|
+
when 'binary'
|
37
|
+
case dataset.database_type
|
38
|
+
when :sqlite
|
39
|
+
:blob
|
40
|
+
when :postgres
|
41
|
+
:bytea
|
42
|
+
when :mysql, :h2
|
43
|
+
:binary
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
if cast
|
48
|
+
@values[0].cast(cast)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
Function.register(Cast)
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Linkage
|
2
|
+
module Functions
|
3
|
+
# Returns the number of characters in a string.
|
4
|
+
class Length < Function
|
5
|
+
def self.function_name
|
6
|
+
"length"
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.parameters
|
10
|
+
[[String]]
|
11
|
+
end
|
12
|
+
|
13
|
+
def ruby_type
|
14
|
+
{:type => Fixnum}
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_expr(options = {})
|
18
|
+
expr =
|
19
|
+
case dataset.database_type
|
20
|
+
when :mysql, :postgres
|
21
|
+
:char_length.sql_function(@values[0])
|
22
|
+
else
|
23
|
+
:length.sql_function(@values[0])
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
Function.register(Length)
|
28
|
+
end
|
29
|
+
end
|
@@ -14,17 +14,18 @@ module Linkage
|
|
14
14
|
{:type => String}
|
15
15
|
end
|
16
16
|
|
17
|
-
def to_expr(
|
18
|
-
|
19
|
-
|
20
|
-
:
|
21
|
-
|
22
|
-
:
|
23
|
-
|
24
|
-
:
|
25
|
-
|
26
|
-
|
27
|
-
|
17
|
+
def to_expr(options = {})
|
18
|
+
expr =
|
19
|
+
case dataset.database_type
|
20
|
+
when :mysql
|
21
|
+
:date_format.sql_function(*@values)
|
22
|
+
when :sqlite
|
23
|
+
:strftime.sql_function(@values[1], @values[0])
|
24
|
+
when :postgres
|
25
|
+
:to_char.sql_function(*@values)
|
26
|
+
else
|
27
|
+
:strftime.sql_function(@values[0], @values[1])
|
28
|
+
end
|
28
29
|
end
|
29
30
|
end
|
30
31
|
Function.register(Strftime)
|
data/lib/linkage/group.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
module Linkage
|
2
2
|
class Group
|
3
|
+
include Linkage::Decollation
|
4
|
+
|
3
5
|
# @return [Hash] Hash of matching values
|
4
6
|
attr_reader :values
|
5
7
|
|
@@ -24,12 +26,30 @@ module Linkage
|
|
24
26
|
|
25
27
|
# @param [Hash] values Values that define this group
|
26
28
|
# @param [Hash] options
|
29
|
+
# @option options [Fixnum] :id The group ID
|
30
|
+
# @option options [Fixnum] :count How many records are in the group
|
31
|
+
# @option options [Hash] :ruby_types Hash of ruby types for each value
|
32
|
+
# @option options [Symbol] :database_type
|
27
33
|
# @example
|
28
34
|
# Linkage::Group.new({:foo => 123, :bar => 'baz'}, {:count => 5, :id => 456})
|
29
35
|
def initialize(values, options)
|
30
36
|
@count = options[:count]
|
31
37
|
@id = options[:id]
|
38
|
+
@ruby_types = options[:ruby_types]
|
39
|
+
@database_type = options[:database_type]
|
32
40
|
@values = values
|
33
41
|
end
|
42
|
+
|
43
|
+
def decollated_values
|
44
|
+
@values.inject({}) do |hsh, (key, value)|
|
45
|
+
ruby_type = @ruby_types[key]
|
46
|
+
if ruby_type && ruby_type.has_key?(:opts) && ruby_type[:opts].has_key?(:collate)
|
47
|
+
hsh[key] = decollate(value, @database_type, ruby_type[:opts][:collate])
|
48
|
+
else
|
49
|
+
hsh[key] = value
|
50
|
+
end
|
51
|
+
hsh
|
52
|
+
end
|
53
|
+
end
|
34
54
|
end
|
35
55
|
end
|
@@ -1,15 +1,11 @@
|
|
1
1
|
module Linkage
|
2
2
|
class ImportBuffer
|
3
|
-
# @param [
|
4
|
-
# @param [Symbol, String] table_name
|
3
|
+
# @param [Sequel::Dataset] dataset
|
5
4
|
# @param [Array<Symbol>] headers List of fields you want to insert
|
6
|
-
# @param [Hash] options Sequel.connect options
|
7
5
|
# @param [Fixnum] limit Number of records to insert at a time
|
8
|
-
def initialize(
|
9
|
-
@
|
10
|
-
@table_name = table_name.to_sym
|
6
|
+
def initialize(dataset, headers, limit = 1000)
|
7
|
+
@dataset = dataset
|
11
8
|
@headers = headers
|
12
|
-
@options = options
|
13
9
|
@limit = limit
|
14
10
|
@values = []
|
15
11
|
end
|
@@ -23,17 +19,10 @@ module Linkage
|
|
23
19
|
|
24
20
|
def flush
|
25
21
|
return if @values.empty?
|
26
|
-
|
27
|
-
|
28
|
-
ds.import(@headers, @values)
|
22
|
+
@dataset.db.synchronize do
|
23
|
+
@dataset.import(@headers, @values)
|
29
24
|
@values.clear
|
30
25
|
end
|
31
26
|
end
|
32
|
-
|
33
|
-
private
|
34
|
-
|
35
|
-
def database(&block)
|
36
|
-
Sequel.connect(@uri, @options, &block)
|
37
|
-
end
|
38
27
|
end
|
39
28
|
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
module Linkage
|
2
|
+
class MetaObject
|
3
|
+
attr_reader :object
|
4
|
+
attr_writer :side
|
5
|
+
|
6
|
+
# Creates a new MetaObject.
|
7
|
+
#
|
8
|
+
# @param [Object] object This can be a {Field}, {Function} or a regular
|
9
|
+
# Ruby object (Fixnum, String, etc). If `object` is not static (a {Field}
|
10
|
+
# or a {Function} that contains one or more {Field} objects), you should
|
11
|
+
# specify which "side" of the linkage the object belongs to (left-hand
|
12
|
+
# side or right-hand side) in the `side` argument.
|
13
|
+
# @param [Symbol] side `:lhs` for left-hand side or `:rhs` for right-hand
|
14
|
+
# side
|
15
|
+
def initialize(object, side = nil)
|
16
|
+
@object = object
|
17
|
+
@static = object.kind_of?(Linkage::Data) ? object.static? : true
|
18
|
+
if !side.nil? && side != :lhs && side != :rhs
|
19
|
+
raise ArgumentError, "invalid `side` argument, must be :lhs or :rhs"
|
20
|
+
end
|
21
|
+
@side = side
|
22
|
+
end
|
23
|
+
|
24
|
+
def side
|
25
|
+
if !@static && @side.nil?
|
26
|
+
raise RuntimeError, "Object is dynamic and side is not set"
|
27
|
+
end
|
28
|
+
@side
|
29
|
+
end
|
30
|
+
|
31
|
+
def dataset
|
32
|
+
@object.kind_of?(Linkage::Data) ? @object.dataset : nil
|
33
|
+
end
|
34
|
+
|
35
|
+
def dataset=(dataset)
|
36
|
+
if @object.kind_of?(Linkage::Data)
|
37
|
+
@object.dataset = dataset
|
38
|
+
else
|
39
|
+
raise RuntimeError, "You can't set the dataset of a non-data object."
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def database_type
|
44
|
+
ds = dataset
|
45
|
+
ds ? ds.database_type : nil
|
46
|
+
end
|
47
|
+
|
48
|
+
def static?
|
49
|
+
@static
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns true if the argument has the same object as the instance.
|
53
|
+
#
|
54
|
+
# @param [Linkage::MetaObject] other
|
55
|
+
# @return [Boolean]
|
56
|
+
def objects_equal?(other)
|
57
|
+
other.is_a?(Linkage::MetaObject) && other.object == self.object
|
58
|
+
end
|
59
|
+
|
60
|
+
# Returns true if the argument has the same dataset as the instance.
|
61
|
+
#
|
62
|
+
# @param [Linkage::MetaObject] other
|
63
|
+
# @return [Boolean]
|
64
|
+
def datasets_equal?(other)
|
65
|
+
other.is_a?(Linkage::MetaObject) && other.dataset == self.dataset
|
66
|
+
end
|
67
|
+
|
68
|
+
# Returns an expression suitable for use in Sequel queries.
|
69
|
+
# @return [Object]
|
70
|
+
def to_expr
|
71
|
+
if @object.kind_of?(Linkage::Data)
|
72
|
+
@object.to_expr
|
73
|
+
else
|
74
|
+
@object
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns a Sequel identifier for {Data} objects, or the object itself.
|
79
|
+
# @return [Sequel::SQL::Identifier, Object]
|
80
|
+
def to_identifier
|
81
|
+
if @object.kind_of?(Linkage::Data)
|
82
|
+
Sequel::SQL::Identifier.new(@object.to_expr)
|
83
|
+
else
|
84
|
+
@object
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Return the name of the object for {Data} objects, nil for others.
|
89
|
+
# @return [Symbol, nil]
|
90
|
+
def name
|
91
|
+
if @object.kind_of?(Linkage::Data)
|
92
|
+
@object.name
|
93
|
+
else
|
94
|
+
nil
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# Returns a {MergeField} if both objects are {Data} objects, otherwise,
|
99
|
+
# raises an exception.
|
100
|
+
#
|
101
|
+
# @return [Linkage::MergeField]
|
102
|
+
def merge(other)
|
103
|
+
if @object.kind_of?(Linkage::Data) && other.object.kind_of?(Linkage::Data)
|
104
|
+
@object.merge(other.object)
|
105
|
+
else
|
106
|
+
raise ArgumentError, "Cannot merge a non-data object"
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Returns the Ruby type of the underlying object.
|
111
|
+
#
|
112
|
+
# @return [Hash]
|
113
|
+
# @see Linkage::Field#ruby_type
|
114
|
+
# @see Linkage::Function#ruby_type
|
115
|
+
def ruby_type
|
116
|
+
if @object.kind_of?(Linkage::Data)
|
117
|
+
@object.ruby_type
|
118
|
+
else
|
119
|
+
{:type => @object.class}
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Returns the collation of the underlying object.
|
124
|
+
#
|
125
|
+
# @return [Symbol]
|
126
|
+
def collation
|
127
|
+
if @object.kind_of?(Linkage::Data)
|
128
|
+
@object.collation
|
129
|
+
else
|
130
|
+
nil
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# Returns true if underlying object is not a subclass of {Linkage::Data}.
|
135
|
+
def raw?
|
136
|
+
!@object.kind_of?(Linkage::Data)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
data/lib/linkage/result_set.rb
CHANGED
@@ -7,41 +7,98 @@ module Linkage
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def groups_dataset
|
10
|
-
@groups_dataset ||= Dataset.new(
|
10
|
+
@groups_dataset ||= Dataset.new(database[:groups])
|
11
11
|
end
|
12
12
|
|
13
|
-
def database
|
14
|
-
|
13
|
+
def database
|
14
|
+
# FIXME: If the results database is the same as one of the datasets
|
15
|
+
# being linked, there will be two connections to said database. This
|
16
|
+
# could result in unexpected locking for non-concurrent databases (like
|
17
|
+
# SQLite).
|
18
|
+
@database ||= Sequel.connect(@config.results_uri, @config.results_uri_options)
|
15
19
|
end
|
16
20
|
|
17
21
|
def create_tables!
|
18
|
-
|
22
|
+
if @config.groups_table_needed?
|
19
23
|
schema = @config.groups_table_schema
|
20
|
-
|
24
|
+
if @config.decollation_needed?
|
25
|
+
database.create_table(@config.original_groups_table_name) do
|
26
|
+
schema.each { |col| column(*col) }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
database.create_table(@config.groups_table_name) do
|
21
31
|
schema.each { |col| column(*col) }
|
22
32
|
end
|
33
|
+
end
|
23
34
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
Integer :dataset
|
29
|
-
index :group_id
|
35
|
+
if @config.scores_table_needed?
|
36
|
+
schema = @config.scores_table_schema
|
37
|
+
database.create_table(@config.scores_table_name) do
|
38
|
+
schema.each { |col| column(*col) }
|
30
39
|
end
|
31
40
|
end
|
41
|
+
|
42
|
+
schema = @config.matches_table_schema
|
43
|
+
database.create_table(@config.matches_table_name) do
|
44
|
+
schema.each { |col| column(*col) }
|
45
|
+
end
|
32
46
|
end
|
33
47
|
|
34
48
|
def add_group(group, dataset_id = nil)
|
35
|
-
if
|
36
|
-
|
37
|
-
|
49
|
+
if @config.decollation_needed?
|
50
|
+
original_values = group.values
|
51
|
+
values = group.decollated_values
|
52
|
+
if !@groups_buffer
|
53
|
+
groups_headers = [:id] + values.keys
|
54
|
+
@groups_buffer = ImportBuffer.new(database[@config.groups_table_name],
|
55
|
+
groups_headers)
|
56
|
+
|
57
|
+
original_groups_headers = [:id] + original_values.keys
|
58
|
+
@original_groups_buffer = ImportBuffer.new(
|
59
|
+
database[@config.original_groups_table_name],
|
60
|
+
original_groups_headers)
|
61
|
+
end
|
62
|
+
|
63
|
+
group_id = next_group_id
|
64
|
+
@groups_buffer.add([group_id] + values.values)
|
65
|
+
@original_groups_buffer.add([group_id] + original_values.values)
|
66
|
+
else
|
67
|
+
# Non-DRY for minute speed improvements
|
68
|
+
values = group.values
|
69
|
+
if !@groups_buffer
|
70
|
+
groups_headers = [:id] + values.keys
|
71
|
+
@groups_buffer = ImportBuffer.new(database[@config.groups_table_name],
|
72
|
+
groups_headers)
|
73
|
+
end
|
74
|
+
group_id = next_group_id
|
75
|
+
@groups_buffer.add([group_id] + values.values)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def add_score(comparator_id, record_1_id, record_2_id, score)
|
80
|
+
if !@scores_buffer
|
81
|
+
scores_headers = [:comparator_id, :record_1_id, :record_2_id, :score]
|
82
|
+
@scores_buffer = ImportBuffer.new(database[@config.scores_table_name],
|
83
|
+
scores_headers)
|
84
|
+
end
|
85
|
+
@scores_buffer.add([comparator_id, record_1_id, record_2_id, score])
|
86
|
+
end
|
87
|
+
|
88
|
+
def add_match(record_1_id, record_2_id, total_score)
|
89
|
+
if !@matches_buffer
|
90
|
+
matches_headers = [:record_1_id, :record_2_id, :total_score]
|
91
|
+
@matches_buffer = ImportBuffer.new(database[@config.matches_table_name],
|
92
|
+
matches_headers)
|
38
93
|
end
|
39
|
-
|
40
|
-
@groups_buffer.add([group_id] + group.values.values)
|
94
|
+
@matches_buffer.add([record_1_id, record_2_id, total_score])
|
41
95
|
end
|
42
96
|
|
43
97
|
def flush!
|
44
98
|
@groups_buffer.flush if @groups_buffer
|
99
|
+
@original_groups_buffer.flush if @original_groups_buffer
|
100
|
+
@scores_buffer.flush if @scores_buffer
|
101
|
+
@matches_buffer.flush if @matches_buffer
|
45
102
|
end
|
46
103
|
|
47
104
|
def get_group(index)
|
@@ -50,7 +107,7 @@ module Linkage
|
|
50
107
|
end
|
51
108
|
|
52
109
|
def groups_records_datasets(group)
|
53
|
-
datasets = @config.
|
110
|
+
datasets = @config.datasets_with_applied_simple_expectations
|
54
111
|
datasets.collect! { |ds| ds.dataset_for_group(group) }
|
55
112
|
end
|
56
113
|
|