linkage 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -1
- data/Gemfile.lock +19 -8
- data/README.markdown +8 -5
- data/Rakefile +0 -8
- data/VERSION +1 -1
- data/lib/linkage/configuration.rb +245 -157
- data/lib/linkage/data.rb +0 -24
- data/lib/linkage/dataset.rb +26 -183
- data/lib/linkage/field.rb +0 -3
- data/lib/linkage/field_set.rb +16 -0
- data/lib/linkage/function.rb +0 -7
- data/lib/linkage/result_set.rb +68 -0
- data/lib/linkage/runner/single_threaded.rb +29 -39
- data/lib/linkage/runner.rb +8 -36
- data/lib/linkage.rb +3 -1
- data/linkage.gemspec +14 -17
- data/test/helper.rb +1 -1
- data/test/integration/test_cross_linkage.rb +6 -2
- data/test/integration/test_dataset.rb +30 -0
- data/test/integration/test_dual_linkage.rb +9 -4
- data/test/integration/test_self_linkage.rb +23 -8
- data/test/unit/test_configuration.rb +90 -72
- data/test/unit/test_data.rb +0 -61
- data/test/unit/test_dataset.rb +19 -319
- data/test/unit/test_field.rb +0 -6
- data/test/unit/test_field_set.rb +31 -0
- data/test/unit/test_function.rb +6 -30
- data/test/unit/test_result_set.rb +18 -0
- data/test/unit/test_runner.rb +20 -5
- metadata +57 -41
- data/lib/linkage/expectation.rb +0 -138
- data/test/unit/test_expectation.rb +0 -390
data/lib/linkage/dataset.rb
CHANGED
@@ -1,205 +1,48 @@
|
|
1
1
|
module Linkage
|
2
|
-
|
3
|
-
|
4
|
-
@@next_id = 1 # Internal ID used for expectations
|
5
|
-
@@next_id_mutex = Mutex.new
|
2
|
+
class Dataset < Delegator
|
3
|
+
attr_reader :field_set, :table_name
|
6
4
|
|
7
|
-
# @private
|
8
|
-
def self.next_id
|
9
|
-
result = nil
|
10
|
-
@@next_id_mutex.synchronize do
|
11
|
-
result = @@next_id
|
12
|
-
@@next_id += 1
|
13
|
-
end
|
14
|
-
result
|
15
|
-
end
|
16
|
-
|
17
|
-
# @return [Array] Schema information about the dataset's primary key
|
18
|
-
attr_reader :primary_key
|
19
|
-
|
20
|
-
# @return [Array] Schema information for this dataset
|
21
|
-
attr_reader :schema
|
22
|
-
|
23
|
-
# @return [String] Database URI
|
24
|
-
attr_reader :uri
|
25
|
-
|
26
|
-
# @return [Symbol] Database table name
|
27
|
-
attr_reader :table
|
28
|
-
|
29
|
-
# @return [Array<Linkage::Field>] List of {Linkage::Field}'s
|
30
|
-
attr_reader :fields
|
31
|
-
|
32
|
-
# @private
|
33
|
-
attr_reader :id
|
34
|
-
|
35
|
-
# @param [String] uri Sequel-style database URI
|
36
|
-
# @param [String, Symbol] table Database table name
|
37
|
-
# @param [Hash] options Options to pass to Sequel.connect
|
38
|
-
# @see http://sequel.rubyforge.org/rdoc/files/doc/opening_databases_rdoc.html Sequel: Connecting to a database
|
39
5
|
def initialize(uri, table, options = {})
|
40
|
-
@
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
database { |db| schema = db.schema(@table) }
|
46
|
-
@schema = schema
|
47
|
-
@order = []
|
48
|
-
@select = []
|
49
|
-
@filter = []
|
50
|
-
create_fields
|
6
|
+
@table_name = table.to_sym
|
7
|
+
db = Sequel.connect(uri, options)
|
8
|
+
ds = db[@table_name]
|
9
|
+
super(ds)
|
10
|
+
@field_set = FieldSet.new(db.schema(@table_name))
|
51
11
|
end
|
52
12
|
|
13
|
+
def __setobj__(obj); @dataset = obj; end
|
14
|
+
def __getobj__; @dataset; end
|
15
|
+
|
53
16
|
# Setup a linkage with another dataset
|
54
17
|
#
|
55
18
|
# @return [Linkage::Configuration]
|
56
19
|
def link_with(dataset, &block)
|
57
20
|
conf = Configuration.new(self, dataset)
|
58
|
-
conf.
|
21
|
+
conf.configure(&block)
|
59
22
|
conf
|
60
23
|
end
|
61
24
|
|
62
|
-
|
63
|
-
|
64
|
-
# @return [Boolean]
|
65
|
-
def ==(other)
|
66
|
-
if !other.is_a?(Dataset)
|
67
|
-
super
|
68
|
-
else
|
69
|
-
uri == other.uri && table == other.table
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
# Create a copy of this instance of Dataset, using {Dataset#initialize}.
|
74
|
-
#
|
75
|
-
# @return [Linkage::Dataset]
|
76
|
-
def dup
|
77
|
-
self.class.new(uri, table)
|
78
|
-
end
|
79
|
-
|
80
|
-
# Clone the dataset and its associated {Linkage::Field}'s (without hitting
|
81
|
-
# the database).
|
82
|
-
#
|
83
|
-
# @return [Linkage::Dataset]
|
84
|
-
def clone
|
85
|
-
other = self.class.allocate
|
86
|
-
other.send(:initialize_copy, self, {
|
87
|
-
:order => @order.clone, :select => @select.clone,
|
88
|
-
:filter => @filter.clone, :options => @options.clone
|
89
|
-
})
|
90
|
-
end
|
91
|
-
|
92
|
-
# Add a data source to use for ordering the dataset.
|
93
|
-
#
|
94
|
-
# @param [Linkage::Data] data
|
95
|
-
# @param [nil, Symbol] desc nil or :desc (for descending order)
|
96
|
-
def add_order(data, desc = nil)
|
97
|
-
expr = desc == :desc ? data.to_expr.desc : data.to_expr
|
98
|
-
unless @order.include?(expr)
|
99
|
-
@order << expr
|
100
|
-
end
|
101
|
-
end
|
102
|
-
|
103
|
-
# Add a data source to be selected on the dataset. If you don't add any
|
104
|
-
# selects, all fields will be selected. The primary key is always selected
|
105
|
-
# in either case.
|
106
|
-
#
|
107
|
-
# @param [Linkage::Data] data
|
108
|
-
# @param [Symbol] as Optional field/function alias
|
109
|
-
def add_select(data, as = nil)
|
110
|
-
expr = as ? data.to_expr.as(as) : data.to_expr
|
111
|
-
unless @select.include?(expr)
|
112
|
-
@select << expr
|
113
|
-
end
|
25
|
+
def adapter_scheme
|
26
|
+
@dataset.db.adapter_scheme
|
114
27
|
end
|
115
28
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
arg1 = data.to_expr
|
123
|
-
arg2 = other.is_a?(Data) ? other.to_expr : other
|
124
|
-
expr =
|
125
|
-
case operator
|
126
|
-
when :==
|
127
|
-
{ arg1 => arg2 }
|
128
|
-
when :'!='
|
129
|
-
~{ arg1 => arg2 }
|
130
|
-
else
|
131
|
-
arg1 = Sequel::SQL::Identifier.new(arg1)
|
132
|
-
arg2 = arg2.is_a?(Symbol) ? Sequel::SQL::Identifier.new(arg2) : arg2
|
133
|
-
Sequel::SQL::BooleanExpression.new(operator, arg1, arg2)
|
134
|
-
end
|
135
|
-
@filter << expr
|
136
|
-
end
|
137
|
-
|
138
|
-
# Yield each row of the dataset in a block.
|
139
|
-
#
|
140
|
-
# @yield [row] A Hash of two elements, :pk and :values, where row[:pk] is
|
141
|
-
# the row's primary key value, and row[:values] is an array of all
|
142
|
-
# selected values (except the primary key).
|
143
|
-
def each
|
144
|
-
database do |db|
|
145
|
-
ds = db[@table]
|
146
|
-
|
147
|
-
pk = @primary_key.name
|
148
|
-
if !@select.empty?
|
149
|
-
ds = ds.select(pk, *@select)
|
150
|
-
end
|
151
|
-
if !@order.empty?
|
152
|
-
ds = ds.order(*@order)
|
153
|
-
end
|
154
|
-
if !@filter.empty?
|
155
|
-
ds = ds.filter(*@filter)
|
156
|
-
end
|
157
|
-
ds.each do |row|
|
158
|
-
yield({:pk => row.delete(pk), :values => row})
|
159
|
-
end
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
private
|
164
|
-
|
165
|
-
def initialize_copy(dataset, options = {})
|
166
|
-
@id = dataset.id
|
167
|
-
@uri = dataset.uri
|
168
|
-
@table = dataset.table
|
169
|
-
@schema = dataset.schema
|
170
|
-
@options = options[:options]
|
171
|
-
@order = options[:order]
|
172
|
-
@select = options[:select]
|
173
|
-
@filter = options[:filter]
|
174
|
-
@fields = dataset.fields.inject({}) do |hsh, (name, field)|
|
175
|
-
new_field = field.clone
|
176
|
-
new_field.dataset = self
|
177
|
-
hsh[name] = new_field
|
178
|
-
hsh
|
29
|
+
def initialize_clone(obj)
|
30
|
+
new_obj = obj.instance_variable_get(:@new_obj)
|
31
|
+
if new_obj
|
32
|
+
__setobj__(new_obj)
|
33
|
+
else
|
34
|
+
super
|
179
35
|
end
|
180
|
-
@primary_key = @fields[dataset.primary_key.name]
|
181
|
-
self
|
182
36
|
end
|
183
37
|
|
184
|
-
def
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
@schema.each do |(name, column_schema)|
|
191
|
-
f = Field.new(name, column_schema)
|
192
|
-
f.dataset = self
|
193
|
-
@fields[name] = f
|
194
|
-
|
195
|
-
if @primary_key.nil? && column_schema[:primary_key]
|
196
|
-
@primary_key = f
|
197
|
-
end
|
38
|
+
def method_missing(name, *args, &block)
|
39
|
+
result = super
|
40
|
+
if result.kind_of?(Sequel::Dataset)
|
41
|
+
@new_obj = result
|
42
|
+
result = clone
|
43
|
+
@new_obj = nil
|
198
44
|
end
|
199
|
-
|
200
|
-
|
201
|
-
def set_new_id
|
202
|
-
@id = self.class.next_id
|
45
|
+
result
|
203
46
|
end
|
204
47
|
end
|
205
48
|
end
|
data/lib/linkage/field.rb
CHANGED
@@ -5,9 +5,6 @@ module Linkage
|
|
5
5
|
# @return [Symbol] This field's schema information
|
6
6
|
attr_reader :schema
|
7
7
|
|
8
|
-
# @attr [Linkage::Dataset] This field's associated dataset
|
9
|
-
attr_accessor :dataset
|
10
|
-
|
11
8
|
# Create a new instance of Field.
|
12
9
|
#
|
13
10
|
# @param [Symbol] name The field's name
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Linkage
|
2
|
+
class FieldSet < Hash
|
3
|
+
attr_reader :primary_key
|
4
|
+
|
5
|
+
def initialize(schema)
|
6
|
+
schema.each do |(name, column_schema)|
|
7
|
+
f = Field.new(name, column_schema)
|
8
|
+
self[name] = f
|
9
|
+
|
10
|
+
if @primary_key.nil? && column_schema[:primary_key]
|
11
|
+
@primary_key = f
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/linkage/function.rb
CHANGED
@@ -38,13 +38,10 @@ module Linkage
|
|
38
38
|
nil
|
39
39
|
end
|
40
40
|
|
41
|
-
attr_reader :dataset
|
42
|
-
|
43
41
|
# @param [Linkage::Field, Object] args Function arguments
|
44
42
|
def initialize(*args)
|
45
43
|
@names = [self.class.function_name]
|
46
44
|
@args = args
|
47
|
-
@dataset = nil
|
48
45
|
process_args
|
49
46
|
end
|
50
47
|
|
@@ -83,10 +80,6 @@ module Linkage
|
|
83
80
|
if arg.kind_of?(Data)
|
84
81
|
@names << arg.name
|
85
82
|
@static &&= arg.static?
|
86
|
-
if @dataset && !arg.static? && @dataset != arg.dataset
|
87
|
-
raise ArgumentError, "You cannot supply fields from different datasets as arguments to the same function)"
|
88
|
-
end
|
89
|
-
@dataset ||= arg.dataset
|
90
83
|
type = arg.ruby_type[:type]
|
91
84
|
value = arg.is_a?(Field) ? arg.name : arg.to_expr
|
92
85
|
else
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Linkage
|
2
|
+
class ResultSet
|
3
|
+
def initialize(config)
|
4
|
+
@config = config
|
5
|
+
@next_group_id = 1
|
6
|
+
@next_group_mutex = Mutex.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def groups_dataset
|
10
|
+
Dataset.new(@config.results_uri, :groups, @config.results_uri_options)
|
11
|
+
end
|
12
|
+
|
13
|
+
def groups_records_dataset
|
14
|
+
Dataset.new(@config.results_uri, :groups_records, @config.results_uri_options)
|
15
|
+
end
|
16
|
+
|
17
|
+
def database(&block)
|
18
|
+
Sequel.connect(@config.results_uri, @config.results_uri_options, &block)
|
19
|
+
end
|
20
|
+
|
21
|
+
def create_tables!
|
22
|
+
database do |db|
|
23
|
+
schema = @config.groups_table_schema
|
24
|
+
db.create_table(:groups) do
|
25
|
+
schema.each { |col| column(*col) }
|
26
|
+
end
|
27
|
+
|
28
|
+
pk_type = @config.dataset_1.field_set.primary_key.merge(@config.dataset_2.field_set.primary_key).ruby_type
|
29
|
+
db.create_table(:groups_records) do
|
30
|
+
column(:record_id, pk_type[:type], pk_type[:opts] || {})
|
31
|
+
Integer :group_id
|
32
|
+
Integer :dataset
|
33
|
+
index :group_id
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_group(group, dataset_id = nil)
|
39
|
+
if !@groups_buffer
|
40
|
+
groups_headers = [:id] + group.values.keys
|
41
|
+
@groups_buffer = ImportBuffer.new(@config.results_uri, :groups, groups_headers, @config.results_uri_options)
|
42
|
+
end
|
43
|
+
@groups_records_buffer ||= ImportBuffer.new(@config.results_uri, :groups_records, [:group_id, :dataset, :record_id], @config.results_uri_options)
|
44
|
+
|
45
|
+
group_id = next_group_id
|
46
|
+
@groups_buffer.add([group_id] + group.values.values)
|
47
|
+
group.records.each do |record_id|
|
48
|
+
@groups_records_buffer.add([group_id, dataset_id, record_id])
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def flush!
|
53
|
+
@groups_buffer.flush if @groups_buffer
|
54
|
+
@groups_records_buffer.flush if @groups_records_buffer
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def next_group_id
|
60
|
+
result = nil
|
61
|
+
@next_group_mutex.synchronize do
|
62
|
+
result = @next_group_id
|
63
|
+
@next_group_id += 1
|
64
|
+
end
|
65
|
+
result
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -3,29 +3,36 @@ module Linkage
|
|
3
3
|
#
|
4
4
|
# @see Runner
|
5
5
|
class SingleThreadedRunner < Runner
|
6
|
+
# @return [Linkage::ResultSet]
|
6
7
|
def execute
|
7
|
-
create_tables
|
8
8
|
setup_datasets
|
9
9
|
apply_expectations
|
10
10
|
group_records
|
11
|
-
|
11
|
+
|
12
|
+
return result_set
|
12
13
|
end
|
13
14
|
|
14
15
|
private
|
15
16
|
|
16
17
|
def setup_datasets
|
17
|
-
|
18
|
-
@
|
18
|
+
pk = config.dataset_1.field_set.primary_key
|
19
|
+
@dataset_1 = config.dataset_1.select(pk.to_expr)
|
20
|
+
if @config.linkage_type != :self
|
21
|
+
pk = config.dataset_2.field_set.primary_key
|
22
|
+
@dataset_2 = config.dataset_2.select(pk.to_expr)
|
23
|
+
end
|
19
24
|
end
|
20
25
|
|
21
26
|
def apply_expectations
|
22
27
|
config.expectations.each do |exp|
|
23
|
-
exp.apply_to(@dataset_1)
|
24
|
-
exp.apply_to(@dataset_2) if config.linkage_type != :self
|
28
|
+
@dataset_1 = exp.apply_to(@dataset_1, :lhs)
|
29
|
+
@dataset_2 = exp.apply_to(@dataset_2, :rhs) if config.linkage_type != :self
|
25
30
|
end
|
26
31
|
end
|
27
32
|
|
28
33
|
def group_records
|
34
|
+
result_set.create_tables!
|
35
|
+
|
29
36
|
if config.linkage_type == :self
|
30
37
|
group_records_for(@dataset_1, 1)
|
31
38
|
else
|
@@ -39,59 +46,42 @@ module Linkage
|
|
39
46
|
# @param [Fixnum, nil] dataset_id
|
40
47
|
# @param [Boolean] ignore_empty_groups
|
41
48
|
# @yield [Linkage::Group] If a block is given, yield completed groups to
|
42
|
-
# the block. Otherwise, call
|
49
|
+
# the block. Otherwise, call ResultSet#add_group on the group.
|
43
50
|
def group_records_for(dataset, dataset_id = nil, ignore_empty_groups = true, &block)
|
44
51
|
current_group = nil
|
45
|
-
block ||= lambda { |group|
|
52
|
+
block ||= lambda { |group| result_set.add_group(current_group, dataset_id) }
|
53
|
+
primary_key = dataset.field_set.primary_key.to_expr
|
46
54
|
dataset.each do |row|
|
47
|
-
|
55
|
+
pk = row.delete(primary_key)
|
56
|
+
if current_group.nil? || !current_group.matches?(row)
|
48
57
|
if current_group && (!ignore_empty_groups || current_group.count > 1)
|
49
58
|
block.call(current_group)
|
50
59
|
end
|
51
|
-
new_group = Group.new(row
|
60
|
+
new_group = Group.new(row)
|
52
61
|
current_group = new_group
|
53
62
|
end
|
54
|
-
current_group.add_record(
|
63
|
+
current_group.add_record(pk)
|
55
64
|
end
|
56
65
|
if current_group && (!ignore_empty_groups || current_group.count > 1)
|
57
66
|
block.call(current_group)
|
58
67
|
end
|
59
|
-
|
60
|
-
end
|
61
|
-
|
62
|
-
def save_group(group, dataset_id = nil)
|
63
|
-
if !@groups_buffer
|
64
|
-
groups_headers = [:id] + group.values.keys
|
65
|
-
@groups_buffer = ImportBuffer.new(@uri, :groups, groups_headers, @options)
|
66
|
-
end
|
67
|
-
@groups_records_buffer ||= ImportBuffer.new(@uri, :groups_records, [:group_id, :dataset, :record_id], @options)
|
68
|
-
|
69
|
-
group_id = next_group_id
|
70
|
-
@groups_buffer.add([group_id] + group.values.values)
|
71
|
-
group.records.each do |record_id|
|
72
|
-
@groups_records_buffer.add([group_id, dataset_id, record_id])
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
def flush_buffers
|
77
|
-
@groups_buffer.flush if @groups_buffer
|
78
|
-
@groups_records_buffer.flush if @groups_records_buffer
|
68
|
+
result_set.flush!
|
79
69
|
end
|
80
70
|
|
81
71
|
def combine_groups
|
82
72
|
# Create a new dataset for the groups table
|
83
|
-
|
84
|
-
|
73
|
+
groups_dataset = result_set.groups_dataset
|
74
|
+
|
75
|
+
exprs = groups_dataset.field_set.values.inject([]) do |arr, field|
|
85
76
|
# Sort on all fields
|
86
|
-
|
87
|
-
ds.add_order(field)
|
88
|
-
ds.add_select(field)
|
77
|
+
field.primary_key? ? arr : arr << field.to_expr
|
89
78
|
end
|
90
|
-
|
91
|
-
|
79
|
+
groups_dataset = groups_dataset.select(*exprs, groups_dataset.field_set.primary_key.to_expr).order(*exprs) # ensure matching groups are sorted by id
|
80
|
+
|
81
|
+
result_set.database do |db|
|
92
82
|
groups_to_delete = []
|
93
83
|
db.transaction do # for speed reasons
|
94
|
-
group_records_for(
|
84
|
+
group_records_for(groups_dataset, nil, false) do |group|
|
95
85
|
if group.count == 1
|
96
86
|
# Delete the empty group
|
97
87
|
groups_to_delete << group.records[0]
|
data/lib/linkage/runner.rb
CHANGED
@@ -1,19 +1,19 @@
|
|
1
1
|
module Linkage
|
2
2
|
# Use this class to run a configuration created by {Dataset#link_with}.
|
3
3
|
class Runner
|
4
|
-
attr_reader :config
|
4
|
+
attr_reader :config, :result_set
|
5
5
|
|
6
6
|
# @param [Linkage::Configuration] config
|
7
7
|
# @param [String] uri Sequel-style database URI
|
8
8
|
# @param [Hash] options Sequel.connect options
|
9
9
|
# @see Dataset#link_with
|
10
10
|
# @see http://sequel.rubyforge.org/rdoc/files/doc/opening_databases_rdoc.html Sequel: Connecting to a database
|
11
|
-
def initialize(config, uri, options = {})
|
11
|
+
def initialize(config, uri = nil, options = {})
|
12
12
|
@config = config
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
if uri
|
14
|
+
warn("[DEPRECATION] Please use Configuration#save_results_in with the database URI and options instead")
|
15
|
+
@config.save_results_in(uri, options)
|
16
|
+
end
|
17
17
|
end
|
18
18
|
|
19
19
|
# @abstract
|
@@ -21,36 +21,8 @@ module Linkage
|
|
21
21
|
raise NotImplementedError
|
22
22
|
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
def database(&block)
|
27
|
-
Sequel.connect(@uri, @options, &block)
|
28
|
-
end
|
29
|
-
|
30
|
-
def create_tables
|
31
|
-
database do |db|
|
32
|
-
schema = config.groups_table_schema
|
33
|
-
db.create_table(:groups) do
|
34
|
-
schema.each { |col| column(*col) }
|
35
|
-
end
|
36
|
-
|
37
|
-
pk_type = config.dataset_1.primary_key.merge(config.dataset_2.primary_key).ruby_type
|
38
|
-
db.create_table(:groups_records) do
|
39
|
-
column(:record_id, pk_type[:type], pk_type[:opts] || {})
|
40
|
-
Integer :group_id
|
41
|
-
Integer :dataset
|
42
|
-
index :group_id
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def next_group_id
|
48
|
-
result = nil
|
49
|
-
@next_group_mutex.synchronize do
|
50
|
-
result = @next_group_id
|
51
|
-
@next_group_id += 1
|
52
|
-
end
|
53
|
-
result
|
24
|
+
def result_set
|
25
|
+
@config.result_set
|
54
26
|
end
|
55
27
|
end
|
56
28
|
end
|
data/lib/linkage.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'pathname'
|
2
|
+
require 'delegate'
|
2
3
|
require 'sequel'
|
3
4
|
|
4
5
|
module Linkage
|
@@ -9,10 +10,11 @@ require path + 'utils'
|
|
9
10
|
require path + 'warnings'
|
10
11
|
require path + 'dataset'
|
11
12
|
require path + 'runner'
|
12
|
-
require path + 'expectation'
|
13
13
|
require path + 'data'
|
14
14
|
require path + 'field'
|
15
15
|
require path + 'function'
|
16
16
|
require path + 'group'
|
17
17
|
require path + 'import_buffer'
|
18
18
|
require path + 'configuration'
|
19
|
+
require path + 'result_set'
|
20
|
+
require path + 'field_set'
|
data/linkage.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "linkage"
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Jeremy Stephens"]
|
12
|
-
s.date = "
|
12
|
+
s.date = "2012-02-28"
|
13
13
|
s.description = "Wraps Sequel to perform record linkage between one or two datasets"
|
14
14
|
s.email = "jeremy.f.stephens@vanderbilt.edu"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -30,12 +30,13 @@ Gem::Specification.new do |s|
|
|
30
30
|
"lib/linkage/configuration.rb",
|
31
31
|
"lib/linkage/data.rb",
|
32
32
|
"lib/linkage/dataset.rb",
|
33
|
-
"lib/linkage/expectation.rb",
|
34
33
|
"lib/linkage/field.rb",
|
34
|
+
"lib/linkage/field_set.rb",
|
35
35
|
"lib/linkage/function.rb",
|
36
36
|
"lib/linkage/functions/trim.rb",
|
37
37
|
"lib/linkage/group.rb",
|
38
38
|
"lib/linkage/import_buffer.rb",
|
39
|
+
"lib/linkage/result_set.rb",
|
39
40
|
"lib/linkage/runner.rb",
|
40
41
|
"lib/linkage/runner/single_threaded.rb",
|
41
42
|
"lib/linkage/utils.rb",
|
@@ -44,6 +45,7 @@ Gem::Specification.new do |s|
|
|
44
45
|
"test/config.yml",
|
45
46
|
"test/helper.rb",
|
46
47
|
"test/integration/test_cross_linkage.rb",
|
48
|
+
"test/integration/test_dataset.rb",
|
47
49
|
"test/integration/test_dual_linkage.rb",
|
48
50
|
"test/integration/test_self_linkage.rb",
|
49
51
|
"test/unit/functions/test_trim.rb",
|
@@ -51,19 +53,20 @@ Gem::Specification.new do |s|
|
|
51
53
|
"test/unit/test_configuration.rb",
|
52
54
|
"test/unit/test_data.rb",
|
53
55
|
"test/unit/test_dataset.rb",
|
54
|
-
"test/unit/test_expectation.rb",
|
55
56
|
"test/unit/test_field.rb",
|
57
|
+
"test/unit/test_field_set.rb",
|
56
58
|
"test/unit/test_function.rb",
|
57
59
|
"test/unit/test_group.rb",
|
58
60
|
"test/unit/test_import_buffer.rb",
|
59
61
|
"test/unit/test_linkage.rb",
|
62
|
+
"test/unit/test_result_set.rb",
|
60
63
|
"test/unit/test_runner.rb",
|
61
64
|
"test/unit/test_utils.rb"
|
62
65
|
]
|
63
66
|
s.homepage = "http://github.com/coupler/linkage"
|
64
67
|
s.licenses = ["MIT"]
|
65
68
|
s.require_paths = ["lib"]
|
66
|
-
s.rubygems_version = "1.
|
69
|
+
s.rubygems_version = "1.3.9.4"
|
67
70
|
s.summary = "Sequel-based record linkage"
|
68
71
|
|
69
72
|
if s.respond_to? :specification_version then
|
@@ -73,54 +76,48 @@ Gem::Specification.new do |s|
|
|
73
76
|
s.add_runtime_dependency(%q<sequel>, [">= 0"])
|
74
77
|
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
75
78
|
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
76
|
-
s.add_development_dependency(%q<rcov>, [">= 0"])
|
77
|
-
s.add_development_dependency(%q<guard-test>, [">= 0"])
|
78
79
|
s.add_development_dependency(%q<test-unit>, ["= 2.3.2"])
|
79
80
|
s.add_development_dependency(%q<mocha>, [">= 0"])
|
80
81
|
s.add_development_dependency(%q<sqlite3>, [">= 0"])
|
81
82
|
s.add_development_dependency(%q<yard>, [">= 0"])
|
82
83
|
s.add_development_dependency(%q<rake>, [">= 0"])
|
83
84
|
s.add_development_dependency(%q<versionomy>, [">= 0"])
|
84
|
-
s.add_development_dependency(%q<guard-yard>, [">= 0"])
|
85
|
-
s.add_development_dependency(%q<rb-inotify>, [">= 0"])
|
86
85
|
s.add_development_dependency(%q<mysql2>, [">= 0"])
|
87
86
|
s.add_development_dependency(%q<pry>, [">= 0"])
|
88
87
|
s.add_development_dependency(%q<rdiscount>, [">= 0"])
|
88
|
+
s.add_development_dependency(%q<guard-test>, [">= 0"])
|
89
|
+
s.add_development_dependency(%q<guard-yard>, [">= 0"])
|
89
90
|
else
|
90
91
|
s.add_dependency(%q<sequel>, [">= 0"])
|
91
92
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
92
93
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
93
|
-
s.add_dependency(%q<rcov>, [">= 0"])
|
94
|
-
s.add_dependency(%q<guard-test>, [">= 0"])
|
95
94
|
s.add_dependency(%q<test-unit>, ["= 2.3.2"])
|
96
95
|
s.add_dependency(%q<mocha>, [">= 0"])
|
97
96
|
s.add_dependency(%q<sqlite3>, [">= 0"])
|
98
97
|
s.add_dependency(%q<yard>, [">= 0"])
|
99
98
|
s.add_dependency(%q<rake>, [">= 0"])
|
100
99
|
s.add_dependency(%q<versionomy>, [">= 0"])
|
101
|
-
s.add_dependency(%q<guard-yard>, [">= 0"])
|
102
|
-
s.add_dependency(%q<rb-inotify>, [">= 0"])
|
103
100
|
s.add_dependency(%q<mysql2>, [">= 0"])
|
104
101
|
s.add_dependency(%q<pry>, [">= 0"])
|
105
102
|
s.add_dependency(%q<rdiscount>, [">= 0"])
|
103
|
+
s.add_dependency(%q<guard-test>, [">= 0"])
|
104
|
+
s.add_dependency(%q<guard-yard>, [">= 0"])
|
106
105
|
end
|
107
106
|
else
|
108
107
|
s.add_dependency(%q<sequel>, [">= 0"])
|
109
108
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
110
109
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
111
|
-
s.add_dependency(%q<rcov>, [">= 0"])
|
112
|
-
s.add_dependency(%q<guard-test>, [">= 0"])
|
113
110
|
s.add_dependency(%q<test-unit>, ["= 2.3.2"])
|
114
111
|
s.add_dependency(%q<mocha>, [">= 0"])
|
115
112
|
s.add_dependency(%q<sqlite3>, [">= 0"])
|
116
113
|
s.add_dependency(%q<yard>, [">= 0"])
|
117
114
|
s.add_dependency(%q<rake>, [">= 0"])
|
118
115
|
s.add_dependency(%q<versionomy>, [">= 0"])
|
119
|
-
s.add_dependency(%q<guard-yard>, [">= 0"])
|
120
|
-
s.add_dependency(%q<rb-inotify>, [">= 0"])
|
121
116
|
s.add_dependency(%q<mysql2>, [">= 0"])
|
122
117
|
s.add_dependency(%q<pry>, [">= 0"])
|
123
118
|
s.add_dependency(%q<rdiscount>, [">= 0"])
|
119
|
+
s.add_dependency(%q<guard-test>, [">= 0"])
|
120
|
+
s.add_dependency(%q<guard-yard>, [">= 0"])
|
124
121
|
end
|
125
122
|
end
|
126
123
|
|