linkage 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -1
- data/Gemfile.lock +19 -8
- data/README.markdown +8 -5
- data/Rakefile +0 -8
- data/VERSION +1 -1
- data/lib/linkage/configuration.rb +245 -157
- data/lib/linkage/data.rb +0 -24
- data/lib/linkage/dataset.rb +26 -183
- data/lib/linkage/field.rb +0 -3
- data/lib/linkage/field_set.rb +16 -0
- data/lib/linkage/function.rb +0 -7
- data/lib/linkage/result_set.rb +68 -0
- data/lib/linkage/runner/single_threaded.rb +29 -39
- data/lib/linkage/runner.rb +8 -36
- data/lib/linkage.rb +3 -1
- data/linkage.gemspec +14 -17
- data/test/helper.rb +1 -1
- data/test/integration/test_cross_linkage.rb +6 -2
- data/test/integration/test_dataset.rb +30 -0
- data/test/integration/test_dual_linkage.rb +9 -4
- data/test/integration/test_self_linkage.rb +23 -8
- data/test/unit/test_configuration.rb +90 -72
- data/test/unit/test_data.rb +0 -61
- data/test/unit/test_dataset.rb +19 -319
- data/test/unit/test_field.rb +0 -6
- data/test/unit/test_field_set.rb +31 -0
- data/test/unit/test_function.rb +6 -30
- data/test/unit/test_result_set.rb +18 -0
- data/test/unit/test_runner.rb +20 -5
- metadata +57 -41
- data/lib/linkage/expectation.rb +0 -138
- data/test/unit/test_expectation.rb +0 -390
data/lib/linkage/dataset.rb
CHANGED
@@ -1,205 +1,48 @@
|
|
1
1
|
module Linkage
|
2
|
-
|
3
|
-
|
4
|
-
@@next_id = 1 # Internal ID used for expectations
|
5
|
-
@@next_id_mutex = Mutex.new
|
2
|
+
class Dataset < Delegator
|
3
|
+
attr_reader :field_set, :table_name
|
6
4
|
|
7
|
-
# @private
|
8
|
-
def self.next_id
|
9
|
-
result = nil
|
10
|
-
@@next_id_mutex.synchronize do
|
11
|
-
result = @@next_id
|
12
|
-
@@next_id += 1
|
13
|
-
end
|
14
|
-
result
|
15
|
-
end
|
16
|
-
|
17
|
-
# @return [Array] Schema information about the dataset's primary key
|
18
|
-
attr_reader :primary_key
|
19
|
-
|
20
|
-
# @return [Array] Schema information for this dataset
|
21
|
-
attr_reader :schema
|
22
|
-
|
23
|
-
# @return [String] Database URI
|
24
|
-
attr_reader :uri
|
25
|
-
|
26
|
-
# @return [Symbol] Database table name
|
27
|
-
attr_reader :table
|
28
|
-
|
29
|
-
# @return [Array<Linkage::Field>] List of {Linkage::Field}'s
|
30
|
-
attr_reader :fields
|
31
|
-
|
32
|
-
# @private
|
33
|
-
attr_reader :id
|
34
|
-
|
35
|
-
# @param [String] uri Sequel-style database URI
|
36
|
-
# @param [String, Symbol] table Database table name
|
37
|
-
# @param [Hash] options Options to pass to Sequel.connect
|
38
|
-
# @see http://sequel.rubyforge.org/rdoc/files/doc/opening_databases_rdoc.html Sequel: Connecting to a database
|
39
5
|
def initialize(uri, table, options = {})
|
40
|
-
@
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
database { |db| schema = db.schema(@table) }
|
46
|
-
@schema = schema
|
47
|
-
@order = []
|
48
|
-
@select = []
|
49
|
-
@filter = []
|
50
|
-
create_fields
|
6
|
+
@table_name = table.to_sym
|
7
|
+
db = Sequel.connect(uri, options)
|
8
|
+
ds = db[@table_name]
|
9
|
+
super(ds)
|
10
|
+
@field_set = FieldSet.new(db.schema(@table_name))
|
51
11
|
end
|
52
12
|
|
13
|
+
def __setobj__(obj); @dataset = obj; end
|
14
|
+
def __getobj__; @dataset; end
|
15
|
+
|
53
16
|
# Setup a linkage with another dataset
|
54
17
|
#
|
55
18
|
# @return [Linkage::Configuration]
|
56
19
|
def link_with(dataset, &block)
|
57
20
|
conf = Configuration.new(self, dataset)
|
58
|
-
conf.
|
21
|
+
conf.configure(&block)
|
59
22
|
conf
|
60
23
|
end
|
61
24
|
|
62
|
-
|
63
|
-
|
64
|
-
# @return [Boolean]
|
65
|
-
def ==(other)
|
66
|
-
if !other.is_a?(Dataset)
|
67
|
-
super
|
68
|
-
else
|
69
|
-
uri == other.uri && table == other.table
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
# Create a copy of this instance of Dataset, using {Dataset#initialize}.
|
74
|
-
#
|
75
|
-
# @return [Linkage::Dataset]
|
76
|
-
def dup
|
77
|
-
self.class.new(uri, table)
|
78
|
-
end
|
79
|
-
|
80
|
-
# Clone the dataset and its associated {Linkage::Field}'s (without hitting
|
81
|
-
# the database).
|
82
|
-
#
|
83
|
-
# @return [Linkage::Dataset]
|
84
|
-
def clone
|
85
|
-
other = self.class.allocate
|
86
|
-
other.send(:initialize_copy, self, {
|
87
|
-
:order => @order.clone, :select => @select.clone,
|
88
|
-
:filter => @filter.clone, :options => @options.clone
|
89
|
-
})
|
90
|
-
end
|
91
|
-
|
92
|
-
# Add a data source to use for ordering the dataset.
|
93
|
-
#
|
94
|
-
# @param [Linkage::Data] data
|
95
|
-
# @param [nil, Symbol] desc nil or :desc (for descending order)
|
96
|
-
def add_order(data, desc = nil)
|
97
|
-
expr = desc == :desc ? data.to_expr.desc : data.to_expr
|
98
|
-
unless @order.include?(expr)
|
99
|
-
@order << expr
|
100
|
-
end
|
101
|
-
end
|
102
|
-
|
103
|
-
# Add a data source to be selected on the dataset. If you don't add any
|
104
|
-
# selects, all fields will be selected. The primary key is always selected
|
105
|
-
# in either case.
|
106
|
-
#
|
107
|
-
# @param [Linkage::Data] data
|
108
|
-
# @param [Symbol] as Optional field/function alias
|
109
|
-
def add_select(data, as = nil)
|
110
|
-
expr = as ? data.to_expr.as(as) : data.to_expr
|
111
|
-
unless @select.include?(expr)
|
112
|
-
@select << expr
|
113
|
-
end
|
25
|
+
def adapter_scheme
|
26
|
+
@dataset.db.adapter_scheme
|
114
27
|
end
|
115
28
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
arg1 = data.to_expr
|
123
|
-
arg2 = other.is_a?(Data) ? other.to_expr : other
|
124
|
-
expr =
|
125
|
-
case operator
|
126
|
-
when :==
|
127
|
-
{ arg1 => arg2 }
|
128
|
-
when :'!='
|
129
|
-
~{ arg1 => arg2 }
|
130
|
-
else
|
131
|
-
arg1 = Sequel::SQL::Identifier.new(arg1)
|
132
|
-
arg2 = arg2.is_a?(Symbol) ? Sequel::SQL::Identifier.new(arg2) : arg2
|
133
|
-
Sequel::SQL::BooleanExpression.new(operator, arg1, arg2)
|
134
|
-
end
|
135
|
-
@filter << expr
|
136
|
-
end
|
137
|
-
|
138
|
-
# Yield each row of the dataset in a block.
|
139
|
-
#
|
140
|
-
# @yield [row] A Hash of two elements, :pk and :values, where row[:pk] is
|
141
|
-
# the row's primary key value, and row[:values] is an array of all
|
142
|
-
# selected values (except the primary key).
|
143
|
-
def each
|
144
|
-
database do |db|
|
145
|
-
ds = db[@table]
|
146
|
-
|
147
|
-
pk = @primary_key.name
|
148
|
-
if !@select.empty?
|
149
|
-
ds = ds.select(pk, *@select)
|
150
|
-
end
|
151
|
-
if !@order.empty?
|
152
|
-
ds = ds.order(*@order)
|
153
|
-
end
|
154
|
-
if !@filter.empty?
|
155
|
-
ds = ds.filter(*@filter)
|
156
|
-
end
|
157
|
-
ds.each do |row|
|
158
|
-
yield({:pk => row.delete(pk), :values => row})
|
159
|
-
end
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
private
|
164
|
-
|
165
|
-
def initialize_copy(dataset, options = {})
|
166
|
-
@id = dataset.id
|
167
|
-
@uri = dataset.uri
|
168
|
-
@table = dataset.table
|
169
|
-
@schema = dataset.schema
|
170
|
-
@options = options[:options]
|
171
|
-
@order = options[:order]
|
172
|
-
@select = options[:select]
|
173
|
-
@filter = options[:filter]
|
174
|
-
@fields = dataset.fields.inject({}) do |hsh, (name, field)|
|
175
|
-
new_field = field.clone
|
176
|
-
new_field.dataset = self
|
177
|
-
hsh[name] = new_field
|
178
|
-
hsh
|
29
|
+
def initialize_clone(obj)
|
30
|
+
new_obj = obj.instance_variable_get(:@new_obj)
|
31
|
+
if new_obj
|
32
|
+
__setobj__(new_obj)
|
33
|
+
else
|
34
|
+
super
|
179
35
|
end
|
180
|
-
@primary_key = @fields[dataset.primary_key.name]
|
181
|
-
self
|
182
36
|
end
|
183
37
|
|
184
|
-
def
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
@schema.each do |(name, column_schema)|
|
191
|
-
f = Field.new(name, column_schema)
|
192
|
-
f.dataset = self
|
193
|
-
@fields[name] = f
|
194
|
-
|
195
|
-
if @primary_key.nil? && column_schema[:primary_key]
|
196
|
-
@primary_key = f
|
197
|
-
end
|
38
|
+
def method_missing(name, *args, &block)
|
39
|
+
result = super
|
40
|
+
if result.kind_of?(Sequel::Dataset)
|
41
|
+
@new_obj = result
|
42
|
+
result = clone
|
43
|
+
@new_obj = nil
|
198
44
|
end
|
199
|
-
|
200
|
-
|
201
|
-
def set_new_id
|
202
|
-
@id = self.class.next_id
|
45
|
+
result
|
203
46
|
end
|
204
47
|
end
|
205
48
|
end
|
data/lib/linkage/field.rb
CHANGED
@@ -5,9 +5,6 @@ module Linkage
|
|
5
5
|
# @return [Symbol] This field's schema information
|
6
6
|
attr_reader :schema
|
7
7
|
|
8
|
-
# @attr [Linkage::Dataset] This field's associated dataset
|
9
|
-
attr_accessor :dataset
|
10
|
-
|
11
8
|
# Create a new instance of Field.
|
12
9
|
#
|
13
10
|
# @param [Symbol] name The field's name
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Linkage
|
2
|
+
class FieldSet < Hash
|
3
|
+
attr_reader :primary_key
|
4
|
+
|
5
|
+
def initialize(schema)
|
6
|
+
schema.each do |(name, column_schema)|
|
7
|
+
f = Field.new(name, column_schema)
|
8
|
+
self[name] = f
|
9
|
+
|
10
|
+
if @primary_key.nil? && column_schema[:primary_key]
|
11
|
+
@primary_key = f
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/linkage/function.rb
CHANGED
@@ -38,13 +38,10 @@ module Linkage
|
|
38
38
|
nil
|
39
39
|
end
|
40
40
|
|
41
|
-
attr_reader :dataset
|
42
|
-
|
43
41
|
# @param [Linkage::Field, Object] args Function arguments
|
44
42
|
def initialize(*args)
|
45
43
|
@names = [self.class.function_name]
|
46
44
|
@args = args
|
47
|
-
@dataset = nil
|
48
45
|
process_args
|
49
46
|
end
|
50
47
|
|
@@ -83,10 +80,6 @@ module Linkage
|
|
83
80
|
if arg.kind_of?(Data)
|
84
81
|
@names << arg.name
|
85
82
|
@static &&= arg.static?
|
86
|
-
if @dataset && !arg.static? && @dataset != arg.dataset
|
87
|
-
raise ArgumentError, "You cannot supply fields from different datasets as arguments to the same function)"
|
88
|
-
end
|
89
|
-
@dataset ||= arg.dataset
|
90
83
|
type = arg.ruby_type[:type]
|
91
84
|
value = arg.is_a?(Field) ? arg.name : arg.to_expr
|
92
85
|
else
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Linkage
|
2
|
+
class ResultSet
|
3
|
+
def initialize(config)
|
4
|
+
@config = config
|
5
|
+
@next_group_id = 1
|
6
|
+
@next_group_mutex = Mutex.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def groups_dataset
|
10
|
+
Dataset.new(@config.results_uri, :groups, @config.results_uri_options)
|
11
|
+
end
|
12
|
+
|
13
|
+
def groups_records_dataset
|
14
|
+
Dataset.new(@config.results_uri, :groups_records, @config.results_uri_options)
|
15
|
+
end
|
16
|
+
|
17
|
+
def database(&block)
|
18
|
+
Sequel.connect(@config.results_uri, @config.results_uri_options, &block)
|
19
|
+
end
|
20
|
+
|
21
|
+
def create_tables!
|
22
|
+
database do |db|
|
23
|
+
schema = @config.groups_table_schema
|
24
|
+
db.create_table(:groups) do
|
25
|
+
schema.each { |col| column(*col) }
|
26
|
+
end
|
27
|
+
|
28
|
+
pk_type = @config.dataset_1.field_set.primary_key.merge(@config.dataset_2.field_set.primary_key).ruby_type
|
29
|
+
db.create_table(:groups_records) do
|
30
|
+
column(:record_id, pk_type[:type], pk_type[:opts] || {})
|
31
|
+
Integer :group_id
|
32
|
+
Integer :dataset
|
33
|
+
index :group_id
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_group(group, dataset_id = nil)
|
39
|
+
if !@groups_buffer
|
40
|
+
groups_headers = [:id] + group.values.keys
|
41
|
+
@groups_buffer = ImportBuffer.new(@config.results_uri, :groups, groups_headers, @config.results_uri_options)
|
42
|
+
end
|
43
|
+
@groups_records_buffer ||= ImportBuffer.new(@config.results_uri, :groups_records, [:group_id, :dataset, :record_id], @config.results_uri_options)
|
44
|
+
|
45
|
+
group_id = next_group_id
|
46
|
+
@groups_buffer.add([group_id] + group.values.values)
|
47
|
+
group.records.each do |record_id|
|
48
|
+
@groups_records_buffer.add([group_id, dataset_id, record_id])
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def flush!
|
53
|
+
@groups_buffer.flush if @groups_buffer
|
54
|
+
@groups_records_buffer.flush if @groups_records_buffer
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def next_group_id
|
60
|
+
result = nil
|
61
|
+
@next_group_mutex.synchronize do
|
62
|
+
result = @next_group_id
|
63
|
+
@next_group_id += 1
|
64
|
+
end
|
65
|
+
result
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -3,29 +3,36 @@ module Linkage
|
|
3
3
|
#
|
4
4
|
# @see Runner
|
5
5
|
class SingleThreadedRunner < Runner
|
6
|
+
# @return [Linkage::ResultSet]
|
6
7
|
def execute
|
7
|
-
create_tables
|
8
8
|
setup_datasets
|
9
9
|
apply_expectations
|
10
10
|
group_records
|
11
|
-
|
11
|
+
|
12
|
+
return result_set
|
12
13
|
end
|
13
14
|
|
14
15
|
private
|
15
16
|
|
16
17
|
def setup_datasets
|
17
|
-
|
18
|
-
@
|
18
|
+
pk = config.dataset_1.field_set.primary_key
|
19
|
+
@dataset_1 = config.dataset_1.select(pk.to_expr)
|
20
|
+
if @config.linkage_type != :self
|
21
|
+
pk = config.dataset_2.field_set.primary_key
|
22
|
+
@dataset_2 = config.dataset_2.select(pk.to_expr)
|
23
|
+
end
|
19
24
|
end
|
20
25
|
|
21
26
|
def apply_expectations
|
22
27
|
config.expectations.each do |exp|
|
23
|
-
exp.apply_to(@dataset_1)
|
24
|
-
exp.apply_to(@dataset_2) if config.linkage_type != :self
|
28
|
+
@dataset_1 = exp.apply_to(@dataset_1, :lhs)
|
29
|
+
@dataset_2 = exp.apply_to(@dataset_2, :rhs) if config.linkage_type != :self
|
25
30
|
end
|
26
31
|
end
|
27
32
|
|
28
33
|
def group_records
|
34
|
+
result_set.create_tables!
|
35
|
+
|
29
36
|
if config.linkage_type == :self
|
30
37
|
group_records_for(@dataset_1, 1)
|
31
38
|
else
|
@@ -39,59 +46,42 @@ module Linkage
|
|
39
46
|
# @param [Fixnum, nil] dataset_id
|
40
47
|
# @param [Boolean] ignore_empty_groups
|
41
48
|
# @yield [Linkage::Group] If a block is given, yield completed groups to
|
42
|
-
# the block. Otherwise, call
|
49
|
+
# the block. Otherwise, call ResultSet#add_group on the group.
|
43
50
|
def group_records_for(dataset, dataset_id = nil, ignore_empty_groups = true, &block)
|
44
51
|
current_group = nil
|
45
|
-
block ||= lambda { |group|
|
52
|
+
block ||= lambda { |group| result_set.add_group(current_group, dataset_id) }
|
53
|
+
primary_key = dataset.field_set.primary_key.to_expr
|
46
54
|
dataset.each do |row|
|
47
|
-
|
55
|
+
pk = row.delete(primary_key)
|
56
|
+
if current_group.nil? || !current_group.matches?(row)
|
48
57
|
if current_group && (!ignore_empty_groups || current_group.count > 1)
|
49
58
|
block.call(current_group)
|
50
59
|
end
|
51
|
-
new_group = Group.new(row
|
60
|
+
new_group = Group.new(row)
|
52
61
|
current_group = new_group
|
53
62
|
end
|
54
|
-
current_group.add_record(
|
63
|
+
current_group.add_record(pk)
|
55
64
|
end
|
56
65
|
if current_group && (!ignore_empty_groups || current_group.count > 1)
|
57
66
|
block.call(current_group)
|
58
67
|
end
|
59
|
-
|
60
|
-
end
|
61
|
-
|
62
|
-
def save_group(group, dataset_id = nil)
|
63
|
-
if !@groups_buffer
|
64
|
-
groups_headers = [:id] + group.values.keys
|
65
|
-
@groups_buffer = ImportBuffer.new(@uri, :groups, groups_headers, @options)
|
66
|
-
end
|
67
|
-
@groups_records_buffer ||= ImportBuffer.new(@uri, :groups_records, [:group_id, :dataset, :record_id], @options)
|
68
|
-
|
69
|
-
group_id = next_group_id
|
70
|
-
@groups_buffer.add([group_id] + group.values.values)
|
71
|
-
group.records.each do |record_id|
|
72
|
-
@groups_records_buffer.add([group_id, dataset_id, record_id])
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
def flush_buffers
|
77
|
-
@groups_buffer.flush if @groups_buffer
|
78
|
-
@groups_records_buffer.flush if @groups_records_buffer
|
68
|
+
result_set.flush!
|
79
69
|
end
|
80
70
|
|
81
71
|
def combine_groups
|
82
72
|
# Create a new dataset for the groups table
|
83
|
-
|
84
|
-
|
73
|
+
groups_dataset = result_set.groups_dataset
|
74
|
+
|
75
|
+
exprs = groups_dataset.field_set.values.inject([]) do |arr, field|
|
85
76
|
# Sort on all fields
|
86
|
-
|
87
|
-
ds.add_order(field)
|
88
|
-
ds.add_select(field)
|
77
|
+
field.primary_key? ? arr : arr << field.to_expr
|
89
78
|
end
|
90
|
-
|
91
|
-
|
79
|
+
groups_dataset = groups_dataset.select(*exprs, groups_dataset.field_set.primary_key.to_expr).order(*exprs) # ensure matching groups are sorted by id
|
80
|
+
|
81
|
+
result_set.database do |db|
|
92
82
|
groups_to_delete = []
|
93
83
|
db.transaction do # for speed reasons
|
94
|
-
group_records_for(
|
84
|
+
group_records_for(groups_dataset, nil, false) do |group|
|
95
85
|
if group.count == 1
|
96
86
|
# Delete the empty group
|
97
87
|
groups_to_delete << group.records[0]
|
data/lib/linkage/runner.rb
CHANGED
@@ -1,19 +1,19 @@
|
|
1
1
|
module Linkage
|
2
2
|
# Use this class to run a configuration created by {Dataset#link_with}.
|
3
3
|
class Runner
|
4
|
-
attr_reader :config
|
4
|
+
attr_reader :config, :result_set
|
5
5
|
|
6
6
|
# @param [Linkage::Configuration] config
|
7
7
|
# @param [String] uri Sequel-style database URI
|
8
8
|
# @param [Hash] options Sequel.connect options
|
9
9
|
# @see Dataset#link_with
|
10
10
|
# @see http://sequel.rubyforge.org/rdoc/files/doc/opening_databases_rdoc.html Sequel: Connecting to a database
|
11
|
-
def initialize(config, uri, options = {})
|
11
|
+
def initialize(config, uri = nil, options = {})
|
12
12
|
@config = config
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
if uri
|
14
|
+
warn("[DEPRECATION] Please use Configuration#save_results_in with the database URI and options instead")
|
15
|
+
@config.save_results_in(uri, options)
|
16
|
+
end
|
17
17
|
end
|
18
18
|
|
19
19
|
# @abstract
|
@@ -21,36 +21,8 @@ module Linkage
|
|
21
21
|
raise NotImplementedError
|
22
22
|
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
def database(&block)
|
27
|
-
Sequel.connect(@uri, @options, &block)
|
28
|
-
end
|
29
|
-
|
30
|
-
def create_tables
|
31
|
-
database do |db|
|
32
|
-
schema = config.groups_table_schema
|
33
|
-
db.create_table(:groups) do
|
34
|
-
schema.each { |col| column(*col) }
|
35
|
-
end
|
36
|
-
|
37
|
-
pk_type = config.dataset_1.primary_key.merge(config.dataset_2.primary_key).ruby_type
|
38
|
-
db.create_table(:groups_records) do
|
39
|
-
column(:record_id, pk_type[:type], pk_type[:opts] || {})
|
40
|
-
Integer :group_id
|
41
|
-
Integer :dataset
|
42
|
-
index :group_id
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def next_group_id
|
48
|
-
result = nil
|
49
|
-
@next_group_mutex.synchronize do
|
50
|
-
result = @next_group_id
|
51
|
-
@next_group_id += 1
|
52
|
-
end
|
53
|
-
result
|
24
|
+
def result_set
|
25
|
+
@config.result_set
|
54
26
|
end
|
55
27
|
end
|
56
28
|
end
|
data/lib/linkage.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'pathname'
|
2
|
+
require 'delegate'
|
2
3
|
require 'sequel'
|
3
4
|
|
4
5
|
module Linkage
|
@@ -9,10 +10,11 @@ require path + 'utils'
|
|
9
10
|
require path + 'warnings'
|
10
11
|
require path + 'dataset'
|
11
12
|
require path + 'runner'
|
12
|
-
require path + 'expectation'
|
13
13
|
require path + 'data'
|
14
14
|
require path + 'field'
|
15
15
|
require path + 'function'
|
16
16
|
require path + 'group'
|
17
17
|
require path + 'import_buffer'
|
18
18
|
require path + 'configuration'
|
19
|
+
require path + 'result_set'
|
20
|
+
require path + 'field_set'
|
data/linkage.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "linkage"
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Jeremy Stephens"]
|
12
|
-
s.date = "
|
12
|
+
s.date = "2012-02-28"
|
13
13
|
s.description = "Wraps Sequel to perform record linkage between one or two datasets"
|
14
14
|
s.email = "jeremy.f.stephens@vanderbilt.edu"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -30,12 +30,13 @@ Gem::Specification.new do |s|
|
|
30
30
|
"lib/linkage/configuration.rb",
|
31
31
|
"lib/linkage/data.rb",
|
32
32
|
"lib/linkage/dataset.rb",
|
33
|
-
"lib/linkage/expectation.rb",
|
34
33
|
"lib/linkage/field.rb",
|
34
|
+
"lib/linkage/field_set.rb",
|
35
35
|
"lib/linkage/function.rb",
|
36
36
|
"lib/linkage/functions/trim.rb",
|
37
37
|
"lib/linkage/group.rb",
|
38
38
|
"lib/linkage/import_buffer.rb",
|
39
|
+
"lib/linkage/result_set.rb",
|
39
40
|
"lib/linkage/runner.rb",
|
40
41
|
"lib/linkage/runner/single_threaded.rb",
|
41
42
|
"lib/linkage/utils.rb",
|
@@ -44,6 +45,7 @@ Gem::Specification.new do |s|
|
|
44
45
|
"test/config.yml",
|
45
46
|
"test/helper.rb",
|
46
47
|
"test/integration/test_cross_linkage.rb",
|
48
|
+
"test/integration/test_dataset.rb",
|
47
49
|
"test/integration/test_dual_linkage.rb",
|
48
50
|
"test/integration/test_self_linkage.rb",
|
49
51
|
"test/unit/functions/test_trim.rb",
|
@@ -51,19 +53,20 @@ Gem::Specification.new do |s|
|
|
51
53
|
"test/unit/test_configuration.rb",
|
52
54
|
"test/unit/test_data.rb",
|
53
55
|
"test/unit/test_dataset.rb",
|
54
|
-
"test/unit/test_expectation.rb",
|
55
56
|
"test/unit/test_field.rb",
|
57
|
+
"test/unit/test_field_set.rb",
|
56
58
|
"test/unit/test_function.rb",
|
57
59
|
"test/unit/test_group.rb",
|
58
60
|
"test/unit/test_import_buffer.rb",
|
59
61
|
"test/unit/test_linkage.rb",
|
62
|
+
"test/unit/test_result_set.rb",
|
60
63
|
"test/unit/test_runner.rb",
|
61
64
|
"test/unit/test_utils.rb"
|
62
65
|
]
|
63
66
|
s.homepage = "http://github.com/coupler/linkage"
|
64
67
|
s.licenses = ["MIT"]
|
65
68
|
s.require_paths = ["lib"]
|
66
|
-
s.rubygems_version = "1.
|
69
|
+
s.rubygems_version = "1.3.9.4"
|
67
70
|
s.summary = "Sequel-based record linkage"
|
68
71
|
|
69
72
|
if s.respond_to? :specification_version then
|
@@ -73,54 +76,48 @@ Gem::Specification.new do |s|
|
|
73
76
|
s.add_runtime_dependency(%q<sequel>, [">= 0"])
|
74
77
|
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
75
78
|
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
76
|
-
s.add_development_dependency(%q<rcov>, [">= 0"])
|
77
|
-
s.add_development_dependency(%q<guard-test>, [">= 0"])
|
78
79
|
s.add_development_dependency(%q<test-unit>, ["= 2.3.2"])
|
79
80
|
s.add_development_dependency(%q<mocha>, [">= 0"])
|
80
81
|
s.add_development_dependency(%q<sqlite3>, [">= 0"])
|
81
82
|
s.add_development_dependency(%q<yard>, [">= 0"])
|
82
83
|
s.add_development_dependency(%q<rake>, [">= 0"])
|
83
84
|
s.add_development_dependency(%q<versionomy>, [">= 0"])
|
84
|
-
s.add_development_dependency(%q<guard-yard>, [">= 0"])
|
85
|
-
s.add_development_dependency(%q<rb-inotify>, [">= 0"])
|
86
85
|
s.add_development_dependency(%q<mysql2>, [">= 0"])
|
87
86
|
s.add_development_dependency(%q<pry>, [">= 0"])
|
88
87
|
s.add_development_dependency(%q<rdiscount>, [">= 0"])
|
88
|
+
s.add_development_dependency(%q<guard-test>, [">= 0"])
|
89
|
+
s.add_development_dependency(%q<guard-yard>, [">= 0"])
|
89
90
|
else
|
90
91
|
s.add_dependency(%q<sequel>, [">= 0"])
|
91
92
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
92
93
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
93
|
-
s.add_dependency(%q<rcov>, [">= 0"])
|
94
|
-
s.add_dependency(%q<guard-test>, [">= 0"])
|
95
94
|
s.add_dependency(%q<test-unit>, ["= 2.3.2"])
|
96
95
|
s.add_dependency(%q<mocha>, [">= 0"])
|
97
96
|
s.add_dependency(%q<sqlite3>, [">= 0"])
|
98
97
|
s.add_dependency(%q<yard>, [">= 0"])
|
99
98
|
s.add_dependency(%q<rake>, [">= 0"])
|
100
99
|
s.add_dependency(%q<versionomy>, [">= 0"])
|
101
|
-
s.add_dependency(%q<guard-yard>, [">= 0"])
|
102
|
-
s.add_dependency(%q<rb-inotify>, [">= 0"])
|
103
100
|
s.add_dependency(%q<mysql2>, [">= 0"])
|
104
101
|
s.add_dependency(%q<pry>, [">= 0"])
|
105
102
|
s.add_dependency(%q<rdiscount>, [">= 0"])
|
103
|
+
s.add_dependency(%q<guard-test>, [">= 0"])
|
104
|
+
s.add_dependency(%q<guard-yard>, [">= 0"])
|
106
105
|
end
|
107
106
|
else
|
108
107
|
s.add_dependency(%q<sequel>, [">= 0"])
|
109
108
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
110
109
|
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
111
|
-
s.add_dependency(%q<rcov>, [">= 0"])
|
112
|
-
s.add_dependency(%q<guard-test>, [">= 0"])
|
113
110
|
s.add_dependency(%q<test-unit>, ["= 2.3.2"])
|
114
111
|
s.add_dependency(%q<mocha>, [">= 0"])
|
115
112
|
s.add_dependency(%q<sqlite3>, [">= 0"])
|
116
113
|
s.add_dependency(%q<yard>, [">= 0"])
|
117
114
|
s.add_dependency(%q<rake>, [">= 0"])
|
118
115
|
s.add_dependency(%q<versionomy>, [">= 0"])
|
119
|
-
s.add_dependency(%q<guard-yard>, [">= 0"])
|
120
|
-
s.add_dependency(%q<rb-inotify>, [">= 0"])
|
121
116
|
s.add_dependency(%q<mysql2>, [">= 0"])
|
122
117
|
s.add_dependency(%q<pry>, [">= 0"])
|
123
118
|
s.add_dependency(%q<rdiscount>, [">= 0"])
|
119
|
+
s.add_dependency(%q<guard-test>, [">= 0"])
|
120
|
+
s.add_dependency(%q<guard-yard>, [">= 0"])
|
124
121
|
end
|
125
122
|
end
|
126
123
|
|