linkage 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -5,7 +5,6 @@ gem "sequel"
5
5
  group :development do
6
6
  gem "bundler", "~> 1.0.0"
7
7
  gem "jeweler", "~> 1.6.4"
8
- gem "rcov", ">= 0"
9
8
  gem "test-unit", "2.3.2"
10
9
  gem "mocha"
11
10
  gem "sqlite3"
@@ -15,4 +14,6 @@ group :development do
15
14
  gem "mysql2"
16
15
  gem 'pry'
17
16
  gem 'rdiscount'
17
+ gem 'guard-test'
18
+ gem 'guard-yard'
18
19
  end
data/Gemfile.lock CHANGED
@@ -3,7 +3,17 @@ GEM
3
3
  specs:
4
4
  blockenspiel (0.4.3)
5
5
  coderay (0.9.8)
6
+ ffi (1.0.11)
6
7
  git (1.2.5)
8
+ guard (1.0.0)
9
+ ffi (>= 0.5.0)
10
+ thor (~> 0.14.6)
11
+ guard-test (0.4.3)
12
+ guard (>= 0.4)
13
+ test-unit (~> 2.2)
14
+ guard-yard (1.0.2)
15
+ guard (>= 0.2.2)
16
+ yard (>= 0.7.0)
7
17
  jeweler (1.6.4)
8
18
  bundler (~> 1.0)
9
19
  git (>= 1.2.5)
@@ -11,39 +21,40 @@ GEM
11
21
  metaclass (0.0.1)
12
22
  method_source (0.6.7)
13
23
  ruby_parser (>= 2.3.1)
14
- mocha (0.10.0)
24
+ mocha (0.10.3)
15
25
  metaclass (~> 0.0.1)
16
- mysql2 (0.3.10)
26
+ mysql2 (0.3.11)
17
27
  pry (0.9.7.4)
18
28
  coderay (~> 0.9.8)
19
29
  method_source (~> 0.6.7)
20
30
  ruby_parser (>= 2.3.1)
21
31
  slop (~> 2.1.0)
22
32
  rake (0.9.2.2)
23
- rcov (0.9.11)
24
33
  rdiscount (1.6.8)
25
34
  ruby_parser (2.3.1)
26
35
  sexp_processor (~> 3.0)
27
- sequel (3.29.0)
28
- sexp_processor (3.0.8)
36
+ sequel (3.31.0)
37
+ sexp_processor (3.0.10)
29
38
  slop (2.1.0)
30
- sqlite3 (1.3.4)
39
+ sqlite3 (1.3.5)
31
40
  test-unit (2.3.2)
41
+ thor (0.14.6)
32
42
  versionomy (0.4.1)
33
43
  blockenspiel (>= 0.4.1)
34
- yard (0.7.3)
44
+ yard (0.7.4)
35
45
 
36
46
  PLATFORMS
37
47
  ruby
38
48
 
39
49
  DEPENDENCIES
40
50
  bundler (~> 1.0.0)
51
+ guard-test
52
+ guard-yard
41
53
  jeweler (~> 1.6.4)
42
54
  mocha
43
55
  mysql2
44
56
  pry
45
57
  rake
46
- rcov
47
58
  rdiscount
48
59
  sequel
49
60
  sqlite3
data/README.markdown CHANGED
@@ -19,6 +19,8 @@ To describe a linkage, you use the `Dataset#link_with` method.
19
19
  lhs[:last_name].must == rhs[:parent_last_name]
20
20
  lhs[:last_name].must_not == "Smith" # exclude parents with the last
21
21
  # name "Smith"
22
+
23
+ save_results_in('sqlite://results.db') # see below
22
24
  end
23
25
 
24
26
  Note that the datasets don't have to be in the same database, or even on
@@ -27,13 +29,14 @@ the same machine.
27
29
  To run a linkage, use a Runner with the resulting configuration from
28
30
  `Dataset#link_with`:
29
31
 
30
- runner = Linkage::SingleThreadedRunner.new(config, 'sqlite://results.db')
32
+ runner = Linkage::SingleThreadedRunner.new(config)
31
33
  runner.execute
32
34
 
33
- The runner needs a database URI, since it stores its results in two
34
- database tables: `groups` and `groups_records`. The `groups` table contains
35
- all of the unique combinations of values in your datasets, and
36
- `groups_records` maps records to groups.
35
+ The runner saves results in a database that you specify in the configuration
36
+ (via the `save_results_in` method). It stores its results in two database
37
+ tables: `groups` and `groups_records`. The `groups` table contains all of the
38
+ unique combinations of values in your datasets, and `groups_records` maps
39
+ records to groups.
37
40
 
38
41
  You can also link a dataset to itself:
39
42
 
data/Rakefile CHANGED
@@ -32,14 +32,6 @@ Rake::TestTask.new(:test) do |test|
32
32
  test.verbose = true
33
33
  end
34
34
 
35
- require 'rcov/rcovtask'
36
- Rcov::RcovTask.new do |test|
37
- test.libs << 'test'
38
- test.pattern = 'test/**/test_*.rb'
39
- test.verbose = true
40
- test.rcov_opts << '--exclude "gems/*"'
41
- end
42
-
43
35
  task :default => :test
44
36
 
45
37
  require 'rdoc/task'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.0.3
@@ -1,188 +1,286 @@
1
1
  module Linkage
2
- # {Configuration} is used to configure linkages. When you call
3
- # {Dataset#link_with}, the block you supply gets called in the context of
4
- # an instance of {Configuration}.
5
- #
6
- # @example
7
- # dataset_1 = Linkage::Dataset.new("mysql://example.com/database_name", "table_1")
8
- # dataset_2 = Linkage::Dataset.new("mysql://example.com/database_name", "table_2")
9
- # dataset_1.link_with(dataset_2) do
10
- # # this gets run inside of a Configuration instance
11
- # end
12
- #
13
- # @see Dataset#link_with
14
2
  class Configuration
15
- # @private
16
- class ExpectationWrapper
17
- def initialize(type, data, config)
18
- @type = type
19
- @data = data
20
- @config = config
21
- @side = nil
22
- @forced_kind = nil
23
- end
3
+ class DSL
4
+ class ExpectationWrapper
5
+ VALID_OPERATORS = [:==, :>, :<, :>=, :<=]
6
+ OPERATOR_OPPOSITES = {
7
+ :== => :'!=',
8
+ :> => :<=,
9
+ :<= => :>,
10
+ :< => :>=,
11
+ :>= => :<
12
+ }
13
+
14
+ attr_reader :kind, :side, :lhs, :rhs
15
+
16
+ def initialize(dsl, type, lhs)
17
+ @dsl = dsl
18
+ @type = type
19
+ @lhs = lhs
20
+ @rhs = nil
21
+ @side = nil
22
+ @kind = nil
23
+ end
24
+
25
+ VALID_OPERATORS.each do |operator|
26
+ define_method(operator) do |rhs|
27
+ # NOTE: lhs is always a DataWrapper
28
+
29
+ @rhs = rhs
30
+ if !@rhs.is_a?(DataWrapper) || @lhs.static? || @rhs.static? || @lhs.side == @rhs.side
31
+ @side = @lhs.side
32
+ @side = @rhs.side if @side.nil? && @rhs.is_a?(DataWrapper)
33
+ @kind = :filter
34
+ elsif @lhs.same_except_side?(@rhs)
35
+ @kind = :self
36
+ elsif @lhs.dataset == @rhs.dataset
37
+ @kind = :cross
38
+ else
39
+ @kind = :dual
40
+ end
41
+ @operator = @type == :must_not ? OPERATOR_OPPOSITES[operator] : operator
42
+ @dsl.add_expectation(self)
43
+ end
44
+ end
45
+
46
+ def merged_field
47
+ @merged_field ||= @lhs.data.merge(@rhs.data)
48
+ end
49
+
50
+ def filter_expr
51
+ if @filter_expr.nil? && @kind == :filter
52
+ if @lhs.is_a?(DataWrapper) && !@lhs.static?
53
+ target = @lhs
54
+ other = @rhs
55
+ elsif @rhs.is_a?(DataWrapper) && !@rhs.static?
56
+ target = @rhs
57
+ other = @lhs
58
+ else
59
+ raise "Wonky filter"
60
+ end
24
61
 
25
- Linkage::Expectation::VALID_OPERATORS.each do |op|
26
- define_method(op) do |other|
27
- case other
28
- when DataWrapper
29
- @other = other.data
30
- if @other.static? || other.side == @data.side
31
- @forced_kind = :filter
32
- @side = @data.side
62
+ arg1 = target.data.to_expr
63
+ arg2 = other.is_a?(DataWrapper) ? other.data.to_expr : other
64
+ @filter_expr =
65
+ case @operator
66
+ when :==
67
+ { arg1 => arg2 }
68
+ when :'!='
69
+ ~{ arg1 => arg2 }
70
+ else
71
+ arg1 = Sequel::SQL::Identifier.new(arg1)
72
+ arg2 = arg2.is_a?(Symbol) ? Sequel::SQL::Identifier.new(arg2) : arg2
73
+ Sequel::SQL::BooleanExpression.new(@operator, arg1, arg2)
74
+ end
75
+ end
76
+ @filter_expr
77
+ end
78
+
79
+ def apply_to(dataset, side)
80
+ if @kind == :filter
81
+ if @side == side
82
+ return dataset.filter(filter_expr)
83
+ else
84
+ # Doesn't apply
85
+ return dataset
33
86
  end
87
+ end
88
+
89
+ if @lhs.is_a?(DataWrapper) && @lhs.side == side
90
+ target = @lhs
91
+ elsif @rhs.is_a?(DataWrapper) && @rhs.side == side
92
+ target = @rhs
34
93
  else
35
- @other = other
36
- @side = @data.side
94
+ raise "Wonky expectation"
95
+ end
96
+
97
+ expr = target.data.to_expr
98
+ aliased_expr = expr
99
+ if expr != merged_field.name
100
+ aliased_expr = expr.as(merged_field.name)
37
101
  end
38
- add_expectation(op)
102
+
103
+ dataset.order_more(expr).select_more(aliased_expr)
104
+ end
105
+
106
+ def same_filter?(other)
107
+ kind == :filter && other.kind == :filter && filter_expr == other.filter_expr
39
108
  end
40
109
  end
41
110
 
42
- private
111
+ class DataWrapper
112
+ attr_reader :side, :dataset
43
113
 
44
- def add_expectation(operator)
45
- klass = Expectation.get(@type)
46
- exp = klass.new(operator, @data.data, @other, @forced_kind)
47
- @config.add_expectation(exp, @side)
114
+ [:must, :must_not].each do |type|
115
+ define_method(type) do
116
+ ExpectationWrapper.new(@dsl, type, self)
117
+ end
118
+ end
48
119
  end
49
- end
50
120
 
51
- # @private
52
- class DataWrapper
53
- attr_reader :data, :side
121
+ class FieldWrapper < DataWrapper
122
+ attr_reader :name
54
123
 
55
- def must
56
- ExpectationWrapper.new(:must, self, @config)
57
- end
124
+ def initialize(dsl, side, dataset, name)
125
+ @dsl = dsl
126
+ @side = side
127
+ @dataset = dataset
128
+ @name = name
129
+ end
58
130
 
59
- def must_not
60
- ExpectationWrapper.new(:must_not, self, @config)
61
- end
131
+ def static?
132
+ false
133
+ end
134
+
135
+ def same_except_side?(other)
136
+ other.is_a?(FieldWrapper) && name == other.name
137
+ end
62
138
 
63
- def static?
64
- false
139
+ def data
140
+ @dataset.field_set[@name]
141
+ end
65
142
  end
66
- end
67
143
 
68
- # @private
69
- class FunctionWrapper < DataWrapper
70
- def initialize(klass, args, config)
71
- @klass = klass
72
- @args = args
73
- @config = config
144
+ class FunctionWrapper < DataWrapper
145
+ attr_reader :klass, :args
146
+
147
+ def initialize(dsl, klass, args)
148
+ @dsl = dsl
149
+ @klass = klass
150
+ @args = args
151
+ @side = nil
152
+ @static = true
153
+ args.each do |arg|
154
+ if arg.kind_of?(DataWrapper)
155
+ raise "conflicting sides" if @side && @side != arg.side
156
+ @side = arg.side
157
+ @static &&= arg.static?
158
+ end
159
+ end
160
+ end
74
161
 
75
- @side = args.inject(nil) do |side, arg|
76
- if arg.kind_of?(DataWrapper)
77
- raise "conflicting sides" if side && side != arg.side
78
- arg.side
79
- else
80
- side
162
+ def data
163
+ @data ||= @klass.new(*@args.collect { |arg| arg.kind_of?(DataWrapper) ? arg.data : arg })
164
+ end
165
+
166
+ def name
167
+ data.name
168
+ end
169
+
170
+ def static?
171
+ @static
172
+ end
173
+
174
+ def same_except_side?(other)
175
+ if other.is_a?(FunctionWrapper) && klass == other.klass
176
+ args.each_with_index do |arg, i|
177
+ other_arg = other.args[i]
178
+ if arg.is_a?(DataWrapper) && other_arg.is_a?(DataWrapper)
179
+ if !arg.same_except_side?(other_arg)
180
+ return false
181
+ end
182
+ else
183
+ if arg != other_arg
184
+ return false
185
+ end
186
+ end
187
+ end
188
+ return true
81
189
  end
190
+ false
82
191
  end
83
192
  end
84
193
 
85
- def data
86
- @klass.new(*@args.collect { |arg| arg.kind_of?(DataWrapper) ? arg.data : arg })
194
+ class DatasetWrapper
195
+ def initialize(dsl, side, dataset)
196
+ @dsl = dsl
197
+ @dataset = dataset
198
+ @side = side
199
+ end
200
+
201
+ def [](field_name)
202
+ if @dataset.field_set.has_key?(field_name)
203
+ FieldWrapper.new(@dsl, @side, @dataset, field_name)
204
+ else
205
+ raise ArgumentError, "The '#{field_name}' field doesn't exist for the #{@side} dataset!"
206
+ end
207
+ end
87
208
  end
88
- end
89
209
 
90
- # @private
91
- class FieldWrapper < DataWrapper
92
- def initialize(field, side, config)
93
- @data = field
94
- @side = side
210
+ def initialize(config, &block)
95
211
  @config = config
212
+ @lhs_filters = []
213
+ @rhs_filters = []
214
+ instance_eval(&block)
96
215
  end
97
- end
98
216
 
99
- # @private
100
- class DatasetWrapper
101
- def initialize(dataset, side, config)
102
- @dataset = dataset
103
- @side = side
104
- @config = config
217
+ def lhs
218
+ DatasetWrapper.new(self, :lhs, @config.dataset_1)
105
219
  end
106
220
 
107
- def [](field_name)
108
- field = @dataset.fields[field_name]
109
- if field.nil?
110
- raise ArgumentError, "The '#{field_name}' field doesn't exist for that dataset!"
111
- end
112
- FieldWrapper.new(field, @side, @config)
221
+ def rhs
222
+ DatasetWrapper.new(self, :rhs, @config.dataset_2)
113
223
  end
114
- end
115
224
 
116
- include Utils
225
+ def save_results_in(uri, options = {})
226
+ @config.results_uri = uri
227
+ @config.results_uri_options = options
228
+ end
117
229
 
118
- # @return [Symbol] :self, :dual, or :cross
119
- attr_reader :linkage_type
230
+ def add_expectation(expectation)
231
+ @config.expectations << expectation
232
+
233
+ if @config.linkage_type == :self
234
+ case expectation.kind
235
+ when :cross
236
+ @config.linkage_type = :cross
237
+ when :filter
238
+ # If there different filters on both 'sides' of a self-linkage,
239
+ # it turns into a cross linkage.
240
+ these_filters, other_filters =
241
+ case expectation.side
242
+ when :lhs
243
+ [@lhs_filters, @rhs_filters]
244
+ when :rhs
245
+ [@rhs_filters, @lhs_filters]
246
+ end
120
247
 
121
- # @return [Array<Linkage::Expectation>]
122
- attr_reader :expectations
248
+ these_filters << expectation
249
+ other_filters.each do |other|
250
+ if !expectation.same_filter?(other)
251
+ @config.linkage_type = :cross
252
+ break
253
+ end
254
+ end
255
+ end
256
+ end
257
+ end
123
258
 
124
- # @return [Linkage::Dataset]
125
- attr_reader :dataset_1
259
+ # For handling functions
260
+ def method_missing(name, *args, &block)
261
+ klass = Function[name.to_s]
262
+ if klass
263
+ FunctionWrapper.new(self, klass, args)
264
+ else
265
+ super
266
+ end
267
+ end
268
+ end
126
269
 
127
- # @return [Linkage::Dataset]
128
- attr_reader :dataset_2
270
+ attr_reader :dataset_1, :dataset_2, :expectations
271
+ attr_accessor :linkage_type, :results_uri, :results_uri_options
129
272
 
130
273
  def initialize(dataset_1, dataset_2)
131
- @dataset_1 = dataset_1.clone
132
- @dataset_2 = dataset_2.clone
274
+ @dataset_1 = dataset_1
275
+ @dataset_2 = dataset_2
133
276
  @expectations = []
134
277
  @linkage_type = dataset_1 == dataset_2 ? :self : :dual
135
- @lhs_filters = []
136
- @rhs_filters = []
137
- end
138
-
139
- def lhs
140
- @lhs ||= DatasetWrapper.new(@dataset_1, :lhs, self)
141
278
  end
142
279
 
143
- def rhs
144
- @rhs ||= DatasetWrapper.new(@dataset_2, :rhs, self)
280
+ def configure(&block)
281
+ DSL.new(self, &block)
145
282
  end
146
283
 
147
- # @private
148
- def add_expectation(expectation, side = nil)
149
- # If the expectation created turns the linkage type from a self to a
150
- # cross, then the dataset gets a new id. This is so that
151
- # Expectation#apply does the right thing.
152
-
153
- @expectations << expectation
154
- if @linkage_type == :self
155
- cross = false
156
-
157
- case expectation.kind
158
- when :cross
159
- cross = true
160
- when :filter
161
- # If there different filters on both 'sides' of a self-linkage,
162
- # it turns into a cross linkage.
163
- these_filters, other_filters =
164
- case side
165
- when :lhs
166
- [@lhs_filters, @rhs_filters]
167
- when :rhs
168
- [@rhs_filters, @lhs_filters]
169
- end
170
-
171
- if !other_filters.empty? && !other_filters.include?(expectation)
172
- cross = true
173
- else
174
- these_filters << expectation
175
- end
176
- end
177
-
178
- if cross
179
- @linkage_type = :cross
180
- @dataset_2.send(:set_new_id)
181
- end
182
- end
183
- end
184
-
185
- # @private
186
284
  def groups_table_schema
187
285
  schema = []
188
286
 
@@ -193,26 +291,16 @@ module Linkage
193
291
  @expectations.each do |exp|
194
292
  next if exp.kind == :filter
195
293
 
196
- merged_type = exp.merged_field.ruby_type
197
- schema << [exp.name, merged_type[:type], merged_type[:opts] || {}]
294
+ merged_field = exp.merged_field
295
+ merged_type = merged_field.ruby_type
296
+ schema << [merged_field.name, merged_type[:type], merged_type[:opts] || {}]
198
297
  end
199
298
 
200
299
  schema
201
300
  end
202
301
 
203
- # @private
204
- def inspect
205
- to_s
206
- end
207
-
208
- # For handling functions
209
- def method_missing(name, *args, &block)
210
- klass = Function[name.to_s]
211
- if klass
212
- FunctionWrapper.new(klass, args, self)
213
- else
214
- super
215
- end
302
+ def result_set
303
+ @result_set ||= ResultSet.new(self)
216
304
  end
217
305
  end
218
306
  end
data/lib/linkage/data.rb CHANGED
@@ -28,10 +28,6 @@ module Linkage
28
28
  raise NotImplementedError
29
29
  end
30
30
 
31
- def dataset
32
- raise NotImplementedError
33
- end
34
-
35
31
  def to_expr
36
32
  raise NotImplementedError
37
33
  end
@@ -126,26 +122,6 @@ module Linkage
126
122
  Field.new(name, nil, result)
127
123
  end
128
124
 
129
- # Returns true if this data's name and dataset match the other's name
130
- # and dataset (using {Dataset#==})
131
- def ==(other)
132
- if !other.is_a?(self.class)
133
- super
134
- elsif equal?(other)
135
- true
136
- else
137
- self.name == other.name && self.dataset == other.dataset
138
- end
139
- end
140
-
141
- # Returns true if this data source's dataset is equal to the given dataset
142
- # (using Dataset#id).
143
- #
144
- # @param [Linkage::Dataset]
145
- def belongs_to?(dataset)
146
- self.dataset.id == dataset.id
147
- end
148
-
149
125
  private
150
126
 
151
127
  def first_common_type(type_1, type_2)