linkage 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -5,7 +5,6 @@ gem "sequel"
5
5
  group :development do
6
6
  gem "bundler", "~> 1.0.0"
7
7
  gem "jeweler", "~> 1.6.4"
8
- gem "rcov", ">= 0"
9
8
  gem "test-unit", "2.3.2"
10
9
  gem "mocha"
11
10
  gem "sqlite3"
@@ -15,4 +14,6 @@ group :development do
15
14
  gem "mysql2"
16
15
  gem 'pry'
17
16
  gem 'rdiscount'
17
+ gem 'guard-test'
18
+ gem 'guard-yard'
18
19
  end
data/Gemfile.lock CHANGED
@@ -3,7 +3,17 @@ GEM
3
3
  specs:
4
4
  blockenspiel (0.4.3)
5
5
  coderay (0.9.8)
6
+ ffi (1.0.11)
6
7
  git (1.2.5)
8
+ guard (1.0.0)
9
+ ffi (>= 0.5.0)
10
+ thor (~> 0.14.6)
11
+ guard-test (0.4.3)
12
+ guard (>= 0.4)
13
+ test-unit (~> 2.2)
14
+ guard-yard (1.0.2)
15
+ guard (>= 0.2.2)
16
+ yard (>= 0.7.0)
7
17
  jeweler (1.6.4)
8
18
  bundler (~> 1.0)
9
19
  git (>= 1.2.5)
@@ -11,39 +21,40 @@ GEM
11
21
  metaclass (0.0.1)
12
22
  method_source (0.6.7)
13
23
  ruby_parser (>= 2.3.1)
14
- mocha (0.10.0)
24
+ mocha (0.10.3)
15
25
  metaclass (~> 0.0.1)
16
- mysql2 (0.3.10)
26
+ mysql2 (0.3.11)
17
27
  pry (0.9.7.4)
18
28
  coderay (~> 0.9.8)
19
29
  method_source (~> 0.6.7)
20
30
  ruby_parser (>= 2.3.1)
21
31
  slop (~> 2.1.0)
22
32
  rake (0.9.2.2)
23
- rcov (0.9.11)
24
33
  rdiscount (1.6.8)
25
34
  ruby_parser (2.3.1)
26
35
  sexp_processor (~> 3.0)
27
- sequel (3.29.0)
28
- sexp_processor (3.0.8)
36
+ sequel (3.31.0)
37
+ sexp_processor (3.0.10)
29
38
  slop (2.1.0)
30
- sqlite3 (1.3.4)
39
+ sqlite3 (1.3.5)
31
40
  test-unit (2.3.2)
41
+ thor (0.14.6)
32
42
  versionomy (0.4.1)
33
43
  blockenspiel (>= 0.4.1)
34
- yard (0.7.3)
44
+ yard (0.7.4)
35
45
 
36
46
  PLATFORMS
37
47
  ruby
38
48
 
39
49
  DEPENDENCIES
40
50
  bundler (~> 1.0.0)
51
+ guard-test
52
+ guard-yard
41
53
  jeweler (~> 1.6.4)
42
54
  mocha
43
55
  mysql2
44
56
  pry
45
57
  rake
46
- rcov
47
58
  rdiscount
48
59
  sequel
49
60
  sqlite3
data/README.markdown CHANGED
@@ -19,6 +19,8 @@ To describe a linkage, you use the `Dataset#link_with` method.
19
19
  lhs[:last_name].must == rhs[:parent_last_name]
20
20
  lhs[:last_name].must_not == "Smith" # exclude parents with the last
21
21
  # name "Smith"
22
+
23
+ save_results_in('sqlite://results.db') # see below
22
24
  end
23
25
 
24
26
  Note that the datasets don't have to be in the same database, or even on
@@ -27,13 +29,14 @@ the same machine.
27
29
  To run a linkage, use a Runner with the resulting configuration from
28
30
  `Dataset#link_with`:
29
31
 
30
- runner = Linkage::SingleThreadedRunner.new(config, 'sqlite://results.db')
32
+ runner = Linkage::SingleThreadedRunner.new(config)
31
33
  runner.execute
32
34
 
33
- The runner needs a database URI, since it stores its results in two
34
- database tables: `groups` and `groups_records`. The `groups` table contains
35
- all of the unique combinations of values in your datasets, and
36
- `groups_records` maps records to groups.
35
+ The runner saves results in a database that you specify in the configuration
36
+ (via the `save_results_in` method). It stores its results in two database
37
+ tables: `groups` and `groups_records`. The `groups` table contains all of the
38
+ unique combinations of values in your datasets, and `groups_records` maps
39
+ records to groups.
37
40
 
38
41
  You can also link a dataset to itself:
39
42
 
data/Rakefile CHANGED
@@ -32,14 +32,6 @@ Rake::TestTask.new(:test) do |test|
32
32
  test.verbose = true
33
33
  end
34
34
 
35
- require 'rcov/rcovtask'
36
- Rcov::RcovTask.new do |test|
37
- test.libs << 'test'
38
- test.pattern = 'test/**/test_*.rb'
39
- test.verbose = true
40
- test.rcov_opts << '--exclude "gems/*"'
41
- end
42
-
43
35
  task :default => :test
44
36
 
45
37
  require 'rdoc/task'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.0.3
@@ -1,188 +1,286 @@
1
1
  module Linkage
2
- # {Configuration} is used to configure linkages. When you call
3
- # {Dataset#link_with}, the block you supply gets called in the context of
4
- # an instance of {Configuration}.
5
- #
6
- # @example
7
- # dataset_1 = Linkage::Dataset.new("mysql://example.com/database_name", "table_1")
8
- # dataset_2 = Linkage::Dataset.new("mysql://example.com/database_name", "table_2")
9
- # dataset_1.link_with(dataset_2) do
10
- # # this gets run inside of a Configuration instance
11
- # end
12
- #
13
- # @see Dataset#link_with
14
2
  class Configuration
15
- # @private
16
- class ExpectationWrapper
17
- def initialize(type, data, config)
18
- @type = type
19
- @data = data
20
- @config = config
21
- @side = nil
22
- @forced_kind = nil
23
- end
3
+ class DSL
4
+ class ExpectationWrapper
5
+ VALID_OPERATORS = [:==, :>, :<, :>=, :<=]
6
+ OPERATOR_OPPOSITES = {
7
+ :== => :'!=',
8
+ :> => :<=,
9
+ :<= => :>,
10
+ :< => :>=,
11
+ :>= => :<
12
+ }
13
+
14
+ attr_reader :kind, :side, :lhs, :rhs
15
+
16
+ def initialize(dsl, type, lhs)
17
+ @dsl = dsl
18
+ @type = type
19
+ @lhs = lhs
20
+ @rhs = nil
21
+ @side = nil
22
+ @kind = nil
23
+ end
24
+
25
+ VALID_OPERATORS.each do |operator|
26
+ define_method(operator) do |rhs|
27
+ # NOTE: lhs is always a DataWrapper
28
+
29
+ @rhs = rhs
30
+ if !@rhs.is_a?(DataWrapper) || @lhs.static? || @rhs.static? || @lhs.side == @rhs.side
31
+ @side = @lhs.side
32
+ @side = @rhs.side if @side.nil? && @rhs.is_a?(DataWrapper)
33
+ @kind = :filter
34
+ elsif @lhs.same_except_side?(@rhs)
35
+ @kind = :self
36
+ elsif @lhs.dataset == @rhs.dataset
37
+ @kind = :cross
38
+ else
39
+ @kind = :dual
40
+ end
41
+ @operator = @type == :must_not ? OPERATOR_OPPOSITES[operator] : operator
42
+ @dsl.add_expectation(self)
43
+ end
44
+ end
45
+
46
+ def merged_field
47
+ @merged_field ||= @lhs.data.merge(@rhs.data)
48
+ end
49
+
50
+ def filter_expr
51
+ if @filter_expr.nil? && @kind == :filter
52
+ if @lhs.is_a?(DataWrapper) && !@lhs.static?
53
+ target = @lhs
54
+ other = @rhs
55
+ elsif @rhs.is_a?(DataWrapper) && !@rhs.static?
56
+ target = @rhs
57
+ other = @lhs
58
+ else
59
+ raise "Wonky filter"
60
+ end
24
61
 
25
- Linkage::Expectation::VALID_OPERATORS.each do |op|
26
- define_method(op) do |other|
27
- case other
28
- when DataWrapper
29
- @other = other.data
30
- if @other.static? || other.side == @data.side
31
- @forced_kind = :filter
32
- @side = @data.side
62
+ arg1 = target.data.to_expr
63
+ arg2 = other.is_a?(DataWrapper) ? other.data.to_expr : other
64
+ @filter_expr =
65
+ case @operator
66
+ when :==
67
+ { arg1 => arg2 }
68
+ when :'!='
69
+ ~{ arg1 => arg2 }
70
+ else
71
+ arg1 = Sequel::SQL::Identifier.new(arg1)
72
+ arg2 = arg2.is_a?(Symbol) ? Sequel::SQL::Identifier.new(arg2) : arg2
73
+ Sequel::SQL::BooleanExpression.new(@operator, arg1, arg2)
74
+ end
75
+ end
76
+ @filter_expr
77
+ end
78
+
79
+ def apply_to(dataset, side)
80
+ if @kind == :filter
81
+ if @side == side
82
+ return dataset.filter(filter_expr)
83
+ else
84
+ # Doesn't apply
85
+ return dataset
33
86
  end
87
+ end
88
+
89
+ if @lhs.is_a?(DataWrapper) && @lhs.side == side
90
+ target = @lhs
91
+ elsif @rhs.is_a?(DataWrapper) && @rhs.side == side
92
+ target = @rhs
34
93
  else
35
- @other = other
36
- @side = @data.side
94
+ raise "Wonky expectation"
95
+ end
96
+
97
+ expr = target.data.to_expr
98
+ aliased_expr = expr
99
+ if expr != merged_field.name
100
+ aliased_expr = expr.as(merged_field.name)
37
101
  end
38
- add_expectation(op)
102
+
103
+ dataset.order_more(expr).select_more(aliased_expr)
104
+ end
105
+
106
+ def same_filter?(other)
107
+ kind == :filter && other.kind == :filter && filter_expr == other.filter_expr
39
108
  end
40
109
  end
41
110
 
42
- private
111
+ class DataWrapper
112
+ attr_reader :side, :dataset
43
113
 
44
- def add_expectation(operator)
45
- klass = Expectation.get(@type)
46
- exp = klass.new(operator, @data.data, @other, @forced_kind)
47
- @config.add_expectation(exp, @side)
114
+ [:must, :must_not].each do |type|
115
+ define_method(type) do
116
+ ExpectationWrapper.new(@dsl, type, self)
117
+ end
118
+ end
48
119
  end
49
- end
50
120
 
51
- # @private
52
- class DataWrapper
53
- attr_reader :data, :side
121
+ class FieldWrapper < DataWrapper
122
+ attr_reader :name
54
123
 
55
- def must
56
- ExpectationWrapper.new(:must, self, @config)
57
- end
124
+ def initialize(dsl, side, dataset, name)
125
+ @dsl = dsl
126
+ @side = side
127
+ @dataset = dataset
128
+ @name = name
129
+ end
58
130
 
59
- def must_not
60
- ExpectationWrapper.new(:must_not, self, @config)
61
- end
131
+ def static?
132
+ false
133
+ end
134
+
135
+ def same_except_side?(other)
136
+ other.is_a?(FieldWrapper) && name == other.name
137
+ end
62
138
 
63
- def static?
64
- false
139
+ def data
140
+ @dataset.field_set[@name]
141
+ end
65
142
  end
66
- end
67
143
 
68
- # @private
69
- class FunctionWrapper < DataWrapper
70
- def initialize(klass, args, config)
71
- @klass = klass
72
- @args = args
73
- @config = config
144
+ class FunctionWrapper < DataWrapper
145
+ attr_reader :klass, :args
146
+
147
+ def initialize(dsl, klass, args)
148
+ @dsl = dsl
149
+ @klass = klass
150
+ @args = args
151
+ @side = nil
152
+ @static = true
153
+ args.each do |arg|
154
+ if arg.kind_of?(DataWrapper)
155
+ raise "conflicting sides" if @side && @side != arg.side
156
+ @side = arg.side
157
+ @static &&= arg.static?
158
+ end
159
+ end
160
+ end
74
161
 
75
- @side = args.inject(nil) do |side, arg|
76
- if arg.kind_of?(DataWrapper)
77
- raise "conflicting sides" if side && side != arg.side
78
- arg.side
79
- else
80
- side
162
+ def data
163
+ @data ||= @klass.new(*@args.collect { |arg| arg.kind_of?(DataWrapper) ? arg.data : arg })
164
+ end
165
+
166
+ def name
167
+ data.name
168
+ end
169
+
170
+ def static?
171
+ @static
172
+ end
173
+
174
+ def same_except_side?(other)
175
+ if other.is_a?(FunctionWrapper) && klass == other.klass
176
+ args.each_with_index do |arg, i|
177
+ other_arg = other.args[i]
178
+ if arg.is_a?(DataWrapper) && other_arg.is_a?(DataWrapper)
179
+ if !arg.same_except_side?(other_arg)
180
+ return false
181
+ end
182
+ else
183
+ if arg != other_arg
184
+ return false
185
+ end
186
+ end
187
+ end
188
+ return true
81
189
  end
190
+ false
82
191
  end
83
192
  end
84
193
 
85
- def data
86
- @klass.new(*@args.collect { |arg| arg.kind_of?(DataWrapper) ? arg.data : arg })
194
+ class DatasetWrapper
195
+ def initialize(dsl, side, dataset)
196
+ @dsl = dsl
197
+ @dataset = dataset
198
+ @side = side
199
+ end
200
+
201
+ def [](field_name)
202
+ if @dataset.field_set.has_key?(field_name)
203
+ FieldWrapper.new(@dsl, @side, @dataset, field_name)
204
+ else
205
+ raise ArgumentError, "The '#{field_name}' field doesn't exist for the #{@side} dataset!"
206
+ end
207
+ end
87
208
  end
88
- end
89
209
 
90
- # @private
91
- class FieldWrapper < DataWrapper
92
- def initialize(field, side, config)
93
- @data = field
94
- @side = side
210
+ def initialize(config, &block)
95
211
  @config = config
212
+ @lhs_filters = []
213
+ @rhs_filters = []
214
+ instance_eval(&block)
96
215
  end
97
- end
98
216
 
99
- # @private
100
- class DatasetWrapper
101
- def initialize(dataset, side, config)
102
- @dataset = dataset
103
- @side = side
104
- @config = config
217
+ def lhs
218
+ DatasetWrapper.new(self, :lhs, @config.dataset_1)
105
219
  end
106
220
 
107
- def [](field_name)
108
- field = @dataset.fields[field_name]
109
- if field.nil?
110
- raise ArgumentError, "The '#{field_name}' field doesn't exist for that dataset!"
111
- end
112
- FieldWrapper.new(field, @side, @config)
221
+ def rhs
222
+ DatasetWrapper.new(self, :rhs, @config.dataset_2)
113
223
  end
114
- end
115
224
 
116
- include Utils
225
+ def save_results_in(uri, options = {})
226
+ @config.results_uri = uri
227
+ @config.results_uri_options = options
228
+ end
117
229
 
118
- # @return [Symbol] :self, :dual, or :cross
119
- attr_reader :linkage_type
230
+ def add_expectation(expectation)
231
+ @config.expectations << expectation
232
+
233
+ if @config.linkage_type == :self
234
+ case expectation.kind
235
+ when :cross
236
+ @config.linkage_type = :cross
237
+ when :filter
238
+ # If there different filters on both 'sides' of a self-linkage,
239
+ # it turns into a cross linkage.
240
+ these_filters, other_filters =
241
+ case expectation.side
242
+ when :lhs
243
+ [@lhs_filters, @rhs_filters]
244
+ when :rhs
245
+ [@rhs_filters, @lhs_filters]
246
+ end
120
247
 
121
- # @return [Array<Linkage::Expectation>]
122
- attr_reader :expectations
248
+ these_filters << expectation
249
+ other_filters.each do |other|
250
+ if !expectation.same_filter?(other)
251
+ @config.linkage_type = :cross
252
+ break
253
+ end
254
+ end
255
+ end
256
+ end
257
+ end
123
258
 
124
- # @return [Linkage::Dataset]
125
- attr_reader :dataset_1
259
+ # For handling functions
260
+ def method_missing(name, *args, &block)
261
+ klass = Function[name.to_s]
262
+ if klass
263
+ FunctionWrapper.new(self, klass, args)
264
+ else
265
+ super
266
+ end
267
+ end
268
+ end
126
269
 
127
- # @return [Linkage::Dataset]
128
- attr_reader :dataset_2
270
+ attr_reader :dataset_1, :dataset_2, :expectations
271
+ attr_accessor :linkage_type, :results_uri, :results_uri_options
129
272
 
130
273
  def initialize(dataset_1, dataset_2)
131
- @dataset_1 = dataset_1.clone
132
- @dataset_2 = dataset_2.clone
274
+ @dataset_1 = dataset_1
275
+ @dataset_2 = dataset_2
133
276
  @expectations = []
134
277
  @linkage_type = dataset_1 == dataset_2 ? :self : :dual
135
- @lhs_filters = []
136
- @rhs_filters = []
137
- end
138
-
139
- def lhs
140
- @lhs ||= DatasetWrapper.new(@dataset_1, :lhs, self)
141
278
  end
142
279
 
143
- def rhs
144
- @rhs ||= DatasetWrapper.new(@dataset_2, :rhs, self)
280
+ def configure(&block)
281
+ DSL.new(self, &block)
145
282
  end
146
283
 
147
- # @private
148
- def add_expectation(expectation, side = nil)
149
- # If the expectation created turns the linkage type from a self to a
150
- # cross, then the dataset gets a new id. This is so that
151
- # Expectation#apply does the right thing.
152
-
153
- @expectations << expectation
154
- if @linkage_type == :self
155
- cross = false
156
-
157
- case expectation.kind
158
- when :cross
159
- cross = true
160
- when :filter
161
- # If there different filters on both 'sides' of a self-linkage,
162
- # it turns into a cross linkage.
163
- these_filters, other_filters =
164
- case side
165
- when :lhs
166
- [@lhs_filters, @rhs_filters]
167
- when :rhs
168
- [@rhs_filters, @lhs_filters]
169
- end
170
-
171
- if !other_filters.empty? && !other_filters.include?(expectation)
172
- cross = true
173
- else
174
- these_filters << expectation
175
- end
176
- end
177
-
178
- if cross
179
- @linkage_type = :cross
180
- @dataset_2.send(:set_new_id)
181
- end
182
- end
183
- end
184
-
185
- # @private
186
284
  def groups_table_schema
187
285
  schema = []
188
286
 
@@ -193,26 +291,16 @@ module Linkage
193
291
  @expectations.each do |exp|
194
292
  next if exp.kind == :filter
195
293
 
196
- merged_type = exp.merged_field.ruby_type
197
- schema << [exp.name, merged_type[:type], merged_type[:opts] || {}]
294
+ merged_field = exp.merged_field
295
+ merged_type = merged_field.ruby_type
296
+ schema << [merged_field.name, merged_type[:type], merged_type[:opts] || {}]
198
297
  end
199
298
 
200
299
  schema
201
300
  end
202
301
 
203
- # @private
204
- def inspect
205
- to_s
206
- end
207
-
208
- # For handling functions
209
- def method_missing(name, *args, &block)
210
- klass = Function[name.to_s]
211
- if klass
212
- FunctionWrapper.new(klass, args, self)
213
- else
214
- super
215
- end
302
+ def result_set
303
+ @result_set ||= ResultSet.new(self)
216
304
  end
217
305
  end
218
306
  end
data/lib/linkage/data.rb CHANGED
@@ -28,10 +28,6 @@ module Linkage
28
28
  raise NotImplementedError
29
29
  end
30
30
 
31
- def dataset
32
- raise NotImplementedError
33
- end
34
-
35
31
  def to_expr
36
32
  raise NotImplementedError
37
33
  end
@@ -126,26 +122,6 @@ module Linkage
126
122
  Field.new(name, nil, result)
127
123
  end
128
124
 
129
- # Returns true if this data's name and dataset match the other's name
130
- # and dataset (using {Dataset#==})
131
- def ==(other)
132
- if !other.is_a?(self.class)
133
- super
134
- elsif equal?(other)
135
- true
136
- else
137
- self.name == other.name && self.dataset == other.dataset
138
- end
139
- end
140
-
141
- # Returns true if this data source's dataset is equal to the given dataset
142
- # (using Dataset#id).
143
- #
144
- # @param [Linkage::Dataset]
145
- def belongs_to?(dataset)
146
- self.dataset.id == dataset.id
147
- end
148
-
149
125
  private
150
126
 
151
127
  def first_common_type(type_1, type_2)