linkage 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,205 @@
1
+ module Linkage
2
+ # Wrapper for a Sequel dataset
3
+ class Dataset
4
+ @@next_id = 1 # Internal ID used for expectations
5
+ @@next_id_mutex = Mutex.new
6
+
7
+ # @private
8
+ def self.next_id
9
+ result = nil
10
+ @@next_id_mutex.synchronize do
11
+ result = @@next_id
12
+ @@next_id += 1
13
+ end
14
+ result
15
+ end
16
+
17
+ # @return [Array] Schema information about the dataset's primary key
18
+ attr_reader :primary_key
19
+
20
+ # @return [Array] Schema information for this dataset
21
+ attr_reader :schema
22
+
23
+ # @return [String] Database URI
24
+ attr_reader :uri
25
+
26
+ # @return [Symbol] Database table name
27
+ attr_reader :table
28
+
29
+ # @return [Array<Linkage::Field>] List of {Linkage::Field}'s
30
+ attr_reader :fields
31
+
32
+ # @private
33
+ attr_reader :id
34
+
35
+ # @param [String] uri Sequel-style database URI
36
+ # @param [String, Symbol] table Database table name
37
+ # @param [Hash] options Options to pass to Sequel.connect
38
+ # @see http://sequel.rubyforge.org/rdoc/files/doc/opening_databases_rdoc.html Sequel: Connecting to a database
39
+ def initialize(uri, table, options = {})
40
+ @id = self.class.next_id
41
+ @uri = uri
42
+ @table = table.to_sym
43
+ @options = options
44
+ schema = nil
45
+ database { |db| schema = db.schema(@table) }
46
+ @schema = schema
47
+ @order = []
48
+ @select = []
49
+ @filter = []
50
+ create_fields
51
+ end
52
+
53
+ # Setup a linkage with another dataset
54
+ #
55
+ # @return [Linkage::Configuration]
56
+ def link_with(dataset, &block)
57
+ conf = Configuration.new(self, dataset)
58
+ conf.instance_eval(&block)
59
+ conf
60
+ end
61
+
62
+ # Compare URI and database table name
63
+ #
64
+ # @return [Boolean]
65
+ def ==(other)
66
+ if !other.is_a?(Dataset)
67
+ super
68
+ else
69
+ uri == other.uri && table == other.table
70
+ end
71
+ end
72
+
73
+ # Create a copy of this instance of Dataset, using {Dataset#initialize}.
74
+ #
75
+ # @return [Linkage::Dataset]
76
+ def dup
77
+ self.class.new(uri, table)
78
+ end
79
+
80
+ # Clone the dataset and its associated {Linkage::Field}'s (without hitting
81
+ # the database).
82
+ #
83
+ # @return [Linkage::Dataset]
84
+ def clone
85
+ other = self.class.allocate
86
+ other.send(:initialize_copy, self, {
87
+ :order => @order.clone, :select => @select.clone,
88
+ :filter => @filter.clone, :options => @options.clone
89
+ })
90
+ end
91
+
92
+ # Add a field to use for ordering the dataset.
93
+ #
94
+ # @param [Linkage::Field] field
95
+ # @param [nil, Symbol] desc nil or :desc (for descending order)
96
+ def add_order(field, desc = nil)
97
+ expr = desc == :desc ? field.name.desc : field.name
98
+ unless @order.include?(expr)
99
+ @order << expr
100
+ end
101
+ end
102
+
103
+ # Add a field to be selected on the dataset. If you don't add any
104
+ # selects, all fields will be selected. The primary key is always
105
+ # selected in either case.
106
+ #
107
+ # @param [Linkage::Field] field
108
+ # @param [Symbol] as Optional field alias
109
+ def add_select(field, as = nil)
110
+ expr = as ? field.name.as(as) : field.name
111
+ unless @select.include?(expr)
112
+ @select << expr
113
+ end
114
+ end
115
+
116
+ # Add a filter (SQL WHERE) condition to the dataset.
117
+ #
118
+ # @param [Linkage::Field] field
119
+ # @param [Symbol] operator
120
+ # @param [Linkage::Field, Object] other
121
+ def add_filter(field, operator, other)
122
+ arg1 = field.name
123
+ arg2 = other.is_a?(Field) ? other.name : other
124
+ expr =
125
+ case operator
126
+ when :==
127
+ { arg1 => arg2 }
128
+ when :'!='
129
+ ~{ arg1 => arg2 }
130
+ else
131
+ arg1 = Sequel::SQL::Identifier.new(arg1)
132
+ arg2 = arg2.is_a?(Symbol) ? Sequel::SQL::Identifier.new(arg2) : arg2
133
+ Sequel::SQL::BooleanExpression.new(operator, arg1, arg2)
134
+ end
135
+ @filter << expr
136
+ end
137
+
138
+ # Yield each row of the dataset in a block.
139
+ #
140
+ # @yield [row] A Hash of two elements, :pk and :values, where row[:pk] is
141
+ # the row's primary key value, and row[:values] is an array of all
142
+ # selected values (except the primary key).
143
+ def each
144
+ database do |db|
145
+ ds = db[@table]
146
+
147
+ pk = @primary_key.name
148
+ if !@select.empty?
149
+ ds = ds.select(pk, *@select)
150
+ end
151
+ if !@order.empty?
152
+ ds = ds.order(*@order)
153
+ end
154
+ if !@filter.empty?
155
+ ds = ds.filter(*@filter)
156
+ end
157
+ ds.each do |row|
158
+ yield({:pk => row.delete(pk), :values => row})
159
+ end
160
+ end
161
+ end
162
+
163
+ private
164
+
165
+ def initialize_copy(dataset, options = {})
166
+ @id = dataset.id
167
+ @uri = dataset.uri
168
+ @table = dataset.table
169
+ @schema = dataset.schema
170
+ @options = options[:options]
171
+ @order = options[:order]
172
+ @select = options[:select]
173
+ @filter = options[:filter]
174
+ @fields = dataset.fields.inject({}) do |hsh, (name, field)|
175
+ new_field = field.clone
176
+ new_field.dataset = self
177
+ hsh[name] = new_field
178
+ hsh
179
+ end
180
+ @primary_key = @fields[dataset.primary_key.name]
181
+ self
182
+ end
183
+
184
+ def database(&block)
185
+ Sequel.connect(uri, @options, &block)
186
+ end
187
+
188
+ def create_fields
189
+ @fields = {}
190
+ @schema.each do |(name, column_schema)|
191
+ f = Field.new(name, column_schema)
192
+ f.dataset = self
193
+ @fields[name] = f
194
+
195
+ if @primary_key.nil? && column_schema[:primary_key]
196
+ @primary_key = f
197
+ end
198
+ end
199
+ end
200
+
201
+ def set_new_id
202
+ @id = self.class.next_id
203
+ end
204
+ end
205
+ end
@@ -0,0 +1,138 @@
1
+ module Linkage
2
+ class Expectation
3
+ VALID_OPERATORS = [:==, :>, :<, :>=, :<=, :'!=']
4
+
5
+ def self.get(type)
6
+ TYPES[type]
7
+ end
8
+
9
+ attr_reader :operator, :field_1, :field_2
10
+
11
+ # @param [Symbol] operator Currently, only :==
12
+ # @param [Linkage::Field, Object] field_1
13
+ # @param [Linkage::Field, Object] field_2
14
+ # @param [Symbol] force_kind Manually set type of expectation (useful for
15
+ # a filter between two fields)
16
+ def initialize(operator, field_1, field_2, force_kind = nil)
17
+ if !(field_1.is_a?(Field) || field_2.is_a?(Field))
18
+ raise ArgumentError, "You must have at least one Linkage::Field"
19
+ end
20
+
21
+ if !VALID_OPERATORS.include?(operator)
22
+ raise ArgumentError, "Invalid operator: #{operator.inspect}"
23
+ end
24
+
25
+ @operator = operator
26
+ @field_1 = field_1
27
+ @field_2 = field_2
28
+ @kind = force_kind
29
+
30
+ if kind == :filter
31
+ if @field_1.is_a?(Field)
32
+ @filter_field = @field_1
33
+ @filter_value = @field_2
34
+ else
35
+ @filter_field = @field_2
36
+ @filter_value = @field_1
37
+ end
38
+ elsif @operator != :==
39
+ raise ArgumentError, "Inequality operators are not allowed for non-filter expectations"
40
+ end
41
+ end
42
+
43
+ def ==(other)
44
+ if other.is_a?(Expectation)
45
+ @operator == other.operator && @field_1 == other.field_1 &&
46
+ @field_2 == other.field_2
47
+ else
48
+ super
49
+ end
50
+ end
51
+
52
+ # @return [Symbol] :self, :dual, :cross, or :filter
53
+ def kind
54
+ @kind ||=
55
+ if !(@field_1.is_a?(Field) && @field_2.is_a?(Field))
56
+ :filter
57
+ elsif @field_1 == @field_2
58
+ :self
59
+ elsif @field_1.dataset == @field_2.dataset
60
+ :cross
61
+ else
62
+ :dual
63
+ end
64
+ end
65
+
66
+ # @return [Symbol] name of the merged field type
67
+ def name
68
+ merged_field.name
69
+ end
70
+
71
+ # @return [Linkage::Field] result of Field#merge between the two fields
72
+ def merged_field
73
+ @merged_field ||= @field_1.merge(@field_2)
74
+ end
75
+
76
+ # @return [Boolean] Whether or not this expectation involves a field in
77
+ # the given dataset (Only useful for :filter expressions)
78
+ def applies_to?(dataset)
79
+ if kind == :filter
80
+ @filter_field.belongs_to?(dataset)
81
+ else
82
+ @field_1.belongs_to?(dataset) || @field_2.belongs_to?(dataset)
83
+ end
84
+ end
85
+
86
+ # Apply changes to a dataset based on the expectation, such as calling
87
+ # {Dataset#add_order}, {Dataset#add_select}, and {Dataset#add_filter}
88
+ # with the appropriate arguments.
89
+ def apply_to(dataset)
90
+ case kind
91
+ when :filter
92
+ if @filter_field.belongs_to?(dataset)
93
+ dataset.add_filter(@filter_field, @operator, @filter_value)
94
+ end
95
+ else
96
+ as =
97
+ if kind == :self
98
+ nil
99
+ else
100
+ name != @field_1.name ? name : nil
101
+ end
102
+
103
+ if @field_1.belongs_to?(dataset)
104
+ dataset.add_order(@field_1)
105
+ dataset.add_select(@field_1, as)
106
+ end
107
+ if @field_2.belongs_to?(dataset)
108
+ dataset.add_order(@field_2)
109
+ dataset.add_select(@field_2, as)
110
+ end
111
+ end
112
+ end
113
+ end
114
+
115
+ class MustExpectation < Expectation
116
+ end
117
+
118
+ class MustNotExpectation < Expectation
119
+ OPERATOR_OPPOSITES = {
120
+ :== => :'!=',
121
+ :'!=' => :==,
122
+ :> => :<=,
123
+ :<= => :>,
124
+ :< => :>=,
125
+ :>= => :<
126
+ }
127
+
128
+ # Same as Expectation, except it negates the operator.
129
+ def initialize(operator, field_1, field_2, force_kind = nil)
130
+ super(OPERATOR_OPPOSITES[operator], field_1, field_2, force_kind)
131
+ end
132
+ end
133
+
134
+ Expectation::TYPES = {
135
+ :must => MustExpectation,
136
+ :must_not => MustNotExpectation
137
+ }
138
+ end
@@ -0,0 +1,227 @@
1
+ module Linkage
2
+ # This class is for holding information about a particular field in a
3
+ # dataset.
4
+ class Field
5
+ # A "tree" used to find compatible types.
6
+ TYPE_CONVERSION_TREE = {
7
+ TrueClass => [Integer],
8
+ Integer => [Bignum, Float],
9
+ Bignum => [BigDecimal],
10
+ Float => [BigDecimal],
11
+ BigDecimal => [String],
12
+ String => nil,
13
+ DateTime => nil,
14
+ Date => nil,
15
+ Time => nil,
16
+ File => nil
17
+ }
18
+
19
+ # @return [Symbol] This field's name
20
+ attr_reader :name
21
+
22
+ # @return [Symbol] This field's schema information
23
+ attr_reader :schema
24
+
25
+ # @attr [Linkage::Dataset] This field's associated dataset
26
+ attr_accessor :dataset
27
+
28
+ # Create a new instance of Field.
29
+ #
30
+ # @param [Symbol] name The field's name
31
+ # @param [Hash] schema The field's schema information
32
+ # @param [Hash] ruby_type The field's ruby type
33
+ def initialize(name, schema, ruby_type = nil)
34
+ @name = name
35
+ @schema = schema
36
+ @ruby_type = ruby_type
37
+ end
38
+
39
+ # Convert the column schema information to a hash of column options, one of
40
+ # which must be :type. The other options added should modify that type
41
+ # (e.g. :size). If a database type is not recognized, return it as a String
42
+ # type.
43
+ #
44
+ # @note This method comes more or less straight from Sequel
45
+ # (lib/sequel/extensions/schema_dumper.rb).
46
+ def ruby_type
47
+ unless @ruby_type
48
+ hsh =
49
+ case t = @schema[:db_type].downcase
50
+ when /\A(?:medium|small)?int(?:eger)?(?:\((?:\d+)\))?(?: unsigned)?\z/o
51
+ {:type=>Integer}
52
+ when /\Atinyint(?:\((\d+)\))?\z/o
53
+ {:type =>@schema[:type] == :boolean ? TrueClass : Integer}
54
+ when /\Abigint(?:\((?:\d+)\))?(?: unsigned)?\z/o
55
+ {:type=>Bignum}
56
+ when /\A(?:real|float|double(?: precision)?)\z/o
57
+ {:type=>Float}
58
+ when 'boolean'
59
+ {:type=>TrueClass}
60
+ when /\A(?:(?:tiny|medium|long|n)?text|clob)\z/o
61
+ {:type=>String, :text=>true}
62
+ when 'date'
63
+ {:type=>Date}
64
+ when /\A(?:small)?datetime\z/o
65
+ {:type=>DateTime}
66
+ when /\Atimestamp(?:\((\d+)\))?(?: with(?:out)? time zone)?\z/o
67
+ {:type=>DateTime, :size=>($1.to_i if $1)}
68
+ when /\Atime(?: with(?:out)? time zone)?\z/o
69
+ {:type=>Time, :only_time=>true}
70
+ when /\An?char(?:acter)?(?:\((\d+)\))?\z/o
71
+ {:type=>String, :size=>($1.to_i if $1), :fixed=>true}
72
+ when /\A(?:n?varchar|character varying|bpchar|string)(?:\((\d+)\))?\z/o
73
+ {:type=>String, :size=>($1.to_i if $1)}
74
+ when /\A(?:small)?money\z/o
75
+ {:type=>BigDecimal, :size=>[19,2]}
76
+ when /\A(?:decimal|numeric|number)(?:\((\d+)(?:,\s*(\d+))?\))?\z/o
77
+ s = [($1.to_i if $1), ($2.to_i if $2)].compact
78
+ {:type=>BigDecimal, :size=>(s.empty? ? nil : s)}
79
+ when /\A(?:bytea|(?:tiny|medium|long)?blob|(?:var)?binary)(?:\((\d+)\))?\z/o
80
+ {:type=>File, :size=>($1.to_i if $1)}
81
+ when 'year'
82
+ {:type=>Integer}
83
+ else
84
+ {:type=>String}
85
+ end
86
+ hsh.delete_if { |k, v| v.nil? }
87
+ @ruby_type = {:type => hsh.delete(:type)}
88
+ @ruby_type[:opts] = hsh if !hsh.empty?
89
+ end
90
+ @ruby_type
91
+ end
92
+
93
+ # Create a field that can hold data from two other fields. If the fields
94
+ # have different types, the resulting type is determined via a
95
+ # type-conversion tree.
96
+ #
97
+ # @param [Linkage::Field] other
98
+ # @return [Linkage::Field]
99
+ def merge(other, new_name = nil)
100
+ schema_1 = self.ruby_type
101
+ schema_2 = other.ruby_type
102
+ if schema_1 == schema_2
103
+ result = schema_1
104
+ else
105
+ type_1 = schema_1[:type]
106
+ opts_1 = schema_1[:opts] || {}
107
+ type_2 = schema_2[:type]
108
+ opts_2 = schema_2[:opts] || {}
109
+ result_type = type_1
110
+ result_opts = schema_1[:opts] ? schema_1[:opts].dup : {}
111
+
112
+ # type
113
+ if type_1 != type_2
114
+ result_type = first_common_type(type_1, type_2)
115
+ end
116
+
117
+ # text
118
+ if opts_1[:text] != opts_2[:text]
119
+ # This can only be of type String.
120
+ result_opts[:text] = true
121
+ result_opts.delete(:size)
122
+ end
123
+
124
+ # size
125
+ if !result_opts[:text] && opts_1[:size] != opts_2[:size]
126
+ types = [type_1, type_2].uniq
127
+ if types.length == 1 && types[0] == BigDecimal
128
+ # Two decimals
129
+ if opts_1.has_key?(:size) && opts_2.has_key?(:size)
130
+ s_1 = opts_1[:size]
131
+ s_2 = opts_2[:size]
132
+ result_opts[:size] = [ s_1[0] > s_2[0] ? s_1[0] : s_2[0] ]
133
+
134
+ if s_1[1] && s_2[1]
135
+ result_opts[:size][1] = s_1[1] > s_2[1] ? s_1[1] : s_2[1]
136
+ else
137
+ result_opts[:size][1] = s_1[1] ? s_1[1] : s_2[1]
138
+ end
139
+ else
140
+ result_opts[:size] = opts_1.has_key?(:size) ? opts_1[:size] : opts_2[:size]
141
+ end
142
+ elsif types.include?(String) && types.include?(BigDecimal)
143
+ # Add one to the precision of the BigDecimal (for the dot)
144
+ if opts_1.has_key?(:size) && opts_2.has_key?(:size)
145
+ s_1 = opts_1[:size].is_a?(Array) ? opts_1[:size][0] + 1 : opts_1[:size]
146
+ s_2 = opts_2[:size].is_a?(Array) ? opts_2[:size][0] + 1 : opts_2[:size]
147
+ result_opts[:size] = s_1 > s_2 ? s_1 : s_2
148
+ elsif opts_1.has_key?(:size)
149
+ result_opts[:size] = opts_1[:size].is_a?(Array) ? opts_1[:size][0] + 1 : opts_1[:size]
150
+ elsif opts_2.has_key?(:size)
151
+ result_opts[:size] = opts_2[:size].is_a?(Array) ? opts_2[:size][0] + 1 : opts_2[:size]
152
+ end
153
+ else
154
+ # Treat as two strings
155
+ if opts_1.has_key?(:size) && opts_2.has_key?(:size)
156
+ result_opts[:size] = opts_1[:size] > opts_2[:size] ? opts_1[:size] : opts_2[:size]
157
+ elsif opts_1.has_key?(:size)
158
+ result_opts[:size] = opts_1[:size]
159
+ else
160
+ result_opts[:size] = opts_2[:size]
161
+ end
162
+ end
163
+ end
164
+
165
+ # fixed
166
+ if opts_1[:fixed] != opts_2[:fixed]
167
+ # This can only be of type String.
168
+ result_opts[:fixed] = true
169
+ end
170
+
171
+ result = {:type => result_type}
172
+ result[:opts] = result_opts unless result_opts.empty?
173
+ end
174
+
175
+ if new_name
176
+ name = new_name.to_sym
177
+ else
178
+ name = self.name == other.name ? self.name : :"#{self.name}_#{other.name}"
179
+ end
180
+ Field.new(name, nil, result)
181
+ end
182
+
183
+ # Returns true if this field's name and dataset match the other's name
184
+ # and dataset (using {Dataset#==})
185
+ def ==(other)
186
+ if !other.is_a?(Field)
187
+ super
188
+ else
189
+ self.name == other.name && self.dataset == other.dataset
190
+ end
191
+ end
192
+
193
+ # Returns true if this field's dataset is equal to the given dataset
194
+ # (using Dataset#id).
195
+ #
196
+ # @param [Linkage::Dataset]
197
+ def belongs_to?(dataset)
198
+ self.dataset.id == dataset.id
199
+ end
200
+
201
+ def primary_key?
202
+ schema && schema[:primary_key]
203
+ end
204
+
205
+ private
206
+
207
+ def first_common_type(type_1, type_2)
208
+ types_1 = [type_1] + get_types(type_1)
209
+ types_2 = [type_2] + get_types(type_2)
210
+ (types_1 & types_2).first
211
+ end
212
+
213
+ # Get all types that the specified type can be converted to. Order
214
+ # matters.
215
+ def get_types(type)
216
+ result = []
217
+ types = TYPE_CONVERSION_TREE[type]
218
+ if types
219
+ result += types
220
+ types.each do |t|
221
+ result |= get_types(t)
222
+ end
223
+ end
224
+ result
225
+ end
226
+ end
227
+ end