data_miner-ruby19 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,194 @@
1
+ module DataMiner
2
+ class Base
3
+ include Blockenspiel::DSL
4
+
5
+ attr_accessor :resource, :steps, :step_counter, :attributes
6
+
7
+ def initialize(resource)
8
+ @steps = Array.new
9
+ @resource = resource
10
+ @step_counter = 0
11
+ @attributes = HashWithIndifferentAccess.new
12
+ end
13
+
14
+ def schema(create_table_options = {}, &block)
15
+ step = DataMiner::Schema.new self, step_counter, create_table_options
16
+ Blockenspiel.invoke block, step
17
+ steps << step
18
+ self.step_counter += 1
19
+ end
20
+
21
+ def process(method_name_or_block_description, &block)
22
+ steps << DataMiner::Process.new(self, step_counter, method_name_or_block_description, &block)
23
+ self.step_counter += 1
24
+ end
25
+
26
+ def tap(description, source, options = {})
27
+ steps << DataMiner::Tap.new(self, step_counter, description, source, options)
28
+ self.step_counter += 1
29
+ end
30
+
31
+ def import(*args, &block)
32
+ if args.length == 1
33
+ description = '(no description)'
34
+ else
35
+ description = args.first
36
+ end
37
+ options = args.last
38
+
39
+ step = DataMiner::Import.new self, step_counter, description, options
40
+ Blockenspiel.invoke block, step
41
+ steps << step
42
+ self.step_counter += 1
43
+ end
44
+
45
+ # Mine data for this class.
46
+ def run(options = {})
47
+ options.symbolize_keys!
48
+
49
+ return if DataMiner::Base.call_stack.include? resource.name
50
+ DataMiner::Base.call_stack.push resource.name
51
+
52
+ finished = false
53
+ skipped = false
54
+ if DataMiner::Run.table_exists?
55
+ run = DataMiner::Run.create! :started_at => Time.now, :resource_name => resource.name, :killed => true
56
+ else
57
+ run = nil
58
+ DataMiner.log_info "Not logging individual runs. Please run DataMiner::Run.create_tables if you want to enable this."
59
+ end
60
+ resource.delete_all if options[:from_scratch]
61
+ begin
62
+ steps.each do |step|
63
+ step.run run
64
+ resource.reset_column_information
65
+ end
66
+ finished = true
67
+ rescue DataMiner::Finish
68
+ finished = true
69
+ rescue DataMiner::Skip
70
+ skipped = true
71
+ ensure
72
+ if DataMiner::Run.table_exists?
73
+ run.update_attributes! :terminated_at => Time.now, :finished => finished, :skipped => skipped, :killed => false
74
+ end
75
+ DataMiner::Base.call_stack.clear if DataMiner::Base.call_stack.first == resource.name and !options[:preserve_call_stack_between_runs]
76
+ end
77
+ nil
78
+ end
79
+
80
+ def import_steps
81
+ steps.select { |step| step.is_a? Import }
82
+ end
83
+
84
+ def before_invoke
85
+
86
+ end
87
+
88
+ def after_invoke
89
+ return unless resource.table_exists?
90
+ make_sure_unit_definitions_make_sense
91
+ suggest_missing_column_migrations
92
+ end
93
+
94
+ COMPLETE_UNIT_DEFINITIONS = [
95
+ [:units],
96
+ [:from_units, :to_units],
97
+ [:units_field_name],
98
+ [:units_field_name, :to_units],
99
+ [:units_field_number],
100
+ [:units_field_number, :to_units]
101
+ ]
102
+
103
+ def make_sure_unit_definitions_make_sense
104
+ import_steps.each do |step|
105
+ step.attributes.each do |_, attribute|
106
+ if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
107
+ DataMiner.log_or_raise %{
108
+
109
+ ================================
110
+
111
+ You don't have a valid unit definition for #{resource.name}##{attribute.name}.
112
+
113
+ You supplied #{attribute.options.keys.select { |k, _| k.to_s =~ /unit/ }.map(&:to_sym).inspect }.
114
+
115
+ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence}".
116
+
117
+ ================================
118
+ }
119
+ end
120
+ end
121
+ end
122
+ end
123
+
124
+ def suggest_missing_column_migrations
125
+ missing_columns = Array.new
126
+
127
+ import_steps.each do |step|
128
+ step.attributes.each do |_, attribute|
129
+ DataMiner.log_or_raise "You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.end_with? '_units'
130
+ unless resource.column_names.include? attribute.name
131
+ missing_columns << attribute.name
132
+ end
133
+ if attribute.wants_units? and !resource.column_names.include?(units_column = "#{attribute.name}_units")
134
+ missing_columns << units_column
135
+ end
136
+ end
137
+ end
138
+ missing_columns.uniq!
139
+ if missing_columns.any?
140
+ DataMiner.log_debug %{
141
+
142
+ ================================
143
+
144
+ On #{resource}, it looks like you're missing some columns...
145
+
146
+ Please run this...
147
+
148
+ ./script/generate migration AddMissingColumnsTo#{resource.name}
149
+
150
+ and **replace** the resulting file with this:
151
+
152
+ class AddMissingColumnsTo#{resource.name} < ActiveRecord::Migration
153
+ def self.up
154
+ #{missing_columns.map { |column_name| " add_column :#{resource.table_name}, :#{column_name}, :#{column_name.end_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' }" }.join("\n") }
155
+ end
156
+
157
+ def self.down
158
+ #{missing_columns.map { |column_name| " remove_column :#{resource.table_name}, :#{column_name}" }.join("\n") }
159
+ end
160
+ end
161
+
162
+ On the other hand, if you're working directly with create_table, this might be helpful:
163
+
164
+ #{missing_columns.map { |column_name| "t.#{column_name.end_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' } '#{column_name}'" }.join("\n") }
165
+
166
+ ================================
167
+ }
168
+ end
169
+ end
170
+
171
+ cattr_accessor :resource_names
172
+ self.resource_names = Array.new
173
+
174
+ cattr_accessor :call_stack
175
+ self.call_stack = Array.new
176
+ class << self
177
+ # Mine data. Defaults to all resource_names touched by DataMiner.
178
+ #
179
+ # Options
180
+ # * <tt>:resource_names</tt>: array of resource (class) names to mine
181
+ def run(options = {})
182
+ options.symbolize_keys!
183
+
184
+ resource_names.each do |resource_name|
185
+ if options[:resource_names].blank? or options[:resource_names].include?(resource_name)
186
+ resource_name.constantize.data_miner_base.run options
187
+ end
188
+ end
189
+ ensure
190
+ RemoteTable.cleanup
191
+ end
192
+ end
193
+ end
194
+ end
@@ -0,0 +1,36 @@
1
+ module DataMiner
2
+ class Dictionary
3
+ attr_accessor :key_name, :value_name, :sprintf, :table
4
+
5
+ def initialize(options = {})
6
+ @key_name = options[:input]
7
+ @value_name = options[:output]
8
+ @sprintf = options[:sprintf] || '%s'
9
+ @table = RemoteTable.new(:url => options[:url])
10
+ end
11
+
12
+ def lookup(key)
13
+ find(self.key_name, key, self.value_name, :sprintf => self.sprintf)
14
+ end
15
+
16
+ def find(key_name, key, value_name, options = {})
17
+ if match = table.rows.detect { |row| normalize_for_comparison(key, options) == normalize_for_comparison(row[key_name], options) }
18
+ match[value_name].to_s
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def normalize_for_comparison(string, options = {})
25
+ if options[:sprintf]
26
+ if /\%[0-9\.]*f/.match(options[:sprintf])
27
+ string = string.to_f
28
+ elsif /\%[0-9\.]*d/.match(options[:sprintf])
29
+ string = string.to_i
30
+ end
31
+ string = sprintf % string
32
+ end
33
+ string.to_s.strip
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,70 @@
1
+ module DataMiner
2
+ class Import
3
+ include Blockenspiel::DSL
4
+
5
+ attr_reader :attributes
6
+ attr_accessor :base, :position_in_run, :table
7
+ attr_accessor :description
8
+ delegate :resource, :to => :base
9
+
10
+ def initialize(base, position_in_run, description, table_options = {})
11
+ table_options.symbolize_keys!
12
+
13
+ @attributes = ActiveSupport::OrderedHash.new
14
+ @base = base
15
+ @position_in_run = position_in_run
16
+ @description = description
17
+
18
+ if table_options[:errata].is_a?(String)
19
+ table_options[:errata] = Errata.new :url => table_options[:errata], :responder => resource
20
+ end
21
+
22
+ if table_options[:table].present?
23
+ DataMiner.log_or_raise "You should specify :table or :url, but not both" if table_options[:url].present?
24
+ @table = table_options[:table]
25
+ else
26
+ @table = RemoteTable.new table_options
27
+ end
28
+ end
29
+
30
+ def inspect
31
+ "Import(#{resource}) position #{position_in_run} (#{description})"
32
+ end
33
+
34
+ def stores?(attr_name)
35
+ attributes.has_key? attr_name
36
+ end
37
+
38
+ def store(attr_name, attr_options = {})
39
+ DataMiner.log_or_raise "You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
40
+ attributes[attr_name] = Attribute.new self, attr_name, attr_options
41
+ end
42
+
43
+ def key(attr_name, attr_options = {})
44
+ DataMiner.log_or_raise "You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
45
+ @key = attr_name
46
+ store attr_name, attr_options
47
+ end
48
+
49
+ def run(run)
50
+ primary_key = resource.primary_key
51
+ test_counter = 0
52
+
53
+ table.each_row do |row|
54
+ if ENV['DUMP'] == 'true'
55
+ raise "[data_miner gem] Stopping after 5 rows because TEST=true" if test_counter > 5
56
+ test_counter += 1
57
+ DataMiner.log_info %{Row #{test_counter}
58
+ IN: #{row.inspect}
59
+ OUT: #{attributes.inject(Hash.new) { |memo, v| attr_name, attr = v; memo[attr_name] = attr.value_from_row(row); memo }.inspect}
60
+ }
61
+ end
62
+
63
+ record = resource.send "find_or_initialize_by_#{@key}", attributes[@key].value_from_row(row)
64
+ attributes.each { |_, attr| attr.set_record_from_row record, row }
65
+ record.save! if record.send(primary_key).present?
66
+ end
67
+ DataMiner.log_info "performed #{inspect}"
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,37 @@
1
+ module DataMiner
2
+ class Process
3
+ attr_accessor :base, :position_in_run
4
+ attr_accessor :method_name
5
+ attr_accessor :block_description, :block
6
+ delegate :resource, :to => :base
7
+
8
+ def initialize(base, position_in_run, method_name_or_block_description, &block)
9
+ @base = base
10
+ @position_in_run = position_in_run
11
+ if block_given?
12
+ @block_description = method_name_or_block_description
13
+ @block = block
14
+ else
15
+ @method_name = method_name_or_block_description
16
+ end
17
+ end
18
+
19
+ def inspect
20
+ str = "Process(#{resource}) position #{position_in_run}"
21
+ if block
22
+ str << " ran block (#{block_description})"
23
+ else
24
+ str << " called :#{method_name}"
25
+ end
26
+ end
27
+
28
+ def run(run)
29
+ if block
30
+ block.call
31
+ else
32
+ resource.send method_name
33
+ end
34
+ DataMiner.log_info "ran #{inspect}"
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,26 @@
1
+ module DataMiner
2
+ class Run < ActiveRecord::Base
3
+ set_table_name 'data_miner_runs'
4
+
5
+ def resource
6
+ resource_name.constantize
7
+ end
8
+
9
+ class << self
10
+ def create_tables
11
+ return if table_exists? and column_names.include?('skipped') # force a drop
12
+ connection.create_table 'data_miner_runs', :force => true do |t|
13
+ t.string 'resource_name'
14
+ t.boolean 'killed'
15
+ t.boolean 'skipped'
16
+ t.boolean 'finished'
17
+ t.datetime 'started_at'
18
+ t.datetime 'terminated_at'
19
+ t.datetime 'created_at'
20
+ t.datetime 'updated_at'
21
+ end
22
+ reset_column_information
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,244 @@
1
+ module DataMiner
2
+ class Schema
3
+ include Blockenspiel::DSL
4
+
5
+ attr_reader :base
6
+ attr_reader :position_in_run
7
+ attr_reader :create_table_options
8
+ delegate :resource, :to => :base
9
+
10
+ def initialize(base, position_in_run, create_table_options)
11
+ @base = base
12
+ @position_in_run = position_in_run
13
+ @create_table_options = create_table_options
14
+ @create_table_options.symbolize_keys!
15
+ DataMiner.log_or_raise ":id => true is not allowed in create_table_options." if @create_table_options[:id] === true
16
+ DataMiner.log_or_raise ":primary_key is not allowed in create_table_options. Use set_primary_key instead." if @create_table_options.has_key?(:primary_key)
17
+ @create_table_options[:id] = false # always
18
+ end
19
+
20
+ def connection
21
+ ActiveRecord::Base.connection
22
+ end
23
+
24
+ def table_name
25
+ resource.table_name
26
+ end
27
+
28
+ def ideal_table
29
+ @ideal_table ||= ActiveRecord::ConnectionAdapters::TableDefinition.new(connection)
30
+ end
31
+
32
+ def ideal_indexes
33
+ @ideal_indexes ||= Array.new
34
+ end
35
+
36
+ def actual_indexes
37
+ connection.indexes table_name
38
+ end
39
+
40
+ def description
41
+ "Define a table called #{table_name} with primary key #{ideal_primary_key_name}"
42
+ end
43
+
44
+ def inspect
45
+ "Schema(#{resource}): #{description}"
46
+ end
47
+
48
+ # lifted straight from activerecord-3.0.0.beta3/lib/active_record/connection_adapters/abstract/schema_definitions.rb
49
+ %w( string text integer float decimal datetime timestamp time date binary boolean ).each do |column_type|
50
+ class_eval <<-EOV
51
+ def #{column_type}(*args) # def string(*args)
52
+ options = args.extract_options! # options = args.extract_options!
53
+ column_names = args # column_names = args
54
+ #
55
+ column_names.each { |name| ideal_table.column(name, '#{column_type}', options) } # column_names.each { |name| ideal_table.column(name, 'string', options) }
56
+ end # end
57
+ EOV
58
+ end
59
+ def column(*args)
60
+ ideal_table.column(*args)
61
+ end
62
+
63
+ MAX_INDEX_NAME_LENGTH = 50
64
+ def index(columns, options = {})
65
+ options.symbolize_keys!
66
+ columns = Array.wrap columns
67
+ unless name = options[:name]
68
+ default_name = connection.index_name(table_name, options.merge(:column => columns))
69
+ name = default_name.length < MAX_INDEX_NAME_LENGTH ? default_name : default_name[0..MAX_INDEX_NAME_LENGTH-11] + Zlib.crc32(default_name).to_s
70
+ end
71
+ index_unique = options.has_key?(:unique) ? options[:unique] : true
72
+ ideal_indexes.push ActiveRecord::ConnectionAdapters::IndexDefinition.new(table_name, name, index_unique, columns)
73
+ end
74
+
75
+ def ideal_primary_key_name
76
+ resource.primary_key.to_s
77
+ end
78
+
79
+ def actual_primary_key_name
80
+ connection.primary_key(table_name).to_s
81
+ end
82
+
83
+ INDEX_PROPERTIES = %w{ name columns }
84
+ def index_equivalent?(a, b)
85
+ return false unless a and b
86
+ INDEX_PROPERTIES.all? do |property|
87
+ DataMiner.log_debug "...comparing #{a.send(property).inspect}.to_s <-> #{b.send(property).inspect}.to_s"
88
+ a.send(property).to_s == b.send(property).to_s
89
+ end
90
+ end
91
+
92
+ # FIXME mysql only (assume integer primary keys)
93
+ def column_equivalent?(a, b)
94
+ return false unless a and b
95
+ a_type = a.type.to_s == 'primary_key' ? 'integer' : a.type.to_s
96
+ b_type = b.type.to_s == 'primary_key' ? 'integer' : b.type.to_s
97
+ a_type == b_type and a.name.to_s == b.name.to_s
98
+ end
99
+
100
+ %w{ column index }.each do |i|
101
+ eval %{
102
+ def #{i}_needs_to_be_placed?(name)
103
+ actual = actual_#{i} name
104
+ return true unless actual
105
+ ideal = ideal_#{i} name
106
+ not #{i}_equivalent? actual, ideal
107
+ end
108
+
109
+ def #{i}_needs_to_be_removed?(name)
110
+ ideal_#{i}(name).nil?
111
+ end
112
+ }
113
+ end
114
+
115
+ def ideal_column(name)
116
+ ideal_table[name.to_s]
117
+ end
118
+
119
+ def actual_column(name)
120
+ resource.columns_hash[name.to_s]
121
+ end
122
+
123
+ def ideal_index(name)
124
+ ideal_indexes.detect { |ideal| ideal.name == name.to_s }
125
+ end
126
+
127
+ def actual_index(name)
128
+ actual_indexes.detect { |actual| actual.name == name.to_s }
129
+ end
130
+
131
+ def place_column(name)
132
+ remove_column name if actual_column name
133
+ ideal = ideal_column name
134
+ DataMiner.log_debug "ADDING COLUMN #{name}"
135
+ connection.add_column table_name, name, ideal.type.to_sym # symbol type!
136
+ resource.reset_column_information
137
+ end
138
+
139
+ def remove_column(name)
140
+ DataMiner.log_debug "REMOVING COLUMN #{name}"
141
+ connection.remove_column table_name, name
142
+ resource.reset_column_information
143
+ end
144
+
145
+ def place_index(name)
146
+ remove_index name if actual_index name
147
+ ideal = ideal_index name
148
+ DataMiner.log_debug "ADDING INDEX #{name}"
149
+ connection.add_index table_name, ideal.columns, :name => ideal.name
150
+ resource.reset_column_information
151
+ end
152
+
153
+ def remove_index(name)
154
+ DataMiner.log_debug "REMOVING INDEX #{name}"
155
+ connection.remove_index table_name, :name => name
156
+ resource.reset_column_information
157
+ end
158
+
159
+ def run(run)
160
+ _add_extra_columns
161
+ _create_table
162
+ _set_primary_key
163
+ _remove_columns
164
+ _add_columns
165
+ _remove_indexes
166
+ _add_indexes
167
+ DataMiner.log_debug "ran #{inspect}"
168
+ end
169
+
170
+ EXTRA_COLUMNS = {
171
+ :updated_at => :datetime,
172
+ :created_at => :datetime
173
+ }
174
+ def _add_extra_columns
175
+ EXTRA_COLUMNS.each do |extra_name, extra_type|
176
+ send extra_type, extra_name unless ideal_column extra_name
177
+ end
178
+ end
179
+
180
+ def _create_table
181
+ if not resource.table_exists?
182
+ DataMiner.log_debug "CREATING TABLE #{table_name} with #{create_table_options.inspect}"
183
+ connection.create_table table_name, create_table_options do |t|
184
+ t.integer :data_miner_placeholder
185
+ end
186
+ resource.reset_column_information
187
+ end
188
+ end
189
+
190
+ # FIXME mysql only
191
+ def _set_primary_key
192
+ if ideal_primary_key_name == 'id' and not ideal_column('id')
193
+ DataMiner.log_debug "no special primary key set on #{table_name}, so using 'id'"
194
+ column 'id', :primary_key
195
+ end
196
+ actual = actual_column actual_primary_key_name
197
+ ideal = ideal_column ideal_primary_key_name
198
+ if not column_equivalent? actual, ideal
199
+ DataMiner.log_debug "looks like #{table_name} has a bad (or missing) primary key"
200
+ if actual
201
+ DataMiner.log_debug "looks like primary key needs to change from #{actual_primary_key_name} to #{ideal_primary_key_name}, re-creating #{table_name} from scratch"
202
+ connection.drop_table table_name
203
+ resource.reset_column_information
204
+ _create_table
205
+ end
206
+ place_column ideal_primary_key_name
207
+ unless ideal.type.to_s == 'primary_key'
208
+ DataMiner.log_debug "SETTING #{ideal_primary_key_name} AS PRIMARY KEY"
209
+ if ActiveRecord::Base.connection.adapter_name.downcase == 'sqlite'
210
+ connection.execute "CREATE UNIQUE INDEX IDX_#{table_name}_#{ideal_primary_key_name} ON #{table_name} (#{ideal_primary_key_name} ASC)"
211
+ else
212
+ connection.execute "ALTER TABLE `#{table_name}` ADD PRIMARY KEY (`#{ideal_primary_key_name}`)"
213
+ end
214
+ end
215
+ end
216
+ resource.reset_column_information
217
+ end
218
+
219
+ def _remove_columns
220
+ resource.columns_hash.values.each do |actual|
221
+ remove_column actual.name if column_needs_to_be_removed? actual.name
222
+ end
223
+ end
224
+
225
+ def _add_columns
226
+ ideal_table.columns.each do |ideal|
227
+ place_column ideal.name if column_needs_to_be_placed? ideal.name
228
+ end
229
+ end
230
+
231
+ def _remove_indexes
232
+ actual_indexes.each do |actual|
233
+ remove_index actual.name if index_needs_to_be_removed? actual.name
234
+ end
235
+ end
236
+
237
+ def _add_indexes
238
+ ideal_indexes.each do |ideal|
239
+ next if ideal.name == ideal_primary_key_name # this should already have been taken care of
240
+ place_index ideal.name if index_needs_to_be_placed? ideal.name
241
+ end
242
+ end
243
+ end
244
+ end