data_miner-ruby19 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,194 @@
1
+ module DataMiner
2
+ class Base
3
+ include Blockenspiel::DSL
4
+
5
+ attr_accessor :resource, :steps, :step_counter, :attributes
6
+
7
+ def initialize(resource)
8
+ @steps = Array.new
9
+ @resource = resource
10
+ @step_counter = 0
11
+ @attributes = HashWithIndifferentAccess.new
12
+ end
13
+
14
+ def schema(create_table_options = {}, &block)
15
+ step = DataMiner::Schema.new self, step_counter, create_table_options
16
+ Blockenspiel.invoke block, step
17
+ steps << step
18
+ self.step_counter += 1
19
+ end
20
+
21
+ def process(method_name_or_block_description, &block)
22
+ steps << DataMiner::Process.new(self, step_counter, method_name_or_block_description, &block)
23
+ self.step_counter += 1
24
+ end
25
+
26
+ def tap(description, source, options = {})
27
+ steps << DataMiner::Tap.new(self, step_counter, description, source, options)
28
+ self.step_counter += 1
29
+ end
30
+
31
+ def import(*args, &block)
32
+ if args.length == 1
33
+ description = '(no description)'
34
+ else
35
+ description = args.first
36
+ end
37
+ options = args.last
38
+
39
+ step = DataMiner::Import.new self, step_counter, description, options
40
+ Blockenspiel.invoke block, step
41
+ steps << step
42
+ self.step_counter += 1
43
+ end
44
+
45
+ # Mine data for this class.
46
+ def run(options = {})
47
+ options.symbolize_keys!
48
+
49
+ return if DataMiner::Base.call_stack.include? resource.name
50
+ DataMiner::Base.call_stack.push resource.name
51
+
52
+ finished = false
53
+ skipped = false
54
+ if DataMiner::Run.table_exists?
55
+ run = DataMiner::Run.create! :started_at => Time.now, :resource_name => resource.name, :killed => true
56
+ else
57
+ run = nil
58
+ DataMiner.log_info "Not logging individual runs. Please run DataMiner::Run.create_tables if you want to enable this."
59
+ end
60
+ resource.delete_all if options[:from_scratch]
61
+ begin
62
+ steps.each do |step|
63
+ step.run run
64
+ resource.reset_column_information
65
+ end
66
+ finished = true
67
+ rescue DataMiner::Finish
68
+ finished = true
69
+ rescue DataMiner::Skip
70
+ skipped = true
71
+ ensure
72
+ if DataMiner::Run.table_exists?
73
+ run.update_attributes! :terminated_at => Time.now, :finished => finished, :skipped => skipped, :killed => false
74
+ end
75
+ DataMiner::Base.call_stack.clear if DataMiner::Base.call_stack.first == resource.name and !options[:preserve_call_stack_between_runs]
76
+ end
77
+ nil
78
+ end
79
+
80
+ def import_steps
81
+ steps.select { |step| step.is_a? Import }
82
+ end
83
+
84
+ def before_invoke
85
+
86
+ end
87
+
88
+ def after_invoke
89
+ return unless resource.table_exists?
90
+ make_sure_unit_definitions_make_sense
91
+ suggest_missing_column_migrations
92
+ end
93
+
94
+ COMPLETE_UNIT_DEFINITIONS = [
95
+ [:units],
96
+ [:from_units, :to_units],
97
+ [:units_field_name],
98
+ [:units_field_name, :to_units],
99
+ [:units_field_number],
100
+ [:units_field_number, :to_units]
101
+ ]
102
+
103
+ def make_sure_unit_definitions_make_sense
104
+ import_steps.each do |step|
105
+ step.attributes.each do |_, attribute|
106
+ if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
107
+ DataMiner.log_or_raise %{
108
+
109
+ ================================
110
+
111
+ You don't have a valid unit definition for #{resource.name}##{attribute.name}.
112
+
113
+ You supplied #{attribute.options.keys.select { |k, _| k.to_s =~ /unit/ }.map(&:to_sym).inspect }.
114
+
115
+ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence}".
116
+
117
+ ================================
118
+ }
119
+ end
120
+ end
121
+ end
122
+ end
123
+
124
+ def suggest_missing_column_migrations
125
+ missing_columns = Array.new
126
+
127
+ import_steps.each do |step|
128
+ step.attributes.each do |_, attribute|
129
+ DataMiner.log_or_raise "You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.end_with? '_units'
130
+ unless resource.column_names.include? attribute.name
131
+ missing_columns << attribute.name
132
+ end
133
+ if attribute.wants_units? and !resource.column_names.include?(units_column = "#{attribute.name}_units")
134
+ missing_columns << units_column
135
+ end
136
+ end
137
+ end
138
+ missing_columns.uniq!
139
+ if missing_columns.any?
140
+ DataMiner.log_debug %{
141
+
142
+ ================================
143
+
144
+ On #{resource}, it looks like you're missing some columns...
145
+
146
+ Please run this...
147
+
148
+ ./script/generate migration AddMissingColumnsTo#{resource.name}
149
+
150
+ and **replace** the resulting file with this:
151
+
152
+ class AddMissingColumnsTo#{resource.name} < ActiveRecord::Migration
153
+ def self.up
154
+ #{missing_columns.map { |column_name| " add_column :#{resource.table_name}, :#{column_name}, :#{column_name.end_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' }" }.join("\n") }
155
+ end
156
+
157
+ def self.down
158
+ #{missing_columns.map { |column_name| " remove_column :#{resource.table_name}, :#{column_name}" }.join("\n") }
159
+ end
160
+ end
161
+
162
+ On the other hand, if you're working directly with create_table, this might be helpful:
163
+
164
+ #{missing_columns.map { |column_name| "t.#{column_name.end_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' } '#{column_name}'" }.join("\n") }
165
+
166
+ ================================
167
+ }
168
+ end
169
+ end
170
+
171
+ cattr_accessor :resource_names
172
+ self.resource_names = Array.new
173
+
174
+ cattr_accessor :call_stack
175
+ self.call_stack = Array.new
176
+ class << self
177
+ # Mine data. Defaults to all resource_names touched by DataMiner.
178
+ #
179
+ # Options
180
+ # * <tt>:resource_names</tt>: array of resource (class) names to mine
181
+ def run(options = {})
182
+ options.symbolize_keys!
183
+
184
+ resource_names.each do |resource_name|
185
+ if options[:resource_names].blank? or options[:resource_names].include?(resource_name)
186
+ resource_name.constantize.data_miner_base.run options
187
+ end
188
+ end
189
+ ensure
190
+ RemoteTable.cleanup
191
+ end
192
+ end
193
+ end
194
+ end
@@ -0,0 +1,36 @@
1
+ module DataMiner
2
+ class Dictionary
3
+ attr_accessor :key_name, :value_name, :sprintf, :table
4
+
5
+ def initialize(options = {})
6
+ @key_name = options[:input]
7
+ @value_name = options[:output]
8
+ @sprintf = options[:sprintf] || '%s'
9
+ @table = RemoteTable.new(:url => options[:url])
10
+ end
11
+
12
+ def lookup(key)
13
+ find(self.key_name, key, self.value_name, :sprintf => self.sprintf)
14
+ end
15
+
16
+ def find(key_name, key, value_name, options = {})
17
+ if match = table.rows.detect { |row| normalize_for_comparison(key, options) == normalize_for_comparison(row[key_name], options) }
18
+ match[value_name].to_s
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def normalize_for_comparison(string, options = {})
25
+ if options[:sprintf]
26
+ if /\%[0-9\.]*f/.match(options[:sprintf])
27
+ string = string.to_f
28
+ elsif /\%[0-9\.]*d/.match(options[:sprintf])
29
+ string = string.to_i
30
+ end
31
+ string = sprintf % string
32
+ end
33
+ string.to_s.strip
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,70 @@
1
+ module DataMiner
2
+ class Import
3
+ include Blockenspiel::DSL
4
+
5
+ attr_reader :attributes
6
+ attr_accessor :base, :position_in_run, :table
7
+ attr_accessor :description
8
+ delegate :resource, :to => :base
9
+
10
+ def initialize(base, position_in_run, description, table_options = {})
11
+ table_options.symbolize_keys!
12
+
13
+ @attributes = ActiveSupport::OrderedHash.new
14
+ @base = base
15
+ @position_in_run = position_in_run
16
+ @description = description
17
+
18
+ if table_options[:errata].is_a?(String)
19
+ table_options[:errata] = Errata.new :url => table_options[:errata], :responder => resource
20
+ end
21
+
22
+ if table_options[:table].present?
23
+ DataMiner.log_or_raise "You should specify :table or :url, but not both" if table_options[:url].present?
24
+ @table = table_options[:table]
25
+ else
26
+ @table = RemoteTable.new table_options
27
+ end
28
+ end
29
+
30
+ def inspect
31
+ "Import(#{resource}) position #{position_in_run} (#{description})"
32
+ end
33
+
34
+ def stores?(attr_name)
35
+ attributes.has_key? attr_name
36
+ end
37
+
38
+ def store(attr_name, attr_options = {})
39
+ DataMiner.log_or_raise "You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
40
+ attributes[attr_name] = Attribute.new self, attr_name, attr_options
41
+ end
42
+
43
+ def key(attr_name, attr_options = {})
44
+ DataMiner.log_or_raise "You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
45
+ @key = attr_name
46
+ store attr_name, attr_options
47
+ end
48
+
49
+ def run(run)
50
+ primary_key = resource.primary_key
51
+ test_counter = 0
52
+
53
+ table.each_row do |row|
54
+ if ENV['DUMP'] == 'true'
55
+ raise "[data_miner gem] Stopping after 5 rows because TEST=true" if test_counter > 5
56
+ test_counter += 1
57
+ DataMiner.log_info %{Row #{test_counter}
58
+ IN: #{row.inspect}
59
+ OUT: #{attributes.inject(Hash.new) { |memo, v| attr_name, attr = v; memo[attr_name] = attr.value_from_row(row); memo }.inspect}
60
+ }
61
+ end
62
+
63
+ record = resource.send "find_or_initialize_by_#{@key}", attributes[@key].value_from_row(row)
64
+ attributes.each { |_, attr| attr.set_record_from_row record, row }
65
+ record.save! if record.send(primary_key).present?
66
+ end
67
+ DataMiner.log_info "performed #{inspect}"
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,37 @@
1
+ module DataMiner
2
+ class Process
3
+ attr_accessor :base, :position_in_run
4
+ attr_accessor :method_name
5
+ attr_accessor :block_description, :block
6
+ delegate :resource, :to => :base
7
+
8
+ def initialize(base, position_in_run, method_name_or_block_description, &block)
9
+ @base = base
10
+ @position_in_run = position_in_run
11
+ if block_given?
12
+ @block_description = method_name_or_block_description
13
+ @block = block
14
+ else
15
+ @method_name = method_name_or_block_description
16
+ end
17
+ end
18
+
19
+ def inspect
20
+ str = "Process(#{resource}) position #{position_in_run}"
21
+ if block
22
+ str << " ran block (#{block_description})"
23
+ else
24
+ str << " called :#{method_name}"
25
+ end
26
+ end
27
+
28
+ def run(run)
29
+ if block
30
+ block.call
31
+ else
32
+ resource.send method_name
33
+ end
34
+ DataMiner.log_info "ran #{inspect}"
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,26 @@
1
+ module DataMiner
2
+ class Run < ActiveRecord::Base
3
+ set_table_name 'data_miner_runs'
4
+
5
+ def resource
6
+ resource_name.constantize
7
+ end
8
+
9
+ class << self
10
+ def create_tables
11
+ return if table_exists? and column_names.include?('skipped') # force a drop
12
+ connection.create_table 'data_miner_runs', :force => true do |t|
13
+ t.string 'resource_name'
14
+ t.boolean 'killed'
15
+ t.boolean 'skipped'
16
+ t.boolean 'finished'
17
+ t.datetime 'started_at'
18
+ t.datetime 'terminated_at'
19
+ t.datetime 'created_at'
20
+ t.datetime 'updated_at'
21
+ end
22
+ reset_column_information
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,244 @@
1
+ module DataMiner
2
+ class Schema
3
+ include Blockenspiel::DSL
4
+
5
+ attr_reader :base
6
+ attr_reader :position_in_run
7
+ attr_reader :create_table_options
8
+ delegate :resource, :to => :base
9
+
10
+ def initialize(base, position_in_run, create_table_options)
11
+ @base = base
12
+ @position_in_run = position_in_run
13
+ @create_table_options = create_table_options
14
+ @create_table_options.symbolize_keys!
15
+ DataMiner.log_or_raise ":id => true is not allowed in create_table_options." if @create_table_options[:id] === true
16
+ DataMiner.log_or_raise ":primary_key is not allowed in create_table_options. Use set_primary_key instead." if @create_table_options.has_key?(:primary_key)
17
+ @create_table_options[:id] = false # always
18
+ end
19
+
20
+ def connection
21
+ ActiveRecord::Base.connection
22
+ end
23
+
24
+ def table_name
25
+ resource.table_name
26
+ end
27
+
28
+ def ideal_table
29
+ @ideal_table ||= ActiveRecord::ConnectionAdapters::TableDefinition.new(connection)
30
+ end
31
+
32
+ def ideal_indexes
33
+ @ideal_indexes ||= Array.new
34
+ end
35
+
36
+ def actual_indexes
37
+ connection.indexes table_name
38
+ end
39
+
40
+ def description
41
+ "Define a table called #{table_name} with primary key #{ideal_primary_key_name}"
42
+ end
43
+
44
+ def inspect
45
+ "Schema(#{resource}): #{description}"
46
+ end
47
+
48
+ # lifted straight from activerecord-3.0.0.beta3/lib/active_record/connection_adapters/abstract/schema_definitions.rb
49
+ %w( string text integer float decimal datetime timestamp time date binary boolean ).each do |column_type|
50
+ class_eval <<-EOV
51
+ def #{column_type}(*args) # def string(*args)
52
+ options = args.extract_options! # options = args.extract_options!
53
+ column_names = args # column_names = args
54
+ #
55
+ column_names.each { |name| ideal_table.column(name, '#{column_type}', options) } # column_names.each { |name| ideal_table.column(name, 'string', options) }
56
+ end # end
57
+ EOV
58
+ end
59
+ def column(*args)
60
+ ideal_table.column(*args)
61
+ end
62
+
63
+ MAX_INDEX_NAME_LENGTH = 50
64
+ def index(columns, options = {})
65
+ options.symbolize_keys!
66
+ columns = Array.wrap columns
67
+ unless name = options[:name]
68
+ default_name = connection.index_name(table_name, options.merge(:column => columns))
69
+ name = default_name.length < MAX_INDEX_NAME_LENGTH ? default_name : default_name[0..MAX_INDEX_NAME_LENGTH-11] + Zlib.crc32(default_name).to_s
70
+ end
71
+ index_unique = options.has_key?(:unique) ? options[:unique] : true
72
+ ideal_indexes.push ActiveRecord::ConnectionAdapters::IndexDefinition.new(table_name, name, index_unique, columns)
73
+ end
74
+
75
+ def ideal_primary_key_name
76
+ resource.primary_key.to_s
77
+ end
78
+
79
+ def actual_primary_key_name
80
+ connection.primary_key(table_name).to_s
81
+ end
82
+
83
+ INDEX_PROPERTIES = %w{ name columns }
84
+ def index_equivalent?(a, b)
85
+ return false unless a and b
86
+ INDEX_PROPERTIES.all? do |property|
87
+ DataMiner.log_debug "...comparing #{a.send(property).inspect}.to_s <-> #{b.send(property).inspect}.to_s"
88
+ a.send(property).to_s == b.send(property).to_s
89
+ end
90
+ end
91
+
92
+ # FIXME mysql only (assume integer primary keys)
93
+ def column_equivalent?(a, b)
94
+ return false unless a and b
95
+ a_type = a.type.to_s == 'primary_key' ? 'integer' : a.type.to_s
96
+ b_type = b.type.to_s == 'primary_key' ? 'integer' : b.type.to_s
97
+ a_type == b_type and a.name.to_s == b.name.to_s
98
+ end
99
+
100
+ %w{ column index }.each do |i|
101
+ eval %{
102
+ def #{i}_needs_to_be_placed?(name)
103
+ actual = actual_#{i} name
104
+ return true unless actual
105
+ ideal = ideal_#{i} name
106
+ not #{i}_equivalent? actual, ideal
107
+ end
108
+
109
+ def #{i}_needs_to_be_removed?(name)
110
+ ideal_#{i}(name).nil?
111
+ end
112
+ }
113
+ end
114
+
115
+ def ideal_column(name)
116
+ ideal_table[name.to_s]
117
+ end
118
+
119
+ def actual_column(name)
120
+ resource.columns_hash[name.to_s]
121
+ end
122
+
123
+ def ideal_index(name)
124
+ ideal_indexes.detect { |ideal| ideal.name == name.to_s }
125
+ end
126
+
127
+ def actual_index(name)
128
+ actual_indexes.detect { |actual| actual.name == name.to_s }
129
+ end
130
+
131
+ def place_column(name)
132
+ remove_column name if actual_column name
133
+ ideal = ideal_column name
134
+ DataMiner.log_debug "ADDING COLUMN #{name}"
135
+ connection.add_column table_name, name, ideal.type.to_sym # symbol type!
136
+ resource.reset_column_information
137
+ end
138
+
139
+ def remove_column(name)
140
+ DataMiner.log_debug "REMOVING COLUMN #{name}"
141
+ connection.remove_column table_name, name
142
+ resource.reset_column_information
143
+ end
144
+
145
+ def place_index(name)
146
+ remove_index name if actual_index name
147
+ ideal = ideal_index name
148
+ DataMiner.log_debug "ADDING INDEX #{name}"
149
+ connection.add_index table_name, ideal.columns, :name => ideal.name
150
+ resource.reset_column_information
151
+ end
152
+
153
+ def remove_index(name)
154
+ DataMiner.log_debug "REMOVING INDEX #{name}"
155
+ connection.remove_index table_name, :name => name
156
+ resource.reset_column_information
157
+ end
158
+
159
+ def run(run)
160
+ _add_extra_columns
161
+ _create_table
162
+ _set_primary_key
163
+ _remove_columns
164
+ _add_columns
165
+ _remove_indexes
166
+ _add_indexes
167
+ DataMiner.log_debug "ran #{inspect}"
168
+ end
169
+
170
+ EXTRA_COLUMNS = {
171
+ :updated_at => :datetime,
172
+ :created_at => :datetime
173
+ }
174
+ def _add_extra_columns
175
+ EXTRA_COLUMNS.each do |extra_name, extra_type|
176
+ send extra_type, extra_name unless ideal_column extra_name
177
+ end
178
+ end
179
+
180
+ def _create_table
181
+ if not resource.table_exists?
182
+ DataMiner.log_debug "CREATING TABLE #{table_name} with #{create_table_options.inspect}"
183
+ connection.create_table table_name, create_table_options do |t|
184
+ t.integer :data_miner_placeholder
185
+ end
186
+ resource.reset_column_information
187
+ end
188
+ end
189
+
190
+ # FIXME mysql only
191
+ def _set_primary_key
192
+ if ideal_primary_key_name == 'id' and not ideal_column('id')
193
+ DataMiner.log_debug "no special primary key set on #{table_name}, so using 'id'"
194
+ column 'id', :primary_key
195
+ end
196
+ actual = actual_column actual_primary_key_name
197
+ ideal = ideal_column ideal_primary_key_name
198
+ if not column_equivalent? actual, ideal
199
+ DataMiner.log_debug "looks like #{table_name} has a bad (or missing) primary key"
200
+ if actual
201
+ DataMiner.log_debug "looks like primary key needs to change from #{actual_primary_key_name} to #{ideal_primary_key_name}, re-creating #{table_name} from scratch"
202
+ connection.drop_table table_name
203
+ resource.reset_column_information
204
+ _create_table
205
+ end
206
+ place_column ideal_primary_key_name
207
+ unless ideal.type.to_s == 'primary_key'
208
+ DataMiner.log_debug "SETTING #{ideal_primary_key_name} AS PRIMARY KEY"
209
+ if ActiveRecord::Base.connection.adapter_name.downcase == 'sqlite'
210
+ connection.execute "CREATE UNIQUE INDEX IDX_#{table_name}_#{ideal_primary_key_name} ON #{table_name} (#{ideal_primary_key_name} ASC)"
211
+ else
212
+ connection.execute "ALTER TABLE `#{table_name}` ADD PRIMARY KEY (`#{ideal_primary_key_name}`)"
213
+ end
214
+ end
215
+ end
216
+ resource.reset_column_information
217
+ end
218
+
219
+ def _remove_columns
220
+ resource.columns_hash.values.each do |actual|
221
+ remove_column actual.name if column_needs_to_be_removed? actual.name
222
+ end
223
+ end
224
+
225
+ def _add_columns
226
+ ideal_table.columns.each do |ideal|
227
+ place_column ideal.name if column_needs_to_be_placed? ideal.name
228
+ end
229
+ end
230
+
231
+ def _remove_indexes
232
+ actual_indexes.each do |actual|
233
+ remove_index actual.name if index_needs_to_be_removed? actual.name
234
+ end
235
+ end
236
+
237
+ def _add_indexes
238
+ ideal_indexes.each do |ideal|
239
+ next if ideal.name == ideal_primary_key_name # this should already have been taken care of
240
+ place_index ideal.name if index_needs_to_be_placed? ideal.name
241
+ end
242
+ end
243
+ end
244
+ end