data_miner 0.4.30 → 0.4.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.30
1
+ 0.4.31
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.30"
8
+ s.version = "0.4.31"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
- s.date = %q{2010-05-18}
12
+ s.date = %q{2010-05-19}
13
13
  s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -32,6 +32,7 @@ Gem::Specification.new do |s|
32
32
  "lib/data_miner/import.rb",
33
33
  "lib/data_miner/process.rb",
34
34
  "lib/data_miner/run.rb",
35
+ "lib/data_miner/schema.rb",
35
36
  "lib/data_miner/tap.rb",
36
37
  "test/data_miner_test.rb",
37
38
  "test/test_helper.rb"
@@ -25,6 +25,7 @@ require 'data_miner/import'
25
25
  require 'data_miner/tap'
26
26
  require 'data_miner/process'
27
27
  require 'data_miner/run'
28
+ require 'data_miner/schema'
28
29
 
29
30
  module DataMiner
30
31
  class MissingHashColumn < RuntimeError; end
@@ -62,6 +63,10 @@ module DataMiner
62
63
  logger.info "[data_miner gem] #{message}"
63
64
  end
64
65
 
66
+ def self.log_debug(message)
67
+ logger.debug "[data_miner gem] #{message}"
68
+ end
69
+
65
70
  def self.run(options = {})
66
71
  DataMiner::Configuration.run options
67
72
  end
@@ -11,6 +11,13 @@ module DataMiner
11
11
  @attributes = HashWithIndifferentAccess.new
12
12
  end
13
13
 
14
+ def schema(create_table_options = {}, &block)
15
+ step = DataMiner::Schema.new self, step_counter, create_table_options
16
+ Blockenspiel.invoke block, step
17
+ steps << step
18
+ self.step_counter += 1
19
+ end
20
+
14
21
  def process(method_name_or_block_description, &block)
15
22
  steps << DataMiner::Process.new(self, step_counter, method_name_or_block_description, &block)
16
23
  self.step_counter += 1
@@ -0,0 +1,217 @@
1
+ module DataMiner
2
+ class Schema
3
+ include Blockenspiel::DSL
4
+
5
+ attr_reader :configuration
6
+ attr_reader :position_in_run
7
+ attr_reader :create_table_options
8
+ delegate :resource, :to => :configuration
9
+
10
+ def initialize(configuration, position_in_run, create_table_options)
11
+ @configuration = configuration
12
+ @position_in_run = position_in_run
13
+ @create_table_options = create_table_options
14
+ end
15
+
16
+ def connection
17
+ ActiveRecord::Base.connection
18
+ end
19
+
20
+ def table_name
21
+ resource.table_name
22
+ end
23
+
24
+ def ideal_table
25
+ @ideal_table ||= ActiveRecord::ConnectionAdapters::TableDefinition.new(connection)
26
+ end
27
+
28
+ def ideal_indexes
29
+ @ideal_indexes ||= Array.new
30
+ end
31
+
32
+ def actual_indexes
33
+ connection.indexes table_name
34
+ end
35
+
36
+ def description
37
+ "Define a table called #{table_name} with primary key #{primary_key}"
38
+ end
39
+
40
+ def inspect
41
+ "Block(#{resource}): #{description}"
42
+ end
43
+
44
+ # lifted straight from activerecord-3.0.0.beta3/lib/active_record/connection_adapters/abstract/schema_definitions.rb
45
+ %w( string text integer float decimal datetime timestamp time date binary boolean ).each do |column_type|
46
+ class_eval <<-EOV
47
+ def #{column_type}(*args) # def string(*args)
48
+ options = args.extract_options! # options = args.extract_options!
49
+ column_names = args # column_names = args
50
+ #
51
+ column_names.each { |name| ideal_table.column(name, '#{column_type}', options) } # column_names.each { |name| ideal_table.column(name, 'string', options) }
52
+ end # end
53
+ EOV
54
+ end
55
+ def column(*args)
56
+ ideal_table.column(*args)
57
+ end
58
+ # class IndexDefinition < Struct.new(:table, :name, :unique, :columns)
59
+ def index(columns, options = {})
60
+ options.symbolize_keys!
61
+ columns = Array.wrap columns
62
+ name = connection.index_name table_name, options.merge(:columns => columns)
63
+ index_unique = options.has_key?(:unique) ? options[:unique] : true
64
+ ideal_indexes.push ActiveRecord::ConnectionAdapters::IndexDefinition.new(table_name, name, index_unique, columns)
65
+ end
66
+
67
+ def primary_key
68
+ resource.primary_key.to_s
69
+ end
70
+
71
+ INDEX_PROPERTIES = %w{ name columns }
72
+ COLUMN_PROPERTIES = %w{ name type }
73
+ %w{ column index }.each do |i|
74
+ eval %{
75
+ def #{i}_needs_to_be_placed?(name)
76
+ actual = actual_#{i} name
77
+ return true unless actual
78
+ ideal = ideal_#{i} name
79
+ #{i.upcase}_PROPERTIES.any? do |property|
80
+ DataMiner.log_debug "...comparing \#{actual.send(property).inspect}.to_s <-> \#{ideal.send(property).inspect}.to_s"
81
+ actual.send(property).to_s != ideal.send(property).to_s
82
+ end
83
+ end
84
+
85
+ def #{i}_needs_to_be_removed?(name)
86
+ ideal_#{i}(name).nil?
87
+ end
88
+ }
89
+ end
90
+
91
+ def ideal_column(name)
92
+ ideal_table[name.to_s]
93
+ end
94
+
95
+ def actual_column(name)
96
+ resource.columns_hash[name.to_s]
97
+ end
98
+
99
+ def ideal_index(name)
100
+ ideal_indexes.detect { |ideal| ideal.name == name.to_s }
101
+ end
102
+
103
+ def actual_index(name)
104
+ actual_indexes.detect { |actual| actual.name == name.to_s }
105
+ end
106
+
107
+ def place_column(name)
108
+ remove_column name if actual_column name
109
+ ideal = ideal_column name
110
+ DataMiner.log_info "adding column #{name}"
111
+ connection.add_column table_name, name, ideal.type.to_sym # symbol type!
112
+ resource.reset_column_information
113
+ end
114
+
115
+ def remove_column(name)
116
+ DataMiner.log_info "removing column #{name}"
117
+ connection.remove_column table_name, name
118
+ resource.reset_column_information
119
+ end
120
+
121
+ def place_index(name)
122
+ remove_index name if actual_index name
123
+ ideal = ideal_index name
124
+ DataMiner.log_info "adding index #{name}"
125
+ connection.add_index table_name, ideal.columns, :name => ideal.name
126
+ resource.reset_column_information
127
+ end
128
+
129
+ def remove_index(name)
130
+ DataMiner.log_info "removing index #{name}"
131
+ connection.remove_index table_name, :name => name
132
+ resource.reset_column_information
133
+ end
134
+
135
+ def run(run)
136
+ _add_extra_columns
137
+ _create_table
138
+ _set_primary_key
139
+ _remove_columns
140
+ _add_columns
141
+ _remove_indexes
142
+ _add_indexes
143
+ DataMiner.log_info "ran #{inspect}"
144
+ end
145
+
146
+ EXTRA_COLUMNS = {
147
+ :updated_at => :datetime,
148
+ :created_at => :datetime,
149
+ :data_miner_last_run_id => :integer,
150
+ :data_miner_touch_count => :integer
151
+ }
152
+ def _add_extra_columns
153
+ EXTRA_COLUMNS.each do |extra_name, extra_type|
154
+ send extra_type, extra_name unless ideal_column extra_name
155
+ end
156
+ end
157
+
158
+ def _create_table
159
+ if not resource.table_exists?
160
+ DataMiner.log_info "creating table #{table_name} with #{create_table_options.inspect}"
161
+ connection.create_table table_name, create_table_options do |t|
162
+ t.integer :data_miner_placeholder
163
+ end
164
+ resource.reset_column_information
165
+ end
166
+ end
167
+
168
+ # FIXME mysql only
169
+ def _set_primary_key
170
+ if column_needs_to_be_placed?(primary_key)
171
+ retries_allowed = 1
172
+ begin
173
+ place_column primary_key
174
+ DataMiner.log_info "adding primary key #{primary_key}"
175
+ connection.execute "ALTER TABLE `#{table_name}` ADD PRIMARY KEY (`#{primary_key}`)"
176
+ rescue
177
+ if retries_allowed > 0 and $!.message =~ /primary/i
178
+ DataMiner.log_info "looks like primary key changed, re-creating table from scratch"
179
+ connection.drop_table table_name
180
+ resource.reset_column_information
181
+ _create_table
182
+ retries_allowed -= 1
183
+ retry
184
+ else
185
+ raise $!
186
+ end
187
+ end
188
+ resource.reset_column_information
189
+ end
190
+ end
191
+
192
+ def _remove_columns
193
+ resource.columns_hash.values.each do |actual|
194
+ remove_column actual.name if column_needs_to_be_removed? actual.name
195
+ end
196
+ end
197
+
198
+ def _add_columns
199
+ ideal_table.columns.each do |ideal|
200
+ place_column ideal.name if column_needs_to_be_placed? ideal.name
201
+ end
202
+ end
203
+
204
+ def _remove_indexes
205
+ actual_indexes.each do |actual|
206
+ remove_index actual.name if index_needs_to_be_removed? actual.name
207
+ end
208
+ end
209
+
210
+ def _add_indexes
211
+ ideal_indexes.each do |ideal|
212
+ next if ideal.name == primary_key # this should already have been taken care of
213
+ place_index ideal.name if index_needs_to_be_placed? ideal.name
214
+ end
215
+ end
216
+ end
217
+ end
@@ -1091,22 +1091,19 @@ class AutomobileMakeFleetYear < ActiveRecord::Base
1091
1091
  set_primary_key :name
1092
1092
 
1093
1093
  data_miner do
1094
- process "create a table on the fly" do
1095
- create_table "automobile_make_fleet_years", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
1096
- t.string "name"
1097
- t.string "make_name"
1098
- t.string "fleet"
1099
- t.integer "year"
1100
- t.float "fuel_efficiency"
1101
- t.string "fuel_efficiency_units"
1102
- t.integer "volume"
1103
- t.string "make_year_name"
1104
- t.datetime "created_at"
1105
- t.datetime "updated_at"
1106
- t.integer 'data_miner_touch_count'
1107
- t.integer 'data_miner_last_run_id'
1108
- end
1109
- execute 'ALTER TABLE automobile_make_fleet_years ADD PRIMARY KEY (name)'
1094
+ schema :id => false do
1095
+ string "name"
1096
+ string "make_name"
1097
+ string "fleet"
1098
+ integer "year"
1099
+ float "fuel_efficiency"
1100
+ string "fuel_efficiency_units"
1101
+ integer "volume"
1102
+ string "make_year_name"
1103
+ datetime "created_at"
1104
+ datetime "updated_at"
1105
+ integer 'data_miner_touch_count'
1106
+ integer 'data_miner_last_run_id'
1110
1107
  end
1111
1108
 
1112
1109
  # CAFE data privately emailed to Andy from Terry Anderson at the DOT/NHTSA
@@ -1123,12 +1120,55 @@ class AutomobileMakeFleetYear < ActiveRecord::Base
1123
1120
  end
1124
1121
  end
1125
1122
 
1123
+ class CensusDivisionTrois < ActiveRecord::Base
1124
+ set_primary_key :number_code
1125
+ data_miner do
1126
+ schema :options => 'ENGINE=InnoDB default charset=utf8', :id => false do
1127
+ string 'number_code'
1128
+ string 'name'
1129
+ string 'census_region_name'
1130
+ integer 'census_region_number'
1131
+ index 'census_region_name', :name => 'homefry'
1132
+ end
1133
+ end
1134
+ end
1135
+
1126
1136
  # todo: have somebody properly organize these
1127
1137
  class DataMinerTest < Test::Unit::TestCase
1128
1138
  if ENV['ALL'] == 'true' or ENV['NEW'] == 'true'
1129
1139
  end
1130
1140
 
1131
1141
  if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
1142
+ should "eagerly enforce a schema" do
1143
+ ActiveRecord::Base.connection.create_table 'census_division_trois', :force => true, :options => 'ENGINE=InnoDB default charset=utf8' do |t|
1144
+ t.string 'name'
1145
+ # t.datetime 'updated_at'
1146
+ # t.datetime 'created_at'
1147
+ t.string 'census_region_name'
1148
+ # t.integer 'census_region_number'
1149
+ # t.integer 'data_miner_touch_count'
1150
+ # t.integer 'data_miner_last_run_id'
1151
+ end
1152
+ ActiveRecord::Base.connection.execute 'ALTER TABLE census_division_trois ADD INDEX (census_region_name)'
1153
+ CensusDivisionTrois.reset_column_information
1154
+ missing_columns = %w{ updated_at created_at census_region_number data_miner_last_run_id data_miner_touch_count }
1155
+
1156
+ # sanity check
1157
+ missing_columns.each do |column|
1158
+ assert_equal false, CensusDivisionTrois.column_names.include?(column)
1159
+ end
1160
+ assert_equal false, ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
1161
+
1162
+ 3.times do
1163
+ CensusDivisionTrois.run_data_miner!
1164
+ missing_columns.each do |column|
1165
+ assert_equal true, CensusDivisionTrois.column_names.include?(column)
1166
+ end
1167
+ assert_equal true, ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
1168
+ assert_equal :string, CensusDivisionTrois.columns_hash[CensusDivisionTrois.primary_key].type
1169
+ end
1170
+ end
1171
+
1132
1172
  should "allow specifying dictionaries explicitly" do
1133
1173
  CensusDivisionDeux.run_data_miner!
1134
1174
  assert_equal 'South Region', CensusDivisionDeux.find(5).census_region_name
@@ -15,7 +15,7 @@ ActiveRecord::Base.establish_connection(
15
15
  )
16
16
 
17
17
  ActiveSupport::Inflector.inflections do |inflect|
18
- inflect.uncountable %w{ aircraft aircraft_deux census_division_deux }
18
+ inflect.uncountable %w{ aircraft aircraft_deux census_division_deux census_division_trois }
19
19
  end
20
20
 
21
21
  class Test::Unit::TestCase
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 4
8
- - 30
9
- version: 0.4.30
8
+ - 31
9
+ version: 0.4.31
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-05-18 00:00:00 -04:00
18
+ date: 2010-05-19 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -183,6 +183,7 @@ files:
183
183
  - lib/data_miner/import.rb
184
184
  - lib/data_miner/process.rb
185
185
  - lib/data_miner/run.rb
186
+ - lib/data_miner/schema.rb
186
187
  - lib/data_miner/tap.rb
187
188
  - test/data_miner_test.rb
188
189
  - test/test_helper.rb