data_miner 0.4.30 → 0.4.31

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.30
1
+ 0.4.31
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.30"
8
+ s.version = "0.4.31"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
- s.date = %q{2010-05-18}
12
+ s.date = %q{2010-05-19}
13
13
  s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -32,6 +32,7 @@ Gem::Specification.new do |s|
32
32
  "lib/data_miner/import.rb",
33
33
  "lib/data_miner/process.rb",
34
34
  "lib/data_miner/run.rb",
35
+ "lib/data_miner/schema.rb",
35
36
  "lib/data_miner/tap.rb",
36
37
  "test/data_miner_test.rb",
37
38
  "test/test_helper.rb"
@@ -25,6 +25,7 @@ require 'data_miner/import'
25
25
  require 'data_miner/tap'
26
26
  require 'data_miner/process'
27
27
  require 'data_miner/run'
28
+ require 'data_miner/schema'
28
29
 
29
30
  module DataMiner
30
31
  class MissingHashColumn < RuntimeError; end
@@ -62,6 +63,10 @@ module DataMiner
62
63
  logger.info "[data_miner gem] #{message}"
63
64
  end
64
65
 
66
+ def self.log_debug(message)
67
+ logger.debug "[data_miner gem] #{message}"
68
+ end
69
+
65
70
  def self.run(options = {})
66
71
  DataMiner::Configuration.run options
67
72
  end
@@ -11,6 +11,13 @@ module DataMiner
11
11
  @attributes = HashWithIndifferentAccess.new
12
12
  end
13
13
 
14
+ def schema(create_table_options = {}, &block)
15
+ step = DataMiner::Schema.new self, step_counter, create_table_options
16
+ Blockenspiel.invoke block, step
17
+ steps << step
18
+ self.step_counter += 1
19
+ end
20
+
14
21
  def process(method_name_or_block_description, &block)
15
22
  steps << DataMiner::Process.new(self, step_counter, method_name_or_block_description, &block)
16
23
  self.step_counter += 1
@@ -0,0 +1,217 @@
1
+ module DataMiner
2
+ class Schema
3
+ include Blockenspiel::DSL
4
+
5
+ attr_reader :configuration
6
+ attr_reader :position_in_run
7
+ attr_reader :create_table_options
8
+ delegate :resource, :to => :configuration
9
+
10
+ def initialize(configuration, position_in_run, create_table_options)
11
+ @configuration = configuration
12
+ @position_in_run = position_in_run
13
+ @create_table_options = create_table_options
14
+ end
15
+
16
+ def connection
17
+ ActiveRecord::Base.connection
18
+ end
19
+
20
+ def table_name
21
+ resource.table_name
22
+ end
23
+
24
+ def ideal_table
25
+ @ideal_table ||= ActiveRecord::ConnectionAdapters::TableDefinition.new(connection)
26
+ end
27
+
28
+ def ideal_indexes
29
+ @ideal_indexes ||= Array.new
30
+ end
31
+
32
+ def actual_indexes
33
+ connection.indexes table_name
34
+ end
35
+
36
+ def description
37
+ "Define a table called #{table_name} with primary key #{primary_key}"
38
+ end
39
+
40
+ def inspect
41
+ "Block(#{resource}): #{description}"
42
+ end
43
+
44
+ # lifted straight from activerecord-3.0.0.beta3/lib/active_record/connection_adapters/abstract/schema_definitions.rb
45
+ %w( string text integer float decimal datetime timestamp time date binary boolean ).each do |column_type|
46
+ class_eval <<-EOV
47
+ def #{column_type}(*args) # def string(*args)
48
+ options = args.extract_options! # options = args.extract_options!
49
+ column_names = args # column_names = args
50
+ #
51
+ column_names.each { |name| ideal_table.column(name, '#{column_type}', options) } # column_names.each { |name| ideal_table.column(name, 'string', options) }
52
+ end # end
53
+ EOV
54
+ end
55
+ def column(*args)
56
+ ideal_table.column(*args)
57
+ end
58
+ # class IndexDefinition < Struct.new(:table, :name, :unique, :columns)
59
+ def index(columns, options = {})
60
+ options.symbolize_keys!
61
+ columns = Array.wrap columns
62
+ name = connection.index_name table_name, options.merge(:columns => columns)
63
+ index_unique = options.has_key?(:unique) ? options[:unique] : true
64
+ ideal_indexes.push ActiveRecord::ConnectionAdapters::IndexDefinition.new(table_name, name, index_unique, columns)
65
+ end
66
+
67
+ def primary_key
68
+ resource.primary_key.to_s
69
+ end
70
+
71
+ INDEX_PROPERTIES = %w{ name columns }
72
+ COLUMN_PROPERTIES = %w{ name type }
73
+ %w{ column index }.each do |i|
74
+ eval %{
75
+ def #{i}_needs_to_be_placed?(name)
76
+ actual = actual_#{i} name
77
+ return true unless actual
78
+ ideal = ideal_#{i} name
79
+ #{i.upcase}_PROPERTIES.any? do |property|
80
+ DataMiner.log_debug "...comparing \#{actual.send(property).inspect}.to_s <-> \#{ideal.send(property).inspect}.to_s"
81
+ actual.send(property).to_s != ideal.send(property).to_s
82
+ end
83
+ end
84
+
85
+ def #{i}_needs_to_be_removed?(name)
86
+ ideal_#{i}(name).nil?
87
+ end
88
+ }
89
+ end
90
+
91
+ def ideal_column(name)
92
+ ideal_table[name.to_s]
93
+ end
94
+
95
+ def actual_column(name)
96
+ resource.columns_hash[name.to_s]
97
+ end
98
+
99
+ def ideal_index(name)
100
+ ideal_indexes.detect { |ideal| ideal.name == name.to_s }
101
+ end
102
+
103
+ def actual_index(name)
104
+ actual_indexes.detect { |actual| actual.name == name.to_s }
105
+ end
106
+
107
+ def place_column(name)
108
+ remove_column name if actual_column name
109
+ ideal = ideal_column name
110
+ DataMiner.log_info "adding column #{name}"
111
+ connection.add_column table_name, name, ideal.type.to_sym # symbol type!
112
+ resource.reset_column_information
113
+ end
114
+
115
+ def remove_column(name)
116
+ DataMiner.log_info "removing column #{name}"
117
+ connection.remove_column table_name, name
118
+ resource.reset_column_information
119
+ end
120
+
121
+ def place_index(name)
122
+ remove_index name if actual_index name
123
+ ideal = ideal_index name
124
+ DataMiner.log_info "adding index #{name}"
125
+ connection.add_index table_name, ideal.columns, :name => ideal.name
126
+ resource.reset_column_information
127
+ end
128
+
129
+ def remove_index(name)
130
+ DataMiner.log_info "removing index #{name}"
131
+ connection.remove_index table_name, :name => name
132
+ resource.reset_column_information
133
+ end
134
+
135
+ def run(run)
136
+ _add_extra_columns
137
+ _create_table
138
+ _set_primary_key
139
+ _remove_columns
140
+ _add_columns
141
+ _remove_indexes
142
+ _add_indexes
143
+ DataMiner.log_info "ran #{inspect}"
144
+ end
145
+
146
+ EXTRA_COLUMNS = {
147
+ :updated_at => :datetime,
148
+ :created_at => :datetime,
149
+ :data_miner_last_run_id => :integer,
150
+ :data_miner_touch_count => :integer
151
+ }
152
+ def _add_extra_columns
153
+ EXTRA_COLUMNS.each do |extra_name, extra_type|
154
+ send extra_type, extra_name unless ideal_column extra_name
155
+ end
156
+ end
157
+
158
+ def _create_table
159
+ if not resource.table_exists?
160
+ DataMiner.log_info "creating table #{table_name} with #{create_table_options.inspect}"
161
+ connection.create_table table_name, create_table_options do |t|
162
+ t.integer :data_miner_placeholder
163
+ end
164
+ resource.reset_column_information
165
+ end
166
+ end
167
+
168
+ # FIXME mysql only
169
+ def _set_primary_key
170
+ if column_needs_to_be_placed?(primary_key)
171
+ retries_allowed = 1
172
+ begin
173
+ place_column primary_key
174
+ DataMiner.log_info "adding primary key #{primary_key}"
175
+ connection.execute "ALTER TABLE `#{table_name}` ADD PRIMARY KEY (`#{primary_key}`)"
176
+ rescue
177
+ if retries_allowed > 0 and $!.message =~ /primary/i
178
+ DataMiner.log_info "looks like primary key changed, re-creating table from scratch"
179
+ connection.drop_table table_name
180
+ resource.reset_column_information
181
+ _create_table
182
+ retries_allowed -= 1
183
+ retry
184
+ else
185
+ raise $!
186
+ end
187
+ end
188
+ resource.reset_column_information
189
+ end
190
+ end
191
+
192
+ def _remove_columns
193
+ resource.columns_hash.values.each do |actual|
194
+ remove_column actual.name if column_needs_to_be_removed? actual.name
195
+ end
196
+ end
197
+
198
+ def _add_columns
199
+ ideal_table.columns.each do |ideal|
200
+ place_column ideal.name if column_needs_to_be_placed? ideal.name
201
+ end
202
+ end
203
+
204
+ def _remove_indexes
205
+ actual_indexes.each do |actual|
206
+ remove_index actual.name if index_needs_to_be_removed? actual.name
207
+ end
208
+ end
209
+
210
+ def _add_indexes
211
+ ideal_indexes.each do |ideal|
212
+ next if ideal.name == primary_key # this should already have been taken care of
213
+ place_index ideal.name if index_needs_to_be_placed? ideal.name
214
+ end
215
+ end
216
+ end
217
+ end
@@ -1091,22 +1091,19 @@ class AutomobileMakeFleetYear < ActiveRecord::Base
1091
1091
  set_primary_key :name
1092
1092
 
1093
1093
  data_miner do
1094
- process "create a table on the fly" do
1095
- create_table "automobile_make_fleet_years", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
1096
- t.string "name"
1097
- t.string "make_name"
1098
- t.string "fleet"
1099
- t.integer "year"
1100
- t.float "fuel_efficiency"
1101
- t.string "fuel_efficiency_units"
1102
- t.integer "volume"
1103
- t.string "make_year_name"
1104
- t.datetime "created_at"
1105
- t.datetime "updated_at"
1106
- t.integer 'data_miner_touch_count'
1107
- t.integer 'data_miner_last_run_id'
1108
- end
1109
- execute 'ALTER TABLE automobile_make_fleet_years ADD PRIMARY KEY (name)'
1094
+ schema :id => false do
1095
+ string "name"
1096
+ string "make_name"
1097
+ string "fleet"
1098
+ integer "year"
1099
+ float "fuel_efficiency"
1100
+ string "fuel_efficiency_units"
1101
+ integer "volume"
1102
+ string "make_year_name"
1103
+ datetime "created_at"
1104
+ datetime "updated_at"
1105
+ integer 'data_miner_touch_count'
1106
+ integer 'data_miner_last_run_id'
1110
1107
  end
1111
1108
 
1112
1109
  # CAFE data privately emailed to Andy from Terry Anderson at the DOT/NHTSA
@@ -1123,12 +1120,55 @@ class AutomobileMakeFleetYear < ActiveRecord::Base
1123
1120
  end
1124
1121
  end
1125
1122
 
1123
+ class CensusDivisionTrois < ActiveRecord::Base
1124
+ set_primary_key :number_code
1125
+ data_miner do
1126
+ schema :options => 'ENGINE=InnoDB default charset=utf8', :id => false do
1127
+ string 'number_code'
1128
+ string 'name'
1129
+ string 'census_region_name'
1130
+ integer 'census_region_number'
1131
+ index 'census_region_name', :name => 'homefry'
1132
+ end
1133
+ end
1134
+ end
1135
+
1126
1136
  # todo: have somebody properly organize these
1127
1137
  class DataMinerTest < Test::Unit::TestCase
1128
1138
  if ENV['ALL'] == 'true' or ENV['NEW'] == 'true'
1129
1139
  end
1130
1140
 
1131
1141
  if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
1142
+ should "eagerly enforce a schema" do
1143
+ ActiveRecord::Base.connection.create_table 'census_division_trois', :force => true, :options => 'ENGINE=InnoDB default charset=utf8' do |t|
1144
+ t.string 'name'
1145
+ # t.datetime 'updated_at'
1146
+ # t.datetime 'created_at'
1147
+ t.string 'census_region_name'
1148
+ # t.integer 'census_region_number'
1149
+ # t.integer 'data_miner_touch_count'
1150
+ # t.integer 'data_miner_last_run_id'
1151
+ end
1152
+ ActiveRecord::Base.connection.execute 'ALTER TABLE census_division_trois ADD INDEX (census_region_name)'
1153
+ CensusDivisionTrois.reset_column_information
1154
+ missing_columns = %w{ updated_at created_at census_region_number data_miner_last_run_id data_miner_touch_count }
1155
+
1156
+ # sanity check
1157
+ missing_columns.each do |column|
1158
+ assert_equal false, CensusDivisionTrois.column_names.include?(column)
1159
+ end
1160
+ assert_equal false, ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
1161
+
1162
+ 3.times do
1163
+ CensusDivisionTrois.run_data_miner!
1164
+ missing_columns.each do |column|
1165
+ assert_equal true, CensusDivisionTrois.column_names.include?(column)
1166
+ end
1167
+ assert_equal true, ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
1168
+ assert_equal :string, CensusDivisionTrois.columns_hash[CensusDivisionTrois.primary_key].type
1169
+ end
1170
+ end
1171
+
1132
1172
  should "allow specifying dictionaries explicitly" do
1133
1173
  CensusDivisionDeux.run_data_miner!
1134
1174
  assert_equal 'South Region', CensusDivisionDeux.find(5).census_region_name
@@ -15,7 +15,7 @@ ActiveRecord::Base.establish_connection(
15
15
  )
16
16
 
17
17
  ActiveSupport::Inflector.inflections do |inflect|
18
- inflect.uncountable %w{ aircraft aircraft_deux census_division_deux }
18
+ inflect.uncountable %w{ aircraft aircraft_deux census_division_deux census_division_trois }
19
19
  end
20
20
 
21
21
  class Test::Unit::TestCase
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 4
8
- - 30
9
- version: 0.4.30
8
+ - 31
9
+ version: 0.4.31
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-05-18 00:00:00 -04:00
18
+ date: 2010-05-19 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -183,6 +183,7 @@ files:
183
183
  - lib/data_miner/import.rb
184
184
  - lib/data_miner/process.rb
185
185
  - lib/data_miner/run.rb
186
+ - lib/data_miner/schema.rb
186
187
  - lib/data_miner/tap.rb
187
188
  - test/data_miner_test.rb
188
189
  - test/test_helper.rb