data_miner 0.4.1 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/data_miner.gemspec +1 -1
- data/lib/data_miner/configuration.rb +3 -7
- data/lib/data_miner/import.rb +2 -0
- data/test/data_miner_test.rb +7 -14
- metadata +1 -1
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.2
|
data/data_miner.gemspec
CHANGED
@@ -11,10 +11,6 @@ module DataMiner
|
|
11
11
|
@attributes = HashWithIndifferentAccess.new
|
12
12
|
end
|
13
13
|
|
14
|
-
def logger
|
15
|
-
DataMiner.logger
|
16
|
-
end
|
17
|
-
|
18
14
|
def process(method_name_or_block_description, &block)
|
19
15
|
self.runnable_counter += 1
|
20
16
|
runnables << DataMiner::Process.new(self, runnable_counter, method_name_or_block_description, &block)
|
@@ -76,7 +72,7 @@ module DataMiner
|
|
76
72
|
import_runnables.each do |runnable|
|
77
73
|
runnable.attributes.each do |_, attribute|
|
78
74
|
if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
|
79
|
-
logger.error %{
|
75
|
+
DataMiner.logger.error %{
|
80
76
|
|
81
77
|
================================
|
82
78
|
|
@@ -97,7 +93,7 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
|
|
97
93
|
missing_columns = Array.new
|
98
94
|
import_runnables.each do |runnable|
|
99
95
|
runnable.attributes.each do |_, attribute|
|
100
|
-
logger.error "[data_miner gem] You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
|
96
|
+
DataMiner.logger.error "[data_miner gem] You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
|
101
97
|
unless resource.column_names.include? attribute.name
|
102
98
|
missing_columns << attribute.name
|
103
99
|
end
|
@@ -108,7 +104,7 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
|
|
108
104
|
end
|
109
105
|
missing_columns.uniq!
|
110
106
|
if missing_columns.any?
|
111
|
-
logger.error %{
|
107
|
+
DataMiner.logger.error %{
|
112
108
|
|
113
109
|
================================
|
114
110
|
|
data/lib/data_miner/import.rb
CHANGED
@@ -28,10 +28,12 @@ module DataMiner
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def store(attr_name, attr_options = {})
|
31
|
+
DataMiner.logger.error "[data_miner gem] You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
|
31
32
|
attributes[attr_name] = Attribute.new self, attr_name, attr_options
|
32
33
|
end
|
33
34
|
|
34
35
|
def key(attr_name, attr_options = {})
|
36
|
+
DataMiner.logger.error "[data_miner gem] You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
|
35
37
|
@key = attr_name
|
36
38
|
store attr_name, attr_options
|
37
39
|
end
|
data/test/data_miner_test.rb
CHANGED
@@ -412,14 +412,12 @@ class Country < ActiveRecord::Base
|
|
412
412
|
|
413
413
|
data_miner do
|
414
414
|
import 'The official ISO country list', :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do
|
415
|
-
key 'iso_3166'
|
416
|
-
store 'iso_3166', :field_number => 1
|
415
|
+
key 'iso_3166', :field_number => 1
|
417
416
|
store 'name', :field_number => 0
|
418
417
|
end
|
419
418
|
|
420
419
|
import 'A Princeton dataset with better capitalization', :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do
|
421
|
-
key 'iso_3166'
|
422
|
-
store 'iso_3166', :field_name => 'country code'
|
420
|
+
key 'iso_3166', :field_name => 'country code'
|
423
421
|
store 'name', :field_name => 'country'
|
424
422
|
end
|
425
423
|
end
|
@@ -430,11 +428,10 @@ class Airport < ActiveRecord::Base
|
|
430
428
|
|
431
429
|
data_miner do
|
432
430
|
import :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? } do
|
433
|
-
key 'iata_code'
|
431
|
+
key 'iata_code', :field_number => 4
|
434
432
|
store 'name', :field_number => 1
|
435
433
|
store 'city', :field_number => 2
|
436
434
|
store 'country_name', :field_number => 3
|
437
|
-
store 'iata_code', :field_number => 4
|
438
435
|
store 'latitude', :field_number => 6
|
439
436
|
store 'longitude', :field_number => 7
|
440
437
|
end
|
@@ -446,17 +443,15 @@ class CensusRegion < ActiveRecord::Base
|
|
446
443
|
|
447
444
|
data_miner do
|
448
445
|
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
|
449
|
-
key 'number'
|
446
|
+
key 'number', :field_name => 'Region'
|
450
447
|
store 'name', :field_name => 'Name'
|
451
|
-
store 'number', :field_name => 'Region'
|
452
448
|
end
|
453
449
|
|
454
450
|
# pretend this is a different data source
|
455
451
|
# fake! just for testing purposes
|
456
452
|
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
|
457
|
-
key 'number'
|
453
|
+
key 'number', :field_name => 'Region'
|
458
454
|
store 'name', :field_name => 'Name'
|
459
|
-
store 'number', :field_name => 'Region'
|
460
455
|
end
|
461
456
|
end
|
462
457
|
end
|
@@ -467,9 +462,8 @@ class CensusDivision < ActiveRecord::Base
|
|
467
462
|
|
468
463
|
data_miner do
|
469
464
|
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
470
|
-
key 'number'
|
465
|
+
key 'number', :field_name => 'Division'
|
471
466
|
store 'name', :field_name => 'Name'
|
472
|
-
store 'number', :field_name => 'Division'
|
473
467
|
store 'census_region_number', :field_name => 'Region'
|
474
468
|
store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
|
475
469
|
end
|
@@ -488,8 +482,7 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
|
488
482
|
# conversions are NOT performed here, since we first have to zero out legitimate skips
|
489
483
|
# otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
|
490
484
|
import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv', :headers => :upcase do
|
491
|
-
key 'department_of_energy_identifier'
|
492
|
-
store 'department_of_energy_identifier', :field_name => 'DOEID'
|
485
|
+
key 'department_of_energy_identifier', :field_name => 'DOEID'
|
493
486
|
|
494
487
|
store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
|
495
488
|
store 'construction_year', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Date in the middle (synthetic)', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
|