data_miner 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/data_miner.gemspec +1 -1
- data/lib/data_miner/configuration.rb +3 -7
- data/lib/data_miner/import.rb +2 -0
- data/test/data_miner_test.rb +7 -14
- metadata +1 -1
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.2
|
data/data_miner.gemspec
CHANGED
@@ -11,10 +11,6 @@ module DataMiner
|
|
11
11
|
@attributes = HashWithIndifferentAccess.new
|
12
12
|
end
|
13
13
|
|
14
|
-
def logger
|
15
|
-
DataMiner.logger
|
16
|
-
end
|
17
|
-
|
18
14
|
def process(method_name_or_block_description, &block)
|
19
15
|
self.runnable_counter += 1
|
20
16
|
runnables << DataMiner::Process.new(self, runnable_counter, method_name_or_block_description, &block)
|
@@ -76,7 +72,7 @@ module DataMiner
|
|
76
72
|
import_runnables.each do |runnable|
|
77
73
|
runnable.attributes.each do |_, attribute|
|
78
74
|
if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
|
79
|
-
logger.error %{
|
75
|
+
DataMiner.logger.error %{
|
80
76
|
|
81
77
|
================================
|
82
78
|
|
@@ -97,7 +93,7 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
|
|
97
93
|
missing_columns = Array.new
|
98
94
|
import_runnables.each do |runnable|
|
99
95
|
runnable.attributes.each do |_, attribute|
|
100
|
-
logger.error "[data_miner gem] You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
|
96
|
+
DataMiner.logger.error "[data_miner gem] You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
|
101
97
|
unless resource.column_names.include? attribute.name
|
102
98
|
missing_columns << attribute.name
|
103
99
|
end
|
@@ -108,7 +104,7 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
|
|
108
104
|
end
|
109
105
|
missing_columns.uniq!
|
110
106
|
if missing_columns.any?
|
111
|
-
logger.error %{
|
107
|
+
DataMiner.logger.error %{
|
112
108
|
|
113
109
|
================================
|
114
110
|
|
data/lib/data_miner/import.rb
CHANGED
@@ -28,10 +28,12 @@ module DataMiner
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def store(attr_name, attr_options = {})
|
31
|
+
DataMiner.logger.error "[data_miner gem] You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
|
31
32
|
attributes[attr_name] = Attribute.new self, attr_name, attr_options
|
32
33
|
end
|
33
34
|
|
34
35
|
def key(attr_name, attr_options = {})
|
36
|
+
DataMiner.logger.error "[data_miner gem] You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
|
35
37
|
@key = attr_name
|
36
38
|
store attr_name, attr_options
|
37
39
|
end
|
data/test/data_miner_test.rb
CHANGED
@@ -412,14 +412,12 @@ class Country < ActiveRecord::Base
|
|
412
412
|
|
413
413
|
data_miner do
|
414
414
|
import 'The official ISO country list', :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do
|
415
|
-
key 'iso_3166'
|
416
|
-
store 'iso_3166', :field_number => 1
|
415
|
+
key 'iso_3166', :field_number => 1
|
417
416
|
store 'name', :field_number => 0
|
418
417
|
end
|
419
418
|
|
420
419
|
import 'A Princeton dataset with better capitalization', :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do
|
421
|
-
key 'iso_3166'
|
422
|
-
store 'iso_3166', :field_name => 'country code'
|
420
|
+
key 'iso_3166', :field_name => 'country code'
|
423
421
|
store 'name', :field_name => 'country'
|
424
422
|
end
|
425
423
|
end
|
@@ -430,11 +428,10 @@ class Airport < ActiveRecord::Base
|
|
430
428
|
|
431
429
|
data_miner do
|
432
430
|
import :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? } do
|
433
|
-
key 'iata_code'
|
431
|
+
key 'iata_code', :field_number => 4
|
434
432
|
store 'name', :field_number => 1
|
435
433
|
store 'city', :field_number => 2
|
436
434
|
store 'country_name', :field_number => 3
|
437
|
-
store 'iata_code', :field_number => 4
|
438
435
|
store 'latitude', :field_number => 6
|
439
436
|
store 'longitude', :field_number => 7
|
440
437
|
end
|
@@ -446,17 +443,15 @@ class CensusRegion < ActiveRecord::Base
|
|
446
443
|
|
447
444
|
data_miner do
|
448
445
|
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
|
449
|
-
key 'number'
|
446
|
+
key 'number', :field_name => 'Region'
|
450
447
|
store 'name', :field_name => 'Name'
|
451
|
-
store 'number', :field_name => 'Region'
|
452
448
|
end
|
453
449
|
|
454
450
|
# pretend this is a different data source
|
455
451
|
# fake! just for testing purposes
|
456
452
|
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
|
457
|
-
key 'number'
|
453
|
+
key 'number', :field_name => 'Region'
|
458
454
|
store 'name', :field_name => 'Name'
|
459
|
-
store 'number', :field_name => 'Region'
|
460
455
|
end
|
461
456
|
end
|
462
457
|
end
|
@@ -467,9 +462,8 @@ class CensusDivision < ActiveRecord::Base
|
|
467
462
|
|
468
463
|
data_miner do
|
469
464
|
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
470
|
-
key 'number'
|
465
|
+
key 'number', :field_name => 'Division'
|
471
466
|
store 'name', :field_name => 'Name'
|
472
|
-
store 'number', :field_name => 'Division'
|
473
467
|
store 'census_region_number', :field_name => 'Region'
|
474
468
|
store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
|
475
469
|
end
|
@@ -488,8 +482,7 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
|
488
482
|
# conversions are NOT performed here, since we first have to zero out legitimate skips
|
489
483
|
# otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
|
490
484
|
import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv', :headers => :upcase do
|
491
|
-
key 'department_of_energy_identifier'
|
492
|
-
store 'department_of_energy_identifier', :field_name => 'DOEID'
|
485
|
+
key 'department_of_energy_identifier', :field_name => 'DOEID'
|
493
486
|
|
494
487
|
store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
|
495
488
|
store 'construction_year', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Date in the middle (synthetic)', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
|