data_miner 0.4.1 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.1
1
+ 0.4.2
data/data_miner.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.1"
8
+ s.version = "0.4.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
@@ -11,10 +11,6 @@ module DataMiner
11
11
  @attributes = HashWithIndifferentAccess.new
12
12
  end
13
13
 
14
- def logger
15
- DataMiner.logger
16
- end
17
-
18
14
  def process(method_name_or_block_description, &block)
19
15
  self.runnable_counter += 1
20
16
  runnables << DataMiner::Process.new(self, runnable_counter, method_name_or_block_description, &block)
@@ -76,7 +72,7 @@ module DataMiner
76
72
  import_runnables.each do |runnable|
77
73
  runnable.attributes.each do |_, attribute|
78
74
  if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
79
- logger.error %{
75
+ DataMiner.logger.error %{
80
76
 
81
77
  ================================
82
78
 
@@ -97,7 +93,7 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
97
93
  missing_columns = Array.new
98
94
  import_runnables.each do |runnable|
99
95
  runnable.attributes.each do |_, attribute|
100
- logger.error "[data_miner gem] You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
96
+ DataMiner.logger.error "[data_miner gem] You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
101
97
  unless resource.column_names.include? attribute.name
102
98
  missing_columns << attribute.name
103
99
  end
@@ -108,7 +104,7 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
108
104
  end
109
105
  missing_columns.uniq!
110
106
  if missing_columns.any?
111
- logger.error %{
107
+ DataMiner.logger.error %{
112
108
 
113
109
  ================================
114
110
 
@@ -28,10 +28,12 @@ module DataMiner
28
28
  end
29
29
 
30
30
  def store(attr_name, attr_options = {})
31
+ DataMiner.logger.error "[data_miner gem] You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
31
32
  attributes[attr_name] = Attribute.new self, attr_name, attr_options
32
33
  end
33
34
 
34
35
  def key(attr_name, attr_options = {})
36
+ DataMiner.logger.error "[data_miner gem] You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
35
37
  @key = attr_name
36
38
  store attr_name, attr_options
37
39
  end
@@ -412,14 +412,12 @@ class Country < ActiveRecord::Base
412
412
 
413
413
  data_miner do
414
414
  import 'The official ISO country list', :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do
415
- key 'iso_3166'
416
- store 'iso_3166', :field_number => 1
415
+ key 'iso_3166', :field_number => 1
417
416
  store 'name', :field_number => 0
418
417
  end
419
418
 
420
419
  import 'A Princeton dataset with better capitalization', :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do
421
- key 'iso_3166'
422
- store 'iso_3166', :field_name => 'country code'
420
+ key 'iso_3166', :field_name => 'country code'
423
421
  store 'name', :field_name => 'country'
424
422
  end
425
423
  end
@@ -430,11 +428,10 @@ class Airport < ActiveRecord::Base
430
428
 
431
429
  data_miner do
432
430
  import :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? } do
433
- key 'iata_code'
431
+ key 'iata_code', :field_number => 4
434
432
  store 'name', :field_number => 1
435
433
  store 'city', :field_number => 2
436
434
  store 'country_name', :field_number => 3
437
- store 'iata_code', :field_number => 4
438
435
  store 'latitude', :field_number => 6
439
436
  store 'longitude', :field_number => 7
440
437
  end
@@ -446,17 +443,15 @@ class CensusRegion < ActiveRecord::Base
446
443
 
447
444
  data_miner do
448
445
  import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
449
- key 'number'
446
+ key 'number', :field_name => 'Region'
450
447
  store 'name', :field_name => 'Name'
451
- store 'number', :field_name => 'Region'
452
448
  end
453
449
 
454
450
  # pretend this is a different data source
455
451
  # fake! just for testing purposes
456
452
  import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
457
- key 'number'
453
+ key 'number', :field_name => 'Region'
458
454
  store 'name', :field_name => 'Name'
459
- store 'number', :field_name => 'Region'
460
455
  end
461
456
  end
462
457
  end
@@ -467,9 +462,8 @@ class CensusDivision < ActiveRecord::Base
467
462
 
468
463
  data_miner do
469
464
  import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
470
- key 'number'
465
+ key 'number', :field_name => 'Division'
471
466
  store 'name', :field_name => 'Name'
472
- store 'number', :field_name => 'Division'
473
467
  store 'census_region_number', :field_name => 'Region'
474
468
  store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
475
469
  end
@@ -488,8 +482,7 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
488
482
  # conversions are NOT performed here, since we first have to zero out legitimate skips
489
483
  # otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
490
484
  import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv', :headers => :upcase do
491
- key 'department_of_energy_identifier'
492
- store 'department_of_energy_identifier', :field_name => 'DOEID'
485
+ key 'department_of_energy_identifier', :field_name => 'DOEID'
493
486
 
494
487
  store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
495
488
  store 'construction_year', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Date in the middle (synthetic)', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere