data_miner 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.1
1
+ 0.4.2
data/data_miner.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.1"
8
+ s.version = "0.4.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
@@ -11,10 +11,6 @@ module DataMiner
11
11
  @attributes = HashWithIndifferentAccess.new
12
12
  end
13
13
 
14
- def logger
15
- DataMiner.logger
16
- end
17
-
18
14
  def process(method_name_or_block_description, &block)
19
15
  self.runnable_counter += 1
20
16
  runnables << DataMiner::Process.new(self, runnable_counter, method_name_or_block_description, &block)
@@ -76,7 +72,7 @@ module DataMiner
76
72
  import_runnables.each do |runnable|
77
73
  runnable.attributes.each do |_, attribute|
78
74
  if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
79
- logger.error %{
75
+ DataMiner.logger.error %{
80
76
 
81
77
  ================================
82
78
 
@@ -97,7 +93,7 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
97
93
  missing_columns = Array.new
98
94
  import_runnables.each do |runnable|
99
95
  runnable.attributes.each do |_, attribute|
100
- logger.error "[data_miner gem] You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
96
+ DataMiner.logger.error "[data_miner gem] You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
101
97
  unless resource.column_names.include? attribute.name
102
98
  missing_columns << attribute.name
103
99
  end
@@ -108,7 +104,7 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
108
104
  end
109
105
  missing_columns.uniq!
110
106
  if missing_columns.any?
111
- logger.error %{
107
+ DataMiner.logger.error %{
112
108
 
113
109
  ================================
114
110
 
@@ -28,10 +28,12 @@ module DataMiner
28
28
  end
29
29
 
30
30
  def store(attr_name, attr_options = {})
31
+ DataMiner.logger.error "[data_miner gem] You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
31
32
  attributes[attr_name] = Attribute.new self, attr_name, attr_options
32
33
  end
33
34
 
34
35
  def key(attr_name, attr_options = {})
36
+ DataMiner.logger.error "[data_miner gem] You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
35
37
  @key = attr_name
36
38
  store attr_name, attr_options
37
39
  end
@@ -412,14 +412,12 @@ class Country < ActiveRecord::Base
412
412
 
413
413
  data_miner do
414
414
  import 'The official ISO country list', :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do
415
- key 'iso_3166'
416
- store 'iso_3166', :field_number => 1
415
+ key 'iso_3166', :field_number => 1
417
416
  store 'name', :field_number => 0
418
417
  end
419
418
 
420
419
  import 'A Princeton dataset with better capitalization', :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do
421
- key 'iso_3166'
422
- store 'iso_3166', :field_name => 'country code'
420
+ key 'iso_3166', :field_name => 'country code'
423
421
  store 'name', :field_name => 'country'
424
422
  end
425
423
  end
@@ -430,11 +428,10 @@ class Airport < ActiveRecord::Base
430
428
 
431
429
  data_miner do
432
430
  import :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? } do
433
- key 'iata_code'
431
+ key 'iata_code', :field_number => 4
434
432
  store 'name', :field_number => 1
435
433
  store 'city', :field_number => 2
436
434
  store 'country_name', :field_number => 3
437
- store 'iata_code', :field_number => 4
438
435
  store 'latitude', :field_number => 6
439
436
  store 'longitude', :field_number => 7
440
437
  end
@@ -446,17 +443,15 @@ class CensusRegion < ActiveRecord::Base
446
443
 
447
444
  data_miner do
448
445
  import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
449
- key 'number'
446
+ key 'number', :field_name => 'Region'
450
447
  store 'name', :field_name => 'Name'
451
- store 'number', :field_name => 'Region'
452
448
  end
453
449
 
454
450
  # pretend this is a different data source
455
451
  # fake! just for testing purposes
456
452
  import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
457
- key 'number'
453
+ key 'number', :field_name => 'Region'
458
454
  store 'name', :field_name => 'Name'
459
- store 'number', :field_name => 'Region'
460
455
  end
461
456
  end
462
457
  end
@@ -467,9 +462,8 @@ class CensusDivision < ActiveRecord::Base
467
462
 
468
463
  data_miner do
469
464
  import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
470
- key 'number'
465
+ key 'number', :field_name => 'Division'
471
466
  store 'name', :field_name => 'Name'
472
- store 'number', :field_name => 'Division'
473
467
  store 'census_region_number', :field_name => 'Region'
474
468
  store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
475
469
  end
@@ -488,8 +482,7 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
488
482
  # conversions are NOT performed here, since we first have to zero out legitimate skips
489
483
  # otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
490
484
  import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv', :headers => :upcase do
491
- key 'department_of_energy_identifier'
492
- store 'department_of_energy_identifier', :field_name => 'DOEID'
485
+ key 'department_of_energy_identifier', :field_name => 'DOEID'
493
486
 
494
487
  store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
495
488
  store 'construction_year', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Date in the middle (synthetic)', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere