data_miner 1.3.0 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +52 -59
- data/data_miner.gemspec +9 -9
- data/lib/data_miner.rb +1 -18
- data/lib/data_miner/attribute.rb +8 -4
- data/lib/data_miner/config.rb +2 -35
- data/lib/data_miner/import.rb +3 -3
- data/lib/data_miner/tap.rb +14 -35
- data/lib/data_miner/version.rb +1 -1
- data/test/helper.rb +2 -1
- data/test/support/test_database.rb +2 -0
- data/test/test_data_miner_attribute.rb +53 -0
- data/test/test_old_syntax.rb +78 -156
- data/test/test_tap.rb +21 -0
- metadata +48 -46
data/README.rdoc
CHANGED
@@ -8,21 +8,21 @@ Programmatically import useful data into your ActiveRecord models.
|
|
8
8
|
|
9
9
|
You define <tt>data_miner</tt> blocks in your ActiveRecord models. For example, in <tt>app/models/country.rb</tt>:
|
10
10
|
|
11
|
-
|
12
|
-
|
11
|
+
class Country < ActiveRecord::Base
|
12
|
+
set_primary_key :iso_3166_code
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
14
|
+
data_miner do
|
15
|
+
import 'the official ISO country list',
|
16
|
+
:url => 'http://www.iso.org/iso/list-en1-semic-3.txt',
|
17
|
+
:skip => 2,
|
18
|
+
:headers => false,
|
19
|
+
:delimiter => ';',
|
20
|
+
:encoding => 'ISO-8859-1' do
|
21
|
+
key :iso_3166_code, :field_number => 1
|
22
|
+
store :name, :field_number => 0
|
23
|
+
end
|
23
24
|
end
|
24
25
|
end
|
25
|
-
end
|
26
26
|
|
27
27
|
Now you can run:
|
28
28
|
|
@@ -31,33 +31,28 @@ Now you can run:
|
|
31
31
|
|
32
32
|
== Creating tables from scratch (changed in 1.2)
|
33
33
|
|
34
|
-
We recommend using the <tt>
|
34
|
+
We recommend using the <tt>mini_record-compat</tt> gem (https://github.com/seamusabshere/mini_record)
|
35
35
|
|
36
36
|
This replaces the <tt>schema</tt> method that was available before. It didn't make sense for <tt>data_miner</tt> to provide this natively.
|
37
37
|
|
38
|
-
class Car < ActiveRecord::Base
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
force_schema do
|
43
|
-
string :make
|
44
|
-
string :model
|
45
|
-
end
|
38
|
+
class Car < ActiveRecord::Base
|
39
|
+
# the mini_record way
|
40
|
+
col :make
|
41
|
+
col :model
|
46
42
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
43
|
+
data_miner do
|
44
|
+
# DEPRECATED - see above
|
45
|
+
# schema do
|
46
|
+
# string :make
|
47
|
+
# string :model
|
48
|
+
# end
|
53
49
|
|
54
|
-
|
55
|
-
|
56
|
-
end
|
50
|
+
# the mini_record way
|
51
|
+
process :auto_upgrade!
|
57
52
|
|
58
|
-
|
59
|
-
|
60
|
-
end
|
53
|
+
# [... other data mining steps]
|
54
|
+
end
|
55
|
+
end
|
61
56
|
|
62
57
|
==Advanced usage
|
63
58
|
|
@@ -69,35 +64,33 @@ This is how we linked together (http://data.brighterplanet.com/aircraft) the FAA
|
|
69
64
|
# table without breaking associations.
|
70
65
|
set_primary_key :icao_code
|
71
66
|
|
72
|
-
# Use the
|
67
|
+
# Use the mini_record-compat gem to define the database schema in-line.
|
73
68
|
# It will destructively and automatically add/remove columns.
|
74
69
|
# This is "OK" because you can always just re-run the import script to get the data back.
|
75
70
|
# PS. If you're using DataMapper, you don't need this
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
index 'bts_aircraft_type_code'
|
100
|
-
end
|
71
|
+
col :icao_code
|
72
|
+
col :manufacturer_name
|
73
|
+
col :name
|
74
|
+
col :bts_name
|
75
|
+
col :bts_aircraft_type_code
|
76
|
+
col :brighter_planet_aircraft_class_code
|
77
|
+
col :fuel_use_aircraft_name
|
78
|
+
col :m3, :type => :float
|
79
|
+
col :m3_units
|
80
|
+
col :m2, :type => :float
|
81
|
+
col :m2_units
|
82
|
+
col :m1, :type => :float
|
83
|
+
col :m1_units
|
84
|
+
col :endpoint_fuel, :type => :float
|
85
|
+
col :endpoint_fuel_units
|
86
|
+
col :seats, :type => :float
|
87
|
+
col :distance, :type => :float
|
88
|
+
col :distance_units
|
89
|
+
col :load_factor, :type => :float
|
90
|
+
col :freight_share, :type => :float
|
91
|
+
col :payload, :type => :float
|
92
|
+
col :weighting, :type => :float
|
93
|
+
col :bts_aircraft_type_code, :type => :index
|
101
94
|
|
102
95
|
# A dictionary between BTS aircraft type codes and ICAO aircraft
|
103
96
|
# codes that uses string similarity instead of exact matching.
|
data/data_miner.gemspec
CHANGED
@@ -19,15 +19,15 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
20
|
s.require_paths = ["lib"]
|
21
21
|
|
22
|
-
s.
|
23
|
-
s.
|
24
|
-
s.
|
25
|
-
s.
|
26
|
-
s.
|
27
|
-
s.
|
28
|
-
s.
|
29
|
-
s.
|
30
|
-
s.add_development_dependency '
|
22
|
+
s.add_runtime_dependency 'remote_table', '>=1.2.2'
|
23
|
+
s.add_runtime_dependency 'activerecord', '>=2.3.4'
|
24
|
+
s.add_runtime_dependency 'activesupport', '>=2.3.4'
|
25
|
+
s.add_runtime_dependency 'conversions', '>=1.4.4'
|
26
|
+
s.add_runtime_dependency 'blockenspiel', '>=0.3.2'
|
27
|
+
s.add_runtime_dependency 'errata', '>=1.0.1'
|
28
|
+
s.add_runtime_dependency 'posix-spawn'
|
29
|
+
s.add_runtime_dependency 'taps'
|
30
|
+
s.add_development_dependency 'mini_record-compat'
|
31
31
|
s.add_development_dependency 'loose_tight_dictionary', ">=0.0.5"
|
32
32
|
s.add_development_dependency 'test-unit'
|
33
33
|
s.add_development_dependency 'shoulda'
|
data/lib/data_miner.rb
CHANGED
@@ -32,24 +32,7 @@ class DataMiner
|
|
32
32
|
delegate :run, :to => :instance
|
33
33
|
delegate :resource_names, :to => :instance
|
34
34
|
end
|
35
|
-
|
36
|
-
# TODO this should probably live somewhere else
|
37
|
-
def self.backtick_with_reporting(cmd)
|
38
|
-
cmd = cmd.gsub /[ ]*\n[ ]*/m, ' '
|
39
|
-
output = `#{cmd}`
|
40
|
-
if not $?.success?
|
41
|
-
raise %{
|
42
|
-
From the data_miner gem...
|
43
|
-
|
44
|
-
Command failed:
|
45
|
-
#{cmd}
|
46
|
-
|
47
|
-
Output:
|
48
|
-
#{output}
|
49
|
-
}
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
35
|
+
|
53
36
|
# http://avdi.org/devblog/2009/07/14/recursively-symbolize-keys/
|
54
37
|
def self.recursively_stringify_keys(hash)
|
55
38
|
hash.inject(::Hash.new) do |result, (key, value)|
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -70,6 +70,7 @@ class DataMiner
|
|
70
70
|
value = do_split(value) if wants_split?
|
71
71
|
value.gsub! /[ ]+/, ' '
|
72
72
|
value.strip!
|
73
|
+
return nil if value.blank? and wants_nullification?
|
73
74
|
value.upcase! if wants_upcase?
|
74
75
|
value = do_convert row, value if wants_conversion?
|
75
76
|
value = do_sprintf value if wants_sprintf?
|
@@ -86,16 +87,19 @@ class DataMiner
|
|
86
87
|
return value if value.is_a? ::ActiveRecord::Base # carry through trapdoor
|
87
88
|
value = value_in_dictionary value if wants_dictionary?
|
88
89
|
value = synthesize.call(row) if wants_synthesize?
|
89
|
-
value = nil if value.blank? and wants_nullification?
|
90
90
|
value
|
91
91
|
end
|
92
|
-
|
92
|
+
|
93
93
|
def set_record_from_row(record, row)
|
94
94
|
return false if !wants_overwriting? and !record.send(name).nil?
|
95
95
|
record.send "#{name}=", value_from_row(row)
|
96
|
-
|
96
|
+
if wants_units?
|
97
|
+
unit = (to_units || unit_from_source(row)).to_s
|
98
|
+
unit = nil if unit.blank? and wants_nullification?
|
99
|
+
record.send "#{name}_units=", unit
|
100
|
+
end
|
97
101
|
end
|
98
|
-
|
102
|
+
|
99
103
|
def unit_from_source(row)
|
100
104
|
row[units_field_name || units_field_number].to_s.strip.underscore.to_sym
|
101
105
|
end
|
data/lib/data_miner/config.rb
CHANGED
@@ -52,11 +52,8 @@ class DataMiner
|
|
52
52
|
|
53
53
|
finished = false
|
54
54
|
skipped = false
|
55
|
-
if Run.table_exists?
|
56
|
-
|
57
|
-
else
|
58
|
-
run = nil
|
59
|
-
::DataMiner.logger.info "Not logging individual runs. Please run DataMiner::Run.create_tables if you want to enable this."
|
55
|
+
run = if Run.table_exists?
|
56
|
+
Run.create! :started_at => ::Time.now, :resource_name => resource.name, :killed => true
|
60
57
|
end
|
61
58
|
resource.delete_all if options['from_scratch']
|
62
59
|
begin
|
@@ -92,7 +89,6 @@ class DataMiner
|
|
92
89
|
def after_invoke
|
93
90
|
return unless resource.table_exists?
|
94
91
|
make_sure_unit_definitions_make_sense
|
95
|
-
suggest_missing_column_migrations
|
96
92
|
end
|
97
93
|
|
98
94
|
COMPLETE_UNIT_DEFINITIONS = [
|
@@ -124,34 +120,5 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
|
|
124
120
|
end
|
125
121
|
end
|
126
122
|
end
|
127
|
-
|
128
|
-
def suggest_missing_column_migrations
|
129
|
-
missing_columns = []
|
130
|
-
|
131
|
-
import_steps.each do |step|
|
132
|
-
step.attributes.each do |_, attribute|
|
133
|
-
raise "You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.end_with? '_units'
|
134
|
-
unless resource.column_names.include? attribute.name
|
135
|
-
missing_columns << attribute.name
|
136
|
-
end
|
137
|
-
if attribute.wants_units? and !resource.column_names.include?(units_column = "#{attribute.name}_units")
|
138
|
-
missing_columns << units_column
|
139
|
-
end
|
140
|
-
end
|
141
|
-
end
|
142
|
-
missing_columns.uniq!
|
143
|
-
if missing_columns.any?
|
144
|
-
::DataMiner.logger.debug %{
|
145
|
-
|
146
|
-
================================
|
147
|
-
|
148
|
-
On #{resource}, it looks like you're missing some columns...
|
149
|
-
|
150
|
-
#{missing_columns.map { |column_name| "#{column_name.end_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' } '#{column_name}'" }.join("\n") }
|
151
|
-
|
152
|
-
================================
|
153
|
-
}
|
154
|
-
end
|
155
|
-
end
|
156
123
|
end
|
157
124
|
end
|
data/lib/data_miner/import.rb
CHANGED
@@ -80,10 +80,10 @@ class DataMiner
|
|
80
80
|
table.each do |row|
|
81
81
|
record = resource.send "find_or_initialize_by_#{@_key}", attributes[@_key].value_from_row(row)
|
82
82
|
attributes.each { |_, attr| attr.set_record_from_row record, row }
|
83
|
-
|
83
|
+
begin
|
84
84
|
record.save!
|
85
|
-
|
86
|
-
::DataMiner.logger.
|
85
|
+
rescue
|
86
|
+
::DataMiner.logger.warn "[data_miner] Got #{$!.inspect} when trying to save #{row}"
|
87
87
|
end
|
88
88
|
end
|
89
89
|
free
|
data/lib/data_miner/tap.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'posix/spawn'
|
2
2
|
class DataMiner
|
3
3
|
class Tap
|
4
4
|
attr_reader :config
|
@@ -28,7 +28,7 @@ class DataMiner
|
|
28
28
|
connection.drop_table possible_obstacle
|
29
29
|
end
|
30
30
|
end
|
31
|
-
|
31
|
+
taps_pull
|
32
32
|
if needs_table_rename?
|
33
33
|
connection.rename_table source_table_name, resource.table_name
|
34
34
|
end
|
@@ -55,6 +55,8 @@ class DataMiner
|
|
55
55
|
|
56
56
|
def adapter
|
57
57
|
case connection.adapter_name
|
58
|
+
when /mysql2/i
|
59
|
+
'mysql2'
|
58
60
|
when /mysql/i
|
59
61
|
'mysql'
|
60
62
|
when /postgres/i
|
@@ -71,11 +73,13 @@ class DataMiner
|
|
71
73
|
|
72
74
|
DEFAULT_PORTS = {
|
73
75
|
'mysql' => 3306,
|
76
|
+
'mysql2' => 3306,
|
74
77
|
'postgres' => 5432
|
75
78
|
}
|
76
79
|
|
77
80
|
DEFAULT_USERNAMES = {
|
78
81
|
'mysql' => 'root',
|
82
|
+
'mysql2' => 'root',
|
79
83
|
'postgres' => ''
|
80
84
|
}
|
81
85
|
|
@@ -95,16 +99,15 @@ class DataMiner
|
|
95
99
|
|
96
100
|
def db_locator
|
97
101
|
case adapter
|
98
|
-
when 'mysql', 'postgres'
|
99
|
-
"#{username}:#{password}@#{host}:#{port}/#{database}"
|
100
102
|
when 'sqlite'
|
101
103
|
database
|
104
|
+
else
|
105
|
+
"#{username}:#{password}@#{host}:#{port}/#{database}"
|
102
106
|
end
|
103
107
|
end
|
104
108
|
|
105
|
-
|
106
|
-
|
107
|
-
::Escape.shell_command [
|
109
|
+
def taps_pull
|
110
|
+
args = [
|
108
111
|
'taps',
|
109
112
|
'pull',
|
110
113
|
"#{adapter}://#{db_locator}",
|
@@ -113,34 +116,10 @@ class DataMiner
|
|
113
116
|
'--tables',
|
114
117
|
source_table_name
|
115
118
|
]
|
116
|
-
|
119
|
+
child = ::POSIX::Spawn::Child.new *args
|
120
|
+
unless child.success?
|
121
|
+
raise %{[data_miner gem] Got "#{child.err}" back when tried to run "#{args.join(' ')}"}
|
122
|
+
end
|
117
123
|
end
|
118
|
-
|
119
|
-
# 2.3.5 mysql
|
120
|
-
# * <tt>:host</tt> - Defaults to "localhost".
|
121
|
-
# * <tt>:port</tt> - Defaults to 3306.
|
122
|
-
# * <tt>:socket</tt> - Defaults to "/tmp/mysql.sock".
|
123
|
-
# * <tt>:username</tt> - Defaults to "root"
|
124
|
-
# * <tt>:password</tt> - Defaults to nothing.
|
125
|
-
# * <tt>:database</tt> - The name of the database. No default, must be provided.
|
126
|
-
# * <tt>:encoding</tt> - (Optional) Sets the client encoding by executing "SET NAMES <encoding>" after connection.
|
127
|
-
# * <tt>:reconnect</tt> - Defaults to false (See MySQL documentation: http://dev.mysql.com/doc/refman/5.0/en/auto-reconnect.html).
|
128
|
-
# * <tt>:sslca</tt> - Necessary to use MySQL with an SSL connection.
|
129
|
-
# * <tt>:sslkey</tt> - Necessary to use MySQL with an SSL connection.
|
130
|
-
# * <tt>:sslcert</tt> - Necessary to use MySQL with an SSL connection.
|
131
|
-
# * <tt>:sslcapath</tt> - Necessary to use MySQL with an SSL connection.
|
132
|
-
# * <tt>:sslcipher</tt> - Necessary to use MySQL with an SSL connection.
|
133
|
-
# 2.3.5 mysql
|
134
|
-
# * <tt>:host</tt> - Defaults to "localhost".
|
135
|
-
# * <tt>:port</tt> - Defaults to 5432.
|
136
|
-
# * <tt>:username</tt> - Defaults to nothing.
|
137
|
-
# * <tt>:password</tt> - Defaults to nothing.
|
138
|
-
# * <tt>:database</tt> - The name of the database. No default, must be provided.
|
139
|
-
# * <tt>:schema_search_path</tt> - An optional schema search path for the connection given as a string of comma-separated schema names. This is backward-compatible with the <tt>:schema_order</tt> option.
|
140
|
-
# * <tt>:encoding</tt> - An optional client encoding that is used in a <tt>SET client_encoding TO <encoding></tt> call on the connection.
|
141
|
-
# * <tt>:min_messages</tt> - An optional client min messages that is used in a <tt>SET client_min_messages TO <min_messages></tt> call on the connection.
|
142
|
-
# * <tt>:allow_concurrency</tt> - If true, use async query methods so Ruby threads don't deadlock; otherwise, use blocking query methods.
|
143
|
-
# 2.3.5 sqlite[3]
|
144
|
-
# * <tt>:database</tt> - Path to the database file.
|
145
124
|
end
|
146
125
|
end
|
data/lib/data_miner/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -3,7 +3,8 @@ require 'bundler'
|
|
3
3
|
Bundler.setup
|
4
4
|
require 'test/unit'
|
5
5
|
require 'shoulda'
|
6
|
-
require '
|
6
|
+
require 'mini_record'
|
7
|
+
require 'logger'
|
7
8
|
# require 'ruby-debug'
|
8
9
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
9
10
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
@@ -181,7 +181,9 @@ module TestDatabase
|
|
181
181
|
t.datetime "created_at"
|
182
182
|
t.datetime "updated_at"
|
183
183
|
t.float "emission_factor"
|
184
|
+
t.string "emission_factor_units"
|
184
185
|
t.float "annual_distance"
|
186
|
+
t.string "annual_distance_units"
|
185
187
|
t.string "code"
|
186
188
|
end
|
187
189
|
execute "ALTER TABLE automobile_fuel_types ADD PRIMARY KEY (code);"
|
@@ -55,4 +55,57 @@ class TestDataMinerAttribute < Test::Unit::TestCase
|
|
55
55
|
end
|
56
56
|
end
|
57
57
|
end
|
58
|
+
|
59
|
+
context '#set_record_from_row' do
|
60
|
+
setup do
|
61
|
+
@automobile_fuel_type = AutomobileFuelType.new
|
62
|
+
end
|
63
|
+
context 'nullify is true, wants units' do
|
64
|
+
setup do
|
65
|
+
@attribute = DataMiner::Attribute.new @automobile_fuel_type, 'annual_distance', :nullify => true, :units_field_name => 'annual_distance_units'
|
66
|
+
end
|
67
|
+
should 'set value and units to nil if field is blank' do
|
68
|
+
@attribute.set_record_from_row(@automobile_fuel_type,
|
69
|
+
'name' => 'electricity',
|
70
|
+
'annual_distance' => '',
|
71
|
+
'annual_distance_units' => ''
|
72
|
+
)
|
73
|
+
assert_nil @automobile_fuel_type.annual_distance
|
74
|
+
assert_nil @automobile_fuel_type.annual_distance_units
|
75
|
+
end
|
76
|
+
should 'set value and units if field is not blank' do
|
77
|
+
@attribute.set_record_from_row(@automobile_fuel_type,
|
78
|
+
'name' => 'electricity',
|
79
|
+
'annual_distance' => '100.0',
|
80
|
+
'annual_distance_units' => 'kilometres'
|
81
|
+
)
|
82
|
+
assert_equal 100.0, @automobile_fuel_type.annual_distance
|
83
|
+
assert_equal 'kilometres', @automobile_fuel_type.annual_distance_units
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
context 'nullify is false, wants units' do
|
88
|
+
setup do
|
89
|
+
@attribute = DataMiner::Attribute.new @automobile_fuel_type, 'annual_distance', :units_field_name => 'annual_distance_units'
|
90
|
+
end
|
91
|
+
should 'set value and units to blank if field is blank' do
|
92
|
+
@attribute.set_record_from_row(@automobile_fuel_type,
|
93
|
+
'name' => 'electricity',
|
94
|
+
'annual_distance' => '',
|
95
|
+
'annual_distance_units' => ''
|
96
|
+
)
|
97
|
+
assert_equal 0.0, @automobile_fuel_type.annual_distance
|
98
|
+
assert_equal '', @automobile_fuel_type.annual_distance_units
|
99
|
+
end
|
100
|
+
should 'set value and units if field is not blank' do
|
101
|
+
@attribute.set_record_from_row(@automobile_fuel_type,
|
102
|
+
'name' => 'electricity',
|
103
|
+
'annual_distance' => '100.0',
|
104
|
+
'annual_distance_units' => 'kilometres'
|
105
|
+
)
|
106
|
+
assert_equal 100.0, @automobile_fuel_type.annual_distance
|
107
|
+
assert_equal 'kilometres', @automobile_fuel_type.annual_distance_units
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
58
111
|
end
|
data/test/test_old_syntax.rb
CHANGED
@@ -3,24 +3,15 @@ require 'helper'
|
|
3
3
|
|
4
4
|
TestDatabase.load_models
|
5
5
|
|
6
|
-
class TappedAirport < ActiveRecord::Base
|
7
|
-
set_primary_key :iata_code
|
8
|
-
|
9
|
-
data_miner do
|
10
|
-
tap "Brighter Planet's sanitized airports table", "http://carbon:neutral@data.brighterplanet.com:5001", :source_table_name => 'airports'
|
11
|
-
# tap "Brighter Planet's sanitized airports table", "http://carbon:neutral@localhost:5000", :source_table_name => 'airports'
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
6
|
class CensusRegion < ActiveRecord::Base
|
16
7
|
set_primary_key :number
|
17
|
-
|
8
|
+
|
18
9
|
data_miner do
|
19
10
|
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
|
20
11
|
key 'number', :field_name => 'Region'
|
21
12
|
store 'name', :field_name => 'Name'
|
22
13
|
end
|
23
|
-
|
14
|
+
|
24
15
|
# pretend this is a different data source
|
25
16
|
# fake! just for testing purposes
|
26
17
|
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
|
@@ -33,7 +24,7 @@ end
|
|
33
24
|
# smaller than a region
|
34
25
|
class CensusDivision < ActiveRecord::Base
|
35
26
|
set_primary_key :number
|
36
|
-
|
27
|
+
|
37
28
|
data_miner do
|
38
29
|
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
39
30
|
key 'number', :field_name => 'Division'
|
@@ -46,7 +37,7 @@ end
|
|
46
37
|
|
47
38
|
class CensusDivisionDeux < ActiveRecord::Base
|
48
39
|
set_primary_key :number
|
49
|
-
|
40
|
+
|
50
41
|
data_miner do
|
51
42
|
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
52
43
|
key 'number', :field_name => 'Division'
|
@@ -59,9 +50,9 @@ end
|
|
59
50
|
|
60
51
|
class CrosscallingCensusRegion < ActiveRecord::Base
|
61
52
|
set_primary_key :number
|
62
|
-
|
53
|
+
|
63
54
|
has_many :crosscalling_census_divisions
|
64
|
-
|
55
|
+
|
65
56
|
data_miner do
|
66
57
|
process "derive ourselves from the census divisions table (i.e., cross call census divisions)" do
|
67
58
|
CrosscallingCensusDivision.run_data_miner!
|
@@ -80,9 +71,9 @@ end
|
|
80
71
|
|
81
72
|
class CrosscallingCensusDivision < ActiveRecord::Base
|
82
73
|
set_primary_key :number
|
83
|
-
|
74
|
+
|
84
75
|
belongs_to :crosscalling_census_regions, :foreign_key => 'census_region_number'
|
85
|
-
|
76
|
+
|
86
77
|
data_miner do
|
87
78
|
import "get a list of census divisions and their regions", :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
88
79
|
key 'number', :field_name => 'Division'
|
@@ -90,7 +81,7 @@ class CrosscallingCensusDivision < ActiveRecord::Base
|
|
90
81
|
store 'census_region_number', :field_name => 'Region'
|
91
82
|
store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
|
92
83
|
end
|
93
|
-
|
84
|
+
|
94
85
|
process "make sure my parent object is set up (i.e., cross-call it)" do
|
95
86
|
CrosscallingCensusRegion.run_data_miner!
|
96
87
|
end
|
@@ -99,18 +90,18 @@ end
|
|
99
90
|
|
100
91
|
class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
101
92
|
set_primary_key :department_of_energy_identifier
|
102
|
-
|
93
|
+
|
103
94
|
data_miner do
|
104
95
|
process 'Define some unit conversions' do
|
105
96
|
Conversions.register :kbtus, :joules, 1_000.0 * 1_055.05585
|
106
97
|
Conversions.register :square_feet, :square_metres, 0.09290304
|
107
98
|
end
|
108
|
-
|
99
|
+
|
109
100
|
# conversions are NOT performed here, since we first have to zero out legitimate skips
|
110
101
|
# otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
|
111
102
|
import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv' do
|
112
103
|
key 'department_of_energy_identifier', :field_name => 'DOEID'
|
113
|
-
|
104
|
+
|
114
105
|
store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
|
115
106
|
store 'construction_year', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Date in the middle (synthetic)', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
|
116
107
|
store 'construction_period', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
|
@@ -120,12 +111,12 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
|
120
111
|
store 'window_ac_use', :field_name => 'USEWWAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usewwac/usewwac.csv' }
|
121
112
|
store 'clothes_washer_use', :field_name => 'WASHLOAD', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/washload/washload.csv' }
|
122
113
|
store 'clothes_dryer_use', :field_name => 'DRYRUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dryruse/dryruse.csv' }
|
123
|
-
|
114
|
+
|
124
115
|
store 'census_division_number', :field_name => 'DIVISION'
|
125
116
|
store 'census_division_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
|
126
117
|
store 'census_region_number', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_number', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
|
127
118
|
store 'census_region_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
|
128
|
-
|
119
|
+
|
129
120
|
store 'floorspace', :field_name => 'TOTSQFT'
|
130
121
|
store 'residents', :field_name => 'NHSLDMEM'
|
131
122
|
store 'ownership', :field_name => 'KOWNRENT'
|
@@ -248,15 +239,15 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
|
248
239
|
update_all "#{attr_name} = #{attr_name} * #{Conversions::Unit.exchange_rate from_units, to_units}"
|
249
240
|
end
|
250
241
|
end
|
251
|
-
|
242
|
+
|
252
243
|
process 'Add a new field "rooms" that estimates how many rooms are in the house' do
|
253
244
|
update_all 'rooms = total_rooms + bathrooms/2 + halfbaths/4 + heated_garage*(attached_1car_garage + detached_1car_garage + 2*(attached_2car_garage + detached_2car_garage) + 3*(attached_3car_garage + detached_3car_garage))'
|
254
245
|
end
|
255
|
-
|
246
|
+
|
256
247
|
process 'Add a new field "lighting_use" that estimates how many hours light bulbs are turned on in the house' do
|
257
248
|
update_all 'lighting_use = 2*(lights_on_1_to_4_hours + efficient_lights_on_1_to_4_hours) + 8*(lights_on_4_to_12_hours + efficient_lights_on_4_to_12_hours) + 16*(lights_on_over_12_hours + efficient_lights_on_over_12_hours) + 12*(outdoor_all_night_lights + outdoor_all_night_gas_lights)'
|
258
249
|
end
|
259
|
-
|
250
|
+
|
260
251
|
process 'Add a new field "lighting_efficiency" that estimates what percentage of light bulbs in a house are energy-efficient' do
|
261
252
|
update_all 'lighting_efficiency = (2*efficient_lights_on_1_to_4_hours + 8*efficient_lights_on_4_to_12_hours + 16*efficient_lights_on_over_12_hours) / lighting_use'
|
262
253
|
end
|
@@ -494,7 +485,7 @@ class T100FlightSegment < ActiveRecord::Base
|
|
494
485
|
store 'data_source', :field_name => 'DATA_SOURCE'
|
495
486
|
end
|
496
487
|
end
|
497
|
-
|
488
|
+
|
498
489
|
process 'Derive freight share as a fraction of payload' do
|
499
490
|
update_all 'freight_share = (freight + mail) / payload', 'payload > 0'
|
500
491
|
end
|
@@ -502,7 +493,7 @@ class T100FlightSegment < ActiveRecord::Base
|
|
502
493
|
process 'Derive load factor, which is passengers divided by the total seats available' do
|
503
494
|
update_all 'load_factor = passengers / seats', 'passengers <= seats'
|
504
495
|
end
|
505
|
-
|
496
|
+
|
506
497
|
process 'Derive average seats per departure' do
|
507
498
|
update_all 'seats_per_departure = seats / departures_performed', 'departures_performed > 0'
|
508
499
|
end
|
@@ -512,38 +503,38 @@ end
|
|
512
503
|
# note that this depends on stuff in Aircraft
|
513
504
|
class AircraftDeux < ActiveRecord::Base
|
514
505
|
set_primary_key :icao_code
|
515
|
-
|
506
|
+
|
516
507
|
# defined on the class because we defined the errata with a shorthand
|
517
508
|
class << self
|
518
509
|
def is_not_attributed_to_aerospatiale?(row)
|
519
510
|
not row['Manufacturer'] =~ /AEROSPATIALE/i
|
520
511
|
end
|
521
|
-
|
512
|
+
|
522
513
|
def is_not_attributed_to_cessna?(row)
|
523
514
|
not row['Manufacturer'] =~ /CESSNA/i
|
524
515
|
end
|
525
|
-
|
516
|
+
|
526
517
|
def is_not_attributed_to_learjet?(row)
|
527
518
|
not row['Manufacturer'] =~ /LEAR/i
|
528
519
|
end
|
529
|
-
|
520
|
+
|
530
521
|
def is_not_attributed_to_dehavilland?(row)
|
531
522
|
not row['Manufacturer'] =~ /DE ?HAVILLAND/i
|
532
523
|
end
|
533
|
-
|
524
|
+
|
534
525
|
def is_not_attributed_to_mcdonnell_douglas?(row)
|
535
526
|
not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
|
536
527
|
end
|
537
|
-
|
528
|
+
|
538
529
|
def is_not_a_dc_plane?(row)
|
539
530
|
not row['Model'] =~ /DC/i
|
540
531
|
end
|
541
|
-
|
532
|
+
|
542
533
|
def is_a_crj_900?(row)
|
543
534
|
row['Designator'].downcase == 'crj9'
|
544
535
|
end
|
545
536
|
end
|
546
|
-
|
537
|
+
|
547
538
|
data_miner do
|
548
539
|
# ('A'..'Z').each do |letter|
|
549
540
|
# Note: for the purposes of testing, only importing "D"
|
@@ -567,28 +558,24 @@ end
|
|
567
558
|
class AutomobileMakeFleetYear < ActiveRecord::Base
|
568
559
|
set_primary_key :name
|
569
560
|
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
datetime "updated_at"
|
581
|
-
end
|
561
|
+
col :name
|
562
|
+
col :make_name
|
563
|
+
col :fleet
|
564
|
+
col :year, :type => :integer
|
565
|
+
col :fuel_efficiency, :type => :float
|
566
|
+
col :fuel_efficiency_units
|
567
|
+
col :volume, :type => :integer
|
568
|
+
col :make_year_name
|
569
|
+
col :created_at, :type => :datetime
|
570
|
+
col :updated_at, :type => :datetime
|
582
571
|
|
583
572
|
data_miner do
|
584
|
-
process
|
585
|
-
|
586
|
-
end
|
587
|
-
|
573
|
+
process :auto_upgrade!
|
574
|
+
|
588
575
|
process "finish if i tell you to" do
|
589
576
|
raise DataMiner::Finish if $force_finish
|
590
577
|
end
|
591
|
-
|
578
|
+
|
592
579
|
process "skip if i tell you to" do
|
593
580
|
raise DataMiner::Skip if $force_skip
|
594
581
|
end
|
@@ -609,36 +596,28 @@ end
|
|
609
596
|
|
610
597
|
class CensusDivisionTrois < ActiveRecord::Base
|
611
598
|
set_primary_key :number_code
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
end
|
621
|
-
|
599
|
+
|
600
|
+
col :number_code
|
601
|
+
col :name
|
602
|
+
col :census_region_name
|
603
|
+
col :census_region_number, :type => :integer
|
604
|
+
add_index 'census_region_name', :name => 'homefry'
|
605
|
+
add_index ['number_code', 'name', 'census_region_name', 'census_region_number']
|
606
|
+
|
622
607
|
data_miner do
|
623
|
-
process
|
624
|
-
force_schema!
|
625
|
-
end
|
608
|
+
process :auto_upgrade!
|
626
609
|
end
|
627
610
|
end
|
628
611
|
|
629
612
|
class CensusDivisionFour < ActiveRecord::Base
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
index 'census_region_name', :name => 'homefry'
|
636
|
-
end
|
613
|
+
col :number_code
|
614
|
+
col :name
|
615
|
+
col :census_region_name
|
616
|
+
col :census_region_number, :type => :integer
|
617
|
+
add_index 'census_region_name', :name => 'homefry'
|
637
618
|
|
638
619
|
data_miner do
|
639
|
-
process
|
640
|
-
force_schema!
|
641
|
-
end
|
620
|
+
process :auto_upgrade!
|
642
621
|
end
|
643
622
|
end
|
644
623
|
|
@@ -661,11 +640,11 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
661
640
|
if AutomobileMakeFleetYear.table_exists?
|
662
641
|
ActiveRecord::Base.connection.execute 'DROP TABLE automobile_make_fleet_years;'
|
663
642
|
end
|
664
|
-
AutomobileMakeFleetYear.
|
643
|
+
AutomobileMakeFleetYear.auto_upgrade!
|
665
644
|
assert AutomobileMakeFleetYear.table_exists?
|
666
645
|
end
|
667
646
|
end
|
668
|
-
|
647
|
+
|
669
648
|
if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
|
670
649
|
should 'append to an existing config' do
|
671
650
|
AutomobileFuelType.class_eval do
|
@@ -685,7 +664,7 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
685
664
|
assert_equal 'http://example1.com', AutomobileFuelType.data_miner_config.steps[-2].table.url
|
686
665
|
assert_equal 'http://example2.com', AutomobileFuelType.data_miner_config.steps[-1].table.url
|
687
666
|
end
|
688
|
-
|
667
|
+
|
689
668
|
should 'override an existing data_miner configuration' do
|
690
669
|
AutomobileFuelType.class_eval do
|
691
670
|
data_miner do
|
@@ -710,7 +689,7 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
710
689
|
AutomobileMakeFleetYear.run_data_miner!
|
711
690
|
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
712
691
|
end
|
713
|
-
|
692
|
+
|
714
693
|
should "stop and register skipped if it gets a DataMiner::Skip" do
|
715
694
|
AutomobileMakeFleetYear.delete_all
|
716
695
|
AutomobileMakeFleetYear.data_miner_runs.delete_all
|
@@ -723,124 +702,67 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
723
702
|
AutomobileMakeFleetYear.run_data_miner!
|
724
703
|
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
725
704
|
end
|
726
|
-
|
727
|
-
should "eagerly enforce a schema" do
|
728
|
-
ActiveRecord::Base.connection.create_table 'census_division_trois', :force => true, :options => 'ENGINE=InnoDB default charset=utf8' do |t|
|
729
|
-
t.string 'name'
|
730
|
-
t.string 'census_region_name'
|
731
|
-
# t.integer 'census_region_number'
|
732
|
-
end
|
733
|
-
ActiveRecord::Base.connection.execute 'ALTER TABLE census_division_trois ADD INDEX (census_region_name)'
|
734
|
-
CensusDivisionTrois.reset_column_information
|
735
|
-
missing_columns = %w{ census_region_number }
|
736
705
|
|
737
|
-
# sanity check
|
738
|
-
missing_columns.each do |column|
|
739
|
-
assert_false CensusDivisionTrois.column_names.include?(column)
|
740
|
-
end
|
741
|
-
assert_false ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
|
742
|
-
|
743
|
-
3.times do
|
744
|
-
CensusDivisionTrois.run_data_miner!
|
745
|
-
missing_columns.each do |column|
|
746
|
-
assert CensusDivisionTrois.column_names.include?(column)
|
747
|
-
end
|
748
|
-
assert ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
|
749
|
-
assert_equal :string, CensusDivisionTrois.columns_hash[CensusDivisionTrois.primary_key].type
|
750
|
-
end
|
751
|
-
end
|
752
|
-
|
753
|
-
should "let schemas work with default id primary keys" do
|
754
|
-
ActiveRecord::Base.connection.create_table 'census_division_fours', :force => true, :options => 'ENGINE=InnoDB default charset=utf8' do |t|
|
755
|
-
t.string 'name'
|
756
|
-
t.string 'census_region_name'
|
757
|
-
# t.integer 'census_region_number'
|
758
|
-
end
|
759
|
-
ActiveRecord::Base.connection.execute 'ALTER TABLE census_division_fours ADD INDEX (census_region_name)'
|
760
|
-
CensusDivisionFour.reset_column_information
|
761
|
-
missing_columns = %w{ census_region_number }
|
762
|
-
|
763
|
-
# sanity check
|
764
|
-
missing_columns.each do |column|
|
765
|
-
assert_false CensusDivisionFour.column_names.include?(column)
|
766
|
-
end
|
767
|
-
assert_false ActiveRecord::Base.connection.indexes(CensusDivisionFour.table_name).any? { |index| index.name == 'homefry' }
|
768
|
-
|
769
|
-
3.times do
|
770
|
-
CensusDivisionFour.run_data_miner!
|
771
|
-
missing_columns.each do |column|
|
772
|
-
assert CensusDivisionFour.column_names.include?(column)
|
773
|
-
end
|
774
|
-
assert ActiveRecord::Base.connection.indexes(CensusDivisionFour.table_name).any? { |index| index.name == 'homefry' }
|
775
|
-
assert_equal :integer, CensusDivisionFour.columns_hash[CensusDivisionFour.primary_key].type
|
776
|
-
end
|
777
|
-
end
|
778
|
-
|
779
706
|
should "allow specifying dictionaries explicitly" do
|
780
707
|
CensusDivisionDeux.run_data_miner!
|
781
708
|
assert_equal 'South Region', CensusDivisionDeux.find(5).census_region_name
|
782
709
|
end
|
783
|
-
|
710
|
+
|
784
711
|
should "be able to key on things other than the primary key" do
|
785
712
|
Aircraft.run_data_miner!
|
786
713
|
assert_equal 'SP', Aircraft.find('DHC6').brighter_planet_aircraft_class_code
|
787
714
|
end
|
788
|
-
|
715
|
+
|
789
716
|
should "be able to synthesize rows without using a full parser class" do
|
790
717
|
AutomobileMakeFleetYear.run_data_miner!
|
791
718
|
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
792
719
|
end
|
793
|
-
|
720
|
+
|
794
721
|
should "keep a call stack so that you can call run_data_miner! on a child" do
|
795
722
|
CrosscallingCensusDivision.run_data_miner!
|
796
723
|
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
797
724
|
assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
|
798
725
|
end
|
799
|
-
|
726
|
+
|
800
727
|
should "keep a call stack so that you can call run_data_miner! on a parent" do
|
801
728
|
CrosscallingCensusRegion.run_data_miner!
|
802
729
|
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
803
730
|
assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
|
804
731
|
end
|
805
|
-
|
732
|
+
|
806
733
|
should "import airports" do
|
807
734
|
Airport.run_data_miner!
|
808
735
|
assert Airport.count > 0
|
809
736
|
end
|
810
|
-
|
811
|
-
should "tap airports" do
|
812
|
-
TappedAirport.run_data_miner!
|
813
|
-
assert TappedAirport.count > 0
|
814
|
-
end
|
815
|
-
|
737
|
+
|
816
738
|
should "pull in census divisions using a data.brighterplanet.com dictionary" do
|
817
739
|
CensusDivision.run_data_miner!
|
818
740
|
assert CensusDivision.count > 0
|
819
741
|
end
|
820
|
-
|
742
|
+
|
821
743
|
should "have a way to queue up runs that works with delated_job's send_later" do
|
822
744
|
assert AutomobileVariant.respond_to?(:run_data_miner!)
|
823
745
|
end
|
824
|
-
|
746
|
+
|
825
747
|
should "be idempotent" do
|
826
748
|
Country.data_miner_config.run
|
827
749
|
a = Country.count
|
828
750
|
Country.data_miner_config.run
|
829
751
|
b = Country.count
|
830
752
|
assert_equal a, b
|
831
|
-
|
753
|
+
|
832
754
|
CensusRegion.data_miner_config.run
|
833
755
|
a = CensusRegion.count
|
834
756
|
CensusRegion.data_miner_config.run
|
835
757
|
b = CensusRegion.count
|
836
758
|
assert_equal a, b
|
837
759
|
end
|
838
|
-
|
760
|
+
|
839
761
|
should "hash things" do
|
840
762
|
AutomobileVariant.data_miner_config.steps[0].run
|
841
763
|
assert AutomobileVariant.first.row_hash.present?
|
842
764
|
end
|
843
|
-
|
765
|
+
|
844
766
|
should "process a callback block instead of a method" do
|
845
767
|
AutomobileVariant.delete_all
|
846
768
|
AutomobileVariant.data_miner_config.steps[0].run
|
@@ -848,7 +770,7 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
848
770
|
AutomobileVariant.data_miner_config.steps.last.run
|
849
771
|
assert AutomobileVariant.first.fuel_efficiency_city.present?
|
850
772
|
end
|
851
|
-
|
773
|
+
|
852
774
|
should "keep a log when it does a run" do
|
853
775
|
approx_started_at = Time.now
|
854
776
|
DataMiner.run :resource_names => %w{ Country }
|
@@ -857,7 +779,7 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
857
779
|
assert (last_run.started_at - approx_started_at).abs < 5 # seconds
|
858
780
|
assert (last_run.terminated_at - approx_terminated_at).abs < 5 # seconds
|
859
781
|
end
|
860
|
-
|
782
|
+
|
861
783
|
should "request a re-import from scratch" do
|
862
784
|
c = Country.new
|
863
785
|
c.iso_3166 = 'JUNK'
|
@@ -866,35 +788,35 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
866
788
|
DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
|
867
789
|
assert !Country.exists?(:iso_3166 => 'JUNK')
|
868
790
|
end
|
869
|
-
|
791
|
+
|
870
792
|
should "know what runs were on a resource" do
|
871
793
|
DataMiner.run :resource_names => %w{ Country }
|
872
794
|
DataMiner.run :resource_names => %w{ Country }
|
873
795
|
assert Country.data_miner_runs.count > 0
|
874
796
|
end
|
875
797
|
end
|
876
|
-
|
798
|
+
|
877
799
|
if ENV['ALL'] == 'true' or ENV['SLOW'] == 'true'
|
878
800
|
should "allow errata to be specified with a shorthand, assuming the responder is the resource class itself" do
|
879
801
|
AircraftDeux.run_data_miner!
|
880
802
|
assert AircraftDeux.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
|
881
803
|
end
|
882
|
-
|
804
|
+
|
883
805
|
should "mine aircraft" do
|
884
806
|
Aircraft.run_data_miner!
|
885
807
|
assert Aircraft.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
|
886
808
|
end
|
887
|
-
|
809
|
+
|
888
810
|
should "mine automobile variants" do
|
889
811
|
AutomobileVariant.run_data_miner!
|
890
812
|
assert AutomobileVariant.count('make_name LIKE "%tesla"') > 0
|
891
813
|
end
|
892
|
-
|
814
|
+
|
893
815
|
should "mine T100 flight segments" do
|
894
816
|
T100FlightSegment.run_data_miner!
|
895
817
|
assert T100FlightSegment.count('dest_country_name LIKE "%United States"') > 0
|
896
818
|
end
|
897
|
-
|
819
|
+
|
898
820
|
should "mine residence survey responses" do
|
899
821
|
ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
|
900
822
|
assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.start_with?('Single-family detached house')
|
data/test/test_tap.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
$:.push File.dirname(__FILE__)
|
2
|
+
require 'helper'
|
3
|
+
|
4
|
+
TestDatabase.load_models
|
5
|
+
|
6
|
+
class TappedAirport < ActiveRecord::Base
|
7
|
+
set_primary_key :iata_code
|
8
|
+
|
9
|
+
data_miner do
|
10
|
+
tap "Brighter Planet's sanitized airports table", "http://carbon:neutral@data.brighterplanet.com:5001", :source_table_name => 'airports'
|
11
|
+
# tap "Brighter Planet's sanitized airports table", "http://carbon:neutral@localhost:5000", :source_table_name => 'airports'
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
class TestTap < Test::Unit::TestCase
|
17
|
+
should "tap airports" do
|
18
|
+
TappedAirport.run_data_miner!
|
19
|
+
assert TappedAirport.count > 0
|
20
|
+
end
|
21
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,11 +11,11 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2011-09
|
14
|
+
date: 2011-12-09 00:00:00.000000000Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: remote_table
|
18
|
-
requirement: &
|
18
|
+
requirement: &2163341440 !ruby/object:Gem::Requirement
|
19
19
|
none: false
|
20
20
|
requirements:
|
21
21
|
- - ! '>='
|
@@ -23,21 +23,10 @@ dependencies:
|
|
23
23
|
version: 1.2.2
|
24
24
|
type: :runtime
|
25
25
|
prerelease: false
|
26
|
-
version_requirements: *
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: escape
|
29
|
-
requirement: &2166719520 !ruby/object:Gem::Requirement
|
30
|
-
none: false
|
31
|
-
requirements:
|
32
|
-
- - ! '>='
|
33
|
-
- !ruby/object:Gem::Version
|
34
|
-
version: 0.0.4
|
35
|
-
type: :runtime
|
36
|
-
prerelease: false
|
37
|
-
version_requirements: *2166719520
|
26
|
+
version_requirements: *2163341440
|
38
27
|
- !ruby/object:Gem::Dependency
|
39
28
|
name: activerecord
|
40
|
-
requirement: &
|
29
|
+
requirement: &2163340680 !ruby/object:Gem::Requirement
|
41
30
|
none: false
|
42
31
|
requirements:
|
43
32
|
- - ! '>='
|
@@ -45,10 +34,10 @@ dependencies:
|
|
45
34
|
version: 2.3.4
|
46
35
|
type: :runtime
|
47
36
|
prerelease: false
|
48
|
-
version_requirements: *
|
37
|
+
version_requirements: *2163340680
|
49
38
|
- !ruby/object:Gem::Dependency
|
50
39
|
name: activesupport
|
51
|
-
requirement: &
|
40
|
+
requirement: &2163340220 !ruby/object:Gem::Requirement
|
52
41
|
none: false
|
53
42
|
requirements:
|
54
43
|
- - ! '>='
|
@@ -56,10 +45,10 @@ dependencies:
|
|
56
45
|
version: 2.3.4
|
57
46
|
type: :runtime
|
58
47
|
prerelease: false
|
59
|
-
version_requirements: *
|
48
|
+
version_requirements: *2163340220
|
60
49
|
- !ruby/object:Gem::Dependency
|
61
50
|
name: conversions
|
62
|
-
requirement: &
|
51
|
+
requirement: &2163339760 !ruby/object:Gem::Requirement
|
63
52
|
none: false
|
64
53
|
requirements:
|
65
54
|
- - ! '>='
|
@@ -67,10 +56,10 @@ dependencies:
|
|
67
56
|
version: 1.4.4
|
68
57
|
type: :runtime
|
69
58
|
prerelease: false
|
70
|
-
version_requirements: *
|
59
|
+
version_requirements: *2163339760
|
71
60
|
- !ruby/object:Gem::Dependency
|
72
61
|
name: blockenspiel
|
73
|
-
requirement: &
|
62
|
+
requirement: &2163339260 !ruby/object:Gem::Requirement
|
74
63
|
none: false
|
75
64
|
requirements:
|
76
65
|
- - ! '>='
|
@@ -78,43 +67,54 @@ dependencies:
|
|
78
67
|
version: 0.3.2
|
79
68
|
type: :runtime
|
80
69
|
prerelease: false
|
81
|
-
version_requirements: *
|
70
|
+
version_requirements: *2163339260
|
82
71
|
- !ruby/object:Gem::Dependency
|
83
|
-
name:
|
84
|
-
requirement: &
|
72
|
+
name: errata
|
73
|
+
requirement: &2163338800 !ruby/object:Gem::Requirement
|
85
74
|
none: false
|
86
75
|
requirements:
|
87
76
|
- - ! '>='
|
88
77
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0.
|
78
|
+
version: 1.0.1
|
90
79
|
type: :runtime
|
91
80
|
prerelease: false
|
92
|
-
version_requirements: *
|
81
|
+
version_requirements: *2163338800
|
93
82
|
- !ruby/object:Gem::Dependency
|
94
|
-
name:
|
95
|
-
requirement: &
|
83
|
+
name: posix-spawn
|
84
|
+
requirement: &2163338420 !ruby/object:Gem::Requirement
|
96
85
|
none: false
|
97
86
|
requirements:
|
98
87
|
- - ! '>='
|
99
88
|
- !ruby/object:Gem::Version
|
100
|
-
version:
|
89
|
+
version: '0'
|
101
90
|
type: :runtime
|
102
91
|
prerelease: false
|
103
|
-
version_requirements: *
|
92
|
+
version_requirements: *2163338420
|
104
93
|
- !ruby/object:Gem::Dependency
|
105
|
-
name:
|
106
|
-
requirement: &
|
94
|
+
name: taps
|
95
|
+
requirement: &2163337960 !ruby/object:Gem::Requirement
|
107
96
|
none: false
|
108
97
|
requirements:
|
109
98
|
- - ! '>='
|
110
99
|
- !ruby/object:Gem::Version
|
111
|
-
version: 0
|
100
|
+
version: '0'
|
101
|
+
type: :runtime
|
102
|
+
prerelease: false
|
103
|
+
version_requirements: *2163337960
|
104
|
+
- !ruby/object:Gem::Dependency
|
105
|
+
name: mini_record-compat
|
106
|
+
requirement: &2163337540 !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
108
|
+
requirements:
|
109
|
+
- - ! '>='
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0'
|
112
112
|
type: :development
|
113
113
|
prerelease: false
|
114
|
-
version_requirements: *
|
114
|
+
version_requirements: *2163337540
|
115
115
|
- !ruby/object:Gem::Dependency
|
116
116
|
name: loose_tight_dictionary
|
117
|
-
requirement: &
|
117
|
+
requirement: &2163337040 !ruby/object:Gem::Requirement
|
118
118
|
none: false
|
119
119
|
requirements:
|
120
120
|
- - ! '>='
|
@@ -122,10 +122,10 @@ dependencies:
|
|
122
122
|
version: 0.0.5
|
123
123
|
type: :development
|
124
124
|
prerelease: false
|
125
|
-
version_requirements: *
|
125
|
+
version_requirements: *2163337040
|
126
126
|
- !ruby/object:Gem::Dependency
|
127
127
|
name: test-unit
|
128
|
-
requirement: &
|
128
|
+
requirement: &2163336620 !ruby/object:Gem::Requirement
|
129
129
|
none: false
|
130
130
|
requirements:
|
131
131
|
- - ! '>='
|
@@ -133,10 +133,10 @@ dependencies:
|
|
133
133
|
version: '0'
|
134
134
|
type: :development
|
135
135
|
prerelease: false
|
136
|
-
version_requirements: *
|
136
|
+
version_requirements: *2163336620
|
137
137
|
- !ruby/object:Gem::Dependency
|
138
138
|
name: shoulda
|
139
|
-
requirement: &
|
139
|
+
requirement: &2163336100 !ruby/object:Gem::Requirement
|
140
140
|
none: false
|
141
141
|
requirements:
|
142
142
|
- - ! '>='
|
@@ -144,10 +144,10 @@ dependencies:
|
|
144
144
|
version: '0'
|
145
145
|
type: :development
|
146
146
|
prerelease: false
|
147
|
-
version_requirements: *
|
147
|
+
version_requirements: *2163336100
|
148
148
|
- !ruby/object:Gem::Dependency
|
149
149
|
name: mysql
|
150
|
-
requirement: &
|
150
|
+
requirement: &2163335620 !ruby/object:Gem::Requirement
|
151
151
|
none: false
|
152
152
|
requirements:
|
153
153
|
- - ! '>='
|
@@ -155,10 +155,10 @@ dependencies:
|
|
155
155
|
version: '0'
|
156
156
|
type: :development
|
157
157
|
prerelease: false
|
158
|
-
version_requirements: *
|
158
|
+
version_requirements: *2163335620
|
159
159
|
- !ruby/object:Gem::Dependency
|
160
160
|
name: rake
|
161
|
-
requirement: &
|
161
|
+
requirement: &2163335040 !ruby/object:Gem::Requirement
|
162
162
|
none: false
|
163
163
|
requirements:
|
164
164
|
- - ! '>='
|
@@ -166,7 +166,7 @@ dependencies:
|
|
166
166
|
version: '0'
|
167
167
|
type: :development
|
168
168
|
prerelease: false
|
169
|
-
version_requirements: *
|
169
|
+
version_requirements: *2163335040
|
170
170
|
description: Mine remote data into your ActiveRecord models. You can also convert
|
171
171
|
units.
|
172
172
|
email:
|
@@ -203,6 +203,7 @@ files:
|
|
203
203
|
- test/test_data_miner_attribute.rb
|
204
204
|
- test/test_data_miner_process.rb
|
205
205
|
- test/test_old_syntax.rb
|
206
|
+
- test/test_tap.rb
|
206
207
|
homepage: https://github.com/seamusabshere/data_miner
|
207
208
|
licenses: []
|
208
209
|
post_install_message:
|
@@ -223,7 +224,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
223
224
|
version: '0'
|
224
225
|
requirements: []
|
225
226
|
rubyforge_project: data_miner
|
226
|
-
rubygems_version: 1.8.
|
227
|
+
rubygems_version: 1.8.10
|
227
228
|
signing_key:
|
228
229
|
specification_version: 3
|
229
230
|
summary: Mine remote data into your ActiveRecord models.
|
@@ -238,3 +239,4 @@ test_files:
|
|
238
239
|
- test/test_data_miner_attribute.rb
|
239
240
|
- test/test_data_miner_process.rb
|
240
241
|
- test/test_old_syntax.rb
|
242
|
+
- test/test_tap.rb
|