data_miner 1.3.0 → 1.3.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +52 -59
- data/data_miner.gemspec +9 -9
- data/lib/data_miner.rb +1 -18
- data/lib/data_miner/attribute.rb +8 -4
- data/lib/data_miner/config.rb +2 -35
- data/lib/data_miner/import.rb +3 -3
- data/lib/data_miner/tap.rb +14 -35
- data/lib/data_miner/version.rb +1 -1
- data/test/helper.rb +2 -1
- data/test/support/test_database.rb +2 -0
- data/test/test_data_miner_attribute.rb +53 -0
- data/test/test_old_syntax.rb +78 -156
- data/test/test_tap.rb +21 -0
- metadata +48 -46
data/README.rdoc
CHANGED
@@ -8,21 +8,21 @@ Programmatically import useful data into your ActiveRecord models.
|
|
8
8
|
|
9
9
|
You define <tt>data_miner</tt> blocks in your ActiveRecord models. For example, in <tt>app/models/country.rb</tt>:
|
10
10
|
|
11
|
-
|
12
|
-
|
11
|
+
class Country < ActiveRecord::Base
|
12
|
+
set_primary_key :iso_3166_code
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
14
|
+
data_miner do
|
15
|
+
import 'the official ISO country list',
|
16
|
+
:url => 'http://www.iso.org/iso/list-en1-semic-3.txt',
|
17
|
+
:skip => 2,
|
18
|
+
:headers => false,
|
19
|
+
:delimiter => ';',
|
20
|
+
:encoding => 'ISO-8859-1' do
|
21
|
+
key :iso_3166_code, :field_number => 1
|
22
|
+
store :name, :field_number => 0
|
23
|
+
end
|
23
24
|
end
|
24
25
|
end
|
25
|
-
end
|
26
26
|
|
27
27
|
Now you can run:
|
28
28
|
|
@@ -31,33 +31,28 @@ Now you can run:
|
|
31
31
|
|
32
32
|
== Creating tables from scratch (changed in 1.2)
|
33
33
|
|
34
|
-
We recommend using the <tt>
|
34
|
+
We recommend using the <tt>mini_record-compat</tt> gem (https://github.com/seamusabshere/mini_record)
|
35
35
|
|
36
36
|
This replaces the <tt>schema</tt> method that was available before. It didn't make sense for <tt>data_miner</tt> to provide this natively.
|
37
37
|
|
38
|
-
class Car < ActiveRecord::Base
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
force_schema do
|
43
|
-
string :make
|
44
|
-
string :model
|
45
|
-
end
|
38
|
+
class Car < ActiveRecord::Base
|
39
|
+
# the mini_record way
|
40
|
+
col :make
|
41
|
+
col :model
|
46
42
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
43
|
+
data_miner do
|
44
|
+
# DEPRECATED - see above
|
45
|
+
# schema do
|
46
|
+
# string :make
|
47
|
+
# string :model
|
48
|
+
# end
|
53
49
|
|
54
|
-
|
55
|
-
|
56
|
-
end
|
50
|
+
# the mini_record way
|
51
|
+
process :auto_upgrade!
|
57
52
|
|
58
|
-
|
59
|
-
|
60
|
-
end
|
53
|
+
# [... other data mining steps]
|
54
|
+
end
|
55
|
+
end
|
61
56
|
|
62
57
|
==Advanced usage
|
63
58
|
|
@@ -69,35 +64,33 @@ This is how we linked together (http://data.brighterplanet.com/aircraft) the FAA
|
|
69
64
|
# table without breaking associations.
|
70
65
|
set_primary_key :icao_code
|
71
66
|
|
72
|
-
# Use the
|
67
|
+
# Use the mini_record-compat gem to define the database schema in-line.
|
73
68
|
# It will destructively and automatically add/remove columns.
|
74
69
|
# This is "OK" because you can always just re-run the import script to get the data back.
|
75
70
|
# PS. If you're using DataMapper, you don't need this
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
index 'bts_aircraft_type_code'
|
100
|
-
end
|
71
|
+
col :icao_code
|
72
|
+
col :manufacturer_name
|
73
|
+
col :name
|
74
|
+
col :bts_name
|
75
|
+
col :bts_aircraft_type_code
|
76
|
+
col :brighter_planet_aircraft_class_code
|
77
|
+
col :fuel_use_aircraft_name
|
78
|
+
col :m3, :type => :float
|
79
|
+
col :m3_units
|
80
|
+
col :m2, :type => :float
|
81
|
+
col :m2_units
|
82
|
+
col :m1, :type => :float
|
83
|
+
col :m1_units
|
84
|
+
col :endpoint_fuel, :type => :float
|
85
|
+
col :endpoint_fuel_units
|
86
|
+
col :seats, :type => :float
|
87
|
+
col :distance, :type => :float
|
88
|
+
col :distance_units
|
89
|
+
col :load_factor, :type => :float
|
90
|
+
col :freight_share, :type => :float
|
91
|
+
col :payload, :type => :float
|
92
|
+
col :weighting, :type => :float
|
93
|
+
col :bts_aircraft_type_code, :type => :index
|
101
94
|
|
102
95
|
# A dictionary between BTS aircraft type codes and ICAO aircraft
|
103
96
|
# codes that uses string similarity instead of exact matching.
|
data/data_miner.gemspec
CHANGED
@@ -19,15 +19,15 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
20
|
s.require_paths = ["lib"]
|
21
21
|
|
22
|
-
s.
|
23
|
-
s.
|
24
|
-
s.
|
25
|
-
s.
|
26
|
-
s.
|
27
|
-
s.
|
28
|
-
s.
|
29
|
-
s.
|
30
|
-
s.add_development_dependency '
|
22
|
+
s.add_runtime_dependency 'remote_table', '>=1.2.2'
|
23
|
+
s.add_runtime_dependency 'activerecord', '>=2.3.4'
|
24
|
+
s.add_runtime_dependency 'activesupport', '>=2.3.4'
|
25
|
+
s.add_runtime_dependency 'conversions', '>=1.4.4'
|
26
|
+
s.add_runtime_dependency 'blockenspiel', '>=0.3.2'
|
27
|
+
s.add_runtime_dependency 'errata', '>=1.0.1'
|
28
|
+
s.add_runtime_dependency 'posix-spawn'
|
29
|
+
s.add_runtime_dependency 'taps'
|
30
|
+
s.add_development_dependency 'mini_record-compat'
|
31
31
|
s.add_development_dependency 'loose_tight_dictionary', ">=0.0.5"
|
32
32
|
s.add_development_dependency 'test-unit'
|
33
33
|
s.add_development_dependency 'shoulda'
|
data/lib/data_miner.rb
CHANGED
@@ -32,24 +32,7 @@ class DataMiner
|
|
32
32
|
delegate :run, :to => :instance
|
33
33
|
delegate :resource_names, :to => :instance
|
34
34
|
end
|
35
|
-
|
36
|
-
# TODO this should probably live somewhere else
|
37
|
-
def self.backtick_with_reporting(cmd)
|
38
|
-
cmd = cmd.gsub /[ ]*\n[ ]*/m, ' '
|
39
|
-
output = `#{cmd}`
|
40
|
-
if not $?.success?
|
41
|
-
raise %{
|
42
|
-
From the data_miner gem...
|
43
|
-
|
44
|
-
Command failed:
|
45
|
-
#{cmd}
|
46
|
-
|
47
|
-
Output:
|
48
|
-
#{output}
|
49
|
-
}
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
35
|
+
|
53
36
|
# http://avdi.org/devblog/2009/07/14/recursively-symbolize-keys/
|
54
37
|
def self.recursively_stringify_keys(hash)
|
55
38
|
hash.inject(::Hash.new) do |result, (key, value)|
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -70,6 +70,7 @@ class DataMiner
|
|
70
70
|
value = do_split(value) if wants_split?
|
71
71
|
value.gsub! /[ ]+/, ' '
|
72
72
|
value.strip!
|
73
|
+
return nil if value.blank? and wants_nullification?
|
73
74
|
value.upcase! if wants_upcase?
|
74
75
|
value = do_convert row, value if wants_conversion?
|
75
76
|
value = do_sprintf value if wants_sprintf?
|
@@ -86,16 +87,19 @@ class DataMiner
|
|
86
87
|
return value if value.is_a? ::ActiveRecord::Base # carry through trapdoor
|
87
88
|
value = value_in_dictionary value if wants_dictionary?
|
88
89
|
value = synthesize.call(row) if wants_synthesize?
|
89
|
-
value = nil if value.blank? and wants_nullification?
|
90
90
|
value
|
91
91
|
end
|
92
|
-
|
92
|
+
|
93
93
|
def set_record_from_row(record, row)
|
94
94
|
return false if !wants_overwriting? and !record.send(name).nil?
|
95
95
|
record.send "#{name}=", value_from_row(row)
|
96
|
-
|
96
|
+
if wants_units?
|
97
|
+
unit = (to_units || unit_from_source(row)).to_s
|
98
|
+
unit = nil if unit.blank? and wants_nullification?
|
99
|
+
record.send "#{name}_units=", unit
|
100
|
+
end
|
97
101
|
end
|
98
|
-
|
102
|
+
|
99
103
|
def unit_from_source(row)
|
100
104
|
row[units_field_name || units_field_number].to_s.strip.underscore.to_sym
|
101
105
|
end
|
data/lib/data_miner/config.rb
CHANGED
@@ -52,11 +52,8 @@ class DataMiner
|
|
52
52
|
|
53
53
|
finished = false
|
54
54
|
skipped = false
|
55
|
-
if Run.table_exists?
|
56
|
-
|
57
|
-
else
|
58
|
-
run = nil
|
59
|
-
::DataMiner.logger.info "Not logging individual runs. Please run DataMiner::Run.create_tables if you want to enable this."
|
55
|
+
run = if Run.table_exists?
|
56
|
+
Run.create! :started_at => ::Time.now, :resource_name => resource.name, :killed => true
|
60
57
|
end
|
61
58
|
resource.delete_all if options['from_scratch']
|
62
59
|
begin
|
@@ -92,7 +89,6 @@ class DataMiner
|
|
92
89
|
def after_invoke
|
93
90
|
return unless resource.table_exists?
|
94
91
|
make_sure_unit_definitions_make_sense
|
95
|
-
suggest_missing_column_migrations
|
96
92
|
end
|
97
93
|
|
98
94
|
COMPLETE_UNIT_DEFINITIONS = [
|
@@ -124,34 +120,5 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
|
|
124
120
|
end
|
125
121
|
end
|
126
122
|
end
|
127
|
-
|
128
|
-
def suggest_missing_column_migrations
|
129
|
-
missing_columns = []
|
130
|
-
|
131
|
-
import_steps.each do |step|
|
132
|
-
step.attributes.each do |_, attribute|
|
133
|
-
raise "You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.end_with? '_units'
|
134
|
-
unless resource.column_names.include? attribute.name
|
135
|
-
missing_columns << attribute.name
|
136
|
-
end
|
137
|
-
if attribute.wants_units? and !resource.column_names.include?(units_column = "#{attribute.name}_units")
|
138
|
-
missing_columns << units_column
|
139
|
-
end
|
140
|
-
end
|
141
|
-
end
|
142
|
-
missing_columns.uniq!
|
143
|
-
if missing_columns.any?
|
144
|
-
::DataMiner.logger.debug %{
|
145
|
-
|
146
|
-
================================
|
147
|
-
|
148
|
-
On #{resource}, it looks like you're missing some columns...
|
149
|
-
|
150
|
-
#{missing_columns.map { |column_name| "#{column_name.end_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' } '#{column_name}'" }.join("\n") }
|
151
|
-
|
152
|
-
================================
|
153
|
-
}
|
154
|
-
end
|
155
|
-
end
|
156
123
|
end
|
157
124
|
end
|
data/lib/data_miner/import.rb
CHANGED
@@ -80,10 +80,10 @@ class DataMiner
|
|
80
80
|
table.each do |row|
|
81
81
|
record = resource.send "find_or_initialize_by_#{@_key}", attributes[@_key].value_from_row(row)
|
82
82
|
attributes.each { |_, attr| attr.set_record_from_row record, row }
|
83
|
-
|
83
|
+
begin
|
84
84
|
record.save!
|
85
|
-
|
86
|
-
::DataMiner.logger.
|
85
|
+
rescue
|
86
|
+
::DataMiner.logger.warn "[data_miner] Got #{$!.inspect} when trying to save #{row}"
|
87
87
|
end
|
88
88
|
end
|
89
89
|
free
|
data/lib/data_miner/tap.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'posix/spawn'
|
2
2
|
class DataMiner
|
3
3
|
class Tap
|
4
4
|
attr_reader :config
|
@@ -28,7 +28,7 @@ class DataMiner
|
|
28
28
|
connection.drop_table possible_obstacle
|
29
29
|
end
|
30
30
|
end
|
31
|
-
|
31
|
+
taps_pull
|
32
32
|
if needs_table_rename?
|
33
33
|
connection.rename_table source_table_name, resource.table_name
|
34
34
|
end
|
@@ -55,6 +55,8 @@ class DataMiner
|
|
55
55
|
|
56
56
|
def adapter
|
57
57
|
case connection.adapter_name
|
58
|
+
when /mysql2/i
|
59
|
+
'mysql2'
|
58
60
|
when /mysql/i
|
59
61
|
'mysql'
|
60
62
|
when /postgres/i
|
@@ -71,11 +73,13 @@ class DataMiner
|
|
71
73
|
|
72
74
|
DEFAULT_PORTS = {
|
73
75
|
'mysql' => 3306,
|
76
|
+
'mysql2' => 3306,
|
74
77
|
'postgres' => 5432
|
75
78
|
}
|
76
79
|
|
77
80
|
DEFAULT_USERNAMES = {
|
78
81
|
'mysql' => 'root',
|
82
|
+
'mysql2' => 'root',
|
79
83
|
'postgres' => ''
|
80
84
|
}
|
81
85
|
|
@@ -95,16 +99,15 @@ class DataMiner
|
|
95
99
|
|
96
100
|
def db_locator
|
97
101
|
case adapter
|
98
|
-
when 'mysql', 'postgres'
|
99
|
-
"#{username}:#{password}@#{host}:#{port}/#{database}"
|
100
102
|
when 'sqlite'
|
101
103
|
database
|
104
|
+
else
|
105
|
+
"#{username}:#{password}@#{host}:#{port}/#{database}"
|
102
106
|
end
|
103
107
|
end
|
104
108
|
|
105
|
-
|
106
|
-
|
107
|
-
::Escape.shell_command [
|
109
|
+
def taps_pull
|
110
|
+
args = [
|
108
111
|
'taps',
|
109
112
|
'pull',
|
110
113
|
"#{adapter}://#{db_locator}",
|
@@ -113,34 +116,10 @@ class DataMiner
|
|
113
116
|
'--tables',
|
114
117
|
source_table_name
|
115
118
|
]
|
116
|
-
|
119
|
+
child = ::POSIX::Spawn::Child.new *args
|
120
|
+
unless child.success?
|
121
|
+
raise %{[data_miner gem] Got "#{child.err}" back when tried to run "#{args.join(' ')}"}
|
122
|
+
end
|
117
123
|
end
|
118
|
-
|
119
|
-
# 2.3.5 mysql
|
120
|
-
# * <tt>:host</tt> - Defaults to "localhost".
|
121
|
-
# * <tt>:port</tt> - Defaults to 3306.
|
122
|
-
# * <tt>:socket</tt> - Defaults to "/tmp/mysql.sock".
|
123
|
-
# * <tt>:username</tt> - Defaults to "root"
|
124
|
-
# * <tt>:password</tt> - Defaults to nothing.
|
125
|
-
# * <tt>:database</tt> - The name of the database. No default, must be provided.
|
126
|
-
# * <tt>:encoding</tt> - (Optional) Sets the client encoding by executing "SET NAMES <encoding>" after connection.
|
127
|
-
# * <tt>:reconnect</tt> - Defaults to false (See MySQL documentation: http://dev.mysql.com/doc/refman/5.0/en/auto-reconnect.html).
|
128
|
-
# * <tt>:sslca</tt> - Necessary to use MySQL with an SSL connection.
|
129
|
-
# * <tt>:sslkey</tt> - Necessary to use MySQL with an SSL connection.
|
130
|
-
# * <tt>:sslcert</tt> - Necessary to use MySQL with an SSL connection.
|
131
|
-
# * <tt>:sslcapath</tt> - Necessary to use MySQL with an SSL connection.
|
132
|
-
# * <tt>:sslcipher</tt> - Necessary to use MySQL with an SSL connection.
|
133
|
-
# 2.3.5 mysql
|
134
|
-
# * <tt>:host</tt> - Defaults to "localhost".
|
135
|
-
# * <tt>:port</tt> - Defaults to 5432.
|
136
|
-
# * <tt>:username</tt> - Defaults to nothing.
|
137
|
-
# * <tt>:password</tt> - Defaults to nothing.
|
138
|
-
# * <tt>:database</tt> - The name of the database. No default, must be provided.
|
139
|
-
# * <tt>:schema_search_path</tt> - An optional schema search path for the connection given as a string of comma-separated schema names. This is backward-compatible with the <tt>:schema_order</tt> option.
|
140
|
-
# * <tt>:encoding</tt> - An optional client encoding that is used in a <tt>SET client_encoding TO <encoding></tt> call on the connection.
|
141
|
-
# * <tt>:min_messages</tt> - An optional client min messages that is used in a <tt>SET client_min_messages TO <min_messages></tt> call on the connection.
|
142
|
-
# * <tt>:allow_concurrency</tt> - If true, use async query methods so Ruby threads don't deadlock; otherwise, use blocking query methods.
|
143
|
-
# 2.3.5 sqlite[3]
|
144
|
-
# * <tt>:database</tt> - Path to the database file.
|
145
124
|
end
|
146
125
|
end
|
data/lib/data_miner/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -3,7 +3,8 @@ require 'bundler'
|
|
3
3
|
Bundler.setup
|
4
4
|
require 'test/unit'
|
5
5
|
require 'shoulda'
|
6
|
-
require '
|
6
|
+
require 'mini_record'
|
7
|
+
require 'logger'
|
7
8
|
# require 'ruby-debug'
|
8
9
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
9
10
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
@@ -181,7 +181,9 @@ module TestDatabase
|
|
181
181
|
t.datetime "created_at"
|
182
182
|
t.datetime "updated_at"
|
183
183
|
t.float "emission_factor"
|
184
|
+
t.string "emission_factor_units"
|
184
185
|
t.float "annual_distance"
|
186
|
+
t.string "annual_distance_units"
|
185
187
|
t.string "code"
|
186
188
|
end
|
187
189
|
execute "ALTER TABLE automobile_fuel_types ADD PRIMARY KEY (code);"
|
@@ -55,4 +55,57 @@ class TestDataMinerAttribute < Test::Unit::TestCase
|
|
55
55
|
end
|
56
56
|
end
|
57
57
|
end
|
58
|
+
|
59
|
+
context '#set_record_from_row' do
|
60
|
+
setup do
|
61
|
+
@automobile_fuel_type = AutomobileFuelType.new
|
62
|
+
end
|
63
|
+
context 'nullify is true, wants units' do
|
64
|
+
setup do
|
65
|
+
@attribute = DataMiner::Attribute.new @automobile_fuel_type, 'annual_distance', :nullify => true, :units_field_name => 'annual_distance_units'
|
66
|
+
end
|
67
|
+
should 'set value and units to nil if field is blank' do
|
68
|
+
@attribute.set_record_from_row(@automobile_fuel_type,
|
69
|
+
'name' => 'electricity',
|
70
|
+
'annual_distance' => '',
|
71
|
+
'annual_distance_units' => ''
|
72
|
+
)
|
73
|
+
assert_nil @automobile_fuel_type.annual_distance
|
74
|
+
assert_nil @automobile_fuel_type.annual_distance_units
|
75
|
+
end
|
76
|
+
should 'set value and units if field is not blank' do
|
77
|
+
@attribute.set_record_from_row(@automobile_fuel_type,
|
78
|
+
'name' => 'electricity',
|
79
|
+
'annual_distance' => '100.0',
|
80
|
+
'annual_distance_units' => 'kilometres'
|
81
|
+
)
|
82
|
+
assert_equal 100.0, @automobile_fuel_type.annual_distance
|
83
|
+
assert_equal 'kilometres', @automobile_fuel_type.annual_distance_units
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
context 'nullify is false, wants units' do
|
88
|
+
setup do
|
89
|
+
@attribute = DataMiner::Attribute.new @automobile_fuel_type, 'annual_distance', :units_field_name => 'annual_distance_units'
|
90
|
+
end
|
91
|
+
should 'set value and units to blank if field is blank' do
|
92
|
+
@attribute.set_record_from_row(@automobile_fuel_type,
|
93
|
+
'name' => 'electricity',
|
94
|
+
'annual_distance' => '',
|
95
|
+
'annual_distance_units' => ''
|
96
|
+
)
|
97
|
+
assert_equal 0.0, @automobile_fuel_type.annual_distance
|
98
|
+
assert_equal '', @automobile_fuel_type.annual_distance_units
|
99
|
+
end
|
100
|
+
should 'set value and units if field is not blank' do
|
101
|
+
@attribute.set_record_from_row(@automobile_fuel_type,
|
102
|
+
'name' => 'electricity',
|
103
|
+
'annual_distance' => '100.0',
|
104
|
+
'annual_distance_units' => 'kilometres'
|
105
|
+
)
|
106
|
+
assert_equal 100.0, @automobile_fuel_type.annual_distance
|
107
|
+
assert_equal 'kilometres', @automobile_fuel_type.annual_distance_units
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
58
111
|
end
|
data/test/test_old_syntax.rb
CHANGED
@@ -3,24 +3,15 @@ require 'helper'
|
|
3
3
|
|
4
4
|
TestDatabase.load_models
|
5
5
|
|
6
|
-
class TappedAirport < ActiveRecord::Base
|
7
|
-
set_primary_key :iata_code
|
8
|
-
|
9
|
-
data_miner do
|
10
|
-
tap "Brighter Planet's sanitized airports table", "http://carbon:neutral@data.brighterplanet.com:5001", :source_table_name => 'airports'
|
11
|
-
# tap "Brighter Planet's sanitized airports table", "http://carbon:neutral@localhost:5000", :source_table_name => 'airports'
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
6
|
class CensusRegion < ActiveRecord::Base
|
16
7
|
set_primary_key :number
|
17
|
-
|
8
|
+
|
18
9
|
data_miner do
|
19
10
|
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
|
20
11
|
key 'number', :field_name => 'Region'
|
21
12
|
store 'name', :field_name => 'Name'
|
22
13
|
end
|
23
|
-
|
14
|
+
|
24
15
|
# pretend this is a different data source
|
25
16
|
# fake! just for testing purposes
|
26
17
|
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
|
@@ -33,7 +24,7 @@ end
|
|
33
24
|
# smaller than a region
|
34
25
|
class CensusDivision < ActiveRecord::Base
|
35
26
|
set_primary_key :number
|
36
|
-
|
27
|
+
|
37
28
|
data_miner do
|
38
29
|
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
39
30
|
key 'number', :field_name => 'Division'
|
@@ -46,7 +37,7 @@ end
|
|
46
37
|
|
47
38
|
class CensusDivisionDeux < ActiveRecord::Base
|
48
39
|
set_primary_key :number
|
49
|
-
|
40
|
+
|
50
41
|
data_miner do
|
51
42
|
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
52
43
|
key 'number', :field_name => 'Division'
|
@@ -59,9 +50,9 @@ end
|
|
59
50
|
|
60
51
|
class CrosscallingCensusRegion < ActiveRecord::Base
|
61
52
|
set_primary_key :number
|
62
|
-
|
53
|
+
|
63
54
|
has_many :crosscalling_census_divisions
|
64
|
-
|
55
|
+
|
65
56
|
data_miner do
|
66
57
|
process "derive ourselves from the census divisions table (i.e., cross call census divisions)" do
|
67
58
|
CrosscallingCensusDivision.run_data_miner!
|
@@ -80,9 +71,9 @@ end
|
|
80
71
|
|
81
72
|
class CrosscallingCensusDivision < ActiveRecord::Base
|
82
73
|
set_primary_key :number
|
83
|
-
|
74
|
+
|
84
75
|
belongs_to :crosscalling_census_regions, :foreign_key => 'census_region_number'
|
85
|
-
|
76
|
+
|
86
77
|
data_miner do
|
87
78
|
import "get a list of census divisions and their regions", :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
88
79
|
key 'number', :field_name => 'Division'
|
@@ -90,7 +81,7 @@ class CrosscallingCensusDivision < ActiveRecord::Base
|
|
90
81
|
store 'census_region_number', :field_name => 'Region'
|
91
82
|
store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
|
92
83
|
end
|
93
|
-
|
84
|
+
|
94
85
|
process "make sure my parent object is set up (i.e., cross-call it)" do
|
95
86
|
CrosscallingCensusRegion.run_data_miner!
|
96
87
|
end
|
@@ -99,18 +90,18 @@ end
|
|
99
90
|
|
100
91
|
class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
101
92
|
set_primary_key :department_of_energy_identifier
|
102
|
-
|
93
|
+
|
103
94
|
data_miner do
|
104
95
|
process 'Define some unit conversions' do
|
105
96
|
Conversions.register :kbtus, :joules, 1_000.0 * 1_055.05585
|
106
97
|
Conversions.register :square_feet, :square_metres, 0.09290304
|
107
98
|
end
|
108
|
-
|
99
|
+
|
109
100
|
# conversions are NOT performed here, since we first have to zero out legitimate skips
|
110
101
|
# otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
|
111
102
|
import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv' do
|
112
103
|
key 'department_of_energy_identifier', :field_name => 'DOEID'
|
113
|
-
|
104
|
+
|
114
105
|
store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
|
115
106
|
store 'construction_year', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Date in the middle (synthetic)', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
|
116
107
|
store 'construction_period', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
|
@@ -120,12 +111,12 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
|
120
111
|
store 'window_ac_use', :field_name => 'USEWWAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usewwac/usewwac.csv' }
|
121
112
|
store 'clothes_washer_use', :field_name => 'WASHLOAD', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/washload/washload.csv' }
|
122
113
|
store 'clothes_dryer_use', :field_name => 'DRYRUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dryruse/dryruse.csv' }
|
123
|
-
|
114
|
+
|
124
115
|
store 'census_division_number', :field_name => 'DIVISION'
|
125
116
|
store 'census_division_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
|
126
117
|
store 'census_region_number', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_number', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
|
127
118
|
store 'census_region_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
|
128
|
-
|
119
|
+
|
129
120
|
store 'floorspace', :field_name => 'TOTSQFT'
|
130
121
|
store 'residents', :field_name => 'NHSLDMEM'
|
131
122
|
store 'ownership', :field_name => 'KOWNRENT'
|
@@ -248,15 +239,15 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
|
248
239
|
update_all "#{attr_name} = #{attr_name} * #{Conversions::Unit.exchange_rate from_units, to_units}"
|
249
240
|
end
|
250
241
|
end
|
251
|
-
|
242
|
+
|
252
243
|
process 'Add a new field "rooms" that estimates how many rooms are in the house' do
|
253
244
|
update_all 'rooms = total_rooms + bathrooms/2 + halfbaths/4 + heated_garage*(attached_1car_garage + detached_1car_garage + 2*(attached_2car_garage + detached_2car_garage) + 3*(attached_3car_garage + detached_3car_garage))'
|
254
245
|
end
|
255
|
-
|
246
|
+
|
256
247
|
process 'Add a new field "lighting_use" that estimates how many hours light bulbs are turned on in the house' do
|
257
248
|
update_all 'lighting_use = 2*(lights_on_1_to_4_hours + efficient_lights_on_1_to_4_hours) + 8*(lights_on_4_to_12_hours + efficient_lights_on_4_to_12_hours) + 16*(lights_on_over_12_hours + efficient_lights_on_over_12_hours) + 12*(outdoor_all_night_lights + outdoor_all_night_gas_lights)'
|
258
249
|
end
|
259
|
-
|
250
|
+
|
260
251
|
process 'Add a new field "lighting_efficiency" that estimates what percentage of light bulbs in a house are energy-efficient' do
|
261
252
|
update_all 'lighting_efficiency = (2*efficient_lights_on_1_to_4_hours + 8*efficient_lights_on_4_to_12_hours + 16*efficient_lights_on_over_12_hours) / lighting_use'
|
262
253
|
end
|
@@ -494,7 +485,7 @@ class T100FlightSegment < ActiveRecord::Base
|
|
494
485
|
store 'data_source', :field_name => 'DATA_SOURCE'
|
495
486
|
end
|
496
487
|
end
|
497
|
-
|
488
|
+
|
498
489
|
process 'Derive freight share as a fraction of payload' do
|
499
490
|
update_all 'freight_share = (freight + mail) / payload', 'payload > 0'
|
500
491
|
end
|
@@ -502,7 +493,7 @@ class T100FlightSegment < ActiveRecord::Base
|
|
502
493
|
process 'Derive load factor, which is passengers divided by the total seats available' do
|
503
494
|
update_all 'load_factor = passengers / seats', 'passengers <= seats'
|
504
495
|
end
|
505
|
-
|
496
|
+
|
506
497
|
process 'Derive average seats per departure' do
|
507
498
|
update_all 'seats_per_departure = seats / departures_performed', 'departures_performed > 0'
|
508
499
|
end
|
@@ -512,38 +503,38 @@ end
|
|
512
503
|
# note that this depends on stuff in Aircraft
|
513
504
|
class AircraftDeux < ActiveRecord::Base
|
514
505
|
set_primary_key :icao_code
|
515
|
-
|
506
|
+
|
516
507
|
# defined on the class because we defined the errata with a shorthand
|
517
508
|
class << self
|
518
509
|
def is_not_attributed_to_aerospatiale?(row)
|
519
510
|
not row['Manufacturer'] =~ /AEROSPATIALE/i
|
520
511
|
end
|
521
|
-
|
512
|
+
|
522
513
|
def is_not_attributed_to_cessna?(row)
|
523
514
|
not row['Manufacturer'] =~ /CESSNA/i
|
524
515
|
end
|
525
|
-
|
516
|
+
|
526
517
|
def is_not_attributed_to_learjet?(row)
|
527
518
|
not row['Manufacturer'] =~ /LEAR/i
|
528
519
|
end
|
529
|
-
|
520
|
+
|
530
521
|
def is_not_attributed_to_dehavilland?(row)
|
531
522
|
not row['Manufacturer'] =~ /DE ?HAVILLAND/i
|
532
523
|
end
|
533
|
-
|
524
|
+
|
534
525
|
def is_not_attributed_to_mcdonnell_douglas?(row)
|
535
526
|
not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
|
536
527
|
end
|
537
|
-
|
528
|
+
|
538
529
|
def is_not_a_dc_plane?(row)
|
539
530
|
not row['Model'] =~ /DC/i
|
540
531
|
end
|
541
|
-
|
532
|
+
|
542
533
|
def is_a_crj_900?(row)
|
543
534
|
row['Designator'].downcase == 'crj9'
|
544
535
|
end
|
545
536
|
end
|
546
|
-
|
537
|
+
|
547
538
|
data_miner do
|
548
539
|
# ('A'..'Z').each do |letter|
|
549
540
|
# Note: for the purposes of testing, only importing "D"
|
@@ -567,28 +558,24 @@ end
|
|
567
558
|
class AutomobileMakeFleetYear < ActiveRecord::Base
|
568
559
|
set_primary_key :name
|
569
560
|
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
datetime "updated_at"
|
581
|
-
end
|
561
|
+
col :name
|
562
|
+
col :make_name
|
563
|
+
col :fleet
|
564
|
+
col :year, :type => :integer
|
565
|
+
col :fuel_efficiency, :type => :float
|
566
|
+
col :fuel_efficiency_units
|
567
|
+
col :volume, :type => :integer
|
568
|
+
col :make_year_name
|
569
|
+
col :created_at, :type => :datetime
|
570
|
+
col :updated_at, :type => :datetime
|
582
571
|
|
583
572
|
data_miner do
|
584
|
-
process
|
585
|
-
|
586
|
-
end
|
587
|
-
|
573
|
+
process :auto_upgrade!
|
574
|
+
|
588
575
|
process "finish if i tell you to" do
|
589
576
|
raise DataMiner::Finish if $force_finish
|
590
577
|
end
|
591
|
-
|
578
|
+
|
592
579
|
process "skip if i tell you to" do
|
593
580
|
raise DataMiner::Skip if $force_skip
|
594
581
|
end
|
@@ -609,36 +596,28 @@ end
|
|
609
596
|
|
610
597
|
class CensusDivisionTrois < ActiveRecord::Base
|
611
598
|
set_primary_key :number_code
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
end
|
621
|
-
|
599
|
+
|
600
|
+
col :number_code
|
601
|
+
col :name
|
602
|
+
col :census_region_name
|
603
|
+
col :census_region_number, :type => :integer
|
604
|
+
add_index 'census_region_name', :name => 'homefry'
|
605
|
+
add_index ['number_code', 'name', 'census_region_name', 'census_region_number']
|
606
|
+
|
622
607
|
data_miner do
|
623
|
-
process
|
624
|
-
force_schema!
|
625
|
-
end
|
608
|
+
process :auto_upgrade!
|
626
609
|
end
|
627
610
|
end
|
628
611
|
|
629
612
|
class CensusDivisionFour < ActiveRecord::Base
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
index 'census_region_name', :name => 'homefry'
|
636
|
-
end
|
613
|
+
col :number_code
|
614
|
+
col :name
|
615
|
+
col :census_region_name
|
616
|
+
col :census_region_number, :type => :integer
|
617
|
+
add_index 'census_region_name', :name => 'homefry'
|
637
618
|
|
638
619
|
data_miner do
|
639
|
-
process
|
640
|
-
force_schema!
|
641
|
-
end
|
620
|
+
process :auto_upgrade!
|
642
621
|
end
|
643
622
|
end
|
644
623
|
|
@@ -661,11 +640,11 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
661
640
|
if AutomobileMakeFleetYear.table_exists?
|
662
641
|
ActiveRecord::Base.connection.execute 'DROP TABLE automobile_make_fleet_years;'
|
663
642
|
end
|
664
|
-
AutomobileMakeFleetYear.
|
643
|
+
AutomobileMakeFleetYear.auto_upgrade!
|
665
644
|
assert AutomobileMakeFleetYear.table_exists?
|
666
645
|
end
|
667
646
|
end
|
668
|
-
|
647
|
+
|
669
648
|
if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
|
670
649
|
should 'append to an existing config' do
|
671
650
|
AutomobileFuelType.class_eval do
|
@@ -685,7 +664,7 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
685
664
|
assert_equal 'http://example1.com', AutomobileFuelType.data_miner_config.steps[-2].table.url
|
686
665
|
assert_equal 'http://example2.com', AutomobileFuelType.data_miner_config.steps[-1].table.url
|
687
666
|
end
|
688
|
-
|
667
|
+
|
689
668
|
should 'override an existing data_miner configuration' do
|
690
669
|
AutomobileFuelType.class_eval do
|
691
670
|
data_miner do
|
@@ -710,7 +689,7 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
710
689
|
AutomobileMakeFleetYear.run_data_miner!
|
711
690
|
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
712
691
|
end
|
713
|
-
|
692
|
+
|
714
693
|
should "stop and register skipped if it gets a DataMiner::Skip" do
|
715
694
|
AutomobileMakeFleetYear.delete_all
|
716
695
|
AutomobileMakeFleetYear.data_miner_runs.delete_all
|
@@ -723,124 +702,67 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
723
702
|
AutomobileMakeFleetYear.run_data_miner!
|
724
703
|
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
725
704
|
end
|
726
|
-
|
727
|
-
should "eagerly enforce a schema" do
|
728
|
-
ActiveRecord::Base.connection.create_table 'census_division_trois', :force => true, :options => 'ENGINE=InnoDB default charset=utf8' do |t|
|
729
|
-
t.string 'name'
|
730
|
-
t.string 'census_region_name'
|
731
|
-
# t.integer 'census_region_number'
|
732
|
-
end
|
733
|
-
ActiveRecord::Base.connection.execute 'ALTER TABLE census_division_trois ADD INDEX (census_region_name)'
|
734
|
-
CensusDivisionTrois.reset_column_information
|
735
|
-
missing_columns = %w{ census_region_number }
|
736
705
|
|
737
|
-
# sanity check
|
738
|
-
missing_columns.each do |column|
|
739
|
-
assert_false CensusDivisionTrois.column_names.include?(column)
|
740
|
-
end
|
741
|
-
assert_false ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
|
742
|
-
|
743
|
-
3.times do
|
744
|
-
CensusDivisionTrois.run_data_miner!
|
745
|
-
missing_columns.each do |column|
|
746
|
-
assert CensusDivisionTrois.column_names.include?(column)
|
747
|
-
end
|
748
|
-
assert ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
|
749
|
-
assert_equal :string, CensusDivisionTrois.columns_hash[CensusDivisionTrois.primary_key].type
|
750
|
-
end
|
751
|
-
end
|
752
|
-
|
753
|
-
should "let schemas work with default id primary keys" do
|
754
|
-
ActiveRecord::Base.connection.create_table 'census_division_fours', :force => true, :options => 'ENGINE=InnoDB default charset=utf8' do |t|
|
755
|
-
t.string 'name'
|
756
|
-
t.string 'census_region_name'
|
757
|
-
# t.integer 'census_region_number'
|
758
|
-
end
|
759
|
-
ActiveRecord::Base.connection.execute 'ALTER TABLE census_division_fours ADD INDEX (census_region_name)'
|
760
|
-
CensusDivisionFour.reset_column_information
|
761
|
-
missing_columns = %w{ census_region_number }
|
762
|
-
|
763
|
-
# sanity check
|
764
|
-
missing_columns.each do |column|
|
765
|
-
assert_false CensusDivisionFour.column_names.include?(column)
|
766
|
-
end
|
767
|
-
assert_false ActiveRecord::Base.connection.indexes(CensusDivisionFour.table_name).any? { |index| index.name == 'homefry' }
|
768
|
-
|
769
|
-
3.times do
|
770
|
-
CensusDivisionFour.run_data_miner!
|
771
|
-
missing_columns.each do |column|
|
772
|
-
assert CensusDivisionFour.column_names.include?(column)
|
773
|
-
end
|
774
|
-
assert ActiveRecord::Base.connection.indexes(CensusDivisionFour.table_name).any? { |index| index.name == 'homefry' }
|
775
|
-
assert_equal :integer, CensusDivisionFour.columns_hash[CensusDivisionFour.primary_key].type
|
776
|
-
end
|
777
|
-
end
|
778
|
-
|
779
706
|
should "allow specifying dictionaries explicitly" do
|
780
707
|
CensusDivisionDeux.run_data_miner!
|
781
708
|
assert_equal 'South Region', CensusDivisionDeux.find(5).census_region_name
|
782
709
|
end
|
783
|
-
|
710
|
+
|
784
711
|
should "be able to key on things other than the primary key" do
|
785
712
|
Aircraft.run_data_miner!
|
786
713
|
assert_equal 'SP', Aircraft.find('DHC6').brighter_planet_aircraft_class_code
|
787
714
|
end
|
788
|
-
|
715
|
+
|
789
716
|
should "be able to synthesize rows without using a full parser class" do
|
790
717
|
AutomobileMakeFleetYear.run_data_miner!
|
791
718
|
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
792
719
|
end
|
793
|
-
|
720
|
+
|
794
721
|
should "keep a call stack so that you can call run_data_miner! on a child" do
|
795
722
|
CrosscallingCensusDivision.run_data_miner!
|
796
723
|
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
797
724
|
assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
|
798
725
|
end
|
799
|
-
|
726
|
+
|
800
727
|
should "keep a call stack so that you can call run_data_miner! on a parent" do
|
801
728
|
CrosscallingCensusRegion.run_data_miner!
|
802
729
|
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
803
730
|
assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
|
804
731
|
end
|
805
|
-
|
732
|
+
|
806
733
|
should "import airports" do
|
807
734
|
Airport.run_data_miner!
|
808
735
|
assert Airport.count > 0
|
809
736
|
end
|
810
|
-
|
811
|
-
should "tap airports" do
|
812
|
-
TappedAirport.run_data_miner!
|
813
|
-
assert TappedAirport.count > 0
|
814
|
-
end
|
815
|
-
|
737
|
+
|
816
738
|
should "pull in census divisions using a data.brighterplanet.com dictionary" do
|
817
739
|
CensusDivision.run_data_miner!
|
818
740
|
assert CensusDivision.count > 0
|
819
741
|
end
|
820
|
-
|
742
|
+
|
821
743
|
should "have a way to queue up runs that works with delated_job's send_later" do
|
822
744
|
assert AutomobileVariant.respond_to?(:run_data_miner!)
|
823
745
|
end
|
824
|
-
|
746
|
+
|
825
747
|
should "be idempotent" do
|
826
748
|
Country.data_miner_config.run
|
827
749
|
a = Country.count
|
828
750
|
Country.data_miner_config.run
|
829
751
|
b = Country.count
|
830
752
|
assert_equal a, b
|
831
|
-
|
753
|
+
|
832
754
|
CensusRegion.data_miner_config.run
|
833
755
|
a = CensusRegion.count
|
834
756
|
CensusRegion.data_miner_config.run
|
835
757
|
b = CensusRegion.count
|
836
758
|
assert_equal a, b
|
837
759
|
end
|
838
|
-
|
760
|
+
|
839
761
|
should "hash things" do
|
840
762
|
AutomobileVariant.data_miner_config.steps[0].run
|
841
763
|
assert AutomobileVariant.first.row_hash.present?
|
842
764
|
end
|
843
|
-
|
765
|
+
|
844
766
|
should "process a callback block instead of a method" do
|
845
767
|
AutomobileVariant.delete_all
|
846
768
|
AutomobileVariant.data_miner_config.steps[0].run
|
@@ -848,7 +770,7 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
848
770
|
AutomobileVariant.data_miner_config.steps.last.run
|
849
771
|
assert AutomobileVariant.first.fuel_efficiency_city.present?
|
850
772
|
end
|
851
|
-
|
773
|
+
|
852
774
|
should "keep a log when it does a run" do
|
853
775
|
approx_started_at = Time.now
|
854
776
|
DataMiner.run :resource_names => %w{ Country }
|
@@ -857,7 +779,7 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
857
779
|
assert (last_run.started_at - approx_started_at).abs < 5 # seconds
|
858
780
|
assert (last_run.terminated_at - approx_terminated_at).abs < 5 # seconds
|
859
781
|
end
|
860
|
-
|
782
|
+
|
861
783
|
should "request a re-import from scratch" do
|
862
784
|
c = Country.new
|
863
785
|
c.iso_3166 = 'JUNK'
|
@@ -866,35 +788,35 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
866
788
|
DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
|
867
789
|
assert !Country.exists?(:iso_3166 => 'JUNK')
|
868
790
|
end
|
869
|
-
|
791
|
+
|
870
792
|
should "know what runs were on a resource" do
|
871
793
|
DataMiner.run :resource_names => %w{ Country }
|
872
794
|
DataMiner.run :resource_names => %w{ Country }
|
873
795
|
assert Country.data_miner_runs.count > 0
|
874
796
|
end
|
875
797
|
end
|
876
|
-
|
798
|
+
|
877
799
|
if ENV['ALL'] == 'true' or ENV['SLOW'] == 'true'
|
878
800
|
should "allow errata to be specified with a shorthand, assuming the responder is the resource class itself" do
|
879
801
|
AircraftDeux.run_data_miner!
|
880
802
|
assert AircraftDeux.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
|
881
803
|
end
|
882
|
-
|
804
|
+
|
883
805
|
should "mine aircraft" do
|
884
806
|
Aircraft.run_data_miner!
|
885
807
|
assert Aircraft.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
|
886
808
|
end
|
887
|
-
|
809
|
+
|
888
810
|
should "mine automobile variants" do
|
889
811
|
AutomobileVariant.run_data_miner!
|
890
812
|
assert AutomobileVariant.count('make_name LIKE "%tesla"') > 0
|
891
813
|
end
|
892
|
-
|
814
|
+
|
893
815
|
should "mine T100 flight segments" do
|
894
816
|
T100FlightSegment.run_data_miner!
|
895
817
|
assert T100FlightSegment.count('dest_country_name LIKE "%United States"') > 0
|
896
818
|
end
|
897
|
-
|
819
|
+
|
898
820
|
should "mine residence survey responses" do
|
899
821
|
ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
|
900
822
|
assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.start_with?('Single-family detached house')
|
data/test/test_tap.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
$:.push File.dirname(__FILE__)
|
2
|
+
require 'helper'
|
3
|
+
|
4
|
+
TestDatabase.load_models
|
5
|
+
|
6
|
+
class TappedAirport < ActiveRecord::Base
|
7
|
+
set_primary_key :iata_code
|
8
|
+
|
9
|
+
data_miner do
|
10
|
+
tap "Brighter Planet's sanitized airports table", "http://carbon:neutral@data.brighterplanet.com:5001", :source_table_name => 'airports'
|
11
|
+
# tap "Brighter Planet's sanitized airports table", "http://carbon:neutral@localhost:5000", :source_table_name => 'airports'
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
class TestTap < Test::Unit::TestCase
|
17
|
+
should "tap airports" do
|
18
|
+
TappedAirport.run_data_miner!
|
19
|
+
assert TappedAirport.count > 0
|
20
|
+
end
|
21
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,11 +11,11 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2011-09
|
14
|
+
date: 2011-12-09 00:00:00.000000000Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: remote_table
|
18
|
-
requirement: &
|
18
|
+
requirement: &2163341440 !ruby/object:Gem::Requirement
|
19
19
|
none: false
|
20
20
|
requirements:
|
21
21
|
- - ! '>='
|
@@ -23,21 +23,10 @@ dependencies:
|
|
23
23
|
version: 1.2.2
|
24
24
|
type: :runtime
|
25
25
|
prerelease: false
|
26
|
-
version_requirements: *
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: escape
|
29
|
-
requirement: &2166719520 !ruby/object:Gem::Requirement
|
30
|
-
none: false
|
31
|
-
requirements:
|
32
|
-
- - ! '>='
|
33
|
-
- !ruby/object:Gem::Version
|
34
|
-
version: 0.0.4
|
35
|
-
type: :runtime
|
36
|
-
prerelease: false
|
37
|
-
version_requirements: *2166719520
|
26
|
+
version_requirements: *2163341440
|
38
27
|
- !ruby/object:Gem::Dependency
|
39
28
|
name: activerecord
|
40
|
-
requirement: &
|
29
|
+
requirement: &2163340680 !ruby/object:Gem::Requirement
|
41
30
|
none: false
|
42
31
|
requirements:
|
43
32
|
- - ! '>='
|
@@ -45,10 +34,10 @@ dependencies:
|
|
45
34
|
version: 2.3.4
|
46
35
|
type: :runtime
|
47
36
|
prerelease: false
|
48
|
-
version_requirements: *
|
37
|
+
version_requirements: *2163340680
|
49
38
|
- !ruby/object:Gem::Dependency
|
50
39
|
name: activesupport
|
51
|
-
requirement: &
|
40
|
+
requirement: &2163340220 !ruby/object:Gem::Requirement
|
52
41
|
none: false
|
53
42
|
requirements:
|
54
43
|
- - ! '>='
|
@@ -56,10 +45,10 @@ dependencies:
|
|
56
45
|
version: 2.3.4
|
57
46
|
type: :runtime
|
58
47
|
prerelease: false
|
59
|
-
version_requirements: *
|
48
|
+
version_requirements: *2163340220
|
60
49
|
- !ruby/object:Gem::Dependency
|
61
50
|
name: conversions
|
62
|
-
requirement: &
|
51
|
+
requirement: &2163339760 !ruby/object:Gem::Requirement
|
63
52
|
none: false
|
64
53
|
requirements:
|
65
54
|
- - ! '>='
|
@@ -67,10 +56,10 @@ dependencies:
|
|
67
56
|
version: 1.4.4
|
68
57
|
type: :runtime
|
69
58
|
prerelease: false
|
70
|
-
version_requirements: *
|
59
|
+
version_requirements: *2163339760
|
71
60
|
- !ruby/object:Gem::Dependency
|
72
61
|
name: blockenspiel
|
73
|
-
requirement: &
|
62
|
+
requirement: &2163339260 !ruby/object:Gem::Requirement
|
74
63
|
none: false
|
75
64
|
requirements:
|
76
65
|
- - ! '>='
|
@@ -78,43 +67,54 @@ dependencies:
|
|
78
67
|
version: 0.3.2
|
79
68
|
type: :runtime
|
80
69
|
prerelease: false
|
81
|
-
version_requirements: *
|
70
|
+
version_requirements: *2163339260
|
82
71
|
- !ruby/object:Gem::Dependency
|
83
|
-
name:
|
84
|
-
requirement: &
|
72
|
+
name: errata
|
73
|
+
requirement: &2163338800 !ruby/object:Gem::Requirement
|
85
74
|
none: false
|
86
75
|
requirements:
|
87
76
|
- - ! '>='
|
88
77
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0.
|
78
|
+
version: 1.0.1
|
90
79
|
type: :runtime
|
91
80
|
prerelease: false
|
92
|
-
version_requirements: *
|
81
|
+
version_requirements: *2163338800
|
93
82
|
- !ruby/object:Gem::Dependency
|
94
|
-
name:
|
95
|
-
requirement: &
|
83
|
+
name: posix-spawn
|
84
|
+
requirement: &2163338420 !ruby/object:Gem::Requirement
|
96
85
|
none: false
|
97
86
|
requirements:
|
98
87
|
- - ! '>='
|
99
88
|
- !ruby/object:Gem::Version
|
100
|
-
version:
|
89
|
+
version: '0'
|
101
90
|
type: :runtime
|
102
91
|
prerelease: false
|
103
|
-
version_requirements: *
|
92
|
+
version_requirements: *2163338420
|
104
93
|
- !ruby/object:Gem::Dependency
|
105
|
-
name:
|
106
|
-
requirement: &
|
94
|
+
name: taps
|
95
|
+
requirement: &2163337960 !ruby/object:Gem::Requirement
|
107
96
|
none: false
|
108
97
|
requirements:
|
109
98
|
- - ! '>='
|
110
99
|
- !ruby/object:Gem::Version
|
111
|
-
version: 0
|
100
|
+
version: '0'
|
101
|
+
type: :runtime
|
102
|
+
prerelease: false
|
103
|
+
version_requirements: *2163337960
|
104
|
+
- !ruby/object:Gem::Dependency
|
105
|
+
name: mini_record-compat
|
106
|
+
requirement: &2163337540 !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
108
|
+
requirements:
|
109
|
+
- - ! '>='
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0'
|
112
112
|
type: :development
|
113
113
|
prerelease: false
|
114
|
-
version_requirements: *
|
114
|
+
version_requirements: *2163337540
|
115
115
|
- !ruby/object:Gem::Dependency
|
116
116
|
name: loose_tight_dictionary
|
117
|
-
requirement: &
|
117
|
+
requirement: &2163337040 !ruby/object:Gem::Requirement
|
118
118
|
none: false
|
119
119
|
requirements:
|
120
120
|
- - ! '>='
|
@@ -122,10 +122,10 @@ dependencies:
|
|
122
122
|
version: 0.0.5
|
123
123
|
type: :development
|
124
124
|
prerelease: false
|
125
|
-
version_requirements: *
|
125
|
+
version_requirements: *2163337040
|
126
126
|
- !ruby/object:Gem::Dependency
|
127
127
|
name: test-unit
|
128
|
-
requirement: &
|
128
|
+
requirement: &2163336620 !ruby/object:Gem::Requirement
|
129
129
|
none: false
|
130
130
|
requirements:
|
131
131
|
- - ! '>='
|
@@ -133,10 +133,10 @@ dependencies:
|
|
133
133
|
version: '0'
|
134
134
|
type: :development
|
135
135
|
prerelease: false
|
136
|
-
version_requirements: *
|
136
|
+
version_requirements: *2163336620
|
137
137
|
- !ruby/object:Gem::Dependency
|
138
138
|
name: shoulda
|
139
|
-
requirement: &
|
139
|
+
requirement: &2163336100 !ruby/object:Gem::Requirement
|
140
140
|
none: false
|
141
141
|
requirements:
|
142
142
|
- - ! '>='
|
@@ -144,10 +144,10 @@ dependencies:
|
|
144
144
|
version: '0'
|
145
145
|
type: :development
|
146
146
|
prerelease: false
|
147
|
-
version_requirements: *
|
147
|
+
version_requirements: *2163336100
|
148
148
|
- !ruby/object:Gem::Dependency
|
149
149
|
name: mysql
|
150
|
-
requirement: &
|
150
|
+
requirement: &2163335620 !ruby/object:Gem::Requirement
|
151
151
|
none: false
|
152
152
|
requirements:
|
153
153
|
- - ! '>='
|
@@ -155,10 +155,10 @@ dependencies:
|
|
155
155
|
version: '0'
|
156
156
|
type: :development
|
157
157
|
prerelease: false
|
158
|
-
version_requirements: *
|
158
|
+
version_requirements: *2163335620
|
159
159
|
- !ruby/object:Gem::Dependency
|
160
160
|
name: rake
|
161
|
-
requirement: &
|
161
|
+
requirement: &2163335040 !ruby/object:Gem::Requirement
|
162
162
|
none: false
|
163
163
|
requirements:
|
164
164
|
- - ! '>='
|
@@ -166,7 +166,7 @@ dependencies:
|
|
166
166
|
version: '0'
|
167
167
|
type: :development
|
168
168
|
prerelease: false
|
169
|
-
version_requirements: *
|
169
|
+
version_requirements: *2163335040
|
170
170
|
description: Mine remote data into your ActiveRecord models. You can also convert
|
171
171
|
units.
|
172
172
|
email:
|
@@ -203,6 +203,7 @@ files:
|
|
203
203
|
- test/test_data_miner_attribute.rb
|
204
204
|
- test/test_data_miner_process.rb
|
205
205
|
- test/test_old_syntax.rb
|
206
|
+
- test/test_tap.rb
|
206
207
|
homepage: https://github.com/seamusabshere/data_miner
|
207
208
|
licenses: []
|
208
209
|
post_install_message:
|
@@ -223,7 +224,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
223
224
|
version: '0'
|
224
225
|
requirements: []
|
225
226
|
rubyforge_project: data_miner
|
226
|
-
rubygems_version: 1.8.
|
227
|
+
rubygems_version: 1.8.10
|
227
228
|
signing_key:
|
228
229
|
specification_version: 3
|
229
230
|
summary: Mine remote data into your ActiveRecord models.
|
@@ -238,3 +239,4 @@ test_files:
|
|
238
239
|
- test/test_data_miner_attribute.rb
|
239
240
|
- test/test_data_miner_process.rb
|
240
241
|
- test/test_old_syntax.rb
|
242
|
+
- test/test_tap.rb
|