data_miner 2.4.1 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +15 -0
- data/LICENSE +1 -1
- data/README.markdown +3 -3
- data/data_miner.gemspec +6 -5
- data/lib/data_miner/step/import.rb +31 -22
- data/lib/data_miner/step/sql.rb +9 -10
- data/lib/data_miner/unit_converter/alchemist.rb +1 -1
- data/lib/data_miner/version.rb +1 -1
- data/lib/data_miner.rb +1 -1
- data/test/data_miner/step/test_import.rb +3 -1
- data/test/data_miner/unit_converter/test_alchemist.rb +8 -1
- data/test/data_miner/unit_converter/test_conversions.rb +9 -4
- data/test/helper.rb +1 -2
- data/test/support/breed.rb +1 -1
- data/test/test_data_miner.rb +36 -0
- metadata +35 -13
data/CHANGELOG
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
2.5.0 / 2013-03-15
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* Lock to roo 1.10.1 and activerecord > 3
|
6
|
+
|
7
|
+
* Enhancements
|
8
|
+
|
9
|
+
* If you don't define a `key` in a data_miner block, then just keep appending records - thanks @towerhe
|
10
|
+
* Got rid of ridiculous "install unicode-utils" warning
|
11
|
+
|
12
|
+
* Bug fixes
|
13
|
+
|
14
|
+
* Don't die because an Alchemist::NumericConversion is passed to a function that expects a plain Float - thanks @towerhe
|
15
|
+
|
1
16
|
2.4.1 / 2012-07-26
|
2
17
|
|
3
18
|
* Enhancements
|
data/LICENSE
CHANGED
data/README.markdown
CHANGED
@@ -18,7 +18,7 @@ The killer combination for us is:
|
|
18
18
|
1. [`active_record_inline_schema`](https://github.com/seamusabshere/active_record_inline_schema) - define table structure
|
19
19
|
2. [`remote_table`](https://github.com/seamusabshere/remote_table) - download data and parse it
|
20
20
|
3. [`errata`](https://github.com/seamusabshere/errata) - apply corrections in a transparent way
|
21
|
-
4. [`data_miner`](https://github.com/seamusabshere/
|
21
|
+
4. [`data_miner`](https://github.com/seamusabshere/data_miner) (this library!) - import data idempotently
|
22
22
|
|
23
23
|
## Documentation
|
24
24
|
|
@@ -115,13 +115,13 @@ And many more - look for the `data_miner.rb` file that corresponds to each model
|
|
115
115
|
* Andy Rossmeissl <andy@rossmeissl.net>
|
116
116
|
* Derek Kastner <dkastner@gmail.com>
|
117
117
|
* Ian Hough <ijhough@gmail.com>
|
118
|
+
* Tower He <towerhe@gmail.com>
|
118
119
|
|
119
120
|
## Wishlist
|
120
121
|
|
121
122
|
* Make the tests real unit tests
|
122
123
|
* sql steps shouldn't shell out if binaries are missing
|
123
|
-
* csv import step that uses pg_restore, mysqlimport, etc.
|
124
124
|
|
125
125
|
## Copyright
|
126
126
|
|
127
|
-
Copyright (c)
|
127
|
+
Copyright (c) 2013 Seamus Abshere
|
data/data_miner.gemspec
CHANGED
@@ -4,8 +4,8 @@ require File.expand_path("../lib/data_miner/version", __FILE__)
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "data_miner"
|
6
6
|
s.version = DataMiner::VERSION
|
7
|
-
s.authors = ["Seamus Abshere", "Andy Rossmeissl", "Derek Kastner"]
|
8
|
-
s.email = ["seamus@abshere.net"]
|
7
|
+
s.authors = ["Seamus Abshere", "Andy Rossmeissl", "Derek Kastner", "Ian Hough", "Tower He"]
|
8
|
+
s.email = ["seamus@abshere.net", "rossmeissl@gmail.com", "dkastner@gmail.com", "ijhough@gmail.com", "towerhe@gmail.com"]
|
9
9
|
s.homepage = "https://github.com/seamusabshere/data_miner"
|
10
10
|
s.summary = %{Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models.}
|
11
11
|
s.description = %q{Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models. You can also convert units.}
|
@@ -18,14 +18,15 @@ Gem::Specification.new do |s|
|
|
18
18
|
s.require_paths = ["lib"]
|
19
19
|
|
20
20
|
s.add_runtime_dependency 'aasm'
|
21
|
-
s.add_runtime_dependency 'active_record_inline_schema', '>=0.
|
22
|
-
s.add_runtime_dependency 'activerecord', '
|
23
|
-
s.add_runtime_dependency 'activesupport', '
|
21
|
+
s.add_runtime_dependency 'active_record_inline_schema', '>=0.6.1'
|
22
|
+
s.add_runtime_dependency 'activerecord', '> 3'
|
23
|
+
s.add_runtime_dependency 'activesupport', '> 3'
|
24
24
|
s.add_runtime_dependency 'errata', '>=1.0.1'
|
25
25
|
s.add_runtime_dependency 'remote_table', '>=2.0.2'
|
26
26
|
s.add_runtime_dependency 'upsert', '>=0.3.1'
|
27
27
|
s.add_runtime_dependency 'posix-spawn'
|
28
28
|
s.add_runtime_dependency 'unix_utils'
|
29
|
+
s.add_runtime_dependency 'roo', '1.10.1'
|
29
30
|
|
30
31
|
s.add_development_dependency 'dkastner-alchemist'
|
31
32
|
s.add_development_dependency 'conversions'
|
@@ -19,7 +19,7 @@ class DataMiner
|
|
19
19
|
# Description of what this step does.
|
20
20
|
# @return [String]
|
21
21
|
attr_reader :description
|
22
|
-
|
22
|
+
|
23
23
|
# @private
|
24
24
|
def initialize(script, description, settings, &blk)
|
25
25
|
settings = settings.symbolize_keys
|
@@ -83,27 +83,9 @@ class DataMiner
|
|
83
83
|
|
84
84
|
# @private
|
85
85
|
def start
|
86
|
-
|
87
|
-
c = ActiveRecord::Base.connection_pool.checkout
|
88
|
-
Upsert.stream(c, model.table_name) do |upsert|
|
89
|
-
table.each do |row|
|
90
|
-
selector = { @key => attributes[@key].read(row) }
|
91
|
-
document = attributes.except(@key).inject({}) do |memo, (_, attr)|
|
92
|
-
memo.merge! attr.updates(row)
|
93
|
-
memo
|
94
|
-
end
|
95
|
-
upsert.row selector, document
|
96
|
-
end
|
97
|
-
end
|
98
|
-
ActiveRecord::Base.connection_pool.checkin c
|
99
|
-
else
|
100
|
-
table.each do |row|
|
101
|
-
record = model.send "find_or_initialize_by_#{@key}", attributes[@key].read(row)
|
102
|
-
attributes.each { |_, attr| attr.set_from_row record, row }
|
103
|
-
record.save!
|
104
|
-
end
|
105
|
-
end
|
86
|
+
upsert_enabled? ? save_with_upsert : save_with_activerecord
|
106
87
|
refresh
|
88
|
+
|
107
89
|
nil
|
108
90
|
end
|
109
91
|
|
@@ -115,6 +97,33 @@ class DataMiner
|
|
115
97
|
|
116
98
|
private
|
117
99
|
|
100
|
+
def upsert_enabled?
|
101
|
+
(not validate?) and (storing_primary_key? or table_has_autoincrementing_primary_key?)
|
102
|
+
end
|
103
|
+
|
104
|
+
def save_with_upsert
|
105
|
+
c = ActiveRecord::Base.connection_pool.checkout
|
106
|
+
Upsert.stream(c, model.table_name) do |upsert|
|
107
|
+
table.each do |row|
|
108
|
+
selector = @key ? { @key => attributes[@key].read(row) } : { model.primary_key => nil }
|
109
|
+
document = attributes.except(@key).inject({}) do |memo, (_, attr)|
|
110
|
+
memo.merge! attr.updates(row)
|
111
|
+
memo
|
112
|
+
end
|
113
|
+
upsert.row selector, document
|
114
|
+
end
|
115
|
+
end
|
116
|
+
ActiveRecord::Base.connection_pool.checkin c
|
117
|
+
end
|
118
|
+
|
119
|
+
def save_with_activerecord
|
120
|
+
table.each do |row|
|
121
|
+
record = @key ? model.send("find_or_initialize_by_#{@key}", attributes[@key].read(row)) : model.new
|
122
|
+
attributes.each { |_, attr| attr.set_from_row record, row }
|
123
|
+
record.save!
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
118
127
|
def table_has_autoincrementing_primary_key?
|
119
128
|
return @table_has_autoincrementing_primary_key_query.first if @table_has_autoincrementing_primary_key_query.is_a?(Array)
|
120
129
|
c = ActiveRecord::Base.connection_pool.checkout
|
@@ -140,7 +149,7 @@ class DataMiner
|
|
140
149
|
|
141
150
|
def storing_primary_key?
|
142
151
|
return @storing_primary_key_query.first if @storing_primary_key_query.is_a?(Array)
|
143
|
-
@storing_primary_key_query = [attributes.has_key?(model.primary_key.to_sym)]
|
152
|
+
@storing_primary_key_query = [model.primary_key && attributes.has_key?(model.primary_key.to_sym)]
|
144
153
|
@storing_primary_key_query.first
|
145
154
|
end
|
146
155
|
|
data/lib/data_miner/step/sql.rb
CHANGED
@@ -53,7 +53,7 @@ class DataMiner
|
|
53
53
|
private
|
54
54
|
|
55
55
|
def config
|
56
|
-
|
56
|
+
if ActiveRecord::Base.respond_to?(:connection_config)
|
57
57
|
ActiveRecord::Base.connection_config
|
58
58
|
else
|
59
59
|
ActiveRecord::Base.connection_pool.spec.config
|
@@ -82,7 +82,7 @@ class DataMiner
|
|
82
82
|
::Process.waitpid pid
|
83
83
|
end
|
84
84
|
unless $?.success?
|
85
|
-
raise RuntimeError, "[data_miner] Failed: #{argv.join(' ').inspect}"
|
85
|
+
raise RuntimeError, "[data_miner] Failed: ARGV #{argv.join(' ').inspect}"
|
86
86
|
end
|
87
87
|
nil
|
88
88
|
end
|
@@ -90,25 +90,24 @@ class DataMiner
|
|
90
90
|
alias :mysql2 :mysql
|
91
91
|
|
92
92
|
def postgresql(path)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
93
|
+
env = {}
|
94
|
+
env['PGHOST'] = config['host'] if config['host']
|
95
|
+
env['PGPORT'] = config['port'].to_s if config['port']
|
96
|
+
env['PGPASSWORD'] = config['password'].to_s if config['password']
|
97
|
+
env['PGUSER'] = config['username'].to_s if config['username']
|
98
98
|
|
99
99
|
argv = [
|
100
100
|
'psql',
|
101
|
-
connect,
|
102
101
|
'--quiet',
|
103
102
|
'--dbname', config[:database],
|
104
103
|
'--file', path
|
105
104
|
].flatten
|
106
105
|
|
107
|
-
child = POSIX::Spawn::Child.new(*argv)
|
106
|
+
child = POSIX::Spawn::Child.new(*([env]+argv))
|
108
107
|
$stderr.puts child.out
|
109
108
|
$stderr.puts child.err
|
110
109
|
unless child.success?
|
111
|
-
raise RuntimeError, "[data_miner] Failed: #{argv.join(' ').inspect} (#{child.err.inspect})"
|
110
|
+
raise RuntimeError, "[data_miner] Failed: ENV #{env.inspect} ARGV #{argv.join(' ').inspect} (#{child.err.inspect})"
|
112
111
|
end
|
113
112
|
nil
|
114
113
|
end
|
data/lib/data_miner/version.rb
CHANGED
data/lib/data_miner.rb
CHANGED
@@ -2,7 +2,9 @@ require 'helper'
|
|
2
2
|
init_database
|
3
3
|
require 'earth'
|
4
4
|
|
5
|
-
|
5
|
+
require 'earth/residence'
|
6
|
+
require 'earth/electricity'
|
7
|
+
require 'earth/hospitality'
|
6
8
|
|
7
9
|
class PetBlue < ActiveRecord::Base
|
8
10
|
data_miner do
|
@@ -2,12 +2,19 @@ require 'helper'
|
|
2
2
|
|
3
3
|
describe 'DataMiner::UnitConverter::Alchemist' do
|
4
4
|
before do
|
5
|
+
@original_converter = DataMiner.unit_converter
|
5
6
|
DataMiner.unit_converter = :alchemist
|
6
7
|
end
|
7
8
|
|
9
|
+
after do
|
10
|
+
DataMiner.unit_converter = @original_converter
|
11
|
+
end
|
12
|
+
|
8
13
|
describe '#convert' do
|
9
14
|
it 'converts a value from one unit to another' do
|
10
|
-
DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
|
15
|
+
value = DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
|
16
|
+
assert value.is_a?(Float)
|
17
|
+
value.must_be_close_to 7.71617918
|
11
18
|
end
|
12
19
|
end
|
13
20
|
end
|
@@ -2,14 +2,19 @@ require 'helper'
|
|
2
2
|
|
3
3
|
describe 'DataMiner::UnitConverter::Conversions' do
|
4
4
|
before do
|
5
|
-
|
5
|
+
@original_converter = DataMiner.unit_converter
|
6
|
+
DataMiner.unit_converter = :conversions
|
7
|
+
end
|
8
|
+
|
9
|
+
after do
|
10
|
+
DataMiner.unit_converter = @original_converter
|
6
11
|
end
|
7
12
|
|
8
13
|
describe '#convert' do
|
9
14
|
it 'converts a value from one unit to another' do
|
10
|
-
|
11
|
-
|
12
|
-
|
15
|
+
value = DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
|
16
|
+
assert value.is_a?(Float)
|
17
|
+
value.must_be_close_to 7.71617918
|
13
18
|
end
|
14
19
|
end
|
15
20
|
end
|
data/test/helper.rb
CHANGED
@@ -10,8 +10,7 @@ end
|
|
10
10
|
require 'minitest/spec'
|
11
11
|
require 'minitest/autorun'
|
12
12
|
require 'minitest/reporters'
|
13
|
-
MiniTest::
|
14
|
-
MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
|
13
|
+
MiniTest::Reporters.use!
|
15
14
|
|
16
15
|
require 'active_record'
|
17
16
|
require 'logger'
|
data/test/support/breed.rb
CHANGED
@@ -5,7 +5,7 @@ class Breed < ActiveRecord::Base
|
|
5
5
|
def update_average_age!
|
6
6
|
# make sure pet is populated
|
7
7
|
Pet.run_data_miner!
|
8
|
-
update_all %{
|
8
|
+
update_all %{average_age = (SELECT AVG(pets.age) FROM pets WHERE pets.breed_id = breeds.name)}
|
9
9
|
end
|
10
10
|
end
|
11
11
|
self.primary_key = "name"
|
data/test/test_data_miner.rb
CHANGED
@@ -80,6 +80,10 @@ describe DataMiner do
|
|
80
80
|
Pet.run_data_miner!
|
81
81
|
Pet.find('Pierre').weight.must_be_close_to 1.9958 # 4.4 pounds in kilograms
|
82
82
|
end
|
83
|
+
it "doesn't convert nil to 0 when converting units" do
|
84
|
+
Pet.run_data_miner!
|
85
|
+
Pet.find('Nemo').age.must_be_nil
|
86
|
+
end
|
83
87
|
it "sets units" do
|
84
88
|
Pet.run_data_miner!
|
85
89
|
Pet.find('Pierre').age_units.must_equal 'years'
|
@@ -119,6 +123,38 @@ describe DataMiner do
|
|
119
123
|
Pet3.run_data_miner!
|
120
124
|
end.must_raise RuntimeError, /exist/i
|
121
125
|
end
|
126
|
+
end
|
127
|
+
|
128
|
+
describe 'when the key attribute is not defined' do
|
129
|
+
class PetFunny < ActiveRecord::Base
|
130
|
+
self.primary_key = false
|
131
|
+
col :name
|
132
|
+
col :breed
|
133
|
+
col :color
|
134
|
+
|
135
|
+
data_miner do
|
136
|
+
import 'without a key', url: "file://#{PETS_FUNNY}" do
|
137
|
+
store :name
|
138
|
+
store :breed
|
139
|
+
store :color
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
PetFunny.auto_upgrade!
|
144
|
+
|
145
|
+
before { PetFunny.delete_all }
|
146
|
+
|
147
|
+
it 'imports the example data' do
|
148
|
+
PetFunny.run_data_miner!
|
149
|
+
PetFunny.must_be :exists?
|
150
|
+
end
|
151
|
+
|
152
|
+
it 'imports new example data for each run' do
|
153
|
+
PetFunny.run_data_miner!
|
154
|
+
first_count = PetFunny.count
|
122
155
|
|
156
|
+
PetFunny.run_data_miner!
|
157
|
+
PetFunny.count.must_equal first_count * 2
|
158
|
+
end
|
123
159
|
end
|
124
160
|
end
|
metadata
CHANGED
@@ -1,17 +1,19 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Seamus Abshere
|
9
9
|
- Andy Rossmeissl
|
10
10
|
- Derek Kastner
|
11
|
+
- Ian Hough
|
12
|
+
- Tower He
|
11
13
|
autorequire:
|
12
14
|
bindir: bin
|
13
15
|
cert_chain: []
|
14
|
-
date:
|
16
|
+
date: 2013-03-15 00:00:00.000000000 Z
|
15
17
|
dependencies:
|
16
18
|
- !ruby/object:Gem::Dependency
|
17
19
|
name: aasm
|
@@ -36,7 +38,7 @@ dependencies:
|
|
36
38
|
requirements:
|
37
39
|
- - ! '>='
|
38
40
|
- !ruby/object:Gem::Version
|
39
|
-
version: 0.
|
41
|
+
version: 0.6.1
|
40
42
|
type: :runtime
|
41
43
|
prerelease: false
|
42
44
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -44,39 +46,39 @@ dependencies:
|
|
44
46
|
requirements:
|
45
47
|
- - ! '>='
|
46
48
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.
|
49
|
+
version: 0.6.1
|
48
50
|
- !ruby/object:Gem::Dependency
|
49
51
|
name: activerecord
|
50
52
|
requirement: !ruby/object:Gem::Requirement
|
51
53
|
none: false
|
52
54
|
requirements:
|
53
|
-
- - ! '
|
55
|
+
- - ! '>'
|
54
56
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
57
|
+
version: '3'
|
56
58
|
type: :runtime
|
57
59
|
prerelease: false
|
58
60
|
version_requirements: !ruby/object:Gem::Requirement
|
59
61
|
none: false
|
60
62
|
requirements:
|
61
|
-
- - ! '
|
63
|
+
- - ! '>'
|
62
64
|
- !ruby/object:Gem::Version
|
63
|
-
version:
|
65
|
+
version: '3'
|
64
66
|
- !ruby/object:Gem::Dependency
|
65
67
|
name: activesupport
|
66
68
|
requirement: !ruby/object:Gem::Requirement
|
67
69
|
none: false
|
68
70
|
requirements:
|
69
|
-
- - ! '
|
71
|
+
- - ! '>'
|
70
72
|
- !ruby/object:Gem::Version
|
71
|
-
version:
|
73
|
+
version: '3'
|
72
74
|
type: :runtime
|
73
75
|
prerelease: false
|
74
76
|
version_requirements: !ruby/object:Gem::Requirement
|
75
77
|
none: false
|
76
78
|
requirements:
|
77
|
-
- - ! '
|
79
|
+
- - ! '>'
|
78
80
|
- !ruby/object:Gem::Version
|
79
|
-
version:
|
81
|
+
version: '3'
|
80
82
|
- !ruby/object:Gem::Dependency
|
81
83
|
name: errata
|
82
84
|
requirement: !ruby/object:Gem::Requirement
|
@@ -157,6 +159,22 @@ dependencies:
|
|
157
159
|
- - ! '>='
|
158
160
|
- !ruby/object:Gem::Version
|
159
161
|
version: '0'
|
162
|
+
- !ruby/object:Gem::Dependency
|
163
|
+
name: roo
|
164
|
+
requirement: !ruby/object:Gem::Requirement
|
165
|
+
none: false
|
166
|
+
requirements:
|
167
|
+
- - '='
|
168
|
+
- !ruby/object:Gem::Version
|
169
|
+
version: 1.10.1
|
170
|
+
type: :runtime
|
171
|
+
prerelease: false
|
172
|
+
version_requirements: !ruby/object:Gem::Requirement
|
173
|
+
none: false
|
174
|
+
requirements:
|
175
|
+
- - '='
|
176
|
+
- !ruby/object:Gem::Version
|
177
|
+
version: 1.10.1
|
160
178
|
- !ruby/object:Gem::Dependency
|
161
179
|
name: dkastner-alchemist
|
162
180
|
requirement: !ruby/object:Gem::Requirement
|
@@ -370,6 +388,10 @@ description: Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and
|
|
370
388
|
units.
|
371
389
|
email:
|
372
390
|
- seamus@abshere.net
|
391
|
+
- rossmeissl@gmail.com
|
392
|
+
- dkastner@gmail.com
|
393
|
+
- ijhough@gmail.com
|
394
|
+
- towerhe@gmail.com
|
373
395
|
executables: []
|
374
396
|
extensions: []
|
375
397
|
extra_rdoc_files: []
|
@@ -441,7 +463,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
441
463
|
version: '0'
|
442
464
|
requirements: []
|
443
465
|
rubyforge_project: data_miner
|
444
|
-
rubygems_version: 1.8.
|
466
|
+
rubygems_version: 1.8.25
|
445
467
|
signing_key:
|
446
468
|
specification_version: 3
|
447
469
|
summary: Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import
|