data_miner 2.4.1 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +15 -0
- data/LICENSE +1 -1
- data/README.markdown +3 -3
- data/data_miner.gemspec +6 -5
- data/lib/data_miner/step/import.rb +31 -22
- data/lib/data_miner/step/sql.rb +9 -10
- data/lib/data_miner/unit_converter/alchemist.rb +1 -1
- data/lib/data_miner/version.rb +1 -1
- data/lib/data_miner.rb +1 -1
- data/test/data_miner/step/test_import.rb +3 -1
- data/test/data_miner/unit_converter/test_alchemist.rb +8 -1
- data/test/data_miner/unit_converter/test_conversions.rb +9 -4
- data/test/helper.rb +1 -2
- data/test/support/breed.rb +1 -1
- data/test/test_data_miner.rb +36 -0
- metadata +35 -13
data/CHANGELOG
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
2.5.0 / 2013-03-15
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* Lock to roo 1.10.1 and activerecord > 3
|
6
|
+
|
7
|
+
* Enhancements
|
8
|
+
|
9
|
+
* If you don't define a `key` in a data_miner block, then just keep appending records - thanks @towerhe
|
10
|
+
* Got rid of ridiculous "install unicode-utils" warning
|
11
|
+
|
12
|
+
* Bug fixes
|
13
|
+
|
14
|
+
* Don't die because an Alchemist::NumericConversion is passed to a function that expects a plain Float - thanks @towerhe
|
15
|
+
|
1
16
|
2.4.1 / 2012-07-26
|
2
17
|
|
3
18
|
* Enhancements
|
data/LICENSE
CHANGED
data/README.markdown
CHANGED
@@ -18,7 +18,7 @@ The killer combination for us is:
|
|
18
18
|
1. [`active_record_inline_schema`](https://github.com/seamusabshere/active_record_inline_schema) - define table structure
|
19
19
|
2. [`remote_table`](https://github.com/seamusabshere/remote_table) - download data and parse it
|
20
20
|
3. [`errata`](https://github.com/seamusabshere/errata) - apply corrections in a transparent way
|
21
|
-
4. [`data_miner`](https://github.com/seamusabshere/
|
21
|
+
4. [`data_miner`](https://github.com/seamusabshere/data_miner) (this library!) - import data idempotently
|
22
22
|
|
23
23
|
## Documentation
|
24
24
|
|
@@ -115,13 +115,13 @@ And many more - look for the `data_miner.rb` file that corresponds to each model
|
|
115
115
|
* Andy Rossmeissl <andy@rossmeissl.net>
|
116
116
|
* Derek Kastner <dkastner@gmail.com>
|
117
117
|
* Ian Hough <ijhough@gmail.com>
|
118
|
+
* Tower He <towerhe@gmail.com>
|
118
119
|
|
119
120
|
## Wishlist
|
120
121
|
|
121
122
|
* Make the tests real unit tests
|
122
123
|
* sql steps shouldn't shell out if binaries are missing
|
123
|
-
* csv import step that uses pg_restore, mysqlimport, etc.
|
124
124
|
|
125
125
|
## Copyright
|
126
126
|
|
127
|
-
Copyright (c)
|
127
|
+
Copyright (c) 2013 Seamus Abshere
|
data/data_miner.gemspec
CHANGED
@@ -4,8 +4,8 @@ require File.expand_path("../lib/data_miner/version", __FILE__)
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "data_miner"
|
6
6
|
s.version = DataMiner::VERSION
|
7
|
-
s.authors = ["Seamus Abshere", "Andy Rossmeissl", "Derek Kastner"]
|
8
|
-
s.email = ["seamus@abshere.net"]
|
7
|
+
s.authors = ["Seamus Abshere", "Andy Rossmeissl", "Derek Kastner", "Ian Hough", "Tower He"]
|
8
|
+
s.email = ["seamus@abshere.net", "rossmeissl@gmail.com", "dkastner@gmail.com", "ijhough@gmail.com", "towerhe@gmail.com"]
|
9
9
|
s.homepage = "https://github.com/seamusabshere/data_miner"
|
10
10
|
s.summary = %{Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models.}
|
11
11
|
s.description = %q{Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models. You can also convert units.}
|
@@ -18,14 +18,15 @@ Gem::Specification.new do |s|
|
|
18
18
|
s.require_paths = ["lib"]
|
19
19
|
|
20
20
|
s.add_runtime_dependency 'aasm'
|
21
|
-
s.add_runtime_dependency 'active_record_inline_schema', '>=0.
|
22
|
-
s.add_runtime_dependency 'activerecord', '
|
23
|
-
s.add_runtime_dependency 'activesupport', '
|
21
|
+
s.add_runtime_dependency 'active_record_inline_schema', '>=0.6.1'
|
22
|
+
s.add_runtime_dependency 'activerecord', '> 3'
|
23
|
+
s.add_runtime_dependency 'activesupport', '> 3'
|
24
24
|
s.add_runtime_dependency 'errata', '>=1.0.1'
|
25
25
|
s.add_runtime_dependency 'remote_table', '>=2.0.2'
|
26
26
|
s.add_runtime_dependency 'upsert', '>=0.3.1'
|
27
27
|
s.add_runtime_dependency 'posix-spawn'
|
28
28
|
s.add_runtime_dependency 'unix_utils'
|
29
|
+
s.add_runtime_dependency 'roo', '1.10.1'
|
29
30
|
|
30
31
|
s.add_development_dependency 'dkastner-alchemist'
|
31
32
|
s.add_development_dependency 'conversions'
|
@@ -19,7 +19,7 @@ class DataMiner
|
|
19
19
|
# Description of what this step does.
|
20
20
|
# @return [String]
|
21
21
|
attr_reader :description
|
22
|
-
|
22
|
+
|
23
23
|
# @private
|
24
24
|
def initialize(script, description, settings, &blk)
|
25
25
|
settings = settings.symbolize_keys
|
@@ -83,27 +83,9 @@ class DataMiner
|
|
83
83
|
|
84
84
|
# @private
|
85
85
|
def start
|
86
|
-
|
87
|
-
c = ActiveRecord::Base.connection_pool.checkout
|
88
|
-
Upsert.stream(c, model.table_name) do |upsert|
|
89
|
-
table.each do |row|
|
90
|
-
selector = { @key => attributes[@key].read(row) }
|
91
|
-
document = attributes.except(@key).inject({}) do |memo, (_, attr)|
|
92
|
-
memo.merge! attr.updates(row)
|
93
|
-
memo
|
94
|
-
end
|
95
|
-
upsert.row selector, document
|
96
|
-
end
|
97
|
-
end
|
98
|
-
ActiveRecord::Base.connection_pool.checkin c
|
99
|
-
else
|
100
|
-
table.each do |row|
|
101
|
-
record = model.send "find_or_initialize_by_#{@key}", attributes[@key].read(row)
|
102
|
-
attributes.each { |_, attr| attr.set_from_row record, row }
|
103
|
-
record.save!
|
104
|
-
end
|
105
|
-
end
|
86
|
+
upsert_enabled? ? save_with_upsert : save_with_activerecord
|
106
87
|
refresh
|
88
|
+
|
107
89
|
nil
|
108
90
|
end
|
109
91
|
|
@@ -115,6 +97,33 @@ class DataMiner
|
|
115
97
|
|
116
98
|
private
|
117
99
|
|
100
|
+
def upsert_enabled?
|
101
|
+
(not validate?) and (storing_primary_key? or table_has_autoincrementing_primary_key?)
|
102
|
+
end
|
103
|
+
|
104
|
+
def save_with_upsert
|
105
|
+
c = ActiveRecord::Base.connection_pool.checkout
|
106
|
+
Upsert.stream(c, model.table_name) do |upsert|
|
107
|
+
table.each do |row|
|
108
|
+
selector = @key ? { @key => attributes[@key].read(row) } : { model.primary_key => nil }
|
109
|
+
document = attributes.except(@key).inject({}) do |memo, (_, attr)|
|
110
|
+
memo.merge! attr.updates(row)
|
111
|
+
memo
|
112
|
+
end
|
113
|
+
upsert.row selector, document
|
114
|
+
end
|
115
|
+
end
|
116
|
+
ActiveRecord::Base.connection_pool.checkin c
|
117
|
+
end
|
118
|
+
|
119
|
+
def save_with_activerecord
|
120
|
+
table.each do |row|
|
121
|
+
record = @key ? model.send("find_or_initialize_by_#{@key}", attributes[@key].read(row)) : model.new
|
122
|
+
attributes.each { |_, attr| attr.set_from_row record, row }
|
123
|
+
record.save!
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
118
127
|
def table_has_autoincrementing_primary_key?
|
119
128
|
return @table_has_autoincrementing_primary_key_query.first if @table_has_autoincrementing_primary_key_query.is_a?(Array)
|
120
129
|
c = ActiveRecord::Base.connection_pool.checkout
|
@@ -140,7 +149,7 @@ class DataMiner
|
|
140
149
|
|
141
150
|
def storing_primary_key?
|
142
151
|
return @storing_primary_key_query.first if @storing_primary_key_query.is_a?(Array)
|
143
|
-
@storing_primary_key_query = [attributes.has_key?(model.primary_key.to_sym)]
|
152
|
+
@storing_primary_key_query = [model.primary_key && attributes.has_key?(model.primary_key.to_sym)]
|
144
153
|
@storing_primary_key_query.first
|
145
154
|
end
|
146
155
|
|
data/lib/data_miner/step/sql.rb
CHANGED
@@ -53,7 +53,7 @@ class DataMiner
|
|
53
53
|
private
|
54
54
|
|
55
55
|
def config
|
56
|
-
|
56
|
+
if ActiveRecord::Base.respond_to?(:connection_config)
|
57
57
|
ActiveRecord::Base.connection_config
|
58
58
|
else
|
59
59
|
ActiveRecord::Base.connection_pool.spec.config
|
@@ -82,7 +82,7 @@ class DataMiner
|
|
82
82
|
::Process.waitpid pid
|
83
83
|
end
|
84
84
|
unless $?.success?
|
85
|
-
raise RuntimeError, "[data_miner] Failed: #{argv.join(' ').inspect}"
|
85
|
+
raise RuntimeError, "[data_miner] Failed: ARGV #{argv.join(' ').inspect}"
|
86
86
|
end
|
87
87
|
nil
|
88
88
|
end
|
@@ -90,25 +90,24 @@ class DataMiner
|
|
90
90
|
alias :mysql2 :mysql
|
91
91
|
|
92
92
|
def postgresql(path)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
93
|
+
env = {}
|
94
|
+
env['PGHOST'] = config['host'] if config['host']
|
95
|
+
env['PGPORT'] = config['port'].to_s if config['port']
|
96
|
+
env['PGPASSWORD'] = config['password'].to_s if config['password']
|
97
|
+
env['PGUSER'] = config['username'].to_s if config['username']
|
98
98
|
|
99
99
|
argv = [
|
100
100
|
'psql',
|
101
|
-
connect,
|
102
101
|
'--quiet',
|
103
102
|
'--dbname', config[:database],
|
104
103
|
'--file', path
|
105
104
|
].flatten
|
106
105
|
|
107
|
-
child = POSIX::Spawn::Child.new(*argv)
|
106
|
+
child = POSIX::Spawn::Child.new(*([env]+argv))
|
108
107
|
$stderr.puts child.out
|
109
108
|
$stderr.puts child.err
|
110
109
|
unless child.success?
|
111
|
-
raise RuntimeError, "[data_miner] Failed: #{argv.join(' ').inspect} (#{child.err.inspect})"
|
110
|
+
raise RuntimeError, "[data_miner] Failed: ENV #{env.inspect} ARGV #{argv.join(' ').inspect} (#{child.err.inspect})"
|
112
111
|
end
|
113
112
|
nil
|
114
113
|
end
|
data/lib/data_miner/version.rb
CHANGED
data/lib/data_miner.rb
CHANGED
@@ -2,7 +2,9 @@ require 'helper'
|
|
2
2
|
init_database
|
3
3
|
require 'earth'
|
4
4
|
|
5
|
-
|
5
|
+
require 'earth/residence'
|
6
|
+
require 'earth/electricity'
|
7
|
+
require 'earth/hospitality'
|
6
8
|
|
7
9
|
class PetBlue < ActiveRecord::Base
|
8
10
|
data_miner do
|
@@ -2,12 +2,19 @@ require 'helper'
|
|
2
2
|
|
3
3
|
describe 'DataMiner::UnitConverter::Alchemist' do
|
4
4
|
before do
|
5
|
+
@original_converter = DataMiner.unit_converter
|
5
6
|
DataMiner.unit_converter = :alchemist
|
6
7
|
end
|
7
8
|
|
9
|
+
after do
|
10
|
+
DataMiner.unit_converter = @original_converter
|
11
|
+
end
|
12
|
+
|
8
13
|
describe '#convert' do
|
9
14
|
it 'converts a value from one unit to another' do
|
10
|
-
DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
|
15
|
+
value = DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
|
16
|
+
assert value.is_a?(Float)
|
17
|
+
value.must_be_close_to 7.71617918
|
11
18
|
end
|
12
19
|
end
|
13
20
|
end
|
@@ -2,14 +2,19 @@ require 'helper'
|
|
2
2
|
|
3
3
|
describe 'DataMiner::UnitConverter::Conversions' do
|
4
4
|
before do
|
5
|
-
|
5
|
+
@original_converter = DataMiner.unit_converter
|
6
|
+
DataMiner.unit_converter = :conversions
|
7
|
+
end
|
8
|
+
|
9
|
+
after do
|
10
|
+
DataMiner.unit_converter = @original_converter
|
6
11
|
end
|
7
12
|
|
8
13
|
describe '#convert' do
|
9
14
|
it 'converts a value from one unit to another' do
|
10
|
-
|
11
|
-
|
12
|
-
|
15
|
+
value = DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
|
16
|
+
assert value.is_a?(Float)
|
17
|
+
value.must_be_close_to 7.71617918
|
13
18
|
end
|
14
19
|
end
|
15
20
|
end
|
data/test/helper.rb
CHANGED
@@ -10,8 +10,7 @@ end
|
|
10
10
|
require 'minitest/spec'
|
11
11
|
require 'minitest/autorun'
|
12
12
|
require 'minitest/reporters'
|
13
|
-
MiniTest::
|
14
|
-
MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
|
13
|
+
MiniTest::Reporters.use!
|
15
14
|
|
16
15
|
require 'active_record'
|
17
16
|
require 'logger'
|
data/test/support/breed.rb
CHANGED
@@ -5,7 +5,7 @@ class Breed < ActiveRecord::Base
|
|
5
5
|
def update_average_age!
|
6
6
|
# make sure pet is populated
|
7
7
|
Pet.run_data_miner!
|
8
|
-
update_all %{
|
8
|
+
update_all %{average_age = (SELECT AVG(pets.age) FROM pets WHERE pets.breed_id = breeds.name)}
|
9
9
|
end
|
10
10
|
end
|
11
11
|
self.primary_key = "name"
|
data/test/test_data_miner.rb
CHANGED
@@ -80,6 +80,10 @@ describe DataMiner do
|
|
80
80
|
Pet.run_data_miner!
|
81
81
|
Pet.find('Pierre').weight.must_be_close_to 1.9958 # 4.4 pounds in kilograms
|
82
82
|
end
|
83
|
+
it "doesn't convert nil to 0 when converting units" do
|
84
|
+
Pet.run_data_miner!
|
85
|
+
Pet.find('Nemo').age.must_be_nil
|
86
|
+
end
|
83
87
|
it "sets units" do
|
84
88
|
Pet.run_data_miner!
|
85
89
|
Pet.find('Pierre').age_units.must_equal 'years'
|
@@ -119,6 +123,38 @@ describe DataMiner do
|
|
119
123
|
Pet3.run_data_miner!
|
120
124
|
end.must_raise RuntimeError, /exist/i
|
121
125
|
end
|
126
|
+
end
|
127
|
+
|
128
|
+
describe 'when the key attribute is not defined' do
|
129
|
+
class PetFunny < ActiveRecord::Base
|
130
|
+
self.primary_key = false
|
131
|
+
col :name
|
132
|
+
col :breed
|
133
|
+
col :color
|
134
|
+
|
135
|
+
data_miner do
|
136
|
+
import 'without a key', url: "file://#{PETS_FUNNY}" do
|
137
|
+
store :name
|
138
|
+
store :breed
|
139
|
+
store :color
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
PetFunny.auto_upgrade!
|
144
|
+
|
145
|
+
before { PetFunny.delete_all }
|
146
|
+
|
147
|
+
it 'imports the example data' do
|
148
|
+
PetFunny.run_data_miner!
|
149
|
+
PetFunny.must_be :exists?
|
150
|
+
end
|
151
|
+
|
152
|
+
it 'imports new example data for each run' do
|
153
|
+
PetFunny.run_data_miner!
|
154
|
+
first_count = PetFunny.count
|
122
155
|
|
156
|
+
PetFunny.run_data_miner!
|
157
|
+
PetFunny.count.must_equal first_count * 2
|
158
|
+
end
|
123
159
|
end
|
124
160
|
end
|
metadata
CHANGED
@@ -1,17 +1,19 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Seamus Abshere
|
9
9
|
- Andy Rossmeissl
|
10
10
|
- Derek Kastner
|
11
|
+
- Ian Hough
|
12
|
+
- Tower He
|
11
13
|
autorequire:
|
12
14
|
bindir: bin
|
13
15
|
cert_chain: []
|
14
|
-
date:
|
16
|
+
date: 2013-03-15 00:00:00.000000000 Z
|
15
17
|
dependencies:
|
16
18
|
- !ruby/object:Gem::Dependency
|
17
19
|
name: aasm
|
@@ -36,7 +38,7 @@ dependencies:
|
|
36
38
|
requirements:
|
37
39
|
- - ! '>='
|
38
40
|
- !ruby/object:Gem::Version
|
39
|
-
version: 0.
|
41
|
+
version: 0.6.1
|
40
42
|
type: :runtime
|
41
43
|
prerelease: false
|
42
44
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -44,39 +46,39 @@ dependencies:
|
|
44
46
|
requirements:
|
45
47
|
- - ! '>='
|
46
48
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.
|
49
|
+
version: 0.6.1
|
48
50
|
- !ruby/object:Gem::Dependency
|
49
51
|
name: activerecord
|
50
52
|
requirement: !ruby/object:Gem::Requirement
|
51
53
|
none: false
|
52
54
|
requirements:
|
53
|
-
- - ! '
|
55
|
+
- - ! '>'
|
54
56
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
57
|
+
version: '3'
|
56
58
|
type: :runtime
|
57
59
|
prerelease: false
|
58
60
|
version_requirements: !ruby/object:Gem::Requirement
|
59
61
|
none: false
|
60
62
|
requirements:
|
61
|
-
- - ! '
|
63
|
+
- - ! '>'
|
62
64
|
- !ruby/object:Gem::Version
|
63
|
-
version:
|
65
|
+
version: '3'
|
64
66
|
- !ruby/object:Gem::Dependency
|
65
67
|
name: activesupport
|
66
68
|
requirement: !ruby/object:Gem::Requirement
|
67
69
|
none: false
|
68
70
|
requirements:
|
69
|
-
- - ! '
|
71
|
+
- - ! '>'
|
70
72
|
- !ruby/object:Gem::Version
|
71
|
-
version:
|
73
|
+
version: '3'
|
72
74
|
type: :runtime
|
73
75
|
prerelease: false
|
74
76
|
version_requirements: !ruby/object:Gem::Requirement
|
75
77
|
none: false
|
76
78
|
requirements:
|
77
|
-
- - ! '
|
79
|
+
- - ! '>'
|
78
80
|
- !ruby/object:Gem::Version
|
79
|
-
version:
|
81
|
+
version: '3'
|
80
82
|
- !ruby/object:Gem::Dependency
|
81
83
|
name: errata
|
82
84
|
requirement: !ruby/object:Gem::Requirement
|
@@ -157,6 +159,22 @@ dependencies:
|
|
157
159
|
- - ! '>='
|
158
160
|
- !ruby/object:Gem::Version
|
159
161
|
version: '0'
|
162
|
+
- !ruby/object:Gem::Dependency
|
163
|
+
name: roo
|
164
|
+
requirement: !ruby/object:Gem::Requirement
|
165
|
+
none: false
|
166
|
+
requirements:
|
167
|
+
- - '='
|
168
|
+
- !ruby/object:Gem::Version
|
169
|
+
version: 1.10.1
|
170
|
+
type: :runtime
|
171
|
+
prerelease: false
|
172
|
+
version_requirements: !ruby/object:Gem::Requirement
|
173
|
+
none: false
|
174
|
+
requirements:
|
175
|
+
- - '='
|
176
|
+
- !ruby/object:Gem::Version
|
177
|
+
version: 1.10.1
|
160
178
|
- !ruby/object:Gem::Dependency
|
161
179
|
name: dkastner-alchemist
|
162
180
|
requirement: !ruby/object:Gem::Requirement
|
@@ -370,6 +388,10 @@ description: Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and
|
|
370
388
|
units.
|
371
389
|
email:
|
372
390
|
- seamus@abshere.net
|
391
|
+
- rossmeissl@gmail.com
|
392
|
+
- dkastner@gmail.com
|
393
|
+
- ijhough@gmail.com
|
394
|
+
- towerhe@gmail.com
|
373
395
|
executables: []
|
374
396
|
extensions: []
|
375
397
|
extra_rdoc_files: []
|
@@ -441,7 +463,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
441
463
|
version: '0'
|
442
464
|
requirements: []
|
443
465
|
rubyforge_project: data_miner
|
444
|
-
rubygems_version: 1.8.
|
466
|
+
rubygems_version: 1.8.25
|
445
467
|
signing_key:
|
446
468
|
specification_version: 3
|
447
469
|
summary: Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import
|