data_miner 2.4.1 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,18 @@
1
+ 2.5.0 / 2013-03-15
2
+
3
+ * Breaking changes
4
+
5
+ * Lock to roo 1.10.1 and activerecord > 3
6
+
7
+ * Enhancements
8
+
9
+ * If you don't define a `key` in a data_miner block, then just keep appending records - thanks @towerhe
10
+ * Got rid of ridiculous "install unicode-utils" warning
11
+
12
+ * Bug fixes
13
+
14
+ * Don't die because an Alchemist::NumericConversion is passed to a function that expects a plain Float - thanks @towerhe
15
+
1
16
  2.4.1 / 2012-07-26
2
17
 
3
18
  * Enhancements
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2012 Brighter Planet
1
+ Copyright (c) 2013 Seamus Abshere
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.markdown CHANGED
@@ -18,7 +18,7 @@ The killer combination for us is:
18
18
  1. [`active_record_inline_schema`](https://github.com/seamusabshere/active_record_inline_schema) - define table structure
19
19
  2. [`remote_table`](https://github.com/seamusabshere/remote_table) - download data and parse it
20
20
  3. [`errata`](https://github.com/seamusabshere/errata) - apply corrections in a transparent way
21
- 4. [`data_miner`](https://github.com/seamusabshere/remote_table) (this library!) - import data idempotently
21
+ 4. [`data_miner`](https://github.com/seamusabshere/data_miner) (this library!) - import data idempotently
22
22
 
23
23
  ## Documentation
24
24
 
@@ -115,13 +115,13 @@ And many more - look for the `data_miner.rb` file that corresponds to each model
115
115
  * Andy Rossmeissl <andy@rossmeissl.net>
116
116
  * Derek Kastner <dkastner@gmail.com>
117
117
  * Ian Hough <ijhough@gmail.com>
118
+ * Tower He <towerhe@gmail.com>
118
119
 
119
120
  ## Wishlist
120
121
 
121
122
  * Make the tests real unit tests
122
123
  * sql steps shouldn't shell out if binaries are missing
123
- * csv import step that uses pg_restore, mysqlimport, etc.
124
124
 
125
125
  ## Copyright
126
126
 
127
- Copyright (c) 2012 Brighter Planet. See LICENSE for details.
127
+ Copyright (c) 2013 Seamus Abshere
data/data_miner.gemspec CHANGED
@@ -4,8 +4,8 @@ require File.expand_path("../lib/data_miner/version", __FILE__)
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "data_miner"
6
6
  s.version = DataMiner::VERSION
7
- s.authors = ["Seamus Abshere", "Andy Rossmeissl", "Derek Kastner"]
8
- s.email = ["seamus@abshere.net"]
7
+ s.authors = ["Seamus Abshere", "Andy Rossmeissl", "Derek Kastner", "Ian Hough", "Tower He"]
8
+ s.email = ["seamus@abshere.net", "rossmeissl@gmail.com", "dkastner@gmail.com", "ijhough@gmail.com", "towerhe@gmail.com"]
9
9
  s.homepage = "https://github.com/seamusabshere/data_miner"
10
10
  s.summary = %{Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models.}
11
11
  s.description = %q{Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models. You can also convert units.}
@@ -18,14 +18,15 @@ Gem::Specification.new do |s|
18
18
  s.require_paths = ["lib"]
19
19
 
20
20
  s.add_runtime_dependency 'aasm'
21
- s.add_runtime_dependency 'active_record_inline_schema', '>=0.5.6'
22
- s.add_runtime_dependency 'activerecord', '>=2.3.4'
23
- s.add_runtime_dependency 'activesupport', '>=2.3.4'
21
+ s.add_runtime_dependency 'active_record_inline_schema', '>=0.6.1'
22
+ s.add_runtime_dependency 'activerecord', '> 3'
23
+ s.add_runtime_dependency 'activesupport', '> 3'
24
24
  s.add_runtime_dependency 'errata', '>=1.0.1'
25
25
  s.add_runtime_dependency 'remote_table', '>=2.0.2'
26
26
  s.add_runtime_dependency 'upsert', '>=0.3.1'
27
27
  s.add_runtime_dependency 'posix-spawn'
28
28
  s.add_runtime_dependency 'unix_utils'
29
+ s.add_runtime_dependency 'roo', '1.10.1'
29
30
 
30
31
  s.add_development_dependency 'dkastner-alchemist'
31
32
  s.add_development_dependency 'conversions'
@@ -19,7 +19,7 @@ class DataMiner
19
19
  # Description of what this step does.
20
20
  # @return [String]
21
21
  attr_reader :description
22
-
22
+
23
23
  # @private
24
24
  def initialize(script, description, settings, &blk)
25
25
  settings = settings.symbolize_keys
@@ -83,27 +83,9 @@ class DataMiner
83
83
 
84
84
  # @private
85
85
  def start
86
- if not validate? and (storing_primary_key? or table_has_autoincrementing_primary_key?)
87
- c = ActiveRecord::Base.connection_pool.checkout
88
- Upsert.stream(c, model.table_name) do |upsert|
89
- table.each do |row|
90
- selector = { @key => attributes[@key].read(row) }
91
- document = attributes.except(@key).inject({}) do |memo, (_, attr)|
92
- memo.merge! attr.updates(row)
93
- memo
94
- end
95
- upsert.row selector, document
96
- end
97
- end
98
- ActiveRecord::Base.connection_pool.checkin c
99
- else
100
- table.each do |row|
101
- record = model.send "find_or_initialize_by_#{@key}", attributes[@key].read(row)
102
- attributes.each { |_, attr| attr.set_from_row record, row }
103
- record.save!
104
- end
105
- end
86
+ upsert_enabled? ? save_with_upsert : save_with_activerecord
106
87
  refresh
88
+
107
89
  nil
108
90
  end
109
91
 
@@ -115,6 +97,33 @@ class DataMiner
115
97
 
116
98
  private
117
99
 
100
+ def upsert_enabled?
101
+ (not validate?) and (storing_primary_key? or table_has_autoincrementing_primary_key?)
102
+ end
103
+
104
+ def save_with_upsert
105
+ c = ActiveRecord::Base.connection_pool.checkout
106
+ Upsert.stream(c, model.table_name) do |upsert|
107
+ table.each do |row|
108
+ selector = @key ? { @key => attributes[@key].read(row) } : { model.primary_key => nil }
109
+ document = attributes.except(@key).inject({}) do |memo, (_, attr)|
110
+ memo.merge! attr.updates(row)
111
+ memo
112
+ end
113
+ upsert.row selector, document
114
+ end
115
+ end
116
+ ActiveRecord::Base.connection_pool.checkin c
117
+ end
118
+
119
+ def save_with_activerecord
120
+ table.each do |row|
121
+ record = @key ? model.send("find_or_initialize_by_#{@key}", attributes[@key].read(row)) : model.new
122
+ attributes.each { |_, attr| attr.set_from_row record, row }
123
+ record.save!
124
+ end
125
+ end
126
+
118
127
  def table_has_autoincrementing_primary_key?
119
128
  return @table_has_autoincrementing_primary_key_query.first if @table_has_autoincrementing_primary_key_query.is_a?(Array)
120
129
  c = ActiveRecord::Base.connection_pool.checkout
@@ -140,7 +149,7 @@ class DataMiner
140
149
 
141
150
  def storing_primary_key?
142
151
  return @storing_primary_key_query.first if @storing_primary_key_query.is_a?(Array)
143
- @storing_primary_key_query = [attributes.has_key?(model.primary_key.to_sym)]
152
+ @storing_primary_key_query = [model.primary_key && attributes.has_key?(model.primary_key.to_sym)]
144
153
  @storing_primary_key_query.first
145
154
  end
146
155
 
@@ -53,7 +53,7 @@ class DataMiner
53
53
  private
54
54
 
55
55
  def config
56
- @config ||= if ActiveRecord::Base.respond_to?(:connection_config)
56
+ if ActiveRecord::Base.respond_to?(:connection_config)
57
57
  ActiveRecord::Base.connection_config
58
58
  else
59
59
  ActiveRecord::Base.connection_pool.spec.config
@@ -82,7 +82,7 @@ class DataMiner
82
82
  ::Process.waitpid pid
83
83
  end
84
84
  unless $?.success?
85
- raise RuntimeError, "[data_miner] Failed: #{argv.join(' ').inspect}"
85
+ raise RuntimeError, "[data_miner] Failed: ARGV #{argv.join(' ').inspect}"
86
86
  end
87
87
  nil
88
88
  end
@@ -90,25 +90,24 @@ class DataMiner
90
90
  alias :mysql2 :mysql
91
91
 
92
92
  def postgresql(path)
93
- connect = []
94
- connect << ['--username', config[:username]] if config[:username]
95
- connect << ['--password', config[:password]] if config[:password]
96
- connect << ['--host', config[:host]] if config[:host]
97
- connect << ['--port', config[:port]] if config[:port]
93
+ env = {}
94
+ env['PGHOST'] = config['host'] if config['host']
95
+ env['PGPORT'] = config['port'].to_s if config['port']
96
+ env['PGPASSWORD'] = config['password'].to_s if config['password']
97
+ env['PGUSER'] = config['username'].to_s if config['username']
98
98
 
99
99
  argv = [
100
100
  'psql',
101
- connect,
102
101
  '--quiet',
103
102
  '--dbname', config[:database],
104
103
  '--file', path
105
104
  ].flatten
106
105
 
107
- child = POSIX::Spawn::Child.new(*argv)
106
+ child = POSIX::Spawn::Child.new(*([env]+argv))
108
107
  $stderr.puts child.out
109
108
  $stderr.puts child.err
110
109
  unless child.success?
111
- raise RuntimeError, "[data_miner] Failed: #{argv.join(' ').inspect} (#{child.err.inspect})"
110
+ raise RuntimeError, "[data_miner] Failed: ENV #{env.inspect} ARGV #{argv.join(' ').inspect} (#{child.err.inspect})"
112
111
  end
113
112
  nil
114
113
  end
@@ -4,7 +4,7 @@ class DataMiner
4
4
  class UnitConverter
5
5
  class Alchemist < UnitConverter
6
6
  def convert(value, from, to)
7
- value.to_f.send(from).to.send(to)
7
+ value.to_f.send(from).to.send(to).to_f
8
8
  end
9
9
  end
10
10
  end
@@ -1,3 +1,3 @@
1
1
  class DataMiner
2
- VERSION = '2.4.1'
2
+ VERSION = '2.5.0'
3
3
  end
data/lib/data_miner.rb CHANGED
@@ -10,7 +10,7 @@ if RUBY_VERSION >= '1.9'
10
10
  begin
11
11
  require 'unicode_utils/downcase'
12
12
  rescue LoadError
13
- Kernel.warn '[data_miner] You may wish to include unicode_utils in your Gemfile to improve accuracy of downcasing'
13
+ # oh well.
14
14
  end
15
15
  end
16
16
 
@@ -2,7 +2,9 @@ require 'helper'
2
2
  init_database
3
3
  require 'earth'
4
4
 
5
- Earth.init :residence, :electricity, :hospitality, :load_data_miner => true, :apply_schemas => true
5
+ require 'earth/residence'
6
+ require 'earth/electricity'
7
+ require 'earth/hospitality'
6
8
 
7
9
  class PetBlue < ActiveRecord::Base
8
10
  data_miner do
@@ -2,12 +2,19 @@ require 'helper'
2
2
 
3
3
  describe 'DataMiner::UnitConverter::Alchemist' do
4
4
  before do
5
+ @original_converter = DataMiner.unit_converter
5
6
  DataMiner.unit_converter = :alchemist
6
7
  end
7
8
 
9
+ after do
10
+ DataMiner.unit_converter = @original_converter
11
+ end
12
+
8
13
  describe '#convert' do
9
14
  it 'converts a value from one unit to another' do
10
- DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
15
+ value = DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
16
+ assert value.is_a?(Float)
17
+ value.must_be_close_to 7.71617918
11
18
  end
12
19
  end
13
20
  end
@@ -2,14 +2,19 @@ require 'helper'
2
2
 
3
3
  describe 'DataMiner::UnitConverter::Conversions' do
4
4
  before do
5
- #DataMiner.unit_converter = :conversions
5
+ @original_converter = DataMiner.unit_converter
6
+ DataMiner.unit_converter = :conversions
7
+ end
8
+
9
+ after do
10
+ DataMiner.unit_converter = @original_converter
6
11
  end
7
12
 
8
13
  describe '#convert' do
9
14
  it 'converts a value from one unit to another' do
10
- # can't load both alchemist and conversions in same test run
11
- # see test/test_unit_conversion for coverage of this adapter
12
- #DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
15
+ value = DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
16
+ assert value.is_a?(Float)
17
+ value.must_be_close_to 7.71617918
13
18
  end
14
19
  end
15
20
  end
data/test/helper.rb CHANGED
@@ -10,8 +10,7 @@ end
10
10
  require 'minitest/spec'
11
11
  require 'minitest/autorun'
12
12
  require 'minitest/reporters'
13
- MiniTest::Unit.runner = MiniTest::SuiteRunner.new
14
- MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
13
+ MiniTest::Reporters.use!
15
14
 
16
15
  require 'active_record'
17
16
  require 'logger'
@@ -5,7 +5,7 @@ class Breed < ActiveRecord::Base
5
5
  def update_average_age!
6
6
  # make sure pet is populated
7
7
  Pet.run_data_miner!
8
- update_all %{"average_age" = (SELECT AVG("pets"."age") FROM "pets" WHERE "pets"."breed_id" = "breeds"."name")}
8
+ update_all %{average_age = (SELECT AVG(pets.age) FROM pets WHERE pets.breed_id = breeds.name)}
9
9
  end
10
10
  end
11
11
  self.primary_key = "name"
@@ -80,6 +80,10 @@ describe DataMiner do
80
80
  Pet.run_data_miner!
81
81
  Pet.find('Pierre').weight.must_be_close_to 1.9958 # 4.4 pounds in kilograms
82
82
  end
83
+ it "doesn't convert nil to 0 when converting units" do
84
+ Pet.run_data_miner!
85
+ Pet.find('Nemo').age.must_be_nil
86
+ end
83
87
  it "sets units" do
84
88
  Pet.run_data_miner!
85
89
  Pet.find('Pierre').age_units.must_equal 'years'
@@ -119,6 +123,38 @@ describe DataMiner do
119
123
  Pet3.run_data_miner!
120
124
  end.must_raise RuntimeError, /exist/i
121
125
  end
126
+ end
127
+
128
+ describe 'when the key attribute is not defined' do
129
+ class PetFunny < ActiveRecord::Base
130
+ self.primary_key = false
131
+ col :name
132
+ col :breed
133
+ col :color
134
+
135
+ data_miner do
136
+ import 'without a key', url: "file://#{PETS_FUNNY}" do
137
+ store :name
138
+ store :breed
139
+ store :color
140
+ end
141
+ end
142
+ end
143
+ PetFunny.auto_upgrade!
144
+
145
+ before { PetFunny.delete_all }
146
+
147
+ it 'imports the example data' do
148
+ PetFunny.run_data_miner!
149
+ PetFunny.must_be :exists?
150
+ end
151
+
152
+ it 'imports new example data for each run' do
153
+ PetFunny.run_data_miner!
154
+ first_count = PetFunny.count
122
155
 
156
+ PetFunny.run_data_miner!
157
+ PetFunny.count.must_equal first_count * 2
158
+ end
123
159
  end
124
160
  end
metadata CHANGED
@@ -1,17 +1,19 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.1
4
+ version: 2.5.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Seamus Abshere
9
9
  - Andy Rossmeissl
10
10
  - Derek Kastner
11
+ - Ian Hough
12
+ - Tower He
11
13
  autorequire:
12
14
  bindir: bin
13
15
  cert_chain: []
14
- date: 2012-07-27 00:00:00.000000000 Z
16
+ date: 2013-03-15 00:00:00.000000000 Z
15
17
  dependencies:
16
18
  - !ruby/object:Gem::Dependency
17
19
  name: aasm
@@ -36,7 +38,7 @@ dependencies:
36
38
  requirements:
37
39
  - - ! '>='
38
40
  - !ruby/object:Gem::Version
39
- version: 0.5.6
41
+ version: 0.6.1
40
42
  type: :runtime
41
43
  prerelease: false
42
44
  version_requirements: !ruby/object:Gem::Requirement
@@ -44,39 +46,39 @@ dependencies:
44
46
  requirements:
45
47
  - - ! '>='
46
48
  - !ruby/object:Gem::Version
47
- version: 0.5.6
49
+ version: 0.6.1
48
50
  - !ruby/object:Gem::Dependency
49
51
  name: activerecord
50
52
  requirement: !ruby/object:Gem::Requirement
51
53
  none: false
52
54
  requirements:
53
- - - ! '>='
55
+ - - ! '>'
54
56
  - !ruby/object:Gem::Version
55
- version: 2.3.4
57
+ version: '3'
56
58
  type: :runtime
57
59
  prerelease: false
58
60
  version_requirements: !ruby/object:Gem::Requirement
59
61
  none: false
60
62
  requirements:
61
- - - ! '>='
63
+ - - ! '>'
62
64
  - !ruby/object:Gem::Version
63
- version: 2.3.4
65
+ version: '3'
64
66
  - !ruby/object:Gem::Dependency
65
67
  name: activesupport
66
68
  requirement: !ruby/object:Gem::Requirement
67
69
  none: false
68
70
  requirements:
69
- - - ! '>='
71
+ - - ! '>'
70
72
  - !ruby/object:Gem::Version
71
- version: 2.3.4
73
+ version: '3'
72
74
  type: :runtime
73
75
  prerelease: false
74
76
  version_requirements: !ruby/object:Gem::Requirement
75
77
  none: false
76
78
  requirements:
77
- - - ! '>='
79
+ - - ! '>'
78
80
  - !ruby/object:Gem::Version
79
- version: 2.3.4
81
+ version: '3'
80
82
  - !ruby/object:Gem::Dependency
81
83
  name: errata
82
84
  requirement: !ruby/object:Gem::Requirement
@@ -157,6 +159,22 @@ dependencies:
157
159
  - - ! '>='
158
160
  - !ruby/object:Gem::Version
159
161
  version: '0'
162
+ - !ruby/object:Gem::Dependency
163
+ name: roo
164
+ requirement: !ruby/object:Gem::Requirement
165
+ none: false
166
+ requirements:
167
+ - - '='
168
+ - !ruby/object:Gem::Version
169
+ version: 1.10.1
170
+ type: :runtime
171
+ prerelease: false
172
+ version_requirements: !ruby/object:Gem::Requirement
173
+ none: false
174
+ requirements:
175
+ - - '='
176
+ - !ruby/object:Gem::Version
177
+ version: 1.10.1
160
178
  - !ruby/object:Gem::Dependency
161
179
  name: dkastner-alchemist
162
180
  requirement: !ruby/object:Gem::Requirement
@@ -370,6 +388,10 @@ description: Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and
370
388
  units.
371
389
  email:
372
390
  - seamus@abshere.net
391
+ - rossmeissl@gmail.com
392
+ - dkastner@gmail.com
393
+ - ijhough@gmail.com
394
+ - towerhe@gmail.com
373
395
  executables: []
374
396
  extensions: []
375
397
  extra_rdoc_files: []
@@ -441,7 +463,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
441
463
  version: '0'
442
464
  requirements: []
443
465
  rubyforge_project: data_miner
444
- rubygems_version: 1.8.24
466
+ rubygems_version: 1.8.25
445
467
  signing_key:
446
468
  specification_version: 3
447
469
  summary: Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import