data_miner 1.3.8 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/CHANGELOG +42 -0
  2. data/Gemfile +19 -3
  3. data/README.rdoc +3 -3
  4. data/Rakefile +13 -15
  5. data/data_miner.gemspec +4 -15
  6. data/lib/data_miner.rb +69 -70
  7. data/lib/data_miner/active_record_extensions.rb +17 -22
  8. data/lib/data_miner/attribute.rb +176 -179
  9. data/lib/data_miner/dictionary.rb +38 -31
  10. data/lib/data_miner/run.rb +49 -18
  11. data/lib/data_miner/script.rb +116 -0
  12. data/lib/data_miner/step.rb +5 -0
  13. data/lib/data_miner/step/import.rb +74 -0
  14. data/lib/data_miner/step/process.rb +34 -0
  15. data/lib/data_miner/step/tap.rb +134 -0
  16. data/lib/data_miner/version.rb +1 -1
  17. data/test/helper.rb +26 -24
  18. data/test/support/breeds.xls +0 -0
  19. data/test/support/pet_color_dictionary.en.csv +5 -0
  20. data/test/support/pet_color_dictionary.es.csv +5 -0
  21. data/test/support/pets.csv +5 -0
  22. data/test/support/pets_funny.csv +4 -0
  23. data/test/test_data_miner.rb +103 -0
  24. data/test/test_earth_import.rb +25 -0
  25. data/test/test_earth_tap.rb +25 -0
  26. data/test/test_safety.rb +43 -0
  27. metadata +72 -78
  28. data/.document +0 -5
  29. data/lib/data_miner/config.rb +0 -124
  30. data/lib/data_miner/import.rb +0 -93
  31. data/lib/data_miner/process.rb +0 -38
  32. data/lib/data_miner/tap.rb +0 -143
  33. data/test/support/aircraft.rb +0 -102
  34. data/test/support/airport.rb +0 -16
  35. data/test/support/automobile_fuel_type.rb +0 -40
  36. data/test/support/automobile_variant.rb +0 -362
  37. data/test/support/country.rb +0 -15
  38. data/test/support/test_database.rb +0 -311
  39. data/test/test_data_miner_attribute.rb +0 -111
  40. data/test/test_data_miner_process.rb +0 -18
  41. data/test/test_old_syntax.rb +0 -825
  42. data/test/test_tap.rb +0 -21
@@ -1,3 +1,3 @@
1
1
  class DataMiner
2
- VERSION = '1.3.8'
2
+ VERSION = '2.0.1'
3
3
  end
@@ -1,30 +1,32 @@
1
1
  require 'rubygems'
2
- require 'bundler'
3
- Bundler.setup
4
- require 'test/unit'
5
- require 'shoulda'
6
- require 'mini_record'
7
- require 'logger'
8
- # require 'ruby-debug'
9
- $LOAD_PATH.unshift(File.dirname(__FILE__))
10
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
11
- require 'data_miner'
12
- class Test::Unit::TestCase
13
- end
14
-
15
- test_log = File.open('test.log', 'w')
16
- test_log.sync = true
17
- DataMiner.logger = Logger.new test_log
2
+ require 'bundler/setup'
18
3
 
19
- # because some of the test files reference it
20
- require 'errata'
4
+ if Bundler.definition.specs['ruby-debug19'].first or Bundler.definition.specs['ruby-debug'].first
5
+ require 'ruby-debug'
6
+ end
21
7
 
22
- ENV['WIP'] = 'true' if ENV['ALL'] == 'true'
8
+ require 'minitest/spec'
9
+ require 'minitest/autorun'
10
+ require 'minitest/reporters'
11
+ MiniTest::Unit.runner = MiniTest::SuiteRunner.new
12
+ MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
23
13
 
24
- $:.push File.dirname(__FILE__)
25
- require 'support/test_database'
14
+ cmd = %{mysql -u root -ppassword -e "drop database data_miner_test; create database data_miner_test charset utf8"}
15
+ $stderr.puts "Running `#{cmd}`..."
16
+ system cmd
17
+ $stderr.puts "Done."
26
18
 
27
- ActiveSupport::Inflector.inflections do |inflect|
28
- inflect.uncountable %w{ aircraft aircraft_deux census_division_deux census_division_trois }
29
- end
19
+ require 'active_record'
20
+ require 'logger'
21
+ ActiveRecord::Base.logger = Logger.new $stderr
22
+ ActiveRecord::Base.logger.level = Logger::INFO
23
+ # ActiveRecord::Base.logger.level = Logger::DEBUG
24
+ ActiveRecord::Base.establish_connection(
25
+ 'adapter' => 'mysql2',
26
+ 'database' => 'data_miner_test',
27
+ 'username' => 'root',
28
+ 'password' => 'password'
29
+ )
30
30
 
31
+ require 'data_miner'
32
+ DataMiner::Run.auto_upgrade!
Binary file
@@ -0,0 +1,5 @@
1
+ input,output
2
+ WH/RD,white/red
3
+ BR/BL,brown/black
4
+ GR/BU,green/blue
5
+ GO,gold
@@ -0,0 +1,5 @@
1
+ input,output
2
+ WH/RD,blanco/rojo
3
+ BR/BL,morron/negro
4
+ GR/BU,verde/azul
5
+ GO,oro
@@ -0,0 +1,5 @@
1
+ name,breed,color,age
2
+ Pierre,Tabby,GO,4
3
+ Jerry,Beagle,BR/BL,5
4
+ Amigo,Spanish Lizarto,GR/BU,17
5
+ Johnny,Beagle,BR/BL,2
@@ -0,0 +1,4 @@
1
+ name,breed,color
2
+ Pierre,Rhino,GO
3
+ Jerry,Badger,BR/BL
4
+ Amigo,Dinosaur,GR/BU
@@ -0,0 +1,103 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'helper'
3
+
4
+ PETS = File.expand_path('../support/pets.csv', __FILE__)
5
+ PETS_FUNNY = File.expand_path('../support/pets_funny.csv', __FILE__)
6
+ COLOR_DICTIONARY_ENGLISH = File.expand_path('../support/pet_color_dictionary.en.csv', __FILE__)
7
+ COLOR_DICTIONARY_SPANISH = File.expand_path('../support/pet_color_dictionary.es.csv', __FILE__)
8
+ BREEDS = File.expand_path('../support/breeds.xls', __FILE__)
9
+
10
+ class Pet < ActiveRecord::Base
11
+ self.primary_key = "name"
12
+ col :name
13
+ col :breed_id
14
+ col :color_id
15
+ col :age, :type => :integer
16
+ belongs_to :breed
17
+ data_miner do
18
+ process :auto_upgrade!
19
+ process :run_data_miner_on_parent_associations!
20
+ import("A list of pets", :url => "file://#{PETS}") do
21
+ key :name
22
+ store :age
23
+ store :breed_id, :field_name => :breed
24
+ store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
25
+ end
26
+ end
27
+ end
28
+
29
+ class Breed < ActiveRecord::Base
30
+ class << self
31
+ def update_average_age!
32
+ # make sure pet is populated
33
+ Pet.run_data_miner!
34
+ update_all %{breeds.average_age = (SELECT AVG(pets.age) FROM pets WHERE pets.breed_id = breeds.name)}
35
+ end
36
+ end
37
+ self.primary_key = "name"
38
+ col :name
39
+ col :average_age, :type => :float
40
+ data_miner do
41
+ process :auto_upgrade!
42
+ import("A list of breeds", :url => "file://#{BREEDS}") do
43
+ key :name, :field_name => 'Breed name'
44
+ end
45
+ process :update_average_age!
46
+ end
47
+ end
48
+
49
+ Pet.auto_upgrade!
50
+
51
+ describe DataMiner do
52
+ describe "when used to import example data about pets" do
53
+ before do
54
+ Pet.delete_all
55
+ end
56
+ it "is idempotent given a key" do
57
+ Pet.run_data_miner!
58
+ first_count = Pet.count
59
+ Pet.run_data_miner!
60
+ Pet.count.must_equal first_count
61
+ end
62
+ it "can map fields in the source file to columns in the database" do
63
+ Pet.run_data_miner!
64
+ Pet.find('Jerry').breed_id.must_equal 'Beagle'
65
+ end
66
+ it "can use a dictionary to translate source data" do
67
+ Pet.run_data_miner!
68
+ Pet.find('Jerry').color_id.must_equal 'brown/black'
69
+ end
70
+ it "refreshes the dictionary for every run" do
71
+ Pet.run_data_miner!
72
+ Pet.find('Jerry').color_id.must_equal 'brown/black'
73
+ begin
74
+ FileUtils.mv COLOR_DICTIONARY_ENGLISH, "#{COLOR_DICTIONARY_ENGLISH}.bak"
75
+ FileUtils.cp COLOR_DICTIONARY_SPANISH, COLOR_DICTIONARY_ENGLISH # oops! somebody swapped in a spanish dictionary
76
+ Pet.run_data_miner!
77
+ Pet.find('Jerry').color_id.must_equal 'morron/negro'
78
+ ensure
79
+ FileUtils.mv "#{COLOR_DICTIONARY_ENGLISH}.bak", COLOR_DICTIONARY_ENGLISH
80
+ end
81
+ end
82
+ it "refreshes the data source for every run" do
83
+ Pet.run_data_miner!
84
+ Pet.find('Jerry').breed_id.must_equal 'Beagle'
85
+ begin
86
+ FileUtils.mv PETS, "#{PETS}.bak"
87
+ FileUtils.cp PETS_FUNNY, PETS # oops! somebody swapped in a funny data source
88
+ Pet.run_data_miner!
89
+ Pet.find('Jerry').breed_id.must_equal 'Badger'
90
+ ensure
91
+ FileUtils.mv "#{PETS}.bak", PETS
92
+ end
93
+ end
94
+ it "provides :run_data_miner_on_parent_associations!" do
95
+ Pet.run_data_miner!
96
+ Pet.find('Jerry').breed.must_equal Breed.find('Beagle')
97
+ end
98
+ it "runs class methods" do
99
+ Breed.run_data_miner!
100
+ Breed.find('Beagle').average_age.must_equal((5+2)/2.0)
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'helper'
3
+ require 'earth'
4
+
5
+ # use earth, which has a plethora of real-world data_miner blocks
6
+ Earth.init :locality, :pet, :load_data_miner => true, :apply_schemas => true
7
+
8
+ describe DataMiner do
9
+ describe "being used by the Earth library's import steps" do
10
+ describe "for pets" do
11
+ it "can pull breed and species" do
12
+ Breed.run_data_miner!
13
+ Breed.find('Golden Retriever').species.must_equal Species.find('dog')
14
+ end
15
+ end
16
+ describe "for localities" do
17
+ it "can handle non-latin characters" do
18
+ Country.run_data_miner!
19
+ Country.find('DE').name.must_equal 'Germany'
20
+ Country.find('AX').name.must_equal 'Åland Islands'
21
+ Country.find('CI').name.must_equal "Côte d'Ivoire"
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'helper'
3
+ require 'earth'
4
+
5
+ # use earth, which has a plethora of real-world data_miner blocks
6
+ Earth.init :locality, :pet, :load_data_miner => false, :apply_schemas => true
7
+
8
+ DataMiner.run %w{Country Breed}
9
+
10
+ describe DataMiner do
11
+ describe "being used by the Earth library's tap steps" do
12
+ describe "for pets" do
13
+ it "can pull breed and species" do
14
+ Breed.find('Golden Retriever').species.must_equal Species.find('dog')
15
+ end
16
+ end
17
+ describe "for localities" do
18
+ it "can handle non-latin characters" do
19
+ Country.find('DE').name.must_equal 'Germany'
20
+ Country.find('AX').name.must_equal 'Åland Islands'
21
+ Country.find('CI').name.must_equal "Côte d'Ivoire"
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,43 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'helper'
3
+ require 'earth'
4
+
5
+ # use earth, which has a plethora of real-world data_miner blocks
6
+ Earth.init :locality, :pet, :load_data_miner => true, :apply_schemas => true
7
+
8
+ describe DataMiner do
9
+ describe "when being run in a multi-threaded environment" do
10
+ it "tries not to duplicate data" do
11
+ begin
12
+ old_thread_abort_on_exception = Thread.abort_on_exception
13
+ Thread.abort_on_exception = false
14
+ Breed.delete_all
15
+ Breed.run_data_miner!
16
+ reference_count = Breed.count
17
+ Breed.delete_all
18
+ threads = (0..2).map do |i|
19
+ Thread.new do
20
+ $stderr.write "Thread #{i} starting\n"
21
+ Breed.run_data_miner!
22
+ $stderr.write "Thread #{i} done\n"
23
+ end
24
+ end
25
+ exceptions = []
26
+ threads.each do |t|
27
+ begin
28
+ t.join
29
+ rescue
30
+ exceptions << $!
31
+ end
32
+ end
33
+ exceptions.length.must_equal 2
34
+ exceptions.each do |exception|
35
+ exception.must_be_kind_of LockMethod::Locked
36
+ end
37
+ Breed.count.must_equal reference_count
38
+ ensure
39
+ Thread.abort_on_exception = old_thread_abort_on_exception
40
+ end
41
+ end
42
+ end
43
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.8
4
+ version: 2.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,11 +11,11 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-02-10 00:00:00.000000000 Z
14
+ date: 2012-04-18 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: remote_table
18
- requirement: &2164605920 !ruby/object:Gem::Requirement
18
+ requirement: !ruby/object:Gem::Requirement
19
19
  none: false
20
20
  requirements:
21
21
  - - ! '>='
@@ -23,10 +23,15 @@ dependencies:
23
23
  version: 1.2.2
24
24
  type: :runtime
25
25
  prerelease: false
26
- version_requirements: *2164605920
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ none: false
28
+ requirements:
29
+ - - ! '>='
30
+ - !ruby/object:Gem::Version
31
+ version: 1.2.2
27
32
  - !ruby/object:Gem::Dependency
28
33
  name: activerecord
29
- requirement: &2164605140 !ruby/object:Gem::Requirement
34
+ requirement: !ruby/object:Gem::Requirement
30
35
  none: false
31
36
  requirements:
32
37
  - - ! '>='
@@ -34,10 +39,15 @@ dependencies:
34
39
  version: 2.3.4
35
40
  type: :runtime
36
41
  prerelease: false
37
- version_requirements: *2164605140
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 2.3.4
38
48
  - !ruby/object:Gem::Dependency
39
49
  name: activesupport
40
- requirement: &2164604520 !ruby/object:Gem::Requirement
50
+ requirement: !ruby/object:Gem::Requirement
41
51
  none: false
42
52
  requirements:
43
53
  - - ! '>='
@@ -45,10 +55,15 @@ dependencies:
45
55
  version: 2.3.4
46
56
  type: :runtime
47
57
  prerelease: false
48
- version_requirements: *2164604520
58
+ version_requirements: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: 2.3.4
49
64
  - !ruby/object:Gem::Dependency
50
65
  name: conversions
51
- requirement: &2164619940 !ruby/object:Gem::Requirement
66
+ requirement: !ruby/object:Gem::Requirement
52
67
  none: false
53
68
  requirements:
54
69
  - - ! '>='
@@ -56,21 +71,15 @@ dependencies:
56
71
  version: 1.4.4
57
72
  type: :runtime
58
73
  prerelease: false
59
- version_requirements: *2164619940
60
- - !ruby/object:Gem::Dependency
61
- name: blockenspiel
62
- requirement: &2164619140 !ruby/object:Gem::Requirement
74
+ version_requirements: !ruby/object:Gem::Requirement
63
75
  none: false
64
76
  requirements:
65
77
  - - ! '>='
66
78
  - !ruby/object:Gem::Version
67
- version: 0.3.2
68
- type: :runtime
69
- prerelease: false
70
- version_requirements: *2164619140
79
+ version: 1.4.4
71
80
  - !ruby/object:Gem::Dependency
72
81
  name: errata
73
- requirement: &2164618260 !ruby/object:Gem::Requirement
82
+ requirement: !ruby/object:Gem::Requirement
74
83
  none: false
75
84
  requirements:
76
85
  - - ! '>='
@@ -78,73 +87,60 @@ dependencies:
78
87
  version: 1.0.1
79
88
  type: :runtime
80
89
  prerelease: false
81
- version_requirements: *2164618260
82
- - !ruby/object:Gem::Dependency
83
- name: mini_record-compat
84
- requirement: &2164617640 !ruby/object:Gem::Requirement
90
+ version_requirements: !ruby/object:Gem::Requirement
85
91
  none: false
86
92
  requirements:
87
93
  - - ! '>='
88
94
  - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: *2164617640
95
+ version: 1.0.1
93
96
  - !ruby/object:Gem::Dependency
94
- name: loose_tight_dictionary
95
- requirement: &2164616980 !ruby/object:Gem::Requirement
97
+ name: active_record_inline_schema
98
+ requirement: !ruby/object:Gem::Requirement
96
99
  none: false
97
100
  requirements:
98
101
  - - ! '>='
99
102
  - !ruby/object:Gem::Version
100
- version: 0.0.5
101
- type: :development
103
+ version: '0'
104
+ type: :runtime
102
105
  prerelease: false
103
- version_requirements: *2164616980
104
- - !ruby/object:Gem::Dependency
105
- name: test-unit
106
- requirement: &2164616480 !ruby/object:Gem::Requirement
106
+ version_requirements: !ruby/object:Gem::Requirement
107
107
  none: false
108
108
  requirements:
109
109
  - - ! '>='
110
110
  - !ruby/object:Gem::Version
111
111
  version: '0'
112
- type: :development
113
- prerelease: false
114
- version_requirements: *2164616480
115
112
  - !ruby/object:Gem::Dependency
116
- name: shoulda
117
- requirement: &2164615900 !ruby/object:Gem::Requirement
113
+ name: aasm
114
+ requirement: !ruby/object:Gem::Requirement
118
115
  none: false
119
116
  requirements:
120
117
  - - ! '>='
121
118
  - !ruby/object:Gem::Version
122
119
  version: '0'
123
- type: :development
120
+ type: :runtime
124
121
  prerelease: false
125
- version_requirements: *2164615900
126
- - !ruby/object:Gem::Dependency
127
- name: mysql
128
- requirement: &2164615420 !ruby/object:Gem::Requirement
122
+ version_requirements: !ruby/object:Gem::Requirement
129
123
  none: false
130
124
  requirements:
131
125
  - - ! '>='
132
126
  - !ruby/object:Gem::Version
133
127
  version: '0'
134
- type: :development
135
- prerelease: false
136
- version_requirements: *2164615420
137
128
  - !ruby/object:Gem::Dependency
138
- name: rake
139
- requirement: &2164614880 !ruby/object:Gem::Requirement
129
+ name: lock_method
130
+ requirement: !ruby/object:Gem::Requirement
140
131
  none: false
141
132
  requirements:
142
133
  - - ! '>='
143
134
  - !ruby/object:Gem::Version
144
- version: '0'
145
- type: :development
135
+ version: 0.5.1
136
+ type: :runtime
146
137
  prerelease: false
147
- version_requirements: *2164614880
138
+ version_requirements: !ruby/object:Gem::Requirement
139
+ none: false
140
+ requirements:
141
+ - - ! '>='
142
+ - !ruby/object:Gem::Version
143
+ version: 0.5.1
148
144
  description: Mine remote data into your ActiveRecord models. You can also convert
149
145
  units.
150
146
  email:
@@ -153,7 +149,6 @@ executables: []
153
149
  extensions: []
154
150
  extra_rdoc_files: []
155
151
  files:
156
- - .document
157
152
  - .gitignore
158
153
  - CHANGELOG
159
154
  - Gemfile
@@ -164,24 +159,24 @@ files:
164
159
  - lib/data_miner.rb
165
160
  - lib/data_miner/active_record_extensions.rb
166
161
  - lib/data_miner/attribute.rb
167
- - lib/data_miner/config.rb
168
162
  - lib/data_miner/dictionary.rb
169
- - lib/data_miner/import.rb
170
- - lib/data_miner/process.rb
171
163
  - lib/data_miner/run.rb
172
- - lib/data_miner/tap.rb
164
+ - lib/data_miner/script.rb
165
+ - lib/data_miner/step.rb
166
+ - lib/data_miner/step/import.rb
167
+ - lib/data_miner/step/process.rb
168
+ - lib/data_miner/step/tap.rb
173
169
  - lib/data_miner/version.rb
174
170
  - test/helper.rb
175
- - test/support/aircraft.rb
176
- - test/support/airport.rb
177
- - test/support/automobile_fuel_type.rb
178
- - test/support/automobile_variant.rb
179
- - test/support/country.rb
180
- - test/support/test_database.rb
181
- - test/test_data_miner_attribute.rb
182
- - test/test_data_miner_process.rb
183
- - test/test_old_syntax.rb
184
- - test/test_tap.rb
171
+ - test/support/breeds.xls
172
+ - test/support/pet_color_dictionary.en.csv
173
+ - test/support/pet_color_dictionary.es.csv
174
+ - test/support/pets.csv
175
+ - test/support/pets_funny.csv
176
+ - test/test_data_miner.rb
177
+ - test/test_earth_import.rb
178
+ - test/test_earth_tap.rb
179
+ - test/test_safety.rb
185
180
  homepage: https://github.com/seamusabshere/data_miner
186
181
  licenses: []
187
182
  post_install_message:
@@ -202,20 +197,19 @@ required_rubygems_version: !ruby/object:Gem::Requirement
202
197
  version: '0'
203
198
  requirements: []
204
199
  rubyforge_project: data_miner
205
- rubygems_version: 1.8.15
200
+ rubygems_version: 1.8.21
206
201
  signing_key:
207
202
  specification_version: 3
208
203
  summary: Mine remote data into your ActiveRecord models.
209
204
  test_files:
210
205
  - test/helper.rb
211
- - test/support/aircraft.rb
212
- - test/support/airport.rb
213
- - test/support/automobile_fuel_type.rb
214
- - test/support/automobile_variant.rb
215
- - test/support/country.rb
216
- - test/support/test_database.rb
217
- - test/test_data_miner_attribute.rb
218
- - test/test_data_miner_process.rb
219
- - test/test_old_syntax.rb
220
- - test/test_tap.rb
206
+ - test/support/breeds.xls
207
+ - test/support/pet_color_dictionary.en.csv
208
+ - test/support/pet_color_dictionary.es.csv
209
+ - test/support/pets.csv
210
+ - test/support/pets_funny.csv
211
+ - test/test_data_miner.rb
212
+ - test/test_earth_import.rb
213
+ - test/test_earth_tap.rb
214
+ - test/test_safety.rb
221
215
  has_rdoc: