data_miner 1.3.8 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/CHANGELOG +42 -0
  2. data/Gemfile +19 -3
  3. data/README.rdoc +3 -3
  4. data/Rakefile +13 -15
  5. data/data_miner.gemspec +4 -15
  6. data/lib/data_miner.rb +69 -70
  7. data/lib/data_miner/active_record_extensions.rb +17 -22
  8. data/lib/data_miner/attribute.rb +176 -179
  9. data/lib/data_miner/dictionary.rb +38 -31
  10. data/lib/data_miner/run.rb +49 -18
  11. data/lib/data_miner/script.rb +116 -0
  12. data/lib/data_miner/step.rb +5 -0
  13. data/lib/data_miner/step/import.rb +74 -0
  14. data/lib/data_miner/step/process.rb +34 -0
  15. data/lib/data_miner/step/tap.rb +134 -0
  16. data/lib/data_miner/version.rb +1 -1
  17. data/test/helper.rb +26 -24
  18. data/test/support/breeds.xls +0 -0
  19. data/test/support/pet_color_dictionary.en.csv +5 -0
  20. data/test/support/pet_color_dictionary.es.csv +5 -0
  21. data/test/support/pets.csv +5 -0
  22. data/test/support/pets_funny.csv +4 -0
  23. data/test/test_data_miner.rb +103 -0
  24. data/test/test_earth_import.rb +25 -0
  25. data/test/test_earth_tap.rb +25 -0
  26. data/test/test_safety.rb +43 -0
  27. metadata +72 -78
  28. data/.document +0 -5
  29. data/lib/data_miner/config.rb +0 -124
  30. data/lib/data_miner/import.rb +0 -93
  31. data/lib/data_miner/process.rb +0 -38
  32. data/lib/data_miner/tap.rb +0 -143
  33. data/test/support/aircraft.rb +0 -102
  34. data/test/support/airport.rb +0 -16
  35. data/test/support/automobile_fuel_type.rb +0 -40
  36. data/test/support/automobile_variant.rb +0 -362
  37. data/test/support/country.rb +0 -15
  38. data/test/support/test_database.rb +0 -311
  39. data/test/test_data_miner_attribute.rb +0 -111
  40. data/test/test_data_miner_process.rb +0 -18
  41. data/test/test_old_syntax.rb +0 -825
  42. data/test/test_tap.rb +0 -21
@@ -1,3 +1,3 @@
1
1
  class DataMiner
2
- VERSION = '1.3.8'
2
+ VERSION = '2.0.1'
3
3
  end
@@ -1,30 +1,32 @@
1
1
  require 'rubygems'
2
- require 'bundler'
3
- Bundler.setup
4
- require 'test/unit'
5
- require 'shoulda'
6
- require 'mini_record'
7
- require 'logger'
8
- # require 'ruby-debug'
9
- $LOAD_PATH.unshift(File.dirname(__FILE__))
10
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
11
- require 'data_miner'
12
- class Test::Unit::TestCase
13
- end
14
-
15
- test_log = File.open('test.log', 'w')
16
- test_log.sync = true
17
- DataMiner.logger = Logger.new test_log
2
+ require 'bundler/setup'
18
3
 
19
- # because some of the test files reference it
20
- require 'errata'
4
+ if Bundler.definition.specs['ruby-debug19'].first or Bundler.definition.specs['ruby-debug'].first
5
+ require 'ruby-debug'
6
+ end
21
7
 
22
- ENV['WIP'] = 'true' if ENV['ALL'] == 'true'
8
+ require 'minitest/spec'
9
+ require 'minitest/autorun'
10
+ require 'minitest/reporters'
11
+ MiniTest::Unit.runner = MiniTest::SuiteRunner.new
12
+ MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
23
13
 
24
- $:.push File.dirname(__FILE__)
25
- require 'support/test_database'
14
+ cmd = %{mysql -u root -ppassword -e "drop database data_miner_test; create database data_miner_test charset utf8"}
15
+ $stderr.puts "Running `#{cmd}`..."
16
+ system cmd
17
+ $stderr.puts "Done."
26
18
 
27
- ActiveSupport::Inflector.inflections do |inflect|
28
- inflect.uncountable %w{ aircraft aircraft_deux census_division_deux census_division_trois }
29
- end
19
+ require 'active_record'
20
+ require 'logger'
21
+ ActiveRecord::Base.logger = Logger.new $stderr
22
+ ActiveRecord::Base.logger.level = Logger::INFO
23
+ # ActiveRecord::Base.logger.level = Logger::DEBUG
24
+ ActiveRecord::Base.establish_connection(
25
+ 'adapter' => 'mysql2',
26
+ 'database' => 'data_miner_test',
27
+ 'username' => 'root',
28
+ 'password' => 'password'
29
+ )
30
30
 
31
+ require 'data_miner'
32
+ DataMiner::Run.auto_upgrade!
Binary file
@@ -0,0 +1,5 @@
1
+ input,output
2
+ WH/RD,white/red
3
+ BR/BL,brown/black
4
+ GR/BU,green/blue
5
+ GO,gold
@@ -0,0 +1,5 @@
1
+ input,output
2
+ WH/RD,blanco/rojo
3
+ BR/BL,morron/negro
4
+ GR/BU,verde/azul
5
+ GO,oro
@@ -0,0 +1,5 @@
1
+ name,breed,color,age
2
+ Pierre,Tabby,GO,4
3
+ Jerry,Beagle,BR/BL,5
4
+ Amigo,Spanish Lizarto,GR/BU,17
5
+ Johnny,Beagle,BR/BL,2
@@ -0,0 +1,4 @@
1
+ name,breed,color
2
+ Pierre,Rhino,GO
3
+ Jerry,Badger,BR/BL
4
+ Amigo,Dinosaur,GR/BU
@@ -0,0 +1,103 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'helper'
3
+
4
+ PETS = File.expand_path('../support/pets.csv', __FILE__)
5
+ PETS_FUNNY = File.expand_path('../support/pets_funny.csv', __FILE__)
6
+ COLOR_DICTIONARY_ENGLISH = File.expand_path('../support/pet_color_dictionary.en.csv', __FILE__)
7
+ COLOR_DICTIONARY_SPANISH = File.expand_path('../support/pet_color_dictionary.es.csv', __FILE__)
8
+ BREEDS = File.expand_path('../support/breeds.xls', __FILE__)
9
+
10
+ class Pet < ActiveRecord::Base
11
+ self.primary_key = "name"
12
+ col :name
13
+ col :breed_id
14
+ col :color_id
15
+ col :age, :type => :integer
16
+ belongs_to :breed
17
+ data_miner do
18
+ process :auto_upgrade!
19
+ process :run_data_miner_on_parent_associations!
20
+ import("A list of pets", :url => "file://#{PETS}") do
21
+ key :name
22
+ store :age
23
+ store :breed_id, :field_name => :breed
24
+ store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
25
+ end
26
+ end
27
+ end
28
+
29
+ class Breed < ActiveRecord::Base
30
+ class << self
31
+ def update_average_age!
32
+ # make sure pet is populated
33
+ Pet.run_data_miner!
34
+ update_all %{breeds.average_age = (SELECT AVG(pets.age) FROM pets WHERE pets.breed_id = breeds.name)}
35
+ end
36
+ end
37
+ self.primary_key = "name"
38
+ col :name
39
+ col :average_age, :type => :float
40
+ data_miner do
41
+ process :auto_upgrade!
42
+ import("A list of breeds", :url => "file://#{BREEDS}") do
43
+ key :name, :field_name => 'Breed name'
44
+ end
45
+ process :update_average_age!
46
+ end
47
+ end
48
+
49
+ Pet.auto_upgrade!
50
+
51
+ describe DataMiner do
52
+ describe "when used to import example data about pets" do
53
+ before do
54
+ Pet.delete_all
55
+ end
56
+ it "is idempotent given a key" do
57
+ Pet.run_data_miner!
58
+ first_count = Pet.count
59
+ Pet.run_data_miner!
60
+ Pet.count.must_equal first_count
61
+ end
62
+ it "can map fields in the source file to columns in the database" do
63
+ Pet.run_data_miner!
64
+ Pet.find('Jerry').breed_id.must_equal 'Beagle'
65
+ end
66
+ it "can use a dictionary to translate source data" do
67
+ Pet.run_data_miner!
68
+ Pet.find('Jerry').color_id.must_equal 'brown/black'
69
+ end
70
+ it "refreshes the dictionary for every run" do
71
+ Pet.run_data_miner!
72
+ Pet.find('Jerry').color_id.must_equal 'brown/black'
73
+ begin
74
+ FileUtils.mv COLOR_DICTIONARY_ENGLISH, "#{COLOR_DICTIONARY_ENGLISH}.bak"
75
+ FileUtils.cp COLOR_DICTIONARY_SPANISH, COLOR_DICTIONARY_ENGLISH # oops! somebody swapped in a spanish dictionary
76
+ Pet.run_data_miner!
77
+ Pet.find('Jerry').color_id.must_equal 'morron/negro'
78
+ ensure
79
+ FileUtils.mv "#{COLOR_DICTIONARY_ENGLISH}.bak", COLOR_DICTIONARY_ENGLISH
80
+ end
81
+ end
82
+ it "refreshes the data source for every run" do
83
+ Pet.run_data_miner!
84
+ Pet.find('Jerry').breed_id.must_equal 'Beagle'
85
+ begin
86
+ FileUtils.mv PETS, "#{PETS}.bak"
87
+ FileUtils.cp PETS_FUNNY, PETS # oops! somebody swapped in a funny data source
88
+ Pet.run_data_miner!
89
+ Pet.find('Jerry').breed_id.must_equal 'Badger'
90
+ ensure
91
+ FileUtils.mv "#{PETS}.bak", PETS
92
+ end
93
+ end
94
+ it "provides :run_data_miner_on_parent_associations!" do
95
+ Pet.run_data_miner!
96
+ Pet.find('Jerry').breed.must_equal Breed.find('Beagle')
97
+ end
98
+ it "runs class methods" do
99
+ Breed.run_data_miner!
100
+ Breed.find('Beagle').average_age.must_equal((5+2)/2.0)
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'helper'
3
+ require 'earth'
4
+
5
+ # use earth, which has a plethora of real-world data_miner blocks
6
+ Earth.init :locality, :pet, :load_data_miner => true, :apply_schemas => true
7
+
8
+ describe DataMiner do
9
+ describe "being used by the Earth library's import steps" do
10
+ describe "for pets" do
11
+ it "can pull breed and species" do
12
+ Breed.run_data_miner!
13
+ Breed.find('Golden Retriever').species.must_equal Species.find('dog')
14
+ end
15
+ end
16
+ describe "for localities" do
17
+ it "can handle non-latin characters" do
18
+ Country.run_data_miner!
19
+ Country.find('DE').name.must_equal 'Germany'
20
+ Country.find('AX').name.must_equal 'Åland Islands'
21
+ Country.find('CI').name.must_equal "Côte d'Ivoire"
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'helper'
3
+ require 'earth'
4
+
5
+ # use earth, which has a plethora of real-world data_miner blocks
6
+ Earth.init :locality, :pet, :load_data_miner => false, :apply_schemas => true
7
+
8
+ DataMiner.run %w{Country Breed}
9
+
10
+ describe DataMiner do
11
+ describe "being used by the Earth library's tap steps" do
12
+ describe "for pets" do
13
+ it "can pull breed and species" do
14
+ Breed.find('Golden Retriever').species.must_equal Species.find('dog')
15
+ end
16
+ end
17
+ describe "for localities" do
18
+ it "can handle non-latin characters" do
19
+ Country.find('DE').name.must_equal 'Germany'
20
+ Country.find('AX').name.must_equal 'Åland Islands'
21
+ Country.find('CI').name.must_equal "Côte d'Ivoire"
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,43 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'helper'
3
+ require 'earth'
4
+
5
+ # use earth, which has a plethora of real-world data_miner blocks
6
+ Earth.init :locality, :pet, :load_data_miner => true, :apply_schemas => true
7
+
8
+ describe DataMiner do
9
+ describe "when being run in a multi-threaded environment" do
10
+ it "tries not to duplicate data" do
11
+ begin
12
+ old_thread_abort_on_exception = Thread.abort_on_exception
13
+ Thread.abort_on_exception = false
14
+ Breed.delete_all
15
+ Breed.run_data_miner!
16
+ reference_count = Breed.count
17
+ Breed.delete_all
18
+ threads = (0..2).map do |i|
19
+ Thread.new do
20
+ $stderr.write "Thread #{i} starting\n"
21
+ Breed.run_data_miner!
22
+ $stderr.write "Thread #{i} done\n"
23
+ end
24
+ end
25
+ exceptions = []
26
+ threads.each do |t|
27
+ begin
28
+ t.join
29
+ rescue
30
+ exceptions << $!
31
+ end
32
+ end
33
+ exceptions.length.must_equal 2
34
+ exceptions.each do |exception|
35
+ exception.must_be_kind_of LockMethod::Locked
36
+ end
37
+ Breed.count.must_equal reference_count
38
+ ensure
39
+ Thread.abort_on_exception = old_thread_abort_on_exception
40
+ end
41
+ end
42
+ end
43
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.8
4
+ version: 2.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,11 +11,11 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-02-10 00:00:00.000000000 Z
14
+ date: 2012-04-18 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: remote_table
18
- requirement: &2164605920 !ruby/object:Gem::Requirement
18
+ requirement: !ruby/object:Gem::Requirement
19
19
  none: false
20
20
  requirements:
21
21
  - - ! '>='
@@ -23,10 +23,15 @@ dependencies:
23
23
  version: 1.2.2
24
24
  type: :runtime
25
25
  prerelease: false
26
- version_requirements: *2164605920
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ none: false
28
+ requirements:
29
+ - - ! '>='
30
+ - !ruby/object:Gem::Version
31
+ version: 1.2.2
27
32
  - !ruby/object:Gem::Dependency
28
33
  name: activerecord
29
- requirement: &2164605140 !ruby/object:Gem::Requirement
34
+ requirement: !ruby/object:Gem::Requirement
30
35
  none: false
31
36
  requirements:
32
37
  - - ! '>='
@@ -34,10 +39,15 @@ dependencies:
34
39
  version: 2.3.4
35
40
  type: :runtime
36
41
  prerelease: false
37
- version_requirements: *2164605140
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 2.3.4
38
48
  - !ruby/object:Gem::Dependency
39
49
  name: activesupport
40
- requirement: &2164604520 !ruby/object:Gem::Requirement
50
+ requirement: !ruby/object:Gem::Requirement
41
51
  none: false
42
52
  requirements:
43
53
  - - ! '>='
@@ -45,10 +55,15 @@ dependencies:
45
55
  version: 2.3.4
46
56
  type: :runtime
47
57
  prerelease: false
48
- version_requirements: *2164604520
58
+ version_requirements: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: 2.3.4
49
64
  - !ruby/object:Gem::Dependency
50
65
  name: conversions
51
- requirement: &2164619940 !ruby/object:Gem::Requirement
66
+ requirement: !ruby/object:Gem::Requirement
52
67
  none: false
53
68
  requirements:
54
69
  - - ! '>='
@@ -56,21 +71,15 @@ dependencies:
56
71
  version: 1.4.4
57
72
  type: :runtime
58
73
  prerelease: false
59
- version_requirements: *2164619940
60
- - !ruby/object:Gem::Dependency
61
- name: blockenspiel
62
- requirement: &2164619140 !ruby/object:Gem::Requirement
74
+ version_requirements: !ruby/object:Gem::Requirement
63
75
  none: false
64
76
  requirements:
65
77
  - - ! '>='
66
78
  - !ruby/object:Gem::Version
67
- version: 0.3.2
68
- type: :runtime
69
- prerelease: false
70
- version_requirements: *2164619140
79
+ version: 1.4.4
71
80
  - !ruby/object:Gem::Dependency
72
81
  name: errata
73
- requirement: &2164618260 !ruby/object:Gem::Requirement
82
+ requirement: !ruby/object:Gem::Requirement
74
83
  none: false
75
84
  requirements:
76
85
  - - ! '>='
@@ -78,73 +87,60 @@ dependencies:
78
87
  version: 1.0.1
79
88
  type: :runtime
80
89
  prerelease: false
81
- version_requirements: *2164618260
82
- - !ruby/object:Gem::Dependency
83
- name: mini_record-compat
84
- requirement: &2164617640 !ruby/object:Gem::Requirement
90
+ version_requirements: !ruby/object:Gem::Requirement
85
91
  none: false
86
92
  requirements:
87
93
  - - ! '>='
88
94
  - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: *2164617640
95
+ version: 1.0.1
93
96
  - !ruby/object:Gem::Dependency
94
- name: loose_tight_dictionary
95
- requirement: &2164616980 !ruby/object:Gem::Requirement
97
+ name: active_record_inline_schema
98
+ requirement: !ruby/object:Gem::Requirement
96
99
  none: false
97
100
  requirements:
98
101
  - - ! '>='
99
102
  - !ruby/object:Gem::Version
100
- version: 0.0.5
101
- type: :development
103
+ version: '0'
104
+ type: :runtime
102
105
  prerelease: false
103
- version_requirements: *2164616980
104
- - !ruby/object:Gem::Dependency
105
- name: test-unit
106
- requirement: &2164616480 !ruby/object:Gem::Requirement
106
+ version_requirements: !ruby/object:Gem::Requirement
107
107
  none: false
108
108
  requirements:
109
109
  - - ! '>='
110
110
  - !ruby/object:Gem::Version
111
111
  version: '0'
112
- type: :development
113
- prerelease: false
114
- version_requirements: *2164616480
115
112
  - !ruby/object:Gem::Dependency
116
- name: shoulda
117
- requirement: &2164615900 !ruby/object:Gem::Requirement
113
+ name: aasm
114
+ requirement: !ruby/object:Gem::Requirement
118
115
  none: false
119
116
  requirements:
120
117
  - - ! '>='
121
118
  - !ruby/object:Gem::Version
122
119
  version: '0'
123
- type: :development
120
+ type: :runtime
124
121
  prerelease: false
125
- version_requirements: *2164615900
126
- - !ruby/object:Gem::Dependency
127
- name: mysql
128
- requirement: &2164615420 !ruby/object:Gem::Requirement
122
+ version_requirements: !ruby/object:Gem::Requirement
129
123
  none: false
130
124
  requirements:
131
125
  - - ! '>='
132
126
  - !ruby/object:Gem::Version
133
127
  version: '0'
134
- type: :development
135
- prerelease: false
136
- version_requirements: *2164615420
137
128
  - !ruby/object:Gem::Dependency
138
- name: rake
139
- requirement: &2164614880 !ruby/object:Gem::Requirement
129
+ name: lock_method
130
+ requirement: !ruby/object:Gem::Requirement
140
131
  none: false
141
132
  requirements:
142
133
  - - ! '>='
143
134
  - !ruby/object:Gem::Version
144
- version: '0'
145
- type: :development
135
+ version: 0.5.1
136
+ type: :runtime
146
137
  prerelease: false
147
- version_requirements: *2164614880
138
+ version_requirements: !ruby/object:Gem::Requirement
139
+ none: false
140
+ requirements:
141
+ - - ! '>='
142
+ - !ruby/object:Gem::Version
143
+ version: 0.5.1
148
144
  description: Mine remote data into your ActiveRecord models. You can also convert
149
145
  units.
150
146
  email:
@@ -153,7 +149,6 @@ executables: []
153
149
  extensions: []
154
150
  extra_rdoc_files: []
155
151
  files:
156
- - .document
157
152
  - .gitignore
158
153
  - CHANGELOG
159
154
  - Gemfile
@@ -164,24 +159,24 @@ files:
164
159
  - lib/data_miner.rb
165
160
  - lib/data_miner/active_record_extensions.rb
166
161
  - lib/data_miner/attribute.rb
167
- - lib/data_miner/config.rb
168
162
  - lib/data_miner/dictionary.rb
169
- - lib/data_miner/import.rb
170
- - lib/data_miner/process.rb
171
163
  - lib/data_miner/run.rb
172
- - lib/data_miner/tap.rb
164
+ - lib/data_miner/script.rb
165
+ - lib/data_miner/step.rb
166
+ - lib/data_miner/step/import.rb
167
+ - lib/data_miner/step/process.rb
168
+ - lib/data_miner/step/tap.rb
173
169
  - lib/data_miner/version.rb
174
170
  - test/helper.rb
175
- - test/support/aircraft.rb
176
- - test/support/airport.rb
177
- - test/support/automobile_fuel_type.rb
178
- - test/support/automobile_variant.rb
179
- - test/support/country.rb
180
- - test/support/test_database.rb
181
- - test/test_data_miner_attribute.rb
182
- - test/test_data_miner_process.rb
183
- - test/test_old_syntax.rb
184
- - test/test_tap.rb
171
+ - test/support/breeds.xls
172
+ - test/support/pet_color_dictionary.en.csv
173
+ - test/support/pet_color_dictionary.es.csv
174
+ - test/support/pets.csv
175
+ - test/support/pets_funny.csv
176
+ - test/test_data_miner.rb
177
+ - test/test_earth_import.rb
178
+ - test/test_earth_tap.rb
179
+ - test/test_safety.rb
185
180
  homepage: https://github.com/seamusabshere/data_miner
186
181
  licenses: []
187
182
  post_install_message:
@@ -202,20 +197,19 @@ required_rubygems_version: !ruby/object:Gem::Requirement
202
197
  version: '0'
203
198
  requirements: []
204
199
  rubyforge_project: data_miner
205
- rubygems_version: 1.8.15
200
+ rubygems_version: 1.8.21
206
201
  signing_key:
207
202
  specification_version: 3
208
203
  summary: Mine remote data into your ActiveRecord models.
209
204
  test_files:
210
205
  - test/helper.rb
211
- - test/support/aircraft.rb
212
- - test/support/airport.rb
213
- - test/support/automobile_fuel_type.rb
214
- - test/support/automobile_variant.rb
215
- - test/support/country.rb
216
- - test/support/test_database.rb
217
- - test/test_data_miner_attribute.rb
218
- - test/test_data_miner_process.rb
219
- - test/test_old_syntax.rb
220
- - test/test_tap.rb
206
+ - test/support/breeds.xls
207
+ - test/support/pet_color_dictionary.en.csv
208
+ - test/support/pet_color_dictionary.es.csv
209
+ - test/support/pets.csv
210
+ - test/support/pets_funny.csv
211
+ - test/test_data_miner.rb
212
+ - test/test_earth_import.rb
213
+ - test/test_earth_tap.rb
214
+ - test/test_safety.rb
221
215
  has_rdoc: