data_seeder 0.0.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +71 -98
  3. data/app/models/data_seeder/seed_file.rb +54 -14
  4. data/lib/data_seeder.rb +30 -10
  5. data/lib/data_seeder/config.rb +39 -17
  6. data/lib/data_seeder/loader.rb +80 -85
  7. data/lib/data_seeder/loader/csv.rb +2 -1
  8. data/lib/data_seeder/loader/txt.rb +1 -6
  9. data/lib/data_seeder/loader/yaml.rb +1 -1
  10. data/lib/data_seeder/version.rb +1 -1
  11. data/test/dummy/app/models/app_error_data_seeder.rb +10 -8
  12. data/test/dummy/app/models/country.rb +0 -12
  13. data/test/dummy/db/development.sqlite3 +0 -0
  14. data/test/dummy/db/seed.test/{bar.err → bar_err/bar.err} +0 -0
  15. data/test/dummy/db/seed.test/countries_csv/countries.cfg +9 -0
  16. data/test/dummy/db/seed.test/countries_csv/countries.csv +249 -0
  17. data/test/dummy/db/seed.test/countries_txt/countries.cfg +6 -0
  18. data/test/dummy/db/seed.test/{countries.txt → countries_txt/countries.txt} +0 -0
  19. data/test/dummy/db/seed.test/{foo.err → foo_err/foo.err} +0 -0
  20. data/test/dummy/db/seed.test/{states.csv → states_csv/states.csv} +0 -0
  21. data/test/dummy/db/seed.test/states_json/states.cfg +1 -0
  22. data/test/dummy/db/seed.test/{states.json → states_json/states.json} +0 -1
  23. data/test/dummy/db/seed.test/states_txt/states.cfg +6 -0
  24. data/test/dummy/db/seed.test/{states.txt → states_txt/states.txt} +0 -1
  25. data/test/dummy/db/seed.test/states_yml/states.cfg +1 -0
  26. data/test/dummy/db/seed.test/{states.yml → states_yml/states.yml} +0 -1
  27. data/test/dummy/db/seed.test/{zulu.err → zulu_err/zulu.err} +0 -0
  28. data/test/dummy/db/test.sqlite3 +0 -0
  29. data/test/dummy/log/development.log +126 -0
  30. data/test/dummy/log/test.log +90945 -0
  31. data/test/models/data_seeder_test.rb +50 -30
  32. metadata +33 -20
  33. data/lib/data_seeder/logger.rb +0 -15
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7a93ea7fbedc0caf70fd7e28d3b0c854ab133ecb
4
- data.tar.gz: d4111c1efdf4ab10fc59f12d593b1a7159ca6626
3
+ metadata.gz: e24f096a1fd9c290b8e6046ae8d2a51eedb102da
4
+ data.tar.gz: 5d96f04e6a4fae47c640330a6a15c4f68a05a8a9
5
5
  SHA512:
6
- metadata.gz: b050e33cd2128903baafe04efa16974e08d5abe2505088f1db9e659df91d0568f247bf093c4187f798f479b59e09bb9f30f2d3a843877a4e81c27588c5c94ffa
7
- data.tar.gz: 63c0b2c9fa7200bdea5fc3e9ce605110542b1c2e850a7ea7f8497000d5a9542d5743bb0e3b63d3566a70c04160574732c68a929f8f2ccb2e5451389e8dcf9527
6
+ metadata.gz: 3b2de1af354cb2b5a0977b97f47dd8e3d22f127e777c3cdbdcd0a5f9a81ae6b118f1aecd29647f57a21d81ade2ac5ec5d5a83b069508846c5dea28a9d8cd3324
7
+ data.tar.gz: 72899aa6169debf7eeac35ae8d454c4a0d3befb1297a4ba73ff12935c77c92aa59210db667993a607d2c997708469afa481c7be71c39d9f7cf17827c2c9435ba
data/README.md CHANGED
@@ -38,23 +38,15 @@ the following table:
38
38
 
39
39
  And you have a corresponding db/seed/countries.txt file as follows:
40
40
 
41
- # config: { key_attribute: 'code', line: ->(line) { { code: line[0,2], name: line[3...-1] } } }
42
41
  AD Andorra
43
42
  AE United Arab Emirates
44
43
  AF Arghanistan
45
44
 
46
- The first line in a file can define the config attributes associated with the file. For this seed file,
47
- the key_attribute says that it will use the 'code' attribute to lookup existing records (defaults to 'id')
48
- and the line function
49
- defines how the line is converted to an attribute hash defining the instance.
50
-
51
- Since the first line can get a little busy with config information, you can also store your config in a
52
- separate .cfg file with the same name. This contents of this file should eval to a hash. The above config line would be
53
- equivalent to a db/seed/countries.cfg file with the following:
45
+ And a db/seed/countries.cfg file as follows:
54
46
 
55
47
  {
56
- key_attribute: :'code',
57
- line: ->(attr) {
48
+ key_attribute: 'code',
49
+ line: ->(line) {
58
50
  {
59
51
  code: line[0,2],
60
52
  name: line[3...-1]
@@ -62,6 +54,13 @@ equivalent to a db/seed/countries.cfg file with the following:
62
54
  }
63
55
  }
64
56
 
57
+ The cfg file defines the config attributes associated with the file. This contents of this file
58
+ should eval to a hash. For this seed file,
59
+ the key_attribute says that it will use the 'code' attribute to lookup existing records (defaults to 'id')
60
+ and the line function
61
+ defines how the line is converted to an attribute hash defining the instance.
62
+
63
+
65
64
  Running rake db:seed will result in the following output:
66
65
 
67
66
  # rake db:seed
@@ -97,11 +96,11 @@ your own custom loaders.
97
96
  For instance, suppose you had the following tables:
98
97
 
99
98
  ```ruby
100
- create_table "apps", force: :cascade do |t|
99
+ create_table "apps" do |t|
101
100
  t.string "name"
102
101
  end
103
102
 
104
- create_table "app_errors", force: :cascade do |t|
103
+ create_table "app_errors" do |t|
105
104
  t.integer "app_id"
106
105
  t.string "code"
107
106
  t.string "message"
@@ -122,69 +121,14 @@ And you wanted to load up separate error messages for each app such as the follo
122
121
  B1 Error message for B1
123
122
 
124
123
 
125
- You could create your own custom loader that might look as follows:
126
-
127
- ```ruby
128
- require 'data_seeder'
129
-
130
- class AppErrorDataSeeder
131
- include ::DataSeeder::Loader
132
-
133
- def setup
134
- @app = App.find_or_initialize_by(name: self.path_minus_ext)
135
- @existing_errors = {}
136
- if @app.new_record?
137
- logger.info "Loading errors for new App: #{@app.name}"
138
- @app.save!
139
- else
140
- logger.info "Loading errors for existing App: #{@app.name}"
141
- @app.app_errors.each do |app_error|
142
- @existing_errors[app_error.code] = app_error
143
- end
144
- end
145
- end
146
-
147
- def teardown
148
- unless @existing_errors.empty?
149
- logger.info { " The following are begin removed:" }
150
- @existing_errors.each do |code, app_error|
151
- logger.info " #{code}: #{app_error.message}"
152
- app_error.destroy
153
- end
154
- end
155
- end
156
-
157
- def load(io)
158
- io.each_line do |line|
159
- line.strip!
160
- next if line.blank? || line[0] == ?#
161
- space_i = line.index(' ')
162
- raise "Invalid line: #{line}" unless space_i
163
- code = line[0,space_i].strip
164
- message = line[space_i+1..-1].strip
165
- app_error = @existing_errors[code]
166
- if app_error
167
- @existing_errors.delete(code)
168
- app_error.message = message
169
- unless app_error.changes.empty?
170
- logger.info { " Changing #{code}: #{app_error.changes}" }
171
- app_error.save!
172
- end
173
- else
174
- logger.info { " Creating #{code}: #{message}" }
175
- @app.app_errors.create!(code: code, message: message)
176
- end
177
- end
178
- end
179
- end
180
- ```
124
+ Look [here](test/dummy/app/models/app_error_data_seeder.rb) for an example of creating your own custom loader.
181
125
 
182
- To add the seeder, you would create the following config/initializers/data_seeder.rb:
126
+ To add this seeder, you would create the following config/initializers/data_seeder.rb:
183
127
 
184
128
  ```ruby
185
129
  MyApp::Application.config.after_initialize do
186
130
  DataSeeder.configure do |config|
187
- config.add_loader('err', AppErrorDataSeeder.new)
131
+ config.add_loader('err', AppErrorDataSeeder)
188
132
  end
189
133
  end
190
134
  ```
@@ -200,43 +144,72 @@ Executing DataSeeder.run would result in the following:
200
144
  Creating 2: We are seriously foobared
201
145
  Creating 3: We are less seriously foobared
202
146
 
203
- TODO
204
- ----
147
+ ## Configurable values
148
+
149
+ #### depends
205
150
 
206
- Ability to specify more than 1 directory for Rails.env overrides. Could potentially be used if you have that
207
- x Gigabyte seed file that you don't want to check into source control and only want run on production?
151
+ Value or array that this model depends on such that they must be seeded first. Examples:
208
152
 
209
- YAML should allow loading as either array or hash. (currently only does hash)
153
+ {
154
+ depends: ['countries','states']
155
+ }
210
156
 
211
- CSV should have options such as only: and except: for using/skipping the specified header columns.
157
+ #### key_attribute
212
158
 
213
- The structure.sql caching within rails uses the file timestamp to determine whether to prepare the test database. This
214
- is error prone and forces you to do a 'touch db/structure.sql' to get around the not getting reloaded problem. Should
215
- I add a utility to override this rails implementation with a sha-based one like the seed files use? (or am I the only
216
- one who has to 'touch db/structure.sql' everytime I switch branches?)
159
+ The attribute used to define uniqueness within the model. Can be a single attribute or an array. Defaults to 'id'
217
160
 
218
- Add 'sql' loader (with disclaimer that it will temporarily truncate the table)
161
+ #### klass
219
162
 
220
- Ability to stop early when loading up a large seed file for a given environment, i.e., stop after processing the
221
- first 10 lines when Rails.env.test?
163
+ Defines the ActiveRecord Class if it can't be inferred from the seed file.
222
164
 
223
- I want to allow different seeding for different environments. For instance development might have a bunch of dummy
224
- data useful for getting an environment up and running. I'm thinking either the seed_dir similar to like a PATH
225
- environment variable where the first one found would override the others, or maybe make it automatic based on the
226
- directory names and the environment (seed.development/state.yml would override seed/state.yml).
165
+ #### line
227
166
 
228
- The test environment will be the one that will constantly being seeded after migrations or branch changes. Some of
229
- the seed files might be large and take a long time to seed. The above
230
- strategy using seed.test might be useful but it might also be useful to have a preprocessor type such as .sh so for
231
- instance you might have seed.test/table_with_lotsa_rows.csv.sh which might consist of the line
232
- 'head -20 ../seed/table_with_lotsa_rows.csv'
167
+ Proc used for converting a line to attributes (txt files only).
233
168
 
234
- Caching of long-running stuff via pg_dump, mysqldump, or other? This belongs with discussion of the environment-specific
235
- seeding above.
169
+ #### postprocess
236
170
 
237
- Allow config-driven initialization so that we could require: false in the Gemfile and only load as needed.
171
+ Modify the attributes from the seed file before applying them to the model.
172
+
173
+ Example:
174
+
175
+ {
176
+ key_attribute: 'code',
177
+ postprocess: ->(attrs) {
178
+ {
179
+ code: attrs['country_code'],
180
+ name: attrs['country']
181
+ }
182
+ }
183
+ }
184
+
185
+ #### purge
186
+
187
+ Destroys rows that no longer exist in the seed file.
188
+
189
+ #### update_display_method
190
+
191
+ Model method used for displaying updates to a model.
192
+
193
+ #### use_line_number_as_id
194
+
195
+ Use the line number of the seed file as the id
196
+
197
+ ## Incompatibilities from 0.0.x version
198
+
199
+ Custom seeders should now be specified as a Class and not an instance (MySeeder instead of MySeeder.new)
200
+
201
+ data_seeder_<config-item> methods within the models are no longer supported.
202
+
203
+ Using the first line of txt, json, and yaml files as the config is no longer supported. Move them to
204
+ a separate .cfg file.
205
+
206
+
207
+ TODO
208
+ ----
209
+
210
+ Add 'sql' loader (with disclaimer that it will temporarily truncate the table)
238
211
 
239
- Add depends_on option.
212
+ Caching of long-running stuff via pg_dump, mysqldump, or other?
240
213
 
241
214
  Document options (key_attribute, line, postprocess, etc)
242
215
 
@@ -258,7 +231,7 @@ Author
258
231
  License
259
232
  -------
260
233
 
261
- Copyright 2015 Brad Pardee
234
+ Copyright 2015-2016 Brad Pardee
262
235
 
263
236
  Licensed under the Apache License, Version 2.0 (the "License");
264
237
  you may not use this file except in compliance with the License.
@@ -3,32 +3,72 @@ require 'digest'
3
3
  module DataSeeder
4
4
  class SeedFile < ActiveRecord::Base
5
5
  def self.file_hash
6
- hash = {}
7
- all.each do |seed_file|
8
- hash[seed_file.path] = seed_file
6
+ @file_hash ||= begin
7
+ hash = Hash.new { |hash, path| hash[path] = new(path: path) }
8
+ all.each do |seed_file|
9
+ hash[seed_file.path] = seed_file
10
+ end
11
+ hash
9
12
  end
10
- hash
11
13
  end
12
14
 
13
15
  def self.load(path)
14
- seed_file = self.file_hash[path] || new(path: path)
15
- seed_file.load
16
+ seed_file = self.file_hash[path]
17
+ return seed_file.load
18
+ end
19
+
20
+ def self.processed_set
21
+ @processed_set ||= Set.new
22
+ end
23
+
24
+ def self.add_processed(path)
25
+ self.processed_set.add(path)
26
+ end
27
+
28
+ def self.processed?(paths)
29
+ self.processed_set.proper_superset?(Array(paths).to_set)
16
30
  end
17
31
 
18
32
  def load
19
33
  new_sha256 = Digest::SHA256.file(path).hexdigest
34
+ dot_index = path.rindex('.')
35
+ path_minus_ext = path[0, dot_index]
20
36
  if self.sha256 != new_sha256
21
- self.sha256 = new_sha256
37
+ cfg_file = "#{path_minus_ext}.cfg"
38
+ config = {}
39
+ if File.exist?(cfg_file)
40
+ config = eval(File.read(cfg_file))
41
+ end
42
+ begin
43
+ config[:klass] ||= path_minus_ext.classify.constantize
44
+ rescue NameError => e
45
+ end
22
46
  ext = File.extname(self.path)[1..-1]
23
- return unless ext
24
- loader = DataSeeder.config.loaders[ext]
25
- unless loader
26
- DataSeeder.logger.info { "Warning: No loader for #{path}"}
27
- return
47
+ return true unless ext
48
+ loader_klass = config[:loader] || DataSeeder.config.loaders[ext]
49
+ unless loader_klass
50
+ DataSeeder.config.logger.warn "Warning: No loader for #{path}"
51
+ return true
52
+ end
53
+ if loader_klass.respond_to?(:default_config)
54
+ config = loader_klass.default_config.merge(config)
55
+ end
56
+ config[:path] = path
57
+ config[:path_minus_ext] = path_minus_ext
58
+ loader = loader_klass.new(config)
59
+ depends = loader.config[:depends]
60
+ return false if depends && !self.class.processed?(depends)
61
+ DataSeeder.config.logger.debug { "Loading #{path}" }
62
+ DataSeeder.config.log_indent do
63
+ File.open(path, 'r') do |io|
64
+ loader.process(io)
65
+ end
66
+ self.sha256 = new_sha256
67
+ save!
28
68
  end
29
- loader.process(path)
30
- save!
31
69
  end
70
+ self.class.add_processed(path_minus_ext)
71
+ return true
32
72
  end
33
73
  end
34
74
  end
data/lib/data_seeder.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  require 'data_seeder/config'
2
2
  require 'data_seeder/engine'
3
3
  require 'data_seeder/loader'
4
- require 'data_seeder/logger'
5
4
 
6
5
  module DataSeeder
7
6
  class << self
@@ -22,24 +21,45 @@ module DataSeeder
22
21
  yield(config)
23
22
  end
24
23
 
25
- def self.logger
26
- config.logger
27
- end
28
-
29
24
  def self.run(new_config={})
30
25
  @@mutex.synchronize do
31
26
  msec = Benchmark.ms do
32
27
  new_config.each do |key, value|
33
28
  self.config.send("#{key}=", value)
34
29
  end
35
- Dir.chdir(config.seed_dir) do
36
- Dir['**/*'].each do |path|
37
- next if path.end_with?('.cfg')
38
- SeedFile.load(path) if File.file?(path)
30
+ # Keep track of the seed files that have dependencies that aren't fulfilled
31
+ pending = []
32
+ config.seed_dirs.each do |seed_dir|
33
+ Dir.chdir(seed_dir) do
34
+ Dir['**/*'].each do |path|
35
+ next if path.end_with?('.cfg')
36
+ if File.file?(path)
37
+ unless SeedFile.load(path)
38
+ pending << [seed_dir, path]
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ # Loop thru the ones that couldn't be processed previously because they depended on another seed being loaded first
45
+ until pending.empty?
46
+ new_pending = []
47
+ pending.each do |seed_dir, path|
48
+ Dir.chdir(seed_dir) do
49
+ unless SeedFile.load(path)
50
+ new_pending << [seed_dir, path]
51
+ end
52
+ end
53
+ end
54
+ if pending.size == new_pending.size
55
+ msg = "Error: Circular dependency in DataSeeder, seeds=#{pending.inspect}"
56
+ config.logger.error msg
57
+ raise msg
39
58
  end
59
+ pending = new_pending
40
60
  end
41
61
  end
42
- logger.info { "DataSeeder.run took #{msec.to_i} msec" }
62
+ config.logger.info "DataSeeder.run took #{msec.to_i} msec"
43
63
  end
44
64
  end
45
65
 
@@ -1,28 +1,25 @@
1
1
  module DataSeeder
2
2
  class Config
3
- attr_accessor :seed_dir, :logger, :loaders
3
+ attr_accessor :seed_dirs, :logger, :loaders, :verbose
4
4
 
5
5
  def initialize
6
- @seed_dir = 'db/seed'
7
- @logger = Logger.new
8
- @loaders = default_loaders
9
- end
10
-
11
- def verbose=(verbose)
12
- @logger.verbose = verbose
13
- end
14
-
15
- def verbose
16
- @logger.verbose
6
+ @seed_dirs = ['db/seed'].freeze
7
+ @loaders = default_loaders
8
+ @verbose = true
9
+ @is_default = true
10
+ @logger = Logger.new($stdout)
11
+ @logger.formatter = ->(severity, datetime, progname, msg) { "#{@indent}#{msg}\n" }
12
+ @indent_level = 0
13
+ @indent = ''
17
14
  end
18
15
 
19
16
  def default_loaders
20
17
  {
21
- 'csv' => Loader::CSV.new,
22
- 'json' => Loader::JSON.new,
23
- 'txt' => Loader::Txt.new,
24
- 'yaml' => Loader::YAML.new,
25
- 'yml' => Loader::YAML.new,
18
+ 'csv' => Loader::CSV,
19
+ 'json' => Loader::JSON,
20
+ 'txt' => Loader::Txt,
21
+ 'yaml' => Loader::YAML,
22
+ 'yml' => Loader::YAML,
26
23
  }
27
24
  end
28
25
 
@@ -37,5 +34,30 @@ module DataSeeder
37
34
  def add_loader(ext, loader)
38
35
  @loaders[ext] = loader
39
36
  end
37
+
38
+ def seed_dir=(seed_dir)
39
+ @seed_dirs = [seed_dir]
40
+ end
41
+
42
+ def seed_dir
43
+ @seed_dirs.first
44
+ end
45
+
46
+ def add_seed_dir(seed_dir)
47
+ if @seed_dirs.frozen?
48
+ @seed_dirs = [seed_dir]
49
+ else
50
+ @seed_dirs << seed_dir
51
+ end
52
+ end
53
+
54
+ def log_indent(&block)
55
+ @indent_level += 1
56
+ @indent = ' ' * @indent_level
57
+ yield
58
+ ensure
59
+ @indent_level -= 1
60
+ @indent = ' ' * @indent_level
61
+ end
40
62
  end
41
63
  end