data_seeder 0.0.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +71 -98
- data/app/models/data_seeder/seed_file.rb +54 -14
- data/lib/data_seeder.rb +30 -10
- data/lib/data_seeder/config.rb +39 -17
- data/lib/data_seeder/loader.rb +80 -85
- data/lib/data_seeder/loader/csv.rb +2 -1
- data/lib/data_seeder/loader/txt.rb +1 -6
- data/lib/data_seeder/loader/yaml.rb +1 -1
- data/lib/data_seeder/version.rb +1 -1
- data/test/dummy/app/models/app_error_data_seeder.rb +10 -8
- data/test/dummy/app/models/country.rb +0 -12
- data/test/dummy/db/development.sqlite3 +0 -0
- data/test/dummy/db/seed.test/{bar.err → bar_err/bar.err} +0 -0
- data/test/dummy/db/seed.test/countries_csv/countries.cfg +9 -0
- data/test/dummy/db/seed.test/countries_csv/countries.csv +249 -0
- data/test/dummy/db/seed.test/countries_txt/countries.cfg +6 -0
- data/test/dummy/db/seed.test/{countries.txt → countries_txt/countries.txt} +0 -0
- data/test/dummy/db/seed.test/{foo.err → foo_err/foo.err} +0 -0
- data/test/dummy/db/seed.test/{states.csv → states_csv/states.csv} +0 -0
- data/test/dummy/db/seed.test/states_json/states.cfg +1 -0
- data/test/dummy/db/seed.test/{states.json → states_json/states.json} +0 -1
- data/test/dummy/db/seed.test/states_txt/states.cfg +6 -0
- data/test/dummy/db/seed.test/{states.txt → states_txt/states.txt} +0 -1
- data/test/dummy/db/seed.test/states_yml/states.cfg +1 -0
- data/test/dummy/db/seed.test/{states.yml → states_yml/states.yml} +0 -1
- data/test/dummy/db/seed.test/{zulu.err → zulu_err/zulu.err} +0 -0
- data/test/dummy/db/test.sqlite3 +0 -0
- data/test/dummy/log/development.log +126 -0
- data/test/dummy/log/test.log +90945 -0
- data/test/models/data_seeder_test.rb +50 -30
- metadata +33 -20
- data/lib/data_seeder/logger.rb +0 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e24f096a1fd9c290b8e6046ae8d2a51eedb102da
|
4
|
+
data.tar.gz: 5d96f04e6a4fae47c640330a6a15c4f68a05a8a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b2de1af354cb2b5a0977b97f47dd8e3d22f127e777c3cdbdcd0a5f9a81ae6b118f1aecd29647f57a21d81ade2ac5ec5d5a83b069508846c5dea28a9d8cd3324
|
7
|
+
data.tar.gz: 72899aa6169debf7eeac35ae8d454c4a0d3befb1297a4ba73ff12935c77c92aa59210db667993a607d2c997708469afa481c7be71c39d9f7cf17827c2c9435ba
|
data/README.md
CHANGED
@@ -38,23 +38,15 @@ the following table:
|
|
38
38
|
|
39
39
|
And you have a corresponding db/seed/countries.txt file as follows:
|
40
40
|
|
41
|
-
# config: { key_attribute: 'code', line: ->(line) { { code: line[0,2], name: line[3...-1] } } }
|
42
41
|
AD Andorra
|
43
42
|
AE United Arab Emirates
|
44
43
|
AF Arghanistan
|
45
44
|
|
46
|
-
|
47
|
-
the key_attribute says that it will use the 'code' attribute to lookup existing records (defaults to 'id')
|
48
|
-
and the line function
|
49
|
-
defines how the line is converted to an attribute hash defining the instance.
|
50
|
-
|
51
|
-
Since the first line can get a little busy with config information, you can also store your config in a
|
52
|
-
separate .cfg file with the same name. This contents of this file should eval to a hash. The above config line would be
|
53
|
-
equivalent to a db/seed/countries.cfg file with the following:
|
45
|
+
And a db/seed/countries.cfg file as follows:
|
54
46
|
|
55
47
|
{
|
56
|
-
key_attribute:
|
57
|
-
line: ->(
|
48
|
+
key_attribute: 'code',
|
49
|
+
line: ->(line) {
|
58
50
|
{
|
59
51
|
code: line[0,2],
|
60
52
|
name: line[3...-1]
|
@@ -62,6 +54,13 @@ equivalent to a db/seed/countries.cfg file with the following:
|
|
62
54
|
}
|
63
55
|
}
|
64
56
|
|
57
|
+
The cfg file defines the config attributes associated with the file. This contents of this file
|
58
|
+
should eval to a hash. For this seed file,
|
59
|
+
the key_attribute says that it will use the 'code' attribute to lookup existing records (defaults to 'id')
|
60
|
+
and the line function
|
61
|
+
defines how the line is converted to an attribute hash defining the instance.
|
62
|
+
|
63
|
+
|
65
64
|
Running rake db:seed will result in the following output:
|
66
65
|
|
67
66
|
# rake db:seed
|
@@ -97,11 +96,11 @@ your own custom loaders.
|
|
97
96
|
For instance, suppose you had the following tables:
|
98
97
|
|
99
98
|
```ruby
|
100
|
-
create_table "apps"
|
99
|
+
create_table "apps" do |t|
|
101
100
|
t.string "name"
|
102
101
|
end
|
103
102
|
|
104
|
-
create_table "app_errors"
|
103
|
+
create_table "app_errors" do |t|
|
105
104
|
t.integer "app_id"
|
106
105
|
t.string "code"
|
107
106
|
t.string "message"
|
@@ -122,69 +121,14 @@ And you wanted to load up separate error messages for each app such as the follo
|
|
122
121
|
B1 Error message for B1
|
123
122
|
|
124
123
|
|
125
|
-
|
126
|
-
|
127
|
-
```ruby
|
128
|
-
require 'data_seeder'
|
129
|
-
|
130
|
-
class AppErrorDataSeeder
|
131
|
-
include ::DataSeeder::Loader
|
132
|
-
|
133
|
-
def setup
|
134
|
-
@app = App.find_or_initialize_by(name: self.path_minus_ext)
|
135
|
-
@existing_errors = {}
|
136
|
-
if @app.new_record?
|
137
|
-
logger.info "Loading errors for new App: #{@app.name}"
|
138
|
-
@app.save!
|
139
|
-
else
|
140
|
-
logger.info "Loading errors for existing App: #{@app.name}"
|
141
|
-
@app.app_errors.each do |app_error|
|
142
|
-
@existing_errors[app_error.code] = app_error
|
143
|
-
end
|
144
|
-
end
|
145
|
-
end
|
146
|
-
|
147
|
-
def teardown
|
148
|
-
unless @existing_errors.empty?
|
149
|
-
logger.info { " The following are begin removed:" }
|
150
|
-
@existing_errors.each do |code, app_error|
|
151
|
-
logger.info " #{code}: #{app_error.message}"
|
152
|
-
app_error.destroy
|
153
|
-
end
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
def load(io)
|
158
|
-
io.each_line do |line|
|
159
|
-
line.strip!
|
160
|
-
next if line.blank? || line[0] == ?#
|
161
|
-
space_i = line.index(' ')
|
162
|
-
raise "Invalid line: #{line}" unless space_i
|
163
|
-
code = line[0,space_i].strip
|
164
|
-
message = line[space_i+1..-1].strip
|
165
|
-
app_error = @existing_errors[code]
|
166
|
-
if app_error
|
167
|
-
@existing_errors.delete(code)
|
168
|
-
app_error.message = message
|
169
|
-
unless app_error.changes.empty?
|
170
|
-
logger.info { " Changing #{code}: #{app_error.changes}" }
|
171
|
-
app_error.save!
|
172
|
-
end
|
173
|
-
else
|
174
|
-
logger.info { " Creating #{code}: #{message}" }
|
175
|
-
@app.app_errors.create!(code: code, message: message)
|
176
|
-
end
|
177
|
-
end
|
178
|
-
end
|
179
|
-
end
|
180
|
-
```
|
124
|
+
Look [here](test/dummy/app/models/app_error_data_seeder.rb) for an example of creating your own custom loader.
|
181
125
|
|
182
|
-
To add
|
126
|
+
To add this seeder, you would create the following config/initializers/data_seeder.rb:
|
183
127
|
|
184
128
|
```ruby
|
185
129
|
MyApp::Application.config.after_initialize do
|
186
130
|
DataSeeder.configure do |config|
|
187
|
-
config.add_loader('err', AppErrorDataSeeder
|
131
|
+
config.add_loader('err', AppErrorDataSeeder)
|
188
132
|
end
|
189
133
|
end
|
190
134
|
```
|
@@ -200,43 +144,72 @@ Executing DataSeeder.run would result in the following:
|
|
200
144
|
Creating 2: We are seriously foobared
|
201
145
|
Creating 3: We are less seriously foobared
|
202
146
|
|
203
|
-
|
204
|
-
|
147
|
+
## Configurable values
|
148
|
+
|
149
|
+
#### depends
|
205
150
|
|
206
|
-
|
207
|
-
x Gigabyte seed file that you don't want to check into source control and only want run on production?
|
151
|
+
Value or array that this model depends on such that they must be seeded first. Examples:
|
208
152
|
|
209
|
-
|
153
|
+
{
|
154
|
+
depends: ['countries','states']
|
155
|
+
}
|
210
156
|
|
211
|
-
|
157
|
+
#### key_attribute
|
212
158
|
|
213
|
-
The
|
214
|
-
is error prone and forces you to do a 'touch db/structure.sql' to get around the not getting reloaded problem. Should
|
215
|
-
I add a utility to override this rails implementation with a sha-based one like the seed files use? (or am I the only
|
216
|
-
one who has to 'touch db/structure.sql' everytime I switch branches?)
|
159
|
+
The attribute used to define uniqueness within the model. Can be a single attribute or an array. Defaults to 'id'
|
217
160
|
|
218
|
-
|
161
|
+
#### klass
|
219
162
|
|
220
|
-
|
221
|
-
first 10 lines when Rails.env.test?
|
163
|
+
Defines the ActiveRecord Class if it can't be inferred from the seed file.
|
222
164
|
|
223
|
-
|
224
|
-
data useful for getting an environment up and running. I'm thinking either the seed_dir similar to like a PATH
|
225
|
-
environment variable where the first one found would override the others, or maybe make it automatic based on the
|
226
|
-
directory names and the environment (seed.development/state.yml would override seed/state.yml).
|
165
|
+
#### line
|
227
166
|
|
228
|
-
|
229
|
-
the seed files might be large and take a long time to seed. The above
|
230
|
-
strategy using seed.test might be useful but it might also be useful to have a preprocessor type such as .sh so for
|
231
|
-
instance you might have seed.test/table_with_lotsa_rows.csv.sh which might consist of the line
|
232
|
-
'head -20 ../seed/table_with_lotsa_rows.csv'
|
167
|
+
Proc used for converting a line to attributes (txt files only).
|
233
168
|
|
234
|
-
|
235
|
-
seeding above.
|
169
|
+
#### postprocess
|
236
170
|
|
237
|
-
|
171
|
+
Modify the attributes from the seed file before applying them to the model.
|
172
|
+
|
173
|
+
Example:
|
174
|
+
|
175
|
+
{
|
176
|
+
key_attribute: 'code',
|
177
|
+
postprocess: ->(attrs) {
|
178
|
+
{
|
179
|
+
code: attrs['country_code'],
|
180
|
+
name: attrs['country']
|
181
|
+
}
|
182
|
+
}
|
183
|
+
}
|
184
|
+
|
185
|
+
#### purge
|
186
|
+
|
187
|
+
Destroys rows that no longer exist in the seed file.
|
188
|
+
|
189
|
+
#### update_display_method
|
190
|
+
|
191
|
+
Model method used for displaying updates to a model.
|
192
|
+
|
193
|
+
#### use_line_number_as_id
|
194
|
+
|
195
|
+
Use the line number of the seed file as the id
|
196
|
+
|
197
|
+
## Incompatibilities from 0.0.x version
|
198
|
+
|
199
|
+
Custom seeders should now be specified as a Class and not an instance (MySeeder instead of MySeeder.new)
|
200
|
+
|
201
|
+
data_seeder_<config-item> methods within the models are no longer supported.
|
202
|
+
|
203
|
+
Using the first line of txt, json, and yaml files as the config is no longer supported. Move them to
|
204
|
+
a separate .cfg file.
|
205
|
+
|
206
|
+
|
207
|
+
TODO
|
208
|
+
----
|
209
|
+
|
210
|
+
Add 'sql' loader (with disclaimer that it will temporarily truncate the table)
|
238
211
|
|
239
|
-
|
212
|
+
Caching of long-running stuff via pg_dump, mysqldump, or other?
|
240
213
|
|
241
214
|
Document options (key_attribute, line, postprocess, etc)
|
242
215
|
|
@@ -258,7 +231,7 @@ Author
|
|
258
231
|
License
|
259
232
|
-------
|
260
233
|
|
261
|
-
Copyright 2015 Brad Pardee
|
234
|
+
Copyright 2015-2016 Brad Pardee
|
262
235
|
|
263
236
|
Licensed under the Apache License, Version 2.0 (the "License");
|
264
237
|
you may not use this file except in compliance with the License.
|
@@ -3,32 +3,72 @@ require 'digest'
|
|
3
3
|
module DataSeeder
|
4
4
|
class SeedFile < ActiveRecord::Base
|
5
5
|
def self.file_hash
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
@file_hash ||= begin
|
7
|
+
hash = Hash.new { |hash, path| hash[path] = new(path: path) }
|
8
|
+
all.each do |seed_file|
|
9
|
+
hash[seed_file.path] = seed_file
|
10
|
+
end
|
11
|
+
hash
|
9
12
|
end
|
10
|
-
hash
|
11
13
|
end
|
12
14
|
|
13
15
|
def self.load(path)
|
14
|
-
seed_file = self.file_hash[path]
|
15
|
-
seed_file.load
|
16
|
+
seed_file = self.file_hash[path]
|
17
|
+
return seed_file.load
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.processed_set
|
21
|
+
@processed_set ||= Set.new
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.add_processed(path)
|
25
|
+
self.processed_set.add(path)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.processed?(paths)
|
29
|
+
self.processed_set.proper_superset?(Array(paths).to_set)
|
16
30
|
end
|
17
31
|
|
18
32
|
def load
|
19
33
|
new_sha256 = Digest::SHA256.file(path).hexdigest
|
34
|
+
dot_index = path.rindex('.')
|
35
|
+
path_minus_ext = path[0, dot_index]
|
20
36
|
if self.sha256 != new_sha256
|
21
|
-
|
37
|
+
cfg_file = "#{path_minus_ext}.cfg"
|
38
|
+
config = {}
|
39
|
+
if File.exist?(cfg_file)
|
40
|
+
config = eval(File.read(cfg_file))
|
41
|
+
end
|
42
|
+
begin
|
43
|
+
config[:klass] ||= path_minus_ext.classify.constantize
|
44
|
+
rescue NameError => e
|
45
|
+
end
|
22
46
|
ext = File.extname(self.path)[1..-1]
|
23
|
-
return unless ext
|
24
|
-
|
25
|
-
unless
|
26
|
-
DataSeeder.logger.
|
27
|
-
return
|
47
|
+
return true unless ext
|
48
|
+
loader_klass = config[:loader] || DataSeeder.config.loaders[ext]
|
49
|
+
unless loader_klass
|
50
|
+
DataSeeder.config.logger.warn "Warning: No loader for #{path}"
|
51
|
+
return true
|
52
|
+
end
|
53
|
+
if loader_klass.respond_to?(:default_config)
|
54
|
+
config = loader_klass.default_config.merge(config)
|
55
|
+
end
|
56
|
+
config[:path] = path
|
57
|
+
config[:path_minus_ext] = path_minus_ext
|
58
|
+
loader = loader_klass.new(config)
|
59
|
+
depends = loader.config[:depends]
|
60
|
+
return false if depends && !self.class.processed?(depends)
|
61
|
+
DataSeeder.config.logger.debug { "Loading #{path}" }
|
62
|
+
DataSeeder.config.log_indent do
|
63
|
+
File.open(path, 'r') do |io|
|
64
|
+
loader.process(io)
|
65
|
+
end
|
66
|
+
self.sha256 = new_sha256
|
67
|
+
save!
|
28
68
|
end
|
29
|
-
loader.process(path)
|
30
|
-
save!
|
31
69
|
end
|
70
|
+
self.class.add_processed(path_minus_ext)
|
71
|
+
return true
|
32
72
|
end
|
33
73
|
end
|
34
74
|
end
|
data/lib/data_seeder.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'data_seeder/config'
|
2
2
|
require 'data_seeder/engine'
|
3
3
|
require 'data_seeder/loader'
|
4
|
-
require 'data_seeder/logger'
|
5
4
|
|
6
5
|
module DataSeeder
|
7
6
|
class << self
|
@@ -22,24 +21,45 @@ module DataSeeder
|
|
22
21
|
yield(config)
|
23
22
|
end
|
24
23
|
|
25
|
-
def self.logger
|
26
|
-
config.logger
|
27
|
-
end
|
28
|
-
|
29
24
|
def self.run(new_config={})
|
30
25
|
@@mutex.synchronize do
|
31
26
|
msec = Benchmark.ms do
|
32
27
|
new_config.each do |key, value|
|
33
28
|
self.config.send("#{key}=", value)
|
34
29
|
end
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
30
|
+
# Keep track of the seed files that have dependencies that aren't fulfilled
|
31
|
+
pending = []
|
32
|
+
config.seed_dirs.each do |seed_dir|
|
33
|
+
Dir.chdir(seed_dir) do
|
34
|
+
Dir['**/*'].each do |path|
|
35
|
+
next if path.end_with?('.cfg')
|
36
|
+
if File.file?(path)
|
37
|
+
unless SeedFile.load(path)
|
38
|
+
pending << [seed_dir, path]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
# Loop thru the ones that couldn't be processed previously because they depended on another seed being loaded first
|
45
|
+
until pending.empty?
|
46
|
+
new_pending = []
|
47
|
+
pending.each do |seed_dir, path|
|
48
|
+
Dir.chdir(seed_dir) do
|
49
|
+
unless SeedFile.load(path)
|
50
|
+
new_pending << [seed_dir, path]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
if pending.size == new_pending.size
|
55
|
+
msg = "Error: Circular dependency in DataSeeder, seeds=#{pending.inspect}"
|
56
|
+
config.logger.error msg
|
57
|
+
raise msg
|
39
58
|
end
|
59
|
+
pending = new_pending
|
40
60
|
end
|
41
61
|
end
|
42
|
-
logger.info
|
62
|
+
config.logger.info "DataSeeder.run took #{msec.to_i} msec"
|
43
63
|
end
|
44
64
|
end
|
45
65
|
|
data/lib/data_seeder/config.rb
CHANGED
@@ -1,28 +1,25 @@
|
|
1
1
|
module DataSeeder
|
2
2
|
class Config
|
3
|
-
attr_accessor :
|
3
|
+
attr_accessor :seed_dirs, :logger, :loaders, :verbose
|
4
4
|
|
5
5
|
def initialize
|
6
|
-
@
|
7
|
-
@
|
8
|
-
@
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
@
|
13
|
-
|
14
|
-
|
15
|
-
def verbose
|
16
|
-
@logger.verbose
|
6
|
+
@seed_dirs = ['db/seed'].freeze
|
7
|
+
@loaders = default_loaders
|
8
|
+
@verbose = true
|
9
|
+
@is_default = true
|
10
|
+
@logger = Logger.new($stdout)
|
11
|
+
@logger.formatter = ->(severity, datetime, progname, msg) { "#{@indent}#{msg}\n" }
|
12
|
+
@indent_level = 0
|
13
|
+
@indent = ''
|
17
14
|
end
|
18
15
|
|
19
16
|
def default_loaders
|
20
17
|
{
|
21
|
-
'csv' => Loader::CSV
|
22
|
-
'json' => Loader::JSON
|
23
|
-
'txt' => Loader::Txt
|
24
|
-
'yaml' => Loader::YAML
|
25
|
-
'yml' => Loader::YAML
|
18
|
+
'csv' => Loader::CSV,
|
19
|
+
'json' => Loader::JSON,
|
20
|
+
'txt' => Loader::Txt,
|
21
|
+
'yaml' => Loader::YAML,
|
22
|
+
'yml' => Loader::YAML,
|
26
23
|
}
|
27
24
|
end
|
28
25
|
|
@@ -37,5 +34,30 @@ module DataSeeder
|
|
37
34
|
def add_loader(ext, loader)
|
38
35
|
@loaders[ext] = loader
|
39
36
|
end
|
37
|
+
|
38
|
+
def seed_dir=(seed_dir)
|
39
|
+
@seed_dirs = [seed_dir]
|
40
|
+
end
|
41
|
+
|
42
|
+
def seed_dir
|
43
|
+
@seed_dirs.first
|
44
|
+
end
|
45
|
+
|
46
|
+
def add_seed_dir(seed_dir)
|
47
|
+
if @seed_dirs.frozen?
|
48
|
+
@seed_dirs = [seed_dir]
|
49
|
+
else
|
50
|
+
@seed_dirs << seed_dir
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def log_indent(&block)
|
55
|
+
@indent_level += 1
|
56
|
+
@indent = ' ' * @indent_level
|
57
|
+
yield
|
58
|
+
ensure
|
59
|
+
@indent_level -= 1
|
60
|
+
@indent = ' ' * @indent_level
|
61
|
+
end
|
40
62
|
end
|
41
63
|
end
|