data_seeder 0.0.5 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +71 -98
- data/app/models/data_seeder/seed_file.rb +54 -14
- data/lib/data_seeder.rb +30 -10
- data/lib/data_seeder/config.rb +39 -17
- data/lib/data_seeder/loader.rb +80 -85
- data/lib/data_seeder/loader/csv.rb +2 -1
- data/lib/data_seeder/loader/txt.rb +1 -6
- data/lib/data_seeder/loader/yaml.rb +1 -1
- data/lib/data_seeder/version.rb +1 -1
- data/test/dummy/app/models/app_error_data_seeder.rb +10 -8
- data/test/dummy/app/models/country.rb +0 -12
- data/test/dummy/db/development.sqlite3 +0 -0
- data/test/dummy/db/seed.test/{bar.err → bar_err/bar.err} +0 -0
- data/test/dummy/db/seed.test/countries_csv/countries.cfg +9 -0
- data/test/dummy/db/seed.test/countries_csv/countries.csv +249 -0
- data/test/dummy/db/seed.test/countries_txt/countries.cfg +6 -0
- data/test/dummy/db/seed.test/{countries.txt → countries_txt/countries.txt} +0 -0
- data/test/dummy/db/seed.test/{foo.err → foo_err/foo.err} +0 -0
- data/test/dummy/db/seed.test/{states.csv → states_csv/states.csv} +0 -0
- data/test/dummy/db/seed.test/states_json/states.cfg +1 -0
- data/test/dummy/db/seed.test/{states.json → states_json/states.json} +0 -1
- data/test/dummy/db/seed.test/states_txt/states.cfg +6 -0
- data/test/dummy/db/seed.test/{states.txt → states_txt/states.txt} +0 -1
- data/test/dummy/db/seed.test/states_yml/states.cfg +1 -0
- data/test/dummy/db/seed.test/{states.yml → states_yml/states.yml} +0 -1
- data/test/dummy/db/seed.test/{zulu.err → zulu_err/zulu.err} +0 -0
- data/test/dummy/db/test.sqlite3 +0 -0
- data/test/dummy/log/development.log +126 -0
- data/test/dummy/log/test.log +90945 -0
- data/test/models/data_seeder_test.rb +50 -30
- metadata +33 -20
- data/lib/data_seeder/logger.rb +0 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e24f096a1fd9c290b8e6046ae8d2a51eedb102da
|
4
|
+
data.tar.gz: 5d96f04e6a4fae47c640330a6a15c4f68a05a8a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b2de1af354cb2b5a0977b97f47dd8e3d22f127e777c3cdbdcd0a5f9a81ae6b118f1aecd29647f57a21d81ade2ac5ec5d5a83b069508846c5dea28a9d8cd3324
|
7
|
+
data.tar.gz: 72899aa6169debf7eeac35ae8d454c4a0d3befb1297a4ba73ff12935c77c92aa59210db667993a607d2c997708469afa481c7be71c39d9f7cf17827c2c9435ba
|
data/README.md
CHANGED
@@ -38,23 +38,15 @@ the following table:
|
|
38
38
|
|
39
39
|
And you have a corresponding db/seed/countries.txt file as follows:
|
40
40
|
|
41
|
-
# config: { key_attribute: 'code', line: ->(line) { { code: line[0,2], name: line[3...-1] } } }
|
42
41
|
AD Andorra
|
43
42
|
AE United Arab Emirates
|
44
43
|
AF Arghanistan
|
45
44
|
|
46
|
-
|
47
|
-
the key_attribute says that it will use the 'code' attribute to lookup existing records (defaults to 'id')
|
48
|
-
and the line function
|
49
|
-
defines how the line is converted to an attribute hash defining the instance.
|
50
|
-
|
51
|
-
Since the first line can get a little busy with config information, you can also store your config in a
|
52
|
-
separate .cfg file with the same name. This contents of this file should eval to a hash. The above config line would be
|
53
|
-
equivalent to a db/seed/countries.cfg file with the following:
|
45
|
+
And a db/seed/countries.cfg file as follows:
|
54
46
|
|
55
47
|
{
|
56
|
-
key_attribute:
|
57
|
-
line: ->(
|
48
|
+
key_attribute: 'code',
|
49
|
+
line: ->(line) {
|
58
50
|
{
|
59
51
|
code: line[0,2],
|
60
52
|
name: line[3...-1]
|
@@ -62,6 +54,13 @@ equivalent to a db/seed/countries.cfg file with the following:
|
|
62
54
|
}
|
63
55
|
}
|
64
56
|
|
57
|
+
The cfg file defines the config attributes associated with the file. This contents of this file
|
58
|
+
should eval to a hash. For this seed file,
|
59
|
+
the key_attribute says that it will use the 'code' attribute to lookup existing records (defaults to 'id')
|
60
|
+
and the line function
|
61
|
+
defines how the line is converted to an attribute hash defining the instance.
|
62
|
+
|
63
|
+
|
65
64
|
Running rake db:seed will result in the following output:
|
66
65
|
|
67
66
|
# rake db:seed
|
@@ -97,11 +96,11 @@ your own custom loaders.
|
|
97
96
|
For instance, suppose you had the following tables:
|
98
97
|
|
99
98
|
```ruby
|
100
|
-
create_table "apps"
|
99
|
+
create_table "apps" do |t|
|
101
100
|
t.string "name"
|
102
101
|
end
|
103
102
|
|
104
|
-
create_table "app_errors"
|
103
|
+
create_table "app_errors" do |t|
|
105
104
|
t.integer "app_id"
|
106
105
|
t.string "code"
|
107
106
|
t.string "message"
|
@@ -122,69 +121,14 @@ And you wanted to load up separate error messages for each app such as the follo
|
|
122
121
|
B1 Error message for B1
|
123
122
|
|
124
123
|
|
125
|
-
|
126
|
-
|
127
|
-
```ruby
|
128
|
-
require 'data_seeder'
|
129
|
-
|
130
|
-
class AppErrorDataSeeder
|
131
|
-
include ::DataSeeder::Loader
|
132
|
-
|
133
|
-
def setup
|
134
|
-
@app = App.find_or_initialize_by(name: self.path_minus_ext)
|
135
|
-
@existing_errors = {}
|
136
|
-
if @app.new_record?
|
137
|
-
logger.info "Loading errors for new App: #{@app.name}"
|
138
|
-
@app.save!
|
139
|
-
else
|
140
|
-
logger.info "Loading errors for existing App: #{@app.name}"
|
141
|
-
@app.app_errors.each do |app_error|
|
142
|
-
@existing_errors[app_error.code] = app_error
|
143
|
-
end
|
144
|
-
end
|
145
|
-
end
|
146
|
-
|
147
|
-
def teardown
|
148
|
-
unless @existing_errors.empty?
|
149
|
-
logger.info { " The following are begin removed:" }
|
150
|
-
@existing_errors.each do |code, app_error|
|
151
|
-
logger.info " #{code}: #{app_error.message}"
|
152
|
-
app_error.destroy
|
153
|
-
end
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
def load(io)
|
158
|
-
io.each_line do |line|
|
159
|
-
line.strip!
|
160
|
-
next if line.blank? || line[0] == ?#
|
161
|
-
space_i = line.index(' ')
|
162
|
-
raise "Invalid line: #{line}" unless space_i
|
163
|
-
code = line[0,space_i].strip
|
164
|
-
message = line[space_i+1..-1].strip
|
165
|
-
app_error = @existing_errors[code]
|
166
|
-
if app_error
|
167
|
-
@existing_errors.delete(code)
|
168
|
-
app_error.message = message
|
169
|
-
unless app_error.changes.empty?
|
170
|
-
logger.info { " Changing #{code}: #{app_error.changes}" }
|
171
|
-
app_error.save!
|
172
|
-
end
|
173
|
-
else
|
174
|
-
logger.info { " Creating #{code}: #{message}" }
|
175
|
-
@app.app_errors.create!(code: code, message: message)
|
176
|
-
end
|
177
|
-
end
|
178
|
-
end
|
179
|
-
end
|
180
|
-
```
|
124
|
+
Look [here](test/dummy/app/models/app_error_data_seeder.rb) for an example of creating your own custom loader.
|
181
125
|
|
182
|
-
To add
|
126
|
+
To add this seeder, you would create the following config/initializers/data_seeder.rb:
|
183
127
|
|
184
128
|
```ruby
|
185
129
|
MyApp::Application.config.after_initialize do
|
186
130
|
DataSeeder.configure do |config|
|
187
|
-
config.add_loader('err', AppErrorDataSeeder
|
131
|
+
config.add_loader('err', AppErrorDataSeeder)
|
188
132
|
end
|
189
133
|
end
|
190
134
|
```
|
@@ -200,43 +144,72 @@ Executing DataSeeder.run would result in the following:
|
|
200
144
|
Creating 2: We are seriously foobared
|
201
145
|
Creating 3: We are less seriously foobared
|
202
146
|
|
203
|
-
|
204
|
-
|
147
|
+
## Configurable values
|
148
|
+
|
149
|
+
#### depends
|
205
150
|
|
206
|
-
|
207
|
-
x Gigabyte seed file that you don't want to check into source control and only want run on production?
|
151
|
+
Value or array that this model depends on such that they must be seeded first. Examples:
|
208
152
|
|
209
|
-
|
153
|
+
{
|
154
|
+
depends: ['countries','states']
|
155
|
+
}
|
210
156
|
|
211
|
-
|
157
|
+
#### key_attribute
|
212
158
|
|
213
|
-
The
|
214
|
-
is error prone and forces you to do a 'touch db/structure.sql' to get around the not getting reloaded problem. Should
|
215
|
-
I add a utility to override this rails implementation with a sha-based one like the seed files use? (or am I the only
|
216
|
-
one who has to 'touch db/structure.sql' everytime I switch branches?)
|
159
|
+
The attribute used to define uniqueness within the model. Can be a single attribute or an array. Defaults to 'id'
|
217
160
|
|
218
|
-
|
161
|
+
#### klass
|
219
162
|
|
220
|
-
|
221
|
-
first 10 lines when Rails.env.test?
|
163
|
+
Defines the ActiveRecord Class if it can't be inferred from the seed file.
|
222
164
|
|
223
|
-
|
224
|
-
data useful for getting an environment up and running. I'm thinking either the seed_dir similar to like a PATH
|
225
|
-
environment variable where the first one found would override the others, or maybe make it automatic based on the
|
226
|
-
directory names and the environment (seed.development/state.yml would override seed/state.yml).
|
165
|
+
#### line
|
227
166
|
|
228
|
-
|
229
|
-
the seed files might be large and take a long time to seed. The above
|
230
|
-
strategy using seed.test might be useful but it might also be useful to have a preprocessor type such as .sh so for
|
231
|
-
instance you might have seed.test/table_with_lotsa_rows.csv.sh which might consist of the line
|
232
|
-
'head -20 ../seed/table_with_lotsa_rows.csv'
|
167
|
+
Proc used for converting a line to attributes (txt files only).
|
233
168
|
|
234
|
-
|
235
|
-
seeding above.
|
169
|
+
#### postprocess
|
236
170
|
|
237
|
-
|
171
|
+
Modify the attributes from the seed file before applying them to the model.
|
172
|
+
|
173
|
+
Example:
|
174
|
+
|
175
|
+
{
|
176
|
+
key_attribute: 'code',
|
177
|
+
postprocess: ->(attrs) {
|
178
|
+
{
|
179
|
+
code: attrs['country_code'],
|
180
|
+
name: attrs['country']
|
181
|
+
}
|
182
|
+
}
|
183
|
+
}
|
184
|
+
|
185
|
+
#### purge
|
186
|
+
|
187
|
+
Destroys rows that no longer exist in the seed file.
|
188
|
+
|
189
|
+
#### update_display_method
|
190
|
+
|
191
|
+
Model method used for displaying updates to a model.
|
192
|
+
|
193
|
+
#### use_line_number_as_id
|
194
|
+
|
195
|
+
Use the line number of the seed file as the id
|
196
|
+
|
197
|
+
## Incompatibilities from 0.0.x version
|
198
|
+
|
199
|
+
Custom seeders should now be specified as a Class and not an instance (MySeeder instead of MySeeder.new)
|
200
|
+
|
201
|
+
data_seeder_<config-item> methods within the models are no longer supported.
|
202
|
+
|
203
|
+
Using the first line of txt, json, and yaml files as the config is no longer supported. Move them to
|
204
|
+
a separate .cfg file.
|
205
|
+
|
206
|
+
|
207
|
+
TODO
|
208
|
+
----
|
209
|
+
|
210
|
+
Add 'sql' loader (with disclaimer that it will temporarily truncate the table)
|
238
211
|
|
239
|
-
|
212
|
+
Caching of long-running stuff via pg_dump, mysqldump, or other?
|
240
213
|
|
241
214
|
Document options (key_attribute, line, postprocess, etc)
|
242
215
|
|
@@ -258,7 +231,7 @@ Author
|
|
258
231
|
License
|
259
232
|
-------
|
260
233
|
|
261
|
-
Copyright 2015 Brad Pardee
|
234
|
+
Copyright 2015-2016 Brad Pardee
|
262
235
|
|
263
236
|
Licensed under the Apache License, Version 2.0 (the "License");
|
264
237
|
you may not use this file except in compliance with the License.
|
@@ -3,32 +3,72 @@ require 'digest'
|
|
3
3
|
module DataSeeder
|
4
4
|
class SeedFile < ActiveRecord::Base
|
5
5
|
def self.file_hash
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
@file_hash ||= begin
|
7
|
+
hash = Hash.new { |hash, path| hash[path] = new(path: path) }
|
8
|
+
all.each do |seed_file|
|
9
|
+
hash[seed_file.path] = seed_file
|
10
|
+
end
|
11
|
+
hash
|
9
12
|
end
|
10
|
-
hash
|
11
13
|
end
|
12
14
|
|
13
15
|
def self.load(path)
|
14
|
-
seed_file = self.file_hash[path]
|
15
|
-
seed_file.load
|
16
|
+
seed_file = self.file_hash[path]
|
17
|
+
return seed_file.load
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.processed_set
|
21
|
+
@processed_set ||= Set.new
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.add_processed(path)
|
25
|
+
self.processed_set.add(path)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.processed?(paths)
|
29
|
+
self.processed_set.proper_superset?(Array(paths).to_set)
|
16
30
|
end
|
17
31
|
|
18
32
|
def load
|
19
33
|
new_sha256 = Digest::SHA256.file(path).hexdigest
|
34
|
+
dot_index = path.rindex('.')
|
35
|
+
path_minus_ext = path[0, dot_index]
|
20
36
|
if self.sha256 != new_sha256
|
21
|
-
|
37
|
+
cfg_file = "#{path_minus_ext}.cfg"
|
38
|
+
config = {}
|
39
|
+
if File.exist?(cfg_file)
|
40
|
+
config = eval(File.read(cfg_file))
|
41
|
+
end
|
42
|
+
begin
|
43
|
+
config[:klass] ||= path_minus_ext.classify.constantize
|
44
|
+
rescue NameError => e
|
45
|
+
end
|
22
46
|
ext = File.extname(self.path)[1..-1]
|
23
|
-
return unless ext
|
24
|
-
|
25
|
-
unless
|
26
|
-
DataSeeder.logger.
|
27
|
-
return
|
47
|
+
return true unless ext
|
48
|
+
loader_klass = config[:loader] || DataSeeder.config.loaders[ext]
|
49
|
+
unless loader_klass
|
50
|
+
DataSeeder.config.logger.warn "Warning: No loader for #{path}"
|
51
|
+
return true
|
52
|
+
end
|
53
|
+
if loader_klass.respond_to?(:default_config)
|
54
|
+
config = loader_klass.default_config.merge(config)
|
55
|
+
end
|
56
|
+
config[:path] = path
|
57
|
+
config[:path_minus_ext] = path_minus_ext
|
58
|
+
loader = loader_klass.new(config)
|
59
|
+
depends = loader.config[:depends]
|
60
|
+
return false if depends && !self.class.processed?(depends)
|
61
|
+
DataSeeder.config.logger.debug { "Loading #{path}" }
|
62
|
+
DataSeeder.config.log_indent do
|
63
|
+
File.open(path, 'r') do |io|
|
64
|
+
loader.process(io)
|
65
|
+
end
|
66
|
+
self.sha256 = new_sha256
|
67
|
+
save!
|
28
68
|
end
|
29
|
-
loader.process(path)
|
30
|
-
save!
|
31
69
|
end
|
70
|
+
self.class.add_processed(path_minus_ext)
|
71
|
+
return true
|
32
72
|
end
|
33
73
|
end
|
34
74
|
end
|
data/lib/data_seeder.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'data_seeder/config'
|
2
2
|
require 'data_seeder/engine'
|
3
3
|
require 'data_seeder/loader'
|
4
|
-
require 'data_seeder/logger'
|
5
4
|
|
6
5
|
module DataSeeder
|
7
6
|
class << self
|
@@ -22,24 +21,45 @@ module DataSeeder
|
|
22
21
|
yield(config)
|
23
22
|
end
|
24
23
|
|
25
|
-
def self.logger
|
26
|
-
config.logger
|
27
|
-
end
|
28
|
-
|
29
24
|
def self.run(new_config={})
|
30
25
|
@@mutex.synchronize do
|
31
26
|
msec = Benchmark.ms do
|
32
27
|
new_config.each do |key, value|
|
33
28
|
self.config.send("#{key}=", value)
|
34
29
|
end
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
30
|
+
# Keep track of the seed files that have dependencies that aren't fulfilled
|
31
|
+
pending = []
|
32
|
+
config.seed_dirs.each do |seed_dir|
|
33
|
+
Dir.chdir(seed_dir) do
|
34
|
+
Dir['**/*'].each do |path|
|
35
|
+
next if path.end_with?('.cfg')
|
36
|
+
if File.file?(path)
|
37
|
+
unless SeedFile.load(path)
|
38
|
+
pending << [seed_dir, path]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
# Loop thru the ones that couldn't be processed previously because they depended on another seed being loaded first
|
45
|
+
until pending.empty?
|
46
|
+
new_pending = []
|
47
|
+
pending.each do |seed_dir, path|
|
48
|
+
Dir.chdir(seed_dir) do
|
49
|
+
unless SeedFile.load(path)
|
50
|
+
new_pending << [seed_dir, path]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
if pending.size == new_pending.size
|
55
|
+
msg = "Error: Circular dependency in DataSeeder, seeds=#{pending.inspect}"
|
56
|
+
config.logger.error msg
|
57
|
+
raise msg
|
39
58
|
end
|
59
|
+
pending = new_pending
|
40
60
|
end
|
41
61
|
end
|
42
|
-
logger.info
|
62
|
+
config.logger.info "DataSeeder.run took #{msec.to_i} msec"
|
43
63
|
end
|
44
64
|
end
|
45
65
|
|
data/lib/data_seeder/config.rb
CHANGED
@@ -1,28 +1,25 @@
|
|
1
1
|
module DataSeeder
|
2
2
|
class Config
|
3
|
-
attr_accessor :
|
3
|
+
attr_accessor :seed_dirs, :logger, :loaders, :verbose
|
4
4
|
|
5
5
|
def initialize
|
6
|
-
@
|
7
|
-
@
|
8
|
-
@
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
@
|
13
|
-
|
14
|
-
|
15
|
-
def verbose
|
16
|
-
@logger.verbose
|
6
|
+
@seed_dirs = ['db/seed'].freeze
|
7
|
+
@loaders = default_loaders
|
8
|
+
@verbose = true
|
9
|
+
@is_default = true
|
10
|
+
@logger = Logger.new($stdout)
|
11
|
+
@logger.formatter = ->(severity, datetime, progname, msg) { "#{@indent}#{msg}\n" }
|
12
|
+
@indent_level = 0
|
13
|
+
@indent = ''
|
17
14
|
end
|
18
15
|
|
19
16
|
def default_loaders
|
20
17
|
{
|
21
|
-
'csv' => Loader::CSV
|
22
|
-
'json' => Loader::JSON
|
23
|
-
'txt' => Loader::Txt
|
24
|
-
'yaml' => Loader::YAML
|
25
|
-
'yml' => Loader::YAML
|
18
|
+
'csv' => Loader::CSV,
|
19
|
+
'json' => Loader::JSON,
|
20
|
+
'txt' => Loader::Txt,
|
21
|
+
'yaml' => Loader::YAML,
|
22
|
+
'yml' => Loader::YAML,
|
26
23
|
}
|
27
24
|
end
|
28
25
|
|
@@ -37,5 +34,30 @@ module DataSeeder
|
|
37
34
|
def add_loader(ext, loader)
|
38
35
|
@loaders[ext] = loader
|
39
36
|
end
|
37
|
+
|
38
|
+
def seed_dir=(seed_dir)
|
39
|
+
@seed_dirs = [seed_dir]
|
40
|
+
end
|
41
|
+
|
42
|
+
def seed_dir
|
43
|
+
@seed_dirs.first
|
44
|
+
end
|
45
|
+
|
46
|
+
def add_seed_dir(seed_dir)
|
47
|
+
if @seed_dirs.frozen?
|
48
|
+
@seed_dirs = [seed_dir]
|
49
|
+
else
|
50
|
+
@seed_dirs << seed_dir
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def log_indent(&block)
|
55
|
+
@indent_level += 1
|
56
|
+
@indent = ' ' * @indent_level
|
57
|
+
yield
|
58
|
+
ensure
|
59
|
+
@indent_level -= 1
|
60
|
+
@indent = ' ' * @indent_level
|
61
|
+
end
|
40
62
|
end
|
41
63
|
end
|