data_seeder 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +256 -0
- data/Rakefile +34 -0
- data/app/models/data_seeder/seed_file.rb +34 -0
- data/db/migrate/20150306195118_create_data_seeder_seed_files.rb +9 -0
- data/lib/data_seeder.rb +68 -0
- data/lib/data_seeder/config.rb +41 -0
- data/lib/data_seeder/engine.rb +5 -0
- data/lib/data_seeder/loader.rb +122 -0
- data/lib/data_seeder/loader/csv.rb +15 -0
- data/lib/data_seeder/loader/json.rb +20 -0
- data/lib/data_seeder/loader/txt.rb +23 -0
- data/lib/data_seeder/loader/yaml.rb +23 -0
- data/lib/data_seeder/logger.rb +15 -0
- data/lib/data_seeder/version.rb +3 -0
- data/test/dummy/Rakefile +6 -0
- data/test/dummy/app/models/app.rb +3 -0
- data/test/dummy/app/models/app_error.rb +3 -0
- data/test/dummy/app/models/app_error_data_seeder.rb +52 -0
- data/test/dummy/app/models/country.rb +14 -0
- data/test/dummy/app/models/state.rb +2 -0
- data/test/dummy/bin/bundle +3 -0
- data/test/dummy/bin/rails +4 -0
- data/test/dummy/bin/rake +4 -0
- data/test/dummy/config.ru +4 -0
- data/test/dummy/config/application.rb +26 -0
- data/test/dummy/config/boot.rb +5 -0
- data/test/dummy/config/database.yml +12 -0
- data/test/dummy/config/environment.rb +5 -0
- data/test/dummy/config/environments/development.rb +37 -0
- data/test/dummy/config/environments/test.rb +42 -0
- data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/test/dummy/config/initializers/inflections.rb +16 -0
- data/test/dummy/db/migrate/20150313022149_create_countries.rb +8 -0
- data/test/dummy/db/migrate/20150313022228_create_states.rb +8 -0
- data/test/dummy/db/migrate/20150313172634_create_apps.rb +7 -0
- data/test/dummy/db/migrate/20150313172719_create_app_errors.rb +10 -0
- data/test/dummy/db/schema.rb +45 -0
- data/test/dummy/db/seed.test/bar.err +3 -0
- data/test/dummy/db/seed.test/countries.txt +249 -0
- data/test/dummy/db/seed.test/foo.err +3 -0
- data/test/dummy/db/seed.test/states.csv +51 -0
- data/test/dummy/db/seed.test/states.json +153 -0
- data/test/dummy/db/seed.test/states.txt +51 -0
- data/test/dummy/db/seed.test/states.yml +101 -0
- data/test/dummy/db/seed.test/zulu.err +2 -0
- data/test/dummy/db/test.sqlite3 +0 -0
- data/test/dummy/log/development.log +39 -0
- data/test/dummy/log/test.log +68768 -0
- data/test/models/data_seeder_test.rb +147 -0
- data/test/test_helper.rb +12 -0
- metadata +159 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 9201eccfce12d45bbe01559b668fb2fa32c704f1
|
4
|
+
data.tar.gz: a0d02f94fad7dd14f62c05d4ef33223669dc1665
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0933f7b3628701f8ffbef5f5d6247ffed17c997ec2099beefbf2e32d83ee0a84f39256aff8eefaeea1d1e7d29ec959cb6f13b70fc3b3df5c46d2d6d8becb4501
|
7
|
+
data.tar.gz: 5327311bdba8e1e880d87db269eb2ec69fe834139276dc6398b9fba35f2e07c185fa66fae61c05f0ba67a2451377bd48125cd8b23ccb22c7b33b459cd346cb0f
|
data/README.md
ADDED
@@ -0,0 +1,256 @@
|
|
1
|
+
data_seeder [![Build Status](https://secure.travis-ci.org/bpardee/data_seeder.png?branch=master)](http://travis-ci.org/bpardee/data_seeder)
|
2
|
+
====================
|
3
|
+
|
4
|
+
* http://github.com/bpardee/data_seeder
|
5
|
+
|
6
|
+
## Introduction
|
7
|
+
|
8
|
+
This gem provides a simple methodology for seeding your database. Seed files in your
|
9
|
+
seeds directory are loaded in the database and the checksum is stored away so that the
|
10
|
+
file will only be re-applied when it is changed. Each row instance within a file is
|
11
|
+
converted to an attribute hash and the updates are applied idempotently such that unchanged
|
12
|
+
rows aren't touched, only those rows that have changes as well as insertions and deletions
|
13
|
+
are performed. The extension of the seed file determines how it is loaded. Extensions that
|
14
|
+
are supported by default are json, yaml, csv, and txt but homegrown loaders can be defined
|
15
|
+
as necessary.
|
16
|
+
|
17
|
+
## Usage
|
18
|
+
|
19
|
+
Add this line to your application's Gemfile and run bundler:
|
20
|
+
|
21
|
+
gem 'data_seeder'
|
22
|
+
|
23
|
+
Execute the following and migrate your database:
|
24
|
+
|
25
|
+
rake data_seeder:install:migrations
|
26
|
+
|
27
|
+
Add the following to your db/seeds.rb file
|
28
|
+
|
29
|
+
DataSeeder.run
|
30
|
+
|
31
|
+
Add seed files to the db/seed directory as necessary. For instance, suppose you have
|
32
|
+
the following table:
|
33
|
+
|
34
|
+
create_table :countries do |t|
|
35
|
+
t.column :code, 'CHAR(2)', null: false
|
36
|
+
t.string :name, null: false
|
37
|
+
end
|
38
|
+
|
39
|
+
And you have a corresponding db/seed/countries.txt file as follows:
|
40
|
+
|
41
|
+
# config: { key_attribute: 'code', line: ->(line) { { code: line[0,2], name: line[3...-1] } } }
|
42
|
+
AD Andorra
|
43
|
+
AE United Arab Emirates
|
44
|
+
AF Arghanistan
|
45
|
+
|
46
|
+
The first line in a file can define the config attributes associated with the file. For this seed file,
|
47
|
+
the key_attribute says that it will use the 'code' attribute to lookup existing records (defaults to 'id')
|
48
|
+
and the line function
|
49
|
+
defines how the line is converted to an attribute hash defining the instance.
|
50
|
+
|
51
|
+
Running rake db:seed will result in the following output:
|
52
|
+
|
53
|
+
# rake db:seed
|
54
|
+
Loading countries
|
55
|
+
Saving #<Country id: 1, code: "AD", name: "Andorra">
|
56
|
+
Saving #<Country id: 2, code: "AE", name: "United Arab Emirates">
|
57
|
+
Saving #<Country id: 3, code: "AF", name: "Arghanistan">
|
58
|
+
...
|
59
|
+
DataSeeder.run took 560 msec
|
60
|
+
|
61
|
+
Repeating the command will not attempt to reload the countries file since it is unchanged:
|
62
|
+
|
63
|
+
# rake db:seed
|
64
|
+
DataSeeder.run took 21 msec
|
65
|
+
|
66
|
+
Then you notice that you have a typo in Arghanistan so you fix it and repeat the command:
|
67
|
+
|
68
|
+
# rake db:seed
|
69
|
+
Loading countries
|
70
|
+
Updating AF: {"name"=>["Arghanistan", "Afghanistan"]}
|
71
|
+
DataSeeder.run took 231 msec
|
72
|
+
|
73
|
+
You will probably want your test environment seeded also. Adding the following to test/test_helper.rb
|
74
|
+
will seed your database prior to running tests but will redirect the output to the Rails.logger instead
|
75
|
+
of stdout.
|
76
|
+
|
77
|
+
DataSeeder.test_run
|
78
|
+
|
79
|
+
## Loaders
|
80
|
+
|
81
|
+
data_seeder has default loaders for txt, csv, json and yml extensions but you can also create
|
82
|
+
your own custom loaders.
|
83
|
+
For instance, suppose you had the following tables:
|
84
|
+
|
85
|
+
create_table "app_errors", force: :cascade do |t|
|
86
|
+
t.integer "app_id"
|
87
|
+
t.string "code"
|
88
|
+
t.string "message"
|
89
|
+
end
|
90
|
+
add_index "app_errors", ["app_id"], name: "index_app_errors_on_app_id"
|
91
|
+
|
92
|
+
create_table "apps", force: :cascade do |t|
|
93
|
+
t.string "name"
|
94
|
+
end
|
95
|
+
|
96
|
+
And you wanted to load up separate error messages for each app such as the following 2 files:
|
97
|
+
|
98
|
+
# foo.err
|
99
|
+
1 Something went wrong
|
100
|
+
2 We are seriously foobared
|
101
|
+
3 We are less seriously foobared
|
102
|
+
|
103
|
+
# bar.err
|
104
|
+
A1 Error message for A1
|
105
|
+
A2 Error message for A2
|
106
|
+
B1 Error message for B1
|
107
|
+
|
108
|
+
|
109
|
+
You could create your own custom loader that might look as follows:
|
110
|
+
|
111
|
+
```ruby
|
112
|
+
require 'data_seeder'
|
113
|
+
|
114
|
+
class AppErrorDataSeeder
|
115
|
+
include ::DataSeeder::Loader
|
116
|
+
|
117
|
+
def setup
|
118
|
+
@app = App.find_or_initialize_by(name: self.path_minus_ext)
|
119
|
+
@existing_errors = {}
|
120
|
+
if @app.new_record?
|
121
|
+
logger.info "Loading errors for new App: #{@app.name}"
|
122
|
+
@app.save!
|
123
|
+
else
|
124
|
+
logger.info "Loading errors for existing App: #{@app.name}"
|
125
|
+
@app.app_errors.each do |app_error|
|
126
|
+
@existing_errors[app_error.code] = app_error
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def teardown
|
132
|
+
unless @existing_errors.empty?
|
133
|
+
logger.info { " The following are begin removed:" }
|
134
|
+
@existing_errors.each do |code, app_error|
|
135
|
+
logger.info " #{code}: #{app_error.message}"
|
136
|
+
app_error.destroy
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def load(io)
|
142
|
+
io.each_line do |line|
|
143
|
+
line.strip!
|
144
|
+
next if line.blank? || line[0] == ?#
|
145
|
+
space_i = line.index(' ')
|
146
|
+
raise "Invalid line: #{line}" unless space_i
|
147
|
+
code = line[0,space_i].strip
|
148
|
+
message = line[space_i+1..-1].strip
|
149
|
+
app_error = @existing_errors[code]
|
150
|
+
if app_error
|
151
|
+
@existing_errors.delete(code)
|
152
|
+
app_error.message = message
|
153
|
+
unless app_error.changes.empty?
|
154
|
+
logger.info { " Changing #{code}: #{app_error.changes}" }
|
155
|
+
app_error.save!
|
156
|
+
end
|
157
|
+
else
|
158
|
+
logger.info { " Creating #{code}: #{message}" }
|
159
|
+
@app.app_errors.create!(code: code, message: message)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
```
|
165
|
+
|
166
|
+
To add the seeder, you would create the following config/initializers/data_seeder.rb:
|
167
|
+
|
168
|
+
```ruby
|
169
|
+
MyApp::Application.config.after_initialize do
|
170
|
+
DataSeeder.configure do |config|
|
171
|
+
config.add_loader('err', AppErrorDataSeeder.new)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
```
|
175
|
+
|
176
|
+
Executing DataSeeder.run would result in the following:
|
177
|
+
|
178
|
+
Loading errors for new App: bar
|
179
|
+
Creating A1: Error message for A1
|
180
|
+
Creating A2: Error message for A2
|
181
|
+
Creating B1: Error message for B1
|
182
|
+
Loading errors for new App: foo
|
183
|
+
Creating 1: Something went wrong
|
184
|
+
Creating 2: We are seriously foobared
|
185
|
+
Creating 3: We are less seriously foobared
|
186
|
+
|
187
|
+
TODO
|
188
|
+
----
|
189
|
+
|
190
|
+
Ability to specify more than 1 directory for Rails.env overrides. Could potentially be used if you have that
|
191
|
+
x Gigabyte seed file that you don't want to check into source control and only want run on production?
|
192
|
+
|
193
|
+
YAML should allow loading as either array or hash. (currently only does hash)
|
194
|
+
|
195
|
+
CSV should have options such as only: and except: for using/skipping the specified header columns.
|
196
|
+
|
197
|
+
Allow multi-line config statement in seed file header? Would somehow need to mark it as such via end-of-line mark or
|
198
|
+
beginning-of-line mark or maybe use '#-' or '#%' for all command-type lines?
|
199
|
+
|
200
|
+
The structure.sql caching within rails uses the file timestamp to determine whether to prepare the test database. This
|
201
|
+
is error prone and forces you to do a 'touch db/structure.sql' to get around the not getting reloaded problem. Should
|
202
|
+
I add a utility to override this rails implementation with a sha-based one like the seed files use? (or am I the only
|
203
|
+
one who has to 'touch db/structure.sql' everytime I switch branches?)
|
204
|
+
|
205
|
+
Add 'sql' loader (with disclaimer that it will temporarily truncate the table)
|
206
|
+
|
207
|
+
Ability to stop early when loading up a large seed file for a given environment, i.e., stop after processing the
|
208
|
+
first 10 lines when Rails.env.test?
|
209
|
+
|
210
|
+
I want to allow different seeding for different environments. For instance development might have a bunch of dummy
|
211
|
+
data useful for getting an environment up and running. I'm thinking either the seed_dir similar to like a PATH
|
212
|
+
environment variable where the first one found would override the others, or maybe make it automatic based on the
|
213
|
+
directory names and the environment (seed.development/state.yml would override seed/state.yml).
|
214
|
+
|
215
|
+
The test environment will be the one that will constantly being seeded after migrations or branch changes. Some of
|
216
|
+
the seed files might be large and take a long time to seed. The above
|
217
|
+
strategy using seed.test might be useful but it might also be useful to have a preprocessor type such as .sh so for
|
218
|
+
instance you might have seed.test/table_with_lotsa_rows.csv.sh which might consist of the line
|
219
|
+
'head -20 ../seed/table_with_lotsa_rows.csv'
|
220
|
+
|
221
|
+
Caching of long-running stuff via pg_dump, mysqldump, or other? This belongs with discussion of the environment-specific
|
222
|
+
seeding above.
|
223
|
+
|
224
|
+
Allow config-driven initialization so that we could require: false in the Gemfile and only load as needed.
|
225
|
+
|
226
|
+
Meta
|
227
|
+
----
|
228
|
+
|
229
|
+
* Code: `git clone git://github.com/bpardee/data_seeder.git`
|
230
|
+
* Home: <https://github.com/bpardee/data_seeder>
|
231
|
+
* Issues: <http://github.com/bpardee/data_seeder/issues>
|
232
|
+
* Gems: <http://rubygems.org/gems/data_seeder>
|
233
|
+
|
234
|
+
This project uses [Semantic Versioning](http://semver.org/).
|
235
|
+
|
236
|
+
Author
|
237
|
+
------
|
238
|
+
|
239
|
+
[Brad Pardee](https://github.com/bpardee)
|
240
|
+
|
241
|
+
License
|
242
|
+
-------
|
243
|
+
|
244
|
+
Copyright 2015 Brad Pardee
|
245
|
+
|
246
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
247
|
+
you may not use this file except in compliance with the License.
|
248
|
+
You may obtain a copy of the License at
|
249
|
+
|
250
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
251
|
+
|
252
|
+
Unless required by applicable law or agreed to in writing, software
|
253
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
254
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
255
|
+
See the License for the specific language governing permissions and
|
256
|
+
limitations under the License.
|
data/Rakefile
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
begin
|
2
|
+
require 'bundler/setup'
|
3
|
+
rescue LoadError
|
4
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'rdoc/task'
|
8
|
+
|
9
|
+
RDoc::Task.new(:rdoc) do |rdoc|
|
10
|
+
rdoc.rdoc_dir = 'rdoc'
|
11
|
+
rdoc.title = 'DataSeeder'
|
12
|
+
rdoc.options << '--line-numbers'
|
13
|
+
rdoc.rdoc_files.include('README.rdoc')
|
14
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
15
|
+
end
|
16
|
+
|
17
|
+
APP_RAKEFILE = File.expand_path("../test/dummy/Rakefile", __FILE__)
|
18
|
+
load 'rails/tasks/engine.rake'
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
Bundler::GemHelper.install_tasks
|
23
|
+
|
24
|
+
require 'rake/testtask'
|
25
|
+
|
26
|
+
Rake::TestTask.new(:test) do |t|
|
27
|
+
t.libs << 'lib'
|
28
|
+
t.libs << 'test'
|
29
|
+
t.pattern = 'test/**/*_test.rb'
|
30
|
+
t.verbose = false
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
task default: :test
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'digest'
|
2
|
+
|
3
|
+
module DataSeeder
|
4
|
+
class SeedFile < ActiveRecord::Base
|
5
|
+
def self.file_hash
|
6
|
+
hash = {}
|
7
|
+
all.each do |seed_file|
|
8
|
+
hash[seed_file.path] = seed_file
|
9
|
+
end
|
10
|
+
hash
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.load(path)
|
14
|
+
seed_file = self.file_hash[path] || new(path: path)
|
15
|
+
seed_file.load
|
16
|
+
end
|
17
|
+
|
18
|
+
def load
|
19
|
+
new_sha256 = Digest::SHA256.file(path).hexdigest
|
20
|
+
if self.sha256 != new_sha256
|
21
|
+
self.sha256 = new_sha256
|
22
|
+
ext = File.extname(self.path)[1..-1]
|
23
|
+
return unless ext
|
24
|
+
loader = DataSeeder.config.loaders[ext]
|
25
|
+
unless loader
|
26
|
+
DataSeeder.logger.info { "Warning: No loader for #{path}"}
|
27
|
+
return
|
28
|
+
end
|
29
|
+
loader.process(path)
|
30
|
+
save!
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/data_seeder.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'data_seeder/config'
|
2
|
+
require 'data_seeder/engine'
|
3
|
+
require 'data_seeder/loader'
|
4
|
+
require 'data_seeder/logger'
|
5
|
+
|
6
|
+
module DataSeeder
|
7
|
+
class << self
|
8
|
+
attr_writer :config
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.config
|
12
|
+
@config ||= Config.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.reset
|
16
|
+
@config = Config.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.configure
|
20
|
+
yield(config)
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.logger
|
24
|
+
config.logger
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.run(new_config={})
|
28
|
+
msec = Benchmark.ms do
|
29
|
+
new_config.each do |key, value|
|
30
|
+
self.config.send("#{key}=", value)
|
31
|
+
end
|
32
|
+
Dir.chdir(config.seed_dir) do
|
33
|
+
Dir['**/*'].each do |path|
|
34
|
+
SeedFile.load(path) if File.file?(path)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
logger.info { "DataSeeder.run took #{msec.to_i} msec" }
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.test_run(new_config={})
|
42
|
+
self.config.logger = Rails.logger
|
43
|
+
run(new_config)
|
44
|
+
end
|
45
|
+
|
46
|
+
@@a_ord = ?A.ord
|
47
|
+
@@zero_ord = ?0.ord
|
48
|
+
@@numeric_range = (?0.ord)..(?9.ord)
|
49
|
+
|
50
|
+
def self.to_id(len, str)
|
51
|
+
id = 0
|
52
|
+
str = str.upcase.gsub(/[^A-Z0-9]/, '')
|
53
|
+
len.times do |i|
|
54
|
+
char = str[i]
|
55
|
+
if char
|
56
|
+
ord = char.ord
|
57
|
+
if @@numeric_range.include?(ord)
|
58
|
+
id = id * 37 + ord - @@zero_ord
|
59
|
+
else
|
60
|
+
id = id * 37 + ord - @@a_ord + 10
|
61
|
+
end
|
62
|
+
else
|
63
|
+
id = id * 37 + 36
|
64
|
+
end
|
65
|
+
end
|
66
|
+
return id
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module DataSeeder
|
2
|
+
class Config
|
3
|
+
attr_accessor :seed_dir, :logger, :loaders
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@seed_dir = 'db/seed'
|
7
|
+
@logger = Logger.new
|
8
|
+
@loaders = default_loaders
|
9
|
+
end
|
10
|
+
|
11
|
+
def verbose=(verbose)
|
12
|
+
@logger.verbose = verbose
|
13
|
+
end
|
14
|
+
|
15
|
+
def verbose
|
16
|
+
@logger.verbose
|
17
|
+
end
|
18
|
+
|
19
|
+
def default_loaders
|
20
|
+
{
|
21
|
+
'csv' => Loader::CSV.new,
|
22
|
+
'json' => Loader::JSON.new,
|
23
|
+
'txt' => Loader::Txt.new,
|
24
|
+
'yaml' => Loader::YAML.new,
|
25
|
+
'yml' => Loader::YAML.new,
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
def loaders=(loaders)
|
30
|
+
@loaders = default_loaders.merge(loaders)
|
31
|
+
end
|
32
|
+
|
33
|
+
def add_loaders(loaders)
|
34
|
+
@loaders = @loaders.merge(loaders)
|
35
|
+
end
|
36
|
+
|
37
|
+
def add_loader(ext, loader)
|
38
|
+
@loaders[ext] = loader
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|