data_seeder 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +256 -0
- data/Rakefile +34 -0
- data/app/models/data_seeder/seed_file.rb +34 -0
- data/db/migrate/20150306195118_create_data_seeder_seed_files.rb +9 -0
- data/lib/data_seeder.rb +68 -0
- data/lib/data_seeder/config.rb +41 -0
- data/lib/data_seeder/engine.rb +5 -0
- data/lib/data_seeder/loader.rb +122 -0
- data/lib/data_seeder/loader/csv.rb +15 -0
- data/lib/data_seeder/loader/json.rb +20 -0
- data/lib/data_seeder/loader/txt.rb +23 -0
- data/lib/data_seeder/loader/yaml.rb +23 -0
- data/lib/data_seeder/logger.rb +15 -0
- data/lib/data_seeder/version.rb +3 -0
- data/test/dummy/Rakefile +6 -0
- data/test/dummy/app/models/app.rb +3 -0
- data/test/dummy/app/models/app_error.rb +3 -0
- data/test/dummy/app/models/app_error_data_seeder.rb +52 -0
- data/test/dummy/app/models/country.rb +14 -0
- data/test/dummy/app/models/state.rb +2 -0
- data/test/dummy/bin/bundle +3 -0
- data/test/dummy/bin/rails +4 -0
- data/test/dummy/bin/rake +4 -0
- data/test/dummy/config.ru +4 -0
- data/test/dummy/config/application.rb +26 -0
- data/test/dummy/config/boot.rb +5 -0
- data/test/dummy/config/database.yml +12 -0
- data/test/dummy/config/environment.rb +5 -0
- data/test/dummy/config/environments/development.rb +37 -0
- data/test/dummy/config/environments/test.rb +42 -0
- data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/test/dummy/config/initializers/inflections.rb +16 -0
- data/test/dummy/db/migrate/20150313022149_create_countries.rb +8 -0
- data/test/dummy/db/migrate/20150313022228_create_states.rb +8 -0
- data/test/dummy/db/migrate/20150313172634_create_apps.rb +7 -0
- data/test/dummy/db/migrate/20150313172719_create_app_errors.rb +10 -0
- data/test/dummy/db/schema.rb +45 -0
- data/test/dummy/db/seed.test/bar.err +3 -0
- data/test/dummy/db/seed.test/countries.txt +249 -0
- data/test/dummy/db/seed.test/foo.err +3 -0
- data/test/dummy/db/seed.test/states.csv +51 -0
- data/test/dummy/db/seed.test/states.json +153 -0
- data/test/dummy/db/seed.test/states.txt +51 -0
- data/test/dummy/db/seed.test/states.yml +101 -0
- data/test/dummy/db/seed.test/zulu.err +2 -0
- data/test/dummy/db/test.sqlite3 +0 -0
- data/test/dummy/log/development.log +39 -0
- data/test/dummy/log/test.log +68768 -0
- data/test/models/data_seeder_test.rb +147 -0
- data/test/test_helper.rb +12 -0
- metadata +159 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 9201eccfce12d45bbe01559b668fb2fa32c704f1
|
4
|
+
data.tar.gz: a0d02f94fad7dd14f62c05d4ef33223669dc1665
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0933f7b3628701f8ffbef5f5d6247ffed17c997ec2099beefbf2e32d83ee0a84f39256aff8eefaeea1d1e7d29ec959cb6f13b70fc3b3df5c46d2d6d8becb4501
|
7
|
+
data.tar.gz: 5327311bdba8e1e880d87db269eb2ec69fe834139276dc6398b9fba35f2e07c185fa66fae61c05f0ba67a2451377bd48125cd8b23ccb22c7b33b459cd346cb0f
|
data/README.md
ADDED
@@ -0,0 +1,256 @@
|
|
1
|
+
data_seeder [](http://travis-ci.org/bpardee/data_seeder)
|
2
|
+
====================
|
3
|
+
|
4
|
+
* http://github.com/bpardee/data_seeder
|
5
|
+
|
6
|
+
## Introduction
|
7
|
+
|
8
|
+
This gem provides a simple methodology for seeding your database. Seed files in your
|
9
|
+
seeds directory are loaded in the database and the checksum is stored away so that the
|
10
|
+
file will only be re-applied when it is changed. Each row instance within a file is
|
11
|
+
converted to an attribute hash and the updates are applied idempotently such that unchanged
|
12
|
+
rows aren't touched, only those rows that have changes as well as insertions and deletions
|
13
|
+
are performed. The extension of the seed file determines how it is loaded. Extensions that
|
14
|
+
are supported by default are json, yaml, csv, and txt but homegrown loaders can be defined
|
15
|
+
as necessary.
|
16
|
+
|
17
|
+
## Usage
|
18
|
+
|
19
|
+
Add this line to your application's Gemfile and run bundler:
|
20
|
+
|
21
|
+
gem 'data_seeder'
|
22
|
+
|
23
|
+
Execute the following and migrate your database:
|
24
|
+
|
25
|
+
rake data_seeder:install:migrations
|
26
|
+
|
27
|
+
Add the following to your db/seeds.rb file
|
28
|
+
|
29
|
+
DataSeeder.run
|
30
|
+
|
31
|
+
Add seed files to the db/seed directory as necessary. For instance, suppose you have
|
32
|
+
the following table:
|
33
|
+
|
34
|
+
create_table :countries do |t|
|
35
|
+
t.column :code, 'CHAR(2)', null: false
|
36
|
+
t.string :name, null: false
|
37
|
+
end
|
38
|
+
|
39
|
+
And you have a corresponding db/seed/countries.txt file as follows:
|
40
|
+
|
41
|
+
# config: { key_attribute: 'code', line: ->(line) { { code: line[0,2], name: line[3...-1] } } }
|
42
|
+
AD Andorra
|
43
|
+
AE United Arab Emirates
|
44
|
+
AF Arghanistan
|
45
|
+
|
46
|
+
The first line in a file can define the config attributes associated with the file. For this seed file,
|
47
|
+
the key_attribute says that it will use the 'code' attribute to lookup existing records (defaults to 'id')
|
48
|
+
and the line function
|
49
|
+
defines how the line is converted to an attribute hash defining the instance.
|
50
|
+
|
51
|
+
Running rake db:seed will result in the following output:
|
52
|
+
|
53
|
+
# rake db:seed
|
54
|
+
Loading countries
|
55
|
+
Saving #<Country id: 1, code: "AD", name: "Andorra">
|
56
|
+
Saving #<Country id: 2, code: "AE", name: "United Arab Emirates">
|
57
|
+
Saving #<Country id: 3, code: "AF", name: "Arghanistan">
|
58
|
+
...
|
59
|
+
DataSeeder.run took 560 msec
|
60
|
+
|
61
|
+
Repeating the command will not attempt to reload the countries file since it is unchanged:
|
62
|
+
|
63
|
+
# rake db:seed
|
64
|
+
DataSeeder.run took 21 msec
|
65
|
+
|
66
|
+
Then you notice that you have a typo in Arghanistan so you fix it and repeat the command:
|
67
|
+
|
68
|
+
# rake db:seed
|
69
|
+
Loading countries
|
70
|
+
Updating AF: {"name"=>["Arghanistan", "Afghanistan"]}
|
71
|
+
DataSeeder.run took 231 msec
|
72
|
+
|
73
|
+
You will probably want your test environment seeded also. Adding the following to test/test_helper.rb
|
74
|
+
will seed your database prior to running tests but will redirect the output to the Rails.logger instead
|
75
|
+
of stdout.
|
76
|
+
|
77
|
+
DataSeeder.test_run
|
78
|
+
|
79
|
+
## Loaders
|
80
|
+
|
81
|
+
data_seeder has default loaders for txt, csv, json and yml extensions but you can also create
|
82
|
+
your own custom loaders.
|
83
|
+
For instance, suppose you had the following tables:
|
84
|
+
|
85
|
+
create_table "app_errors", force: :cascade do |t|
|
86
|
+
t.integer "app_id"
|
87
|
+
t.string "code"
|
88
|
+
t.string "message"
|
89
|
+
end
|
90
|
+
add_index "app_errors", ["app_id"], name: "index_app_errors_on_app_id"
|
91
|
+
|
92
|
+
create_table "apps", force: :cascade do |t|
|
93
|
+
t.string "name"
|
94
|
+
end
|
95
|
+
|
96
|
+
And you wanted to load up separate error messages for each app such as the following 2 files:
|
97
|
+
|
98
|
+
# foo.err
|
99
|
+
1 Something went wrong
|
100
|
+
2 We are seriously foobared
|
101
|
+
3 We are less seriously foobared
|
102
|
+
|
103
|
+
# bar.err
|
104
|
+
A1 Error message for A1
|
105
|
+
A2 Error message for A2
|
106
|
+
B1 Error message for B1
|
107
|
+
|
108
|
+
|
109
|
+
You could create your own custom loader that might look as follows:
|
110
|
+
|
111
|
+
```ruby
|
112
|
+
require 'data_seeder'
|
113
|
+
|
114
|
+
class AppErrorDataSeeder
|
115
|
+
include ::DataSeeder::Loader
|
116
|
+
|
117
|
+
def setup
|
118
|
+
@app = App.find_or_initialize_by(name: self.path_minus_ext)
|
119
|
+
@existing_errors = {}
|
120
|
+
if @app.new_record?
|
121
|
+
logger.info "Loading errors for new App: #{@app.name}"
|
122
|
+
@app.save!
|
123
|
+
else
|
124
|
+
logger.info "Loading errors for existing App: #{@app.name}"
|
125
|
+
@app.app_errors.each do |app_error|
|
126
|
+
@existing_errors[app_error.code] = app_error
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def teardown
|
132
|
+
unless @existing_errors.empty?
|
133
|
+
logger.info { " The following are begin removed:" }
|
134
|
+
@existing_errors.each do |code, app_error|
|
135
|
+
logger.info " #{code}: #{app_error.message}"
|
136
|
+
app_error.destroy
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def load(io)
|
142
|
+
io.each_line do |line|
|
143
|
+
line.strip!
|
144
|
+
next if line.blank? || line[0] == ?#
|
145
|
+
space_i = line.index(' ')
|
146
|
+
raise "Invalid line: #{line}" unless space_i
|
147
|
+
code = line[0,space_i].strip
|
148
|
+
message = line[space_i+1..-1].strip
|
149
|
+
app_error = @existing_errors[code]
|
150
|
+
if app_error
|
151
|
+
@existing_errors.delete(code)
|
152
|
+
app_error.message = message
|
153
|
+
unless app_error.changes.empty?
|
154
|
+
logger.info { " Changing #{code}: #{app_error.changes}" }
|
155
|
+
app_error.save!
|
156
|
+
end
|
157
|
+
else
|
158
|
+
logger.info { " Creating #{code}: #{message}" }
|
159
|
+
@app.app_errors.create!(code: code, message: message)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
```
|
165
|
+
|
166
|
+
To add the seeder, you would create the following config/initializers/data_seeder.rb:
|
167
|
+
|
168
|
+
```ruby
|
169
|
+
MyApp::Application.config.after_initialize do
|
170
|
+
DataSeeder.configure do |config|
|
171
|
+
config.add_loader('err', AppErrorDataSeeder.new)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
```
|
175
|
+
|
176
|
+
Executing DataSeeder.run would result in the following:
|
177
|
+
|
178
|
+
Loading errors for new App: bar
|
179
|
+
Creating A1: Error message for A1
|
180
|
+
Creating A2: Error message for A2
|
181
|
+
Creating B1: Error message for B1
|
182
|
+
Loading errors for new App: foo
|
183
|
+
Creating 1: Something went wrong
|
184
|
+
Creating 2: We are seriously foobared
|
185
|
+
Creating 3: We are less seriously foobared
|
186
|
+
|
187
|
+
TODO
|
188
|
+
----
|
189
|
+
|
190
|
+
Ability to specify more than 1 directory for Rails.env overrides. Could potentially be used if you have that
|
191
|
+
x Gigabyte seed file that you don't want to check into source control and only want run on production?
|
192
|
+
|
193
|
+
YAML should allow loading as either array or hash. (currently only does hash)
|
194
|
+
|
195
|
+
CSV should have options such as only: and except: for using/skipping the specified header columns.
|
196
|
+
|
197
|
+
Allow multi-line config statement in seed file header? Would somehow need to mark it as such via end-of-line mark or
|
198
|
+
beginning-of-line mark or maybe use '#-' or '#%' for all command-type lines?
|
199
|
+
|
200
|
+
The structure.sql caching within rails uses the file timestamp to determine whether to prepare the test database. This
|
201
|
+
is error prone and forces you to do a 'touch db/structure.sql' to get around the not getting reloaded problem. Should
|
202
|
+
I add a utility to override this rails implementation with a sha-based one like the seed files use? (or am I the only
|
203
|
+
one who has to 'touch db/structure.sql' everytime I switch branches?)
|
204
|
+
|
205
|
+
Add 'sql' loader (with disclaimer that it will temporarily truncate the table)
|
206
|
+
|
207
|
+
Ability to stop early when loading up a large seed file for a given environment, i.e., stop after processing the
|
208
|
+
first 10 lines when Rails.env.test?
|
209
|
+
|
210
|
+
I want to allow different seeding for different environments. For instance development might have a bunch of dummy
|
211
|
+
data useful for getting an environment up and running. I'm thinking either the seed_dir similar to like a PATH
|
212
|
+
environment variable where the first one found would override the others, or maybe make it automatic based on the
|
213
|
+
directory names and the environment (seed.development/state.yml would override seed/state.yml).
|
214
|
+
|
215
|
+
The test environment will be the one that will constantly being seeded after migrations or branch changes. Some of
|
216
|
+
the seed files might be large and take a long time to seed. The above
|
217
|
+
strategy using seed.test might be useful but it might also be useful to have a preprocessor type such as .sh so for
|
218
|
+
instance you might have seed.test/table_with_lotsa_rows.csv.sh which might consist of the line
|
219
|
+
'head -20 ../seed/table_with_lotsa_rows.csv'
|
220
|
+
|
221
|
+
Caching of long-running stuff via pg_dump, mysqldump, or other? This belongs with discussion of the environment-specific
|
222
|
+
seeding above.
|
223
|
+
|
224
|
+
Allow config-driven initialization so that we could require: false in the Gemfile and only load as needed.
|
225
|
+
|
226
|
+
Meta
|
227
|
+
----
|
228
|
+
|
229
|
+
* Code: `git clone git://github.com/bpardee/data_seeder.git`
|
230
|
+
* Home: <https://github.com/bpardee/data_seeder>
|
231
|
+
* Issues: <http://github.com/bpardee/data_seeder/issues>
|
232
|
+
* Gems: <http://rubygems.org/gems/data_seeder>
|
233
|
+
|
234
|
+
This project uses [Semantic Versioning](http://semver.org/).
|
235
|
+
|
236
|
+
Author
|
237
|
+
------
|
238
|
+
|
239
|
+
[Brad Pardee](https://github.com/bpardee)
|
240
|
+
|
241
|
+
License
|
242
|
+
-------
|
243
|
+
|
244
|
+
Copyright 2015 Brad Pardee
|
245
|
+
|
246
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
247
|
+
you may not use this file except in compliance with the License.
|
248
|
+
You may obtain a copy of the License at
|
249
|
+
|
250
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
251
|
+
|
252
|
+
Unless required by applicable law or agreed to in writing, software
|
253
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
254
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
255
|
+
See the License for the specific language governing permissions and
|
256
|
+
limitations under the License.
|
data/Rakefile
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
begin
|
2
|
+
require 'bundler/setup'
|
3
|
+
rescue LoadError
|
4
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'rdoc/task'
|
8
|
+
|
9
|
+
RDoc::Task.new(:rdoc) do |rdoc|
|
10
|
+
rdoc.rdoc_dir = 'rdoc'
|
11
|
+
rdoc.title = 'DataSeeder'
|
12
|
+
rdoc.options << '--line-numbers'
|
13
|
+
rdoc.rdoc_files.include('README.rdoc')
|
14
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
15
|
+
end
|
16
|
+
|
17
|
+
APP_RAKEFILE = File.expand_path("../test/dummy/Rakefile", __FILE__)
|
18
|
+
load 'rails/tasks/engine.rake'
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
Bundler::GemHelper.install_tasks
|
23
|
+
|
24
|
+
require 'rake/testtask'
|
25
|
+
|
26
|
+
Rake::TestTask.new(:test) do |t|
|
27
|
+
t.libs << 'lib'
|
28
|
+
t.libs << 'test'
|
29
|
+
t.pattern = 'test/**/*_test.rb'
|
30
|
+
t.verbose = false
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
task default: :test
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'digest'
|
2
|
+
|
3
|
+
module DataSeeder
|
4
|
+
class SeedFile < ActiveRecord::Base
|
5
|
+
def self.file_hash
|
6
|
+
hash = {}
|
7
|
+
all.each do |seed_file|
|
8
|
+
hash[seed_file.path] = seed_file
|
9
|
+
end
|
10
|
+
hash
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.load(path)
|
14
|
+
seed_file = self.file_hash[path] || new(path: path)
|
15
|
+
seed_file.load
|
16
|
+
end
|
17
|
+
|
18
|
+
def load
|
19
|
+
new_sha256 = Digest::SHA256.file(path).hexdigest
|
20
|
+
if self.sha256 != new_sha256
|
21
|
+
self.sha256 = new_sha256
|
22
|
+
ext = File.extname(self.path)[1..-1]
|
23
|
+
return unless ext
|
24
|
+
loader = DataSeeder.config.loaders[ext]
|
25
|
+
unless loader
|
26
|
+
DataSeeder.logger.info { "Warning: No loader for #{path}"}
|
27
|
+
return
|
28
|
+
end
|
29
|
+
loader.process(path)
|
30
|
+
save!
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/data_seeder.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'data_seeder/config'
|
2
|
+
require 'data_seeder/engine'
|
3
|
+
require 'data_seeder/loader'
|
4
|
+
require 'data_seeder/logger'
|
5
|
+
|
6
|
+
module DataSeeder
|
7
|
+
class << self
|
8
|
+
attr_writer :config
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.config
|
12
|
+
@config ||= Config.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.reset
|
16
|
+
@config = Config.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.configure
|
20
|
+
yield(config)
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.logger
|
24
|
+
config.logger
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.run(new_config={})
|
28
|
+
msec = Benchmark.ms do
|
29
|
+
new_config.each do |key, value|
|
30
|
+
self.config.send("#{key}=", value)
|
31
|
+
end
|
32
|
+
Dir.chdir(config.seed_dir) do
|
33
|
+
Dir['**/*'].each do |path|
|
34
|
+
SeedFile.load(path) if File.file?(path)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
logger.info { "DataSeeder.run took #{msec.to_i} msec" }
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.test_run(new_config={})
|
42
|
+
self.config.logger = Rails.logger
|
43
|
+
run(new_config)
|
44
|
+
end
|
45
|
+
|
46
|
+
@@a_ord = ?A.ord
|
47
|
+
@@zero_ord = ?0.ord
|
48
|
+
@@numeric_range = (?0.ord)..(?9.ord)
|
49
|
+
|
50
|
+
def self.to_id(len, str)
|
51
|
+
id = 0
|
52
|
+
str = str.upcase.gsub(/[^A-Z0-9]/, '')
|
53
|
+
len.times do |i|
|
54
|
+
char = str[i]
|
55
|
+
if char
|
56
|
+
ord = char.ord
|
57
|
+
if @@numeric_range.include?(ord)
|
58
|
+
id = id * 37 + ord - @@zero_ord
|
59
|
+
else
|
60
|
+
id = id * 37 + ord - @@a_ord + 10
|
61
|
+
end
|
62
|
+
else
|
63
|
+
id = id * 37 + 36
|
64
|
+
end
|
65
|
+
end
|
66
|
+
return id
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module DataSeeder
|
2
|
+
class Config
|
3
|
+
attr_accessor :seed_dir, :logger, :loaders
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@seed_dir = 'db/seed'
|
7
|
+
@logger = Logger.new
|
8
|
+
@loaders = default_loaders
|
9
|
+
end
|
10
|
+
|
11
|
+
def verbose=(verbose)
|
12
|
+
@logger.verbose = verbose
|
13
|
+
end
|
14
|
+
|
15
|
+
def verbose
|
16
|
+
@logger.verbose
|
17
|
+
end
|
18
|
+
|
19
|
+
def default_loaders
|
20
|
+
{
|
21
|
+
'csv' => Loader::CSV.new,
|
22
|
+
'json' => Loader::JSON.new,
|
23
|
+
'txt' => Loader::Txt.new,
|
24
|
+
'yaml' => Loader::YAML.new,
|
25
|
+
'yml' => Loader::YAML.new,
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
def loaders=(loaders)
|
30
|
+
@loaders = default_loaders.merge(loaders)
|
31
|
+
end
|
32
|
+
|
33
|
+
def add_loaders(loaders)
|
34
|
+
@loaders = @loaders.merge(loaders)
|
35
|
+
end
|
36
|
+
|
37
|
+
def add_loader(ext, loader)
|
38
|
+
@loaders[ext] = loader
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|