data_seeder 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +256 -0
  3. data/Rakefile +34 -0
  4. data/app/models/data_seeder/seed_file.rb +34 -0
  5. data/db/migrate/20150306195118_create_data_seeder_seed_files.rb +9 -0
  6. data/lib/data_seeder.rb +68 -0
  7. data/lib/data_seeder/config.rb +41 -0
  8. data/lib/data_seeder/engine.rb +5 -0
  9. data/lib/data_seeder/loader.rb +122 -0
  10. data/lib/data_seeder/loader/csv.rb +15 -0
  11. data/lib/data_seeder/loader/json.rb +20 -0
  12. data/lib/data_seeder/loader/txt.rb +23 -0
  13. data/lib/data_seeder/loader/yaml.rb +23 -0
  14. data/lib/data_seeder/logger.rb +15 -0
  15. data/lib/data_seeder/version.rb +3 -0
  16. data/test/dummy/Rakefile +6 -0
  17. data/test/dummy/app/models/app.rb +3 -0
  18. data/test/dummy/app/models/app_error.rb +3 -0
  19. data/test/dummy/app/models/app_error_data_seeder.rb +52 -0
  20. data/test/dummy/app/models/country.rb +14 -0
  21. data/test/dummy/app/models/state.rb +2 -0
  22. data/test/dummy/bin/bundle +3 -0
  23. data/test/dummy/bin/rails +4 -0
  24. data/test/dummy/bin/rake +4 -0
  25. data/test/dummy/config.ru +4 -0
  26. data/test/dummy/config/application.rb +26 -0
  27. data/test/dummy/config/boot.rb +5 -0
  28. data/test/dummy/config/database.yml +12 -0
  29. data/test/dummy/config/environment.rb +5 -0
  30. data/test/dummy/config/environments/development.rb +37 -0
  31. data/test/dummy/config/environments/test.rb +42 -0
  32. data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
  33. data/test/dummy/config/initializers/inflections.rb +16 -0
  34. data/test/dummy/db/migrate/20150313022149_create_countries.rb +8 -0
  35. data/test/dummy/db/migrate/20150313022228_create_states.rb +8 -0
  36. data/test/dummy/db/migrate/20150313172634_create_apps.rb +7 -0
  37. data/test/dummy/db/migrate/20150313172719_create_app_errors.rb +10 -0
  38. data/test/dummy/db/schema.rb +45 -0
  39. data/test/dummy/db/seed.test/bar.err +3 -0
  40. data/test/dummy/db/seed.test/countries.txt +249 -0
  41. data/test/dummy/db/seed.test/foo.err +3 -0
  42. data/test/dummy/db/seed.test/states.csv +51 -0
  43. data/test/dummy/db/seed.test/states.json +153 -0
  44. data/test/dummy/db/seed.test/states.txt +51 -0
  45. data/test/dummy/db/seed.test/states.yml +101 -0
  46. data/test/dummy/db/seed.test/zulu.err +2 -0
  47. data/test/dummy/db/test.sqlite3 +0 -0
  48. data/test/dummy/log/development.log +39 -0
  49. data/test/dummy/log/test.log +68768 -0
  50. data/test/models/data_seeder_test.rb +147 -0
  51. data/test/test_helper.rb +12 -0
  52. metadata +159 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9201eccfce12d45bbe01559b668fb2fa32c704f1
4
+ data.tar.gz: a0d02f94fad7dd14f62c05d4ef33223669dc1665
5
+ SHA512:
6
+ metadata.gz: 0933f7b3628701f8ffbef5f5d6247ffed17c997ec2099beefbf2e32d83ee0a84f39256aff8eefaeea1d1e7d29ec959cb6f13b70fc3b3df5c46d2d6d8becb4501
7
+ data.tar.gz: 5327311bdba8e1e880d87db269eb2ec69fe834139276dc6398b9fba35f2e07c185fa66fae61c05f0ba67a2451377bd48125cd8b23ccb22c7b33b459cd346cb0f
data/README.md ADDED
@@ -0,0 +1,256 @@
1
+ data_seeder [![Build Status](https://secure.travis-ci.org/bpardee/data_seeder.png?branch=master)](http://travis-ci.org/bpardee/data_seeder)
2
+ ====================
3
+
4
+ * http://github.com/bpardee/data_seeder
5
+
6
+ ## Introduction
7
+
8
+ This gem provides a simple methodology for seeding your database. Seed files in your
9
+ seeds directory are loaded in the database and the checksum is stored away so that the
10
+ file will only be re-applied when it is changed. Each row instance within a file is
11
+ converted to an attribute hash and the updates are applied idempotently such that unchanged
12
+ rows aren't touched, only those rows that have changes as well as insertions and deletions
13
+ are performed. The extension of the seed file determines how it is loaded. Extensions that
14
+ are supported by default are json, yaml, csv, and txt but homegrown loaders can be defined
15
+ as necessary.
16
+
17
+ ## Usage
18
+
19
+ Add this line to your application's Gemfile and run bundler:
20
+
21
+ gem 'data_seeder'
22
+
23
+ Execute the following and migrate your database:
24
+
25
+ rake data_seeder:install:migrations
26
+
27
+ Add the following to your db/seeds.rb file
28
+
29
+ DataSeeder.run
30
+
31
+ Add seed files to the db/seed directory as necessary. For instance, suppose you have
32
+ the following table:
33
+
34
+ create_table :countries do |t|
35
+ t.column :code, 'CHAR(2)', null: false
36
+ t.string :name, null: false
37
+ end
38
+
39
+ And you have a corresponding db/seed/countries.txt file as follows:
40
+
41
+ # config: { key_attribute: 'code', line: ->(line) { { code: line[0,2], name: line[3...-1] } } }
42
+ AD Andorra
43
+ AE United Arab Emirates
44
+ AF Arghanistan
45
+
46
+ The first line in a file can define the config attributes associated with the file. For this seed file,
47
+ the key_attribute says that it will use the 'code' attribute to lookup existing records (defaults to 'id')
48
+ and the line function
49
+ defines how the line is converted to an attribute hash defining the instance.
50
+
51
+ Running rake db:seed will result in the following output:
52
+
53
+ # rake db:seed
54
+ Loading countries
55
+ Saving #<Country id: 1, code: "AD", name: "Andorra">
56
+ Saving #<Country id: 2, code: "AE", name: "United Arab Emirates">
57
+ Saving #<Country id: 3, code: "AF", name: "Arghanistan">
58
+ ...
59
+ DataSeeder.run took 560 msec
60
+
61
+ Repeating the command will not attempt to reload the countries file since it is unchanged:
62
+
63
+ # rake db:seed
64
+ DataSeeder.run took 21 msec
65
+
66
+ Then you notice that you have a typo in Arghanistan so you fix it and repeat the command:
67
+
68
+ # rake db:seed
69
+ Loading countries
70
+ Updating AF: {"name"=>["Arghanistan", "Afghanistan"]}
71
+ DataSeeder.run took 231 msec
72
+
73
+ You will probably want your test environment seeded also. Adding the following to test/test_helper.rb
74
+ will seed your database prior to running tests but will redirect the output to the Rails.logger instead
75
+ of stdout.
76
+
77
+ DataSeeder.test_run
78
+
79
+ ## Loaders
80
+
81
+ data_seeder has default loaders for txt, csv, json and yml extensions but you can also create
82
+ your own custom loaders.
83
+ For instance, suppose you had the following tables:
84
+
85
+ create_table "app_errors", force: :cascade do |t|
86
+ t.integer "app_id"
87
+ t.string "code"
88
+ t.string "message"
89
+ end
90
+ add_index "app_errors", ["app_id"], name: "index_app_errors_on_app_id"
91
+
92
+ create_table "apps", force: :cascade do |t|
93
+ t.string "name"
94
+ end
95
+
96
+ And you wanted to load up separate error messages for each app such as the following 2 files:
97
+
98
+ # foo.err
99
+ 1 Something went wrong
100
+ 2 We are seriously foobared
101
+ 3 We are less seriously foobared
102
+
103
+ # bar.err
104
+ A1 Error message for A1
105
+ A2 Error message for A2
106
+ B1 Error message for B1
107
+
108
+
109
+ You could create your own custom loader that might look as follows:
110
+
111
+ ```ruby
112
+ require 'data_seeder'
113
+
114
+ class AppErrorDataSeeder
115
+ include ::DataSeeder::Loader
116
+
117
+ def setup
118
+ @app = App.find_or_initialize_by(name: self.path_minus_ext)
119
+ @existing_errors = {}
120
+ if @app.new_record?
121
+ logger.info "Loading errors for new App: #{@app.name}"
122
+ @app.save!
123
+ else
124
+ logger.info "Loading errors for existing App: #{@app.name}"
125
+ @app.app_errors.each do |app_error|
126
+ @existing_errors[app_error.code] = app_error
127
+ end
128
+ end
129
+ end
130
+
131
+ def teardown
132
+ unless @existing_errors.empty?
133
+ logger.info { " The following are begin removed:" }
134
+ @existing_errors.each do |code, app_error|
135
+ logger.info " #{code}: #{app_error.message}"
136
+ app_error.destroy
137
+ end
138
+ end
139
+ end
140
+
141
+ def load(io)
142
+ io.each_line do |line|
143
+ line.strip!
144
+ next if line.blank? || line[0] == ?#
145
+ space_i = line.index(' ')
146
+ raise "Invalid line: #{line}" unless space_i
147
+ code = line[0,space_i].strip
148
+ message = line[space_i+1..-1].strip
149
+ app_error = @existing_errors[code]
150
+ if app_error
151
+ @existing_errors.delete(code)
152
+ app_error.message = message
153
+ unless app_error.changes.empty?
154
+ logger.info { " Changing #{code}: #{app_error.changes}" }
155
+ app_error.save!
156
+ end
157
+ else
158
+ logger.info { " Creating #{code}: #{message}" }
159
+ @app.app_errors.create!(code: code, message: message)
160
+ end
161
+ end
162
+ end
163
+ end
164
+ ```
165
+
166
+ To add the seeder, you would create the following config/initializers/data_seeder.rb:
167
+
168
+ ```ruby
169
+ MyApp::Application.config.after_initialize do
170
+ DataSeeder.configure do |config|
171
+ config.add_loader('err', AppErrorDataSeeder.new)
172
+ end
173
+ end
174
+ ```
175
+
176
+ Executing DataSeeder.run would result in the following:
177
+
178
+ Loading errors for new App: bar
179
+ Creating A1: Error message for A1
180
+ Creating A2: Error message for A2
181
+ Creating B1: Error message for B1
182
+ Loading errors for new App: foo
183
+ Creating 1: Something went wrong
184
+ Creating 2: We are seriously foobared
185
+ Creating 3: We are less seriously foobared
186
+
187
+ TODO
188
+ ----
189
+
190
+ Ability to specify more than 1 directory for Rails.env overrides. Could potentially be used if you have that
191
+ x Gigabyte seed file that you don't want to check into source control and only want run on production?
192
+
193
+ YAML should allow loading as either array or hash. (currently only does hash)
194
+
195
+ CSV should have options such as only: and except: for using/skipping the specified header columns.
196
+
197
+ Allow multi-line config statement in seed file header? Would somehow need to mark it as such via end-of-line mark or
198
+ beginning-of-line mark or maybe use '#-' or '#%' for all command-type lines?
199
+
200
+ The structure.sql caching within rails uses the file timestamp to determine whether to prepare the test database. This
201
+ is error prone and forces you to do a 'touch db/structure.sql' to get around the not getting reloaded problem. Should
202
+ I add a utility to override this rails implementation with a sha-based one like the seed files use? (or am I the only
203
+ one who has to 'touch db/structure.sql' everytime I switch branches?)
204
+
205
+ Add 'sql' loader (with disclaimer that it will temporarily truncate the table)
206
+
207
+ Ability to stop early when loading up a large seed file for a given environment, i.e., stop after processing the
208
+ first 10 lines when Rails.env.test?
209
+
210
+ I want to allow different seeding for different environments. For instance development might have a bunch of dummy
211
+ data useful for getting an environment up and running. I'm thinking either the seed_dir similar to like a PATH
212
+ environment variable where the first one found would override the others, or maybe make it automatic based on the
213
+ directory names and the environment (seed.development/state.yml would override seed/state.yml).
214
+
215
+ The test environment will be the one that will constantly being seeded after migrations or branch changes. Some of
216
+ the seed files might be large and take a long time to seed. The above
217
+ strategy using seed.test might be useful but it might also be useful to have a preprocessor type such as .sh so for
218
+ instance you might have seed.test/table_with_lotsa_rows.csv.sh which might consist of the line
219
+ 'head -20 ../seed/table_with_lotsa_rows.csv'
220
+
221
+ Caching of long-running stuff via pg_dump, mysqldump, or other? This belongs with discussion of the environment-specific
222
+ seeding above.
223
+
224
+ Allow config-driven initialization so that we could require: false in the Gemfile and only load as needed.
225
+
226
+ Meta
227
+ ----
228
+
229
+ * Code: `git clone git://github.com/bpardee/data_seeder.git`
230
+ * Home: <https://github.com/bpardee/data_seeder>
231
+ * Issues: <http://github.com/bpardee/data_seeder/issues>
232
+ * Gems: <http://rubygems.org/gems/data_seeder>
233
+
234
+ This project uses [Semantic Versioning](http://semver.org/).
235
+
236
+ Author
237
+ ------
238
+
239
+ [Brad Pardee](https://github.com/bpardee)
240
+
241
+ License
242
+ -------
243
+
244
+ Copyright 2015 Brad Pardee
245
+
246
+ Licensed under the Apache License, Version 2.0 (the "License");
247
+ you may not use this file except in compliance with the License.
248
+ You may obtain a copy of the License at
249
+
250
+ http://www.apache.org/licenses/LICENSE-2.0
251
+
252
+ Unless required by applicable law or agreed to in writing, software
253
+ distributed under the License is distributed on an "AS IS" BASIS,
254
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
255
+ See the License for the specific language governing permissions and
256
+ limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,34 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+ require 'rdoc/task'
8
+
9
+ RDoc::Task.new(:rdoc) do |rdoc|
10
+ rdoc.rdoc_dir = 'rdoc'
11
+ rdoc.title = 'DataSeeder'
12
+ rdoc.options << '--line-numbers'
13
+ rdoc.rdoc_files.include('README.rdoc')
14
+ rdoc.rdoc_files.include('lib/**/*.rb')
15
+ end
16
+
17
+ APP_RAKEFILE = File.expand_path("../test/dummy/Rakefile", __FILE__)
18
+ load 'rails/tasks/engine.rake'
19
+
20
+
21
+
22
+ Bundler::GemHelper.install_tasks
23
+
24
+ require 'rake/testtask'
25
+
26
+ Rake::TestTask.new(:test) do |t|
27
+ t.libs << 'lib'
28
+ t.libs << 'test'
29
+ t.pattern = 'test/**/*_test.rb'
30
+ t.verbose = false
31
+ end
32
+
33
+
34
+ task default: :test
@@ -0,0 +1,34 @@
1
+ require 'digest'
2
+
3
+ module DataSeeder
4
+ class SeedFile < ActiveRecord::Base
5
+ def self.file_hash
6
+ hash = {}
7
+ all.each do |seed_file|
8
+ hash[seed_file.path] = seed_file
9
+ end
10
+ hash
11
+ end
12
+
13
+ def self.load(path)
14
+ seed_file = self.file_hash[path] || new(path: path)
15
+ seed_file.load
16
+ end
17
+
18
+ def load
19
+ new_sha256 = Digest::SHA256.file(path).hexdigest
20
+ if self.sha256 != new_sha256
21
+ self.sha256 = new_sha256
22
+ ext = File.extname(self.path)[1..-1]
23
+ return unless ext
24
+ loader = DataSeeder.config.loaders[ext]
25
+ unless loader
26
+ DataSeeder.logger.info { "Warning: No loader for #{path}"}
27
+ return
28
+ end
29
+ loader.process(path)
30
+ save!
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,9 @@
1
+ class CreateDataSeederSeedFiles < ActiveRecord::Migration
2
+ def change
3
+ create_table :data_seeder_seed_files do |t|
4
+ t.string :path, null: false
5
+ t.string :sha256, null: false
6
+ end
7
+ add_index :data_seeder_seed_files, :path, unique: true
8
+ end
9
+ end
@@ -0,0 +1,68 @@
1
+ require 'data_seeder/config'
2
+ require 'data_seeder/engine'
3
+ require 'data_seeder/loader'
4
+ require 'data_seeder/logger'
5
+
6
+ module DataSeeder
7
+ class << self
8
+ attr_writer :config
9
+ end
10
+
11
+ def self.config
12
+ @config ||= Config.new
13
+ end
14
+
15
+ def self.reset
16
+ @config = Config.new
17
+ end
18
+
19
+ def self.configure
20
+ yield(config)
21
+ end
22
+
23
+ def self.logger
24
+ config.logger
25
+ end
26
+
27
+ def self.run(new_config={})
28
+ msec = Benchmark.ms do
29
+ new_config.each do |key, value|
30
+ self.config.send("#{key}=", value)
31
+ end
32
+ Dir.chdir(config.seed_dir) do
33
+ Dir['**/*'].each do |path|
34
+ SeedFile.load(path) if File.file?(path)
35
+ end
36
+ end
37
+ end
38
+ logger.info { "DataSeeder.run took #{msec.to_i} msec" }
39
+ end
40
+
41
+ def self.test_run(new_config={})
42
+ self.config.logger = Rails.logger
43
+ run(new_config)
44
+ end
45
+
46
+ @@a_ord = ?A.ord
47
+ @@zero_ord = ?0.ord
48
+ @@numeric_range = (?0.ord)..(?9.ord)
49
+
50
+ def self.to_id(len, str)
51
+ id = 0
52
+ str = str.upcase.gsub(/[^A-Z0-9]/, '')
53
+ len.times do |i|
54
+ char = str[i]
55
+ if char
56
+ ord = char.ord
57
+ if @@numeric_range.include?(ord)
58
+ id = id * 37 + ord - @@zero_ord
59
+ else
60
+ id = id * 37 + ord - @@a_ord + 10
61
+ end
62
+ else
63
+ id = id * 37 + 36
64
+ end
65
+ end
66
+ return id
67
+ end
68
+ end
@@ -0,0 +1,41 @@
1
+ module DataSeeder
2
+ class Config
3
+ attr_accessor :seed_dir, :logger, :loaders
4
+
5
+ def initialize
6
+ @seed_dir = 'db/seed'
7
+ @logger = Logger.new
8
+ @loaders = default_loaders
9
+ end
10
+
11
+ def verbose=(verbose)
12
+ @logger.verbose = verbose
13
+ end
14
+
15
+ def verbose
16
+ @logger.verbose
17
+ end
18
+
19
+ def default_loaders
20
+ {
21
+ 'csv' => Loader::CSV.new,
22
+ 'json' => Loader::JSON.new,
23
+ 'txt' => Loader::Txt.new,
24
+ 'yaml' => Loader::YAML.new,
25
+ 'yml' => Loader::YAML.new,
26
+ }
27
+ end
28
+
29
+ def loaders=(loaders)
30
+ @loaders = default_loaders.merge(loaders)
31
+ end
32
+
33
+ def add_loaders(loaders)
34
+ @loaders = @loaders.merge(loaders)
35
+ end
36
+
37
+ def add_loader(ext, loader)
38
+ @loaders[ext] = loader
39
+ end
40
+ end
41
+ end