data_migration_for_rails 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +17 -0
  3. data/README.md +196 -0
  4. data/Rakefile +8 -0
  5. data/app/assets/config/manifest.js +2 -0
  6. data/app/assets/stylesheets/application.css +15 -0
  7. data/app/channels/application_cable/channel.rb +6 -0
  8. data/app/channels/application_cable/connection.rb +6 -0
  9. data/app/controllers/concerns/data_migration/pundit_authorization.rb +12 -0
  10. data/app/controllers/data_migration/application_controller.rb +63 -0
  11. data/app/controllers/data_migration/exports_controller.rb +68 -0
  12. data/app/controllers/data_migration/imports_controller.rb +78 -0
  13. data/app/controllers/data_migration/migration_executions_controller.rb +75 -0
  14. data/app/controllers/data_migration/migration_plans_controller.rb +103 -0
  15. data/app/controllers/data_migration/migration_steps_controller.rb +164 -0
  16. data/app/controllers/data_migration/users_controller.rb +71 -0
  17. data/app/controllers/users/sessions_controller.rb +30 -0
  18. data/app/helpers/data_migration/application_helper.rb +24 -0
  19. data/app/jobs/application_job.rb +9 -0
  20. data/app/jobs/export_job.rb +27 -0
  21. data/app/jobs/import_job.rb +28 -0
  22. data/app/mailers/application_mailer.rb +6 -0
  23. data/app/models/application_record.rb +5 -0
  24. data/app/models/data_migration_user.rb +43 -0
  25. data/app/models/migration_execution.rb +93 -0
  26. data/app/models/migration_plan.rb +23 -0
  27. data/app/models/migration_record.rb +60 -0
  28. data/app/models/migration_step.rb +150 -0
  29. data/app/policies/application_policy.rb +53 -0
  30. data/app/policies/data_migration/user_policy.rb +27 -0
  31. data/app/policies/data_migration_user_policy.rb +37 -0
  32. data/app/policies/migration_execution_policy.rb +33 -0
  33. data/app/policies/migration_plan_policy.rb +41 -0
  34. data/app/policies/migration_step_policy.rb +29 -0
  35. data/app/services/data_migration/model_registry.rb +95 -0
  36. data/app/services/exports/generator_service.rb +444 -0
  37. data/app/services/imports/processor_service.rb +457 -0
  38. data/app/services/migration_plans/export_config_service.rb +41 -0
  39. data/app/services/migration_plans/import_config_service.rb +158 -0
  40. data/app/views/data_migration/devise/registrations/edit.html.erb +41 -0
  41. data/app/views/data_migration/devise/sessions/new.html.erb +35 -0
  42. data/app/views/data_migration/devise/shared/_error_messages.html.erb +13 -0
  43. data/app/views/data_migration/devise/shared/_links.html.erb +21 -0
  44. data/app/views/data_migration/exports/new.html.erb +85 -0
  45. data/app/views/data_migration/imports/new.html.erb +70 -0
  46. data/app/views/data_migration/migration_executions/index.html.erb +78 -0
  47. data/app/views/data_migration/migration_executions/show.html.erb +338 -0
  48. data/app/views/data_migration/migration_plans/_form.html.erb +28 -0
  49. data/app/views/data_migration/migration_plans/edit.html.erb +12 -0
  50. data/app/views/data_migration/migration_plans/index.html.erb +118 -0
  51. data/app/views/data_migration/migration_plans/new.html.erb +9 -0
  52. data/app/views/data_migration/migration_plans/show.html.erb +105 -0
  53. data/app/views/data_migration/migration_steps/_form.html.erb +473 -0
  54. data/app/views/data_migration/migration_steps/edit.html.erb +12 -0
  55. data/app/views/data_migration/migration_steps/new.html.erb +9 -0
  56. data/app/views/data_migration/users/_form.html.erb +49 -0
  57. data/app/views/data_migration/users/edit.html.erb +2 -0
  58. data/app/views/data_migration/users/index.html.erb +41 -0
  59. data/app/views/data_migration/users/new.html.erb +2 -0
  60. data/app/views/data_migration/users/show.html.erb +133 -0
  61. data/app/views/layouts/_navbar.html.erb +38 -0
  62. data/app/views/layouts/data_migration.html.erb +37 -0
  63. data/app/views/layouts/mailer.html.erb +13 -0
  64. data/app/views/layouts/mailer.text.erb +1 -0
  65. data/app/views/users/registrations/edit.html.erb +41 -0
  66. data/app/views/users/sessions/new.html.erb +35 -0
  67. data/app/views/users/shared/_error_messages.html.erb +13 -0
  68. data/app/views/users/shared/_links.html.erb +21 -0
  69. data/config/initializers/assets.rb +14 -0
  70. data/config/initializers/content_security_policy.rb +27 -0
  71. data/config/initializers/devise.rb +313 -0
  72. data/config/initializers/filter_parameter_logging.rb +10 -0
  73. data/config/initializers/inflections.rb +18 -0
  74. data/config/initializers/permissions_policy.rb +15 -0
  75. data/config/initializers/warden.rb +14 -0
  76. data/config/locales/devise.en.yml +65 -0
  77. data/config/locales/en.yml +31 -0
  78. data/config/routes.rb +62 -0
  79. data/db/migrate/20251102121659_create_migration_plans.rb +13 -0
  80. data/db/migrate/20251102122012_create_migration_steps.rb +24 -0
  81. data/db/migrate/20251105215702_create_migration_executions.rb +23 -0
  82. data/db/migrate/20251105215853_create_migration_records.rb +16 -0
  83. data/db/migrate/20251115154000_remove_unused_attributes.rb +17 -0
  84. data/db/migrate/20251116120000_add_filter_params_to_migration_executions.rb +7 -0
  85. data/db/migrate/20251118140000_create_data_migration_users.rb +27 -0
  86. data/db/migrate/20251118200641_add_user_foreign_keys.rb +15 -0
  87. data/db/migrate/20251124140000_add_attachment_export_mode_to_migration_steps.rb +9 -0
  88. data/db/schema.rb +102 -0
  89. data/db/seeds.rb +19 -0
  90. data/lib/data_migration/engine.rb +28 -0
  91. data/lib/data_migration/version.rb +5 -0
  92. data/lib/data_migration.rb +8 -0
  93. data/lib/tasks/data_migration_tasks.rake +40 -0
  94. metadata +279 -0
@@ -0,0 +1,444 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require 'fileutils'
5
+ require 'zlib'
6
+ require 'rubygems/package'
7
+
8
+ module Exports
9
+ class GeneratorService
10
+ attr_reader :migration_plan, :execution
11
+
12
+ def initialize(migration_plan, execution)
13
+ @migration_plan = migration_plan
14
+ @execution = execution
15
+ @stats = {
16
+ total_steps: migration_plan.migration_steps.count,
17
+ completed_steps: 0,
18
+ total_records: 0,
19
+ processed_records: 0,
20
+ total_attachments: 0,
21
+ processed_attachments: 0,
22
+ errors: []
23
+ }
24
+ @exported_ids_cache = {} # Cache format: { step_id => { 'column_name' => [values] } }
25
+ @temp_dir = nil
26
+ end
27
+
28
+ def call
29
+ execution.update!(status: :running, started_at: Time.current)
30
+
31
+ Dir.mktmpdir do |temp_dir|
32
+ export_all_steps(temp_dir)
33
+ archive_path = create_archive(temp_dir)
34
+ finalize_success(archive_path)
35
+ rescue StandardError => e
36
+ finalize_failure(e)
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ def export_all_steps(temp_dir)
43
+ @temp_dir = temp_dir
44
+ migration_plan.migration_steps.order(:sequence).each do |step|
45
+ export_step(step, temp_dir)
46
+ @stats[:completed_steps] += 1
47
+ update_progress
48
+ end
49
+ end
50
+
51
+ def export_step(step, temp_dir)
52
+ model_class = step.source_model_name.constantize
53
+ records = get_records_for_step(step, model_class)
54
+
55
+ @stats[:total_records] += records.count
56
+ update_progress
57
+
58
+ # Initialize cache for this step based on what dependent steps need
59
+ initialize_cache_for_step(step, model_class)
60
+
61
+ csv_path = File.join(temp_dir, "#{step.source_model_name}_export.csv")
62
+
63
+ CSV.open(csv_path, 'wb') do |csv|
64
+ csv << headers_for_step(step, model_class)
65
+
66
+ # Handle both ActiveRecord::Relation and Array
67
+ if records.is_a?(Array)
68
+ records.each do |record|
69
+ csv << row_data_for_record(record, step)
70
+ cache_record_values(step, record)
71
+ @stats[:processed_records] += 1
72
+ update_progress if (@stats[:processed_records] % 100).zero?
73
+ end
74
+ else
75
+ records.find_each do |record|
76
+ csv << row_data_for_record(record, step)
77
+ cache_record_values(step, record)
78
+ @stats[:processed_records] += 1
79
+ update_progress if (@stats[:processed_records] % 100).zero?
80
+ end
81
+ end
82
+ end
83
+ rescue StandardError => e
84
+ @stats[:errors] << { step: step.source_model_name, error: e.message }
85
+ raise
86
+ end
87
+
88
+ def get_records_for_step(step, model_class)
89
+ # Start with base query from filter_query or all records
90
+ base_query = if step.filter_query.present?
91
+ # Safely evaluate the filter query with parameter substitution
92
+ query = step.filter_query.strip
93
+ # Substitute placeholders with actual values
94
+ query = substitute_filter_params(query)
95
+ # Remove leading dot if present (e.g., '.where(...)' becomes 'where(...)')
96
+ query = query.sub(/^\./, '')
97
+ model_class.instance_eval(query)
98
+ else
99
+ model_class.all
100
+ end
101
+
102
+ # Apply dependee filtering if this step depends on another
103
+ apply_dependee_filter(step, base_query, model_class)
104
+ end
105
+
106
+ def substitute_filter_params(query)
107
+ result = query.dup
108
+
109
+ # Substitute placeholders with actual values
110
+ unless execution.filter_params.blank?
111
+ execution.filter_params.each do |key, value|
112
+ # Replace {{key}} with the actual value
113
+ # Note: The placeholder should be inside quotes in the query template
114
+ # e.g., where("created_at < ?", "{{cutoff_date}}")
115
+ result.gsub!("{{#{key}}}", value.to_s)
116
+ end
117
+ end
118
+
119
+ # Check for any remaining unsubstituted placeholders
120
+ remaining_placeholders = result.scan(/\{\{(\w+)\}\}/).flatten
121
+ if remaining_placeholders.any?
122
+ raise "Filter query contains unsubstituted placeholders: #{remaining_placeholders.join(', ')}. " \
123
+ 'Please provide values for these parameters before starting the export.'
124
+ end
125
+
126
+ result
127
+ end
128
+
129
+ def headers_for_step(step, model_class)
130
+ headers = model_class.column_names.dup
131
+
132
+ # Add association columns from column_overrides
133
+ if step.column_overrides.present?
134
+ step.column_overrides.each do |association, attributes|
135
+ Array(attributes).each do |attr|
136
+ headers << "#{association}.#{attr}"
137
+ end
138
+ end
139
+ end
140
+
141
+ # Add attachment columns if mode is not 'ignore'
142
+ unless step.ignore?
143
+ attachment_names = get_attachment_names(model_class)
144
+ attachment_names.each do |attachment_name|
145
+ if step.url?
146
+ headers << "#{attachment_name}_url"
147
+ elsif step.raw_data?
148
+ headers << "#{attachment_name}_path"
149
+ headers << "#{attachment_name}_filename"
150
+ headers << "#{attachment_name}_content_type"
151
+ headers << "#{attachment_name}_size"
152
+ end
153
+ end
154
+ end
155
+
156
+ headers
157
+ end
158
+
159
+ def row_data_for_record(record, step)
160
+ row = []
161
+ model_class = record.class
162
+
163
+ # Add regular column values
164
+ model_class.column_names.each do |column|
165
+ row << record.send(column)
166
+ end
167
+
168
+ # Add association attribute values
169
+ if step.column_overrides.present?
170
+ step.column_overrides.each do |association, attributes|
171
+ association_obj = record.send(association)
172
+
173
+ Array(attributes).each do |attr|
174
+ value = association_obj&.send(attr)
175
+ row << value
176
+ end
177
+ end
178
+ end
179
+
180
+ # Add attachment data
181
+ unless step.ignore?
182
+ attachment_names = get_attachment_names(model_class)
183
+ attachment_names.each do |attachment_name|
184
+ attachment = record.send(attachment_name)
185
+
186
+ if attachment.attached?
187
+ if step.url?
188
+ # Export as URL
189
+ row << attachment_url(attachment)
190
+ elsif step.raw_data?
191
+ # Export as file path and metadata
192
+ file_path = export_attachment_file(record, attachment, attachment_name, step)
193
+ row << file_path
194
+ row << attachment.filename.to_s
195
+ row << attachment.content_type
196
+ row << attachment.byte_size
197
+ @stats[:processed_attachments] += 1
198
+ end
199
+ elsif step.url?
200
+ # No attachment - add empty values
201
+ row << nil
202
+ elsif step.raw_data?
203
+ row << nil
204
+ row << nil
205
+ row << nil
206
+ row << nil
207
+ end
208
+ end
209
+ end
210
+
211
+ row
212
+ end
213
+
214
+ def create_archive(temp_dir)
215
+ timestamp = Time.current.strftime('%Y%m%d_%H%M%S')
216
+ archive_name = "#{migration_plan.name.parameterize}_export_#{timestamp}.tar.gz"
217
+ archive_path = Rails.root.join('tmp', 'exports', archive_name)
218
+
219
+ FileUtils.mkdir_p(File.dirname(archive_path))
220
+
221
+ Gem::Package::TarWriter.new(Zlib::GzipWriter.new(File.open(archive_path, 'wb'))) do |tar|
222
+ # Recursively add all files and directories
223
+ add_directory_to_tar(tar, temp_dir, temp_dir)
224
+ end
225
+
226
+ archive_path.to_s
227
+ end
228
+
229
+ # Recursively add directory contents to tar archive
230
+ def add_directory_to_tar(tar, dir_path, base_path)
231
+ Dir.glob("#{dir_path}/*", File::FNM_DOTMATCH).each do |entry|
232
+ next if ['.', '..'].include?(File.basename(entry))
233
+
234
+ relative_path = entry.sub("#{base_path}/", '')
235
+
236
+ if File.directory?(entry)
237
+ # Recursively add subdirectories
238
+ add_directory_to_tar(tar, entry, base_path)
239
+ else
240
+ # Add file to archive
241
+ mode = File.stat(entry).mode
242
+ tar.add_file_simple(relative_path, mode, File.size(entry)) do |tar_file|
243
+ File.open(entry, 'rb') { |f| tar_file.write(f.read) }
244
+ end
245
+ end
246
+ end
247
+ end
248
+
249
+ def update_progress
250
+ execution.update!(stats: @stats)
251
+ broadcast_progress
252
+ end
253
+
254
+ def broadcast_progress
255
+ ActionCable.server.broadcast(
256
+ "execution_#{execution.id}",
257
+ {
258
+ type: 'progress',
259
+ stats: @stats,
260
+ percentage: calculate_percentage,
261
+ message: progress_message
262
+ }
263
+ )
264
+ end
265
+
266
+ def calculate_percentage
267
+ return 0 if @stats[:total_records].zero?
268
+
269
+ ((@stats[:processed_records].to_f / @stats[:total_records]) * 100).round(2)
270
+ end
271
+
272
+ def progress_message
273
+ "Processing step #{@stats[:completed_steps]}/#{@stats[:total_steps]} - " \
274
+ "#{@stats[:processed_records]}/#{@stats[:total_records]} records exported"
275
+ end
276
+
277
+ def finalize_success(archive_path)
278
+ execution.update!(
279
+ status: :completed,
280
+ completed_at: Time.current,
281
+ file_path: archive_path,
282
+ stats: @stats
283
+ )
284
+
285
+ broadcast_completion('Export completed successfully')
286
+ end
287
+
288
+ def finalize_failure(error)
289
+ @stats[:errors] << { general: error.message }
290
+
291
+ execution.update!(
292
+ status: :failed,
293
+ completed_at: Time.current,
294
+ error_log: error.full_message,
295
+ stats: @stats
296
+ )
297
+
298
+ broadcast_completion("Export failed: #{error.message}")
299
+ end
300
+
301
+ def broadcast_completion(message)
302
+ ActionCable.server.broadcast(
303
+ "execution_#{execution.id}",
304
+ {
305
+ type: 'completion',
306
+ status: execution.status,
307
+ message: message,
308
+ stats: @stats
309
+ }
310
+ )
311
+ end
312
+
313
+ # Initialize cache for a step by looking at what dependent steps need
314
+ def initialize_cache_for_step(step, _model_class)
315
+ # Find all steps that depend on this step
316
+ dependent_steps = migration_plan.migration_steps.where(dependee_id: step.id)
317
+
318
+ # Determine which columns to cache
319
+ columns_to_cache = Set.new
320
+
321
+ dependent_steps.each do |dep_step|
322
+ next if dep_step.dependee_attribute_mapping.blank?
323
+
324
+ # Extract the values from dependee_attribute_mapping
325
+ # Format: { "company_id" => "id", "manager_id" => "email" }
326
+ dep_step.dependee_attribute_mapping.each_value do |dependee_column|
327
+ columns_to_cache.add(dependee_column)
328
+ end
329
+ end
330
+
331
+ # Initialize cache structure for this step
332
+ return unless columns_to_cache.any?
333
+
334
+ @exported_ids_cache[step.id] = {}
335
+ columns_to_cache.each do |col|
336
+ @exported_ids_cache[step.id][col] = []
337
+ end
338
+ end
339
+
340
+ # Cache specific column values from a record
341
+ def cache_record_values(step, record)
342
+ return unless @exported_ids_cache[step.id].present?
343
+
344
+ @exported_ids_cache[step.id].each_key do |column_name|
345
+ value = record.send(column_name)
346
+ @exported_ids_cache[step.id][column_name] << value if value.present?
347
+ end
348
+ end
349
+
350
+ # Apply dependee filtering to the query
351
+ def apply_dependee_filter(step, base_query, _model_class)
352
+ # If this step has no dependee, return the base query as is
353
+ return base_query unless step.dependee_id.present?
354
+
355
+ # Get the dependee step
356
+ dependee_step = migration_plan.migration_steps.find_by(id: step.dependee_id)
357
+ return base_query unless dependee_step.present?
358
+
359
+ # Check if dependee_attribute_mapping is configured
360
+ return base_query if step.dependee_attribute_mapping.blank?
361
+
362
+ # Check if we have cached values for the dependee step
363
+ cached_values = @exported_ids_cache[dependee_step.id]
364
+ return base_query unless cached_values.present?
365
+
366
+ # Build where conditions based on the mapping
367
+ # Format: { "company_id" => "id" } means filter current step's company_id
368
+ # using the cached "id" values from dependee step
369
+ conditions = {}
370
+
371
+ step.dependee_attribute_mapping.each do |local_column, dependee_column|
372
+ # Get the cached values for the dependee column
373
+ values = cached_values[dependee_column]
374
+
375
+ if values.present? && values.any?
376
+ conditions[local_column] = values
377
+ else
378
+ Rails.logger.warn "No cached values found for #{dependee_step.source_model_name}.#{dependee_column}"
379
+ end
380
+ end
381
+
382
+ # Apply the filter if we have conditions
383
+ if conditions.present?
384
+ base_query.where(conditions)
385
+ else
386
+ base_query
387
+ end
388
+ end
389
+
390
+ # Get attachment names from model class
391
+ def get_attachment_names(model_class)
392
+ return [] unless model_class.respond_to?(:reflect_on_all_attachments)
393
+
394
+ model_class.reflect_on_all_attachments.map(&:name)
395
+ end
396
+
397
+ # Generate URL for attachment (for url mode)
398
+ def attachment_url(attachment)
399
+ return nil unless attachment.attached?
400
+
401
+ # Generate a Rails URL for the attachment
402
+ # This assumes Active Storage is configured with a service that supports URLs
403
+ Rails.application.routes.url_helpers.rails_blob_url(attachment, only_path: false)
404
+ rescue StandardError => e
405
+ Rails.logger.error "Failed to generate URL for attachment: #{e.message}"
406
+ nil
407
+ end
408
+
409
+ # Export attachment file to disk (for raw_data mode)
410
+ def export_attachment_file(record, attachment, attachment_name, step)
411
+ # Create attachments directory structure
412
+ attachments_dir = File.join(@temp_dir, 'attachments', step.source_model_name)
413
+ FileUtils.mkdir_p(attachments_dir)
414
+
415
+ # Generate unique filename: {record_id}_{attachment_name}_{original_filename}
416
+ safe_filename = sanitize_filename(attachment.filename.to_s)
417
+ file_name = "#{record.id}_#{attachment_name}_#{safe_filename}"
418
+ file_path = File.join(attachments_dir, file_name)
419
+
420
+ # Download and save the attachment
421
+ attachment.blob.open do |tempfile|
422
+ FileUtils.cp(tempfile.path, file_path)
423
+ end
424
+
425
+ # Return relative path for CSV
426
+ "attachments/#{step.source_model_name}/#{file_name}"
427
+ rescue StandardError => e
428
+ Rails.logger.error "Failed to export attachment #{attachment_name} for record #{record.id}: #{e.message}"
429
+ @stats[:errors] << {
430
+ step: step.source_model_name,
431
+ record_id: record.id,
432
+ attachment: attachment_name,
433
+ error: e.message
434
+ }
435
+ nil
436
+ end
437
+
438
+ # Sanitize filename to avoid filesystem issues
439
+ def sanitize_filename(filename)
440
+ # Remove path separators and other problematic characters
441
+ filename.gsub(%r{[/\\:*?"<>|]}, '_')
442
+ end
443
+ end
444
+ end