grainery 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,7 @@ module Grainery
13
13
  @grainery_path = load_grainery_path
14
14
  @database_configs = load_database_connections
15
15
  @lookup_tables = load_lookup_tables
16
+ @anonymize_fields = load_anonymize_fields
16
17
  end
17
18
 
18
19
  def load_config
@@ -35,11 +36,63 @@ module Grainery
35
36
  # Detect databases and model base classes dynamically
36
37
  detected_databases = detect_databases_and_models
37
38
 
39
+ # Detect anonymizable fields from actual database schema
40
+ detected_fields = detect_anonymizable_fields(detected_databases)
41
+
42
+ # Build default anonymize_fields hash
43
+ default_anonymize_fields = {
44
+ 'email' => 'email',
45
+ 'first_name' => 'first_name',
46
+ 'last_name' => 'last_name',
47
+ 'name' => 'name',
48
+ 'phone' => 'phone_number',
49
+ 'phone_number' => 'phone_number',
50
+ 'address' => 'address',
51
+ 'street_address' => 'street_address',
52
+ 'city' => 'city',
53
+ 'state' => 'state',
54
+ 'zip' => 'zip_code',
55
+ 'zip_code' => 'zip_code',
56
+ 'postal_code' => 'zip_code',
57
+ 'ssn' => 'ssn',
58
+ 'credit_card' => 'credit_card_number',
59
+ 'password' => 'password',
60
+ 'token' => 'token',
61
+ 'api_key' => 'api_key',
62
+ 'secret' => 'secret',
63
+ 'iban' => 'iban',
64
+ 'vat_number' => 'greek_vat',
65
+ 'afm' => 'greek_vat',
66
+ 'identity_number' => 'identity_number',
67
+ 'id_number' => 'identity_number',
68
+ 'national_id' => 'identity_number',
69
+ 'amka' => 'greek_amka',
70
+ 'social_security_number' => 'greek_amka',
71
+ 'ssn_greek' => 'greek_amka',
72
+ 'personal_number' => 'greek_personal_number',
73
+ 'personal_id' => 'greek_personal_number',
74
+ 'afm_extended' => 'greek_personal_number',
75
+ 'ada' => 'greek_ada',
76
+ 'diavgeia_id' => 'greek_ada',
77
+ 'decision_number' => 'greek_ada',
78
+ 'adam' => 'greek_adam',
79
+ 'adam_number' => 'greek_adam',
80
+ 'procurement_id' => 'greek_adam',
81
+ 'date_of_birth' => 'date_of_birth',
82
+ 'birth_date' => 'date_of_birth',
83
+ 'dob' => 'date_of_birth',
84
+ 'birthdate' => 'date_of_birth'
85
+ }
86
+
87
+ # Merge detected fields with defaults (detected fields take precedence)
88
+ anonymize_fields = default_anonymize_fields.merge(detected_fields)
89
+
38
90
  # Build configuration hash
39
91
  config = {
40
92
  'database_connections' => detected_databases,
41
93
  'grainery_path' => 'db/grainery',
42
94
  'lookup_tables' => [],
95
+ 'anonymize_fields' => anonymize_fields,
43
96
  'last_updated' => Time.now.to_s
44
97
  }
45
98
 
@@ -47,6 +100,120 @@ module Grainery
47
100
  write_config_file(config_path, config)
48
101
 
49
102
  puts " ✓ Created config/grainery.yml with #{detected_databases.size} detected databases"
103
+ puts " ✓ Detected #{detected_fields.size} anonymizable fields in database schema"
104
+ end
105
+
106
+ def detect_anonymizable_fields(detected_databases)
107
+ puts " → Detecting anonymizable fields from database schema..."
108
+
109
+ # Field name patterns mapped to anonymization methods
110
+ field_patterns = {
111
+ /email/i => 'email',
112
+ /first_name/i => 'first_name',
113
+ /last_name/i => 'last_name',
114
+ /^name$/i => 'name',
115
+ /phone/i => 'phone_number',
116
+ /mobile/i => 'phone_number',
117
+ /address/i => 'address',
118
+ /street/i => 'street_address',
119
+ /city/i => 'city',
120
+ /state/i => 'state',
121
+ /zip/i => 'zip_code',
122
+ /postal/i => 'zip_code',
123
+ /ssn/i => 'ssn',
124
+ /credit_card/i => 'credit_card_number',
125
+ /password/i => 'password',
126
+ /token/i => 'token',
127
+ /api_key/i => 'api_key',
128
+ /secret/i => 'secret',
129
+ /iban/i => 'iban',
130
+ /vat_number/i => 'greek_vat',
131
+ /afm/i => 'greek_vat',
132
+ /amka/i => 'greek_amka',
133
+ /social_security_number/i => 'greek_amka',
134
+ /personal_number/i => 'greek_personal_number',
135
+ /personal_id/i => 'greek_personal_number',
136
+ /ada$/i => 'greek_ada',
137
+ /diavgeia/i => 'greek_ada',
138
+ /adam/i => 'greek_adam',
139
+ /procurement_id/i => 'greek_adam',
140
+ /(date_of_)?birth/i => 'date_of_birth',
141
+ /dob$/i => 'date_of_birth',
142
+ /identity_number/i => 'identity_number',
143
+ /national_id/i => 'identity_number'
144
+ }
145
+
146
+ detected_fields = {}
147
+ field_locations = Hash.new { |h, k| h[k] = [] } # Track where each field appears
148
+
149
+ detected_databases.each do |db_name, db_config|
150
+ begin
151
+ # Get the model base class
152
+ base_class = db_config['model_base_class'].constantize
153
+
154
+ # Find all models that inherit from this base class
155
+ models = ObjectSpace.each_object(Class).select do |klass|
156
+ klass < base_class && !klass.abstract_class?
157
+ rescue
158
+ false
159
+ end
160
+
161
+ models.each do |model|
162
+ begin
163
+ table_name = model.table_name
164
+
165
+ # Get column information
166
+ model.columns.each do |column|
167
+ column_name = column.name
168
+
169
+ # Skip internal Rails columns
170
+ next if %w[id created_at updated_at].include?(column_name)
171
+
172
+ # Match against patterns
173
+ field_patterns.each do |pattern, method|
174
+ if column_name.match?(pattern)
175
+ # Track location for duplicate detection
176
+ field_locations[column_name] << { db: db_name, table: table_name, model: model.name, method: method }
177
+ puts " ✓ Found: #{model.name}.#{column_name} → #{method}"
178
+ break # Use first matching pattern
179
+ end
180
+ end
181
+ end
182
+ rescue => e
183
+ # Skip models that can't be analyzed
184
+ next
185
+ end
186
+ end
187
+ rescue => e
188
+ puts " ⚠ Warning: Could not analyze #{db_name}: #{e.message}"
189
+ end
190
+ end
191
+
192
+ # Build detected_fields with scoping when duplicates exist
193
+ field_locations.each do |field_name, locations|
194
+ if locations.size == 1
195
+ # Single occurrence - use simple field name
196
+ detected_fields[field_name] = locations.first[:method]
197
+ else
198
+ # Multiple occurrences - use scoped names
199
+ puts " ℹ Field '#{field_name}' appears in multiple tables - using scoped configuration"
200
+ locations.each do |loc|
201
+ if locations.count { |l| l[:db] == loc[:db] } > 1
202
+ # Multiple tables in same database - use db.table.field
203
+ scoped_key = "#{loc[:db]}.#{loc[:table]}.#{field_name}"
204
+ detected_fields[scoped_key] = loc[:method]
205
+ puts " → #{scoped_key}"
206
+ else
207
+ # Single table per database - use table.field
208
+ scoped_key = "#{loc[:table]}.#{field_name}"
209
+ detected_fields[scoped_key] = loc[:method]
210
+ puts " → #{scoped_key}"
211
+ end
212
+ end
213
+ end
214
+ end
215
+
216
+ detected_fields
50
217
  end
51
218
 
52
219
  def detect_databases_and_models
@@ -149,6 +316,21 @@ module Grainery
149
316
  content << "# Lookup tables (harvest all records)"
150
317
  content << "lookup_tables: #{config['lookup_tables'].inspect}"
151
318
  content << ""
319
+ content << "# Field anonymization (column_name => faker_method)"
320
+ content << "# Set to empty hash {} to disable anonymization"
321
+ content << "# Faker methods: email, first_name, last_name, name, phone_number, address,"
322
+ content << "# street_address, city, state, zip_code, ssn, credit_card_number,"
323
+ content << "# password, token, api_key, secret, iban, greek_vat, greek_amka,"
324
+ content << "# greek_personal_number, greek_ada, identity_number"
325
+ content << "anonymize_fields:"
326
+ if config['anonymize_fields']&.any?
327
+ config['anonymize_fields'].each do |field, faker_method|
328
+ content << " #{field}: #{faker_method}"
329
+ end
330
+ else
331
+ content << " {}"
332
+ end
333
+ content << ""
152
334
  content << "# Metadata"
153
335
  content << "last_updated: #{config['last_updated']}"
154
336
  content << ""
@@ -205,6 +387,14 @@ module Grainery
205
387
  Set.new
206
388
  end
207
389
 
390
+ def load_anonymize_fields
391
+ fields = @config['anonymize_fields'] || {}
392
+ fields.is_a?(Hash) ? fields : {}
393
+ rescue => e
394
+ puts " Warning: Could not load anonymize_fields: #{e.message}"
395
+ {}
396
+ end
397
+
208
398
  # Security: Safe constant resolution with whitelist
209
399
  def safe_const_get(class_name)
210
400
  unless class_name.match?(ALLOWED_BASE_CLASS_PATTERN)
@@ -251,7 +441,7 @@ module Grainery
251
441
  :primary
252
442
  end
253
443
 
254
- def harvest_all(limit: nil)
444
+ def harvest_all(limit: nil, dump_schema: false, anonymize: true)
255
445
  all_models = get_all_models
256
446
 
257
447
  models_to_harvest = all_models.reject do |model|
@@ -260,18 +450,31 @@ module Grainery
260
450
  model.table_name.nil?
261
451
  end
262
452
 
263
- harvest_models(models_to_harvest, limit: limit)
453
+ harvest_models(models_to_harvest, limit: limit, dump_schema: dump_schema, anonymize: anonymize)
264
454
  end
265
455
 
266
- def harvest_models(models, limit: nil)
456
+ def harvest_models(models, limit: nil, dump_schema: false, anonymize: true)
267
457
  models = Array(models)
268
458
  return if models.empty?
269
459
 
460
+ # Require faker if anonymization is enabled
461
+ if anonymize && @anonymize_fields.any?
462
+ begin
463
+ require 'faker'
464
+ rescue LoadError
465
+ puts " ⚠ Warning: Faker gem not found. Anonymization disabled."
466
+ puts " Install with: gem install faker"
467
+ anonymize = false
468
+ end
469
+ end
470
+
270
471
  puts "\n" + "="*80
271
472
  puts "Grainer - Extracting Database Seeds"
272
473
  puts "="*80
273
474
  puts "Total models: #{models.size}"
274
475
  puts "Limit per table: #{limit || 'ALL RECORDS'}"
476
+ puts "Schema dump: #{dump_schema ? 'YES' : 'NO'}"
477
+ puts "Anonymization: #{anonymize && @anonymize_fields.any? ? "YES (#{@anonymize_fields.size} fields)" : 'NO'}"
275
478
  puts "="*80 + "\n"
276
479
 
277
480
  # Group by database
@@ -287,13 +490,23 @@ module Grainery
287
490
  FileUtils.mkdir_p(db_dir)
288
491
  end
289
492
 
493
+ # Dump schemas if requested
494
+ if dump_schema
495
+ puts "\n" + "-"*80
496
+ puts "Dumping Database Schemas"
497
+ puts "-"*80
498
+ grouped_models.each do |db_name, _|
499
+ dump_database_schema(db_name)
500
+ end
501
+ end
502
+
290
503
  # Harvest in dependency order
291
504
  load_order.each do |model|
292
505
  next unless models.include?(model)
293
506
 
294
507
  begin
295
508
  db_name = detect_database(model)
296
- harvest_table(model, db_name, limit: limit)
509
+ harvest_table(model, db_name, limit: limit, anonymize: anonymize)
297
510
  rescue => e
298
511
  puts " ✗ Error harvesting #{model.name}: #{e.message}"
299
512
  end
@@ -355,7 +568,7 @@ module Grainery
355
568
  sorted
356
569
  end
357
570
 
358
- def harvest_table(model, db_name, limit: nil)
571
+ def harvest_table(model, db_name, limit: nil, anonymize: true)
359
572
  table_name = model.table_name
360
573
  is_lookup = @lookup_tables.include?(table_name)
361
574
 
@@ -374,13 +587,14 @@ module Grainery
374
587
  end
375
588
 
376
589
  # Generate seed file
377
- seed_content = generate_seed_content(model, records, db_name)
590
+ seed_content = generate_seed_content(model, records, db_name, anonymize: anonymize)
378
591
  seed_path = get_seed_path(model, db_name)
379
592
 
380
593
  File.write(seed_path, seed_content)
381
594
 
595
+ anonymize_suffix = anonymize && @anonymize_fields.any? ? " [anonymized]" : ""
382
596
  record_info = is_lookup ? " (lookup: #{records.size} records)" : " (#{records.size} records)"
383
- puts " ✓ #{model.name.ljust(50)} → #{table_name}.rb#{record_info}"
597
+ puts " ✓ #{model.name.ljust(50)} → #{table_name}.rb#{record_info}#{anonymize_suffix}"
384
598
  end
385
599
 
386
600
  def get_seed_path(model, db_name)
@@ -389,7 +603,7 @@ module Grainery
389
603
  Rails.root.join(db_dir, "#{model.table_name}.rb")
390
604
  end
391
605
 
392
- def generate_seed_content(model, records, db_name)
606
+ def generate_seed_content(model, records, db_name, anonymize: true)
393
607
  table_name = model.table_name
394
608
 
395
609
  # Get columns to export (exclude id, timestamps)
@@ -401,6 +615,7 @@ module Grainery
401
615
  content << "# Harvested from #{db_name} database: #{table_name}"
402
616
  content << "# Records: #{records.size}"
403
617
  content << "# Generated: #{Time.now}"
618
+ content << "# Anonymized: #{anonymize && @anonymize_fields.any? ? 'YES' : 'NO'}"
404
619
  content << ""
405
620
  content << "#{model.name}.create!("
406
621
 
@@ -411,6 +626,18 @@ module Grainery
411
626
 
412
627
  columns.each_with_index do |col, col_idx|
413
628
  value = record.send(col.name)
629
+
630
+ # Anonymize if enabled and field is configured
631
+ if anonymize
632
+ faker_method = get_anonymization_method(col.name, table_name, db_name)
633
+ if faker_method
634
+ # Skip anonymization if explicitly set to "skip"
635
+ unless faker_method.to_s == 'skip'
636
+ value = anonymize_value(col.name, faker_method, col, value)
637
+ end
638
+ end
639
+ end
640
+
414
641
  formatted_value = format_seed_value(value, col)
415
642
  comma = col_idx < columns.size - 1 ? ',' : ''
416
643
  content << " #{col.name}: #{formatted_value}#{comma}"
@@ -424,6 +651,341 @@ module Grainery
424
651
  content.join("\n")
425
652
  end
426
653
 
654
+ def get_anonymization_method(field_name, table_name, db_name)
655
+ # Priority order:
656
+ # 1. database.table.field (most specific)
657
+ # 2. table.field (table-specific)
658
+ # 3. field (global)
659
+
660
+ scoped_key_db_table = "#{db_name}.#{table_name}.#{field_name}"
661
+ scoped_key_table = "#{table_name}.#{field_name}"
662
+
663
+ @anonymize_fields[scoped_key_db_table] ||
664
+ @anonymize_fields[scoped_key_table] ||
665
+ @anonymize_fields[field_name]
666
+ end
667
+
668
+ def anonymize_value(field_name, faker_method, column, original_value = nil)
669
+ return nil if faker_method.nil?
670
+
671
+ begin
672
+ # Generate fake value based on method
673
+ fake_value = case faker_method.to_s
674
+ when 'email'
675
+ Faker::Internet.email
676
+ when 'first_name'
677
+ Faker::Name.first_name
678
+ when 'last_name'
679
+ Faker::Name.last_name
680
+ when 'name'
681
+ Faker::Name.name
682
+ when 'phone_number'
683
+ Faker::PhoneNumber.phone_number
684
+ when 'address'
685
+ Faker::Address.full_address
686
+ when 'street_address'
687
+ Faker::Address.street_address
688
+ when 'city'
689
+ Faker::Address.city
690
+ when 'state'
691
+ Faker::Address.state
692
+ when 'zip_code'
693
+ Faker::Address.zip_code
694
+ when 'ssn'
695
+ Faker::IDNumber.valid
696
+ when 'credit_card_number'
697
+ Faker::Finance.credit_card
698
+ when 'password'
699
+ Faker::Internet.password
700
+ when 'token'
701
+ generate_token(column)
702
+ when 'api_key'
703
+ generate_api_key(column)
704
+ when 'secret'
705
+ generate_secret(column)
706
+ when 'iban'
707
+ generate_fake_iban(column)
708
+ when 'greek_vat'
709
+ generate_fake_greek_vat(column)
710
+ when 'greek_amka'
711
+ generate_fake_greek_amka(column)
712
+ when 'greek_personal_number'
713
+ generate_fake_greek_personal_number(column)
714
+ when 'greek_ada'
715
+ generate_fake_greek_ada(column)
716
+ when 'greek_adam'
717
+ generate_fake_greek_adam(column)
718
+ when 'date_of_birth'
719
+ generate_fake_date_of_birth(original_value, column)
720
+ when 'identity_number'
721
+ generate_fake_identity_number(column)
722
+ else
723
+ # Try to call the method dynamically if it exists
724
+ if Faker.respond_to?(faker_method)
725
+ Faker.send(faker_method)
726
+ else
727
+ nil
728
+ end
729
+ end
730
+
731
+ # Truncate to column limit if necessary
732
+ if fake_value.is_a?(String) && column.limit
733
+ fake_value = fake_value[0...column.limit]
734
+ end
735
+
736
+ fake_value
737
+ rescue => e
738
+ # If faker fails, return nil or a safe default
739
+ puts " ⚠ Warning: Could not anonymize #{field_name} with #{faker_method}: #{e.message}"
740
+ nil
741
+ end
742
+ end
743
+
744
+ def generate_token(column)
745
+ # Generate token respecting column size
746
+ length = column.limit ? [column.limit, 32].min : 32
747
+ Faker::Alphanumeric.alphanumeric(number: length)
748
+ end
749
+
750
+ def generate_api_key(column)
751
+ # Generate API key respecting column size
752
+ length = column.limit ? [column.limit, 40].min : 40
753
+ Faker::Alphanumeric.alphanumeric(number: length)
754
+ end
755
+
756
+ def generate_secret(column)
757
+ # Generate secret respecting column size
758
+ length = column.limit ? [column.limit, 64].min : 64
759
+ Faker::Alphanumeric.alphanumeric(number: length)
760
+ end
761
+
762
+ def generate_fake_iban(column)
763
+ # Generate a fake Greek IBAN (27 characters)
764
+ # Format: GR + 2 check digits + 7 bank code + 16 account number
765
+ country_code = 'GR'
766
+ check_digits = rand(10..99).to_s
767
+ bank_code = rand(1000000..9999999).to_s
768
+ account_number = rand(1000000000000000..9999999999999999).to_s
769
+ iban = "#{country_code}#{check_digits}#{bank_code}#{account_number}"
770
+
771
+ # Truncate if column has a limit
772
+ if column.limit && iban.length > column.limit
773
+ # Keep the country code prefix if possible
774
+ if column.limit >= 4
775
+ iban = iban[0...column.limit]
776
+ else
777
+ iban = iban[0...column.limit]
778
+ end
779
+ end
780
+
781
+ iban
782
+ end
783
+
784
+ def generate_fake_greek_vat(column)
785
+ # Generate a fake Greek VAT number (AFM - 9 digits)
786
+ # Format: 9 digits
787
+ vat = rand(100000000..999999999).to_s
788
+
789
+ # Adjust length if column has a limit
790
+ if column.limit
791
+ if column.limit >= 9
792
+ vat
793
+ elsif column.limit > 0
794
+ # Generate shorter number
795
+ max_val = (10 ** column.limit) - 1
796
+ min_val = 10 ** (column.limit - 1)
797
+ rand(min_val..max_val).to_s
798
+ else
799
+ vat[0...column.limit]
800
+ end
801
+ else
802
+ vat
803
+ end
804
+ end
805
+
806
+ def generate_fake_greek_amka(column)
807
+ # Generate a fake Greek AMKA (Social Security Number - 11 digits)
808
+ # Format: DDMMYY followed by 5 digits
809
+ # Example: 01011990001 (1st January 1990, sequence 001)
810
+
811
+ # Generate random date (between 1950 and 2005 for realistic working age)
812
+ day = rand(1..28).to_s.rjust(2, '0')
813
+ month = rand(1..12).to_s.rjust(2, '0')
814
+ year = rand(50..105).to_s.rjust(2, '0') # Last 2 digits of year
815
+ sequence = rand(0..99999).to_s.rjust(5, '0')
816
+
817
+ amka = "#{day}#{month}#{year}#{sequence}"
818
+
819
+ # Adjust length if column has a limit
820
+ if column.limit
821
+ if column.limit >= 11
822
+ amka
823
+ elsif column.limit > 0
824
+ # Truncate if needed
825
+ amka[0...column.limit]
826
+ else
827
+ amka[0...column.limit]
828
+ end
829
+ else
830
+ amka
831
+ end
832
+ end
833
+
834
+ def generate_fake_greek_personal_number(column)
835
+ # Generate a fake Greek Personal Number (12 characters)
836
+ # Format: 2 digits + 1 letter + 9-digit AFM
837
+ # Example: 12A123456789 (prefix: 12A, AFM: 123456789)
838
+
839
+ # Generate 2-digit prefix
840
+ prefix_digits = rand(10..99).to_s
841
+
842
+ # Generate random letter
843
+ letters = ('A'..'Z').to_a
844
+ prefix_letter = letters.sample
845
+
846
+ # Generate AFM (9 digits)
847
+ afm = rand(100000000..999999999).to_s
848
+
849
+ personal_number = "#{prefix_digits}#{prefix_letter}#{afm}"
850
+
851
+ # Adjust length if column has a limit
852
+ if column.limit
853
+ if column.limit >= 12
854
+ personal_number
855
+ elsif column.limit >= 10
856
+ # Try to keep prefix + partial AFM
857
+ personal_number[0...column.limit]
858
+ elsif column.limit >= 3
859
+ # Keep at least the prefix
860
+ personal_number[0...column.limit]
861
+ elsif column.limit > 0
862
+ # Very short, just use digits
863
+ rand(10 ** (column.limit - 1)...10 ** column.limit).to_s
864
+ else
865
+ personal_number[0...column.limit]
866
+ end
867
+ else
868
+ personal_number
869
+ end
870
+ end
871
+
872
+ def generate_fake_greek_ada(column)
873
+ # Generate a fake Greek ADA (Diavgeia Decision Number)
874
+ # Format: 4 Greek uppercase letters + 2 digits + 4 Greek uppercase letters + dash + 1 digit + 2 Greek uppercase letters
875
+ # Example: ΨΚΕΘ46ΜΤΛΠ-7ΗΠ
876
+
877
+ greek_letters = ['Α', 'Β', 'Γ', 'Δ', 'Ε', 'Ζ', 'Η', 'Θ', 'Ι', 'Κ', 'Λ', 'Μ', 'Ν', 'Ξ', 'Ο', 'Π', 'Ρ', 'Σ', 'Τ', 'Υ', 'Φ', 'Χ', 'Ψ', 'Ω']
878
+
879
+ # First part: 4 Greek uppercase letters
880
+ part1 = 4.times.map { greek_letters.sample }.join
881
+
882
+ # 2 digits
883
+ digits1 = 2.times.map { rand(0..9) }.join
884
+
885
+ # Second part: 4 Greek uppercase letters
886
+ part2 = 4.times.map { greek_letters.sample }.join
887
+
888
+ # After dash: 1 digit
889
+ digit2 = rand(0..9).to_s
890
+
891
+ # Final part: 2 Greek uppercase letters
892
+ part3 = 2.times.map { greek_letters.sample }.join
893
+
894
+ ada = "#{part1}#{digits1}#{part2}-#{digit2}#{part3}"
895
+
896
+ # Adjust length if column has a limit
897
+ if column.limit && column.limit < 15
898
+ ada[0...column.limit]
899
+ else
900
+ ada
901
+ end
902
+ end
903
+
904
+ def generate_fake_greek_adam(column)
905
+ # Generate a fake Greek ADAM number (Public Procurement Publicity identifier)
906
+ # Format: 2 digits + PROC or REQ + 9 digits
907
+ # Examples: 21PROC009041696, 21REQ008902853
908
+
909
+ # First 2 digits (year)
910
+ year_part = 2.times.map { rand(0..9) }.join
911
+
912
+ # Category type (always PROC or REQ)
913
+ category = ['PROC', 'REQ'].sample
914
+
915
+ # 9 digits (sequential number)
916
+ sequence = 9.times.map { rand(0..9) }.join
917
+
918
+ adam = "#{year_part}#{category}#{sequence}"
919
+
920
+ # Adjust length if column has a limit
921
+ if column.limit && column.limit < adam.length
922
+ adam[0...column.limit]
923
+ else
924
+ adam
925
+ end
926
+ end
927
+
928
+ def generate_fake_date_of_birth(original_value, column)
929
+ # Generate a fake date of birth preserving approximate age (adulthood)
930
+ # Strategy: Keep the year roughly the same (within +/- 2 years) to preserve age category
931
+
932
+ return Faker::Date.birthday(min_age: 18, max_age: 80) if original_value.nil?
933
+
934
+ begin
935
+ # Parse the original date
936
+ original_date = case original_value
937
+ when Date
938
+ original_value
939
+ when Time, DateTime
940
+ original_value.to_date
941
+ when String
942
+ Date.parse(original_value)
943
+ else
944
+ return Faker::Date.birthday(min_age: 18, max_age: 80)
945
+ end
946
+
947
+ # Calculate age
948
+ today = Date.today
949
+ age = today.year - original_date.year
950
+ age -= 1 if today < original_date + age.years
951
+
952
+ # Generate a new birth date with similar age (+/- 2 years)
953
+ min_age = [age - 2, 18].max # Preserve adulthood (minimum 18)
954
+ max_age = age + 2
955
+
956
+ Faker::Date.birthday(min_age: min_age, max_age: max_age)
957
+ rescue
958
+ # Fallback if parsing fails
959
+ Faker::Date.birthday(min_age: 18, max_age: 80)
960
+ end
961
+ end
962
+
963
+ def generate_fake_identity_number(column)
964
+ # Generate a fake identity number (generic format)
965
+ # Using alphanumeric format similar to national ID cards
966
+ letters = ('A'..'Z').to_a
967
+
968
+ if column.limit
969
+ if column.limit >= 8
970
+ # Full format: 2 letters + 6 digits
971
+ "#{letters.sample}#{letters.sample}#{rand(100000..999999)}"
972
+ elsif column.limit >= 2
973
+ # Adjust format to fit column size
974
+ num_letters = [2, column.limit / 2].min
975
+ num_digits = column.limit - num_letters
976
+ letter_part = num_letters.times.map { letters.sample }.join
977
+ digit_part = num_digits > 0 ? rand(10 ** (num_digits - 1)...10 ** num_digits).to_s : ''
978
+ "#{letter_part}#{digit_part}"
979
+ else
980
+ # Very small limit, just use letters
981
+ column.limit.times.map { letters.sample }.join
982
+ end
983
+ else
984
+ # No limit, use default format
985
+ "#{letters.sample}#{letters.sample}#{rand(100000..999999)}"
986
+ end
987
+ end
988
+
427
989
  def format_seed_value(value, column)
428
990
  return 'nil' if value.nil?
429
991
 
@@ -447,6 +1009,79 @@ module Grainery
447
1009
  end
448
1010
  end
449
1011
 
1012
+ def dump_database_schema(db_name)
1013
+ db_config = @database_configs[db_name]
1014
+ return unless db_config
1015
+
1016
+ schema_path = Rails.root.join(@grainery_path, db_name.to_s, 'schema.rb')
1017
+
1018
+ begin
1019
+ base_class = safe_const_get(db_config[:model_base_class])
1020
+ connection = base_class.connection
1021
+
1022
+ # Generate schema dump
1023
+ schema_content = []
1024
+ schema_content << "# Schema dump for #{db_name} database"
1025
+ schema_content << "# Generated: #{Time.now}"
1026
+ schema_content << "# Adapter: #{db_config[:adapter]}"
1027
+ schema_content << ""
1028
+ schema_content << "ActiveRecord::Schema.define do"
1029
+ schema_content << ""
1030
+
1031
+ # Get all tables for this connection
1032
+ tables = connection.tables.sort
1033
+
1034
+ tables.each do |table_name|
1035
+ # Skip internal Rails tables
1036
+ next if ['schema_migrations', 'ar_internal_metadata'].include?(table_name)
1037
+
1038
+ schema_content << " create_table \"#{table_name}\", force: :cascade do |t|"
1039
+
1040
+ # Get columns
1041
+ connection.columns(table_name).each do |column|
1042
+ next if column.name == 'id' # Primary key is handled by create_table
1043
+
1044
+ type = column.type
1045
+ attrs = []
1046
+ attrs << "limit: #{column.limit}" if column.limit
1047
+ attrs << "precision: #{column.precision}" if column.precision
1048
+ attrs << "scale: #{column.scale}" if column.scale
1049
+ attrs << "null: false" unless column.null
1050
+ attrs << "default: #{column.default.inspect}" if column.default
1051
+
1052
+ attrs_str = attrs.any? ? ", #{attrs.join(', ')}" : ""
1053
+ schema_content << " t.#{type} \"#{column.name}\"#{attrs_str}"
1054
+ end
1055
+
1056
+ schema_content << " end"
1057
+ schema_content << ""
1058
+
1059
+ # Get indexes
1060
+ connection.indexes(table_name).each do |index|
1061
+ columns = index.columns.is_a?(Array) ? index.columns : [index.columns]
1062
+ options = []
1063
+ options << "name: \"#{index.name}\"" if index.name
1064
+ options << "unique: true" if index.unique
1065
+
1066
+ columns_str = columns.size == 1 ? "\"#{columns.first}\"" : "[#{columns.map { |c| "\"#{c}\"" }.join(', ')}]"
1067
+ options_str = options.any? ? ", #{options.join(', ')}" : ""
1068
+
1069
+ schema_content << " add_index \"#{table_name}\", #{columns_str}#{options_str}"
1070
+ end
1071
+
1072
+ schema_content << "" if connection.indexes(table_name).any?
1073
+ end
1074
+
1075
+ schema_content << "end"
1076
+ schema_content << ""
1077
+
1078
+ File.write(schema_path, schema_content.join("\n"))
1079
+ puts " ✓ Schema dumped for #{db_name} → schema.rb"
1080
+ rescue => e
1081
+ puts " ✗ Error dumping schema for #{db_name}: #{e.message}"
1082
+ end
1083
+ end
1084
+
450
1085
  def create_load_order_file(load_order, models)
451
1086
  order_path = Rails.root.join(@grainery_path, 'load_order.txt')
452
1087
 
@@ -469,7 +1104,7 @@ module Grainery
469
1104
  puts "\n ✓ Load order written to #{@grainery_path}/load_order.txt"
470
1105
  end
471
1106
 
472
- def load_seeds
1107
+ def load_seeds(load_schema: false)
473
1108
  order_file = Rails.root.join(@grainery_path, 'load_order.txt')
474
1109
 
475
1110
  unless File.exist?(order_file)
@@ -480,8 +1115,35 @@ module Grainery
480
1115
  puts "\n" + "="*80
481
1116
  puts "Loading Harvested Seeds"
482
1117
  puts "="*80
1118
+ puts "Load schema: #{load_schema ? 'YES' : 'NO'}"
1119
+ puts "="*80
1120
+
1121
+ # Load schemas first if requested
1122
+ if load_schema
1123
+ puts "\n" + "-"*80
1124
+ puts "Loading Database Schemas"
1125
+ puts "-"*80
1126
+
1127
+ @database_configs.each do |db_name, _|
1128
+ schema_file = Rails.root.join(@grainery_path, db_name.to_s, 'schema.rb')
1129
+ if File.exist?(schema_file)
1130
+ puts " → Loading schema for #{db_name}..."
1131
+ begin
1132
+ load schema_file
1133
+ rescue => e
1134
+ puts " ✗ Error loading schema for #{db_name}: #{e.message}"
1135
+ end
1136
+ else
1137
+ puts " ⚠ No schema file found for #{db_name}"
1138
+ end
1139
+ end
1140
+ end
483
1141
 
484
1142
  # Load harvested seeds in dependency order
1143
+ puts "\n" + "-"*80
1144
+ puts "Loading Seed Data"
1145
+ puts "-"*80
1146
+
485
1147
  File.readlines(order_file).each do |line|
486
1148
  line = line.strip
487
1149
  next if line.empty? || line.start_with?('#')