grainery 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +64 -0
- data/README.md +321 -19
- data/lib/grainery/grainer.rb +671 -9
- data/lib/grainery/version.rb +1 -1
- data/lib/tasks/grainery_tasks.rake +60 -3
- data/lib/tasks/test_db_tasks.rake +12 -0
- metadata +27 -7
data/lib/grainery/grainer.rb
CHANGED
|
@@ -13,6 +13,7 @@ module Grainery
|
|
|
13
13
|
@grainery_path = load_grainery_path
|
|
14
14
|
@database_configs = load_database_connections
|
|
15
15
|
@lookup_tables = load_lookup_tables
|
|
16
|
+
@anonymize_fields = load_anonymize_fields
|
|
16
17
|
end
|
|
17
18
|
|
|
18
19
|
def load_config
|
|
@@ -35,11 +36,63 @@ module Grainery
|
|
|
35
36
|
# Detect databases and model base classes dynamically
|
|
36
37
|
detected_databases = detect_databases_and_models
|
|
37
38
|
|
|
39
|
+
# Detect anonymizable fields from actual database schema
|
|
40
|
+
detected_fields = detect_anonymizable_fields(detected_databases)
|
|
41
|
+
|
|
42
|
+
# Build default anonymize_fields hash
|
|
43
|
+
default_anonymize_fields = {
|
|
44
|
+
'email' => 'email',
|
|
45
|
+
'first_name' => 'first_name',
|
|
46
|
+
'last_name' => 'last_name',
|
|
47
|
+
'name' => 'name',
|
|
48
|
+
'phone' => 'phone_number',
|
|
49
|
+
'phone_number' => 'phone_number',
|
|
50
|
+
'address' => 'address',
|
|
51
|
+
'street_address' => 'street_address',
|
|
52
|
+
'city' => 'city',
|
|
53
|
+
'state' => 'state',
|
|
54
|
+
'zip' => 'zip_code',
|
|
55
|
+
'zip_code' => 'zip_code',
|
|
56
|
+
'postal_code' => 'zip_code',
|
|
57
|
+
'ssn' => 'ssn',
|
|
58
|
+
'credit_card' => 'credit_card_number',
|
|
59
|
+
'password' => 'password',
|
|
60
|
+
'token' => 'token',
|
|
61
|
+
'api_key' => 'api_key',
|
|
62
|
+
'secret' => 'secret',
|
|
63
|
+
'iban' => 'iban',
|
|
64
|
+
'vat_number' => 'greek_vat',
|
|
65
|
+
'afm' => 'greek_vat',
|
|
66
|
+
'identity_number' => 'identity_number',
|
|
67
|
+
'id_number' => 'identity_number',
|
|
68
|
+
'national_id' => 'identity_number',
|
|
69
|
+
'amka' => 'greek_amka',
|
|
70
|
+
'social_security_number' => 'greek_amka',
|
|
71
|
+
'ssn_greek' => 'greek_amka',
|
|
72
|
+
'personal_number' => 'greek_personal_number',
|
|
73
|
+
'personal_id' => 'greek_personal_number',
|
|
74
|
+
'afm_extended' => 'greek_personal_number',
|
|
75
|
+
'ada' => 'greek_ada',
|
|
76
|
+
'diavgeia_id' => 'greek_ada',
|
|
77
|
+
'decision_number' => 'greek_ada',
|
|
78
|
+
'adam' => 'greek_adam',
|
|
79
|
+
'adam_number' => 'greek_adam',
|
|
80
|
+
'procurement_id' => 'greek_adam',
|
|
81
|
+
'date_of_birth' => 'date_of_birth',
|
|
82
|
+
'birth_date' => 'date_of_birth',
|
|
83
|
+
'dob' => 'date_of_birth',
|
|
84
|
+
'birthdate' => 'date_of_birth'
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# Merge detected fields with defaults (detected fields take precedence)
|
|
88
|
+
anonymize_fields = default_anonymize_fields.merge(detected_fields)
|
|
89
|
+
|
|
38
90
|
# Build configuration hash
|
|
39
91
|
config = {
|
|
40
92
|
'database_connections' => detected_databases,
|
|
41
93
|
'grainery_path' => 'db/grainery',
|
|
42
94
|
'lookup_tables' => [],
|
|
95
|
+
'anonymize_fields' => anonymize_fields,
|
|
43
96
|
'last_updated' => Time.now.to_s
|
|
44
97
|
}
|
|
45
98
|
|
|
@@ -47,6 +100,120 @@ module Grainery
|
|
|
47
100
|
write_config_file(config_path, config)
|
|
48
101
|
|
|
49
102
|
puts " ✓ Created config/grainery.yml with #{detected_databases.size} detected databases"
|
|
103
|
+
puts " ✓ Detected #{detected_fields.size} anonymizable fields in database schema"
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def detect_anonymizable_fields(detected_databases)
|
|
107
|
+
puts " → Detecting anonymizable fields from database schema..."
|
|
108
|
+
|
|
109
|
+
# Field name patterns mapped to anonymization methods
|
|
110
|
+
field_patterns = {
|
|
111
|
+
/email/i => 'email',
|
|
112
|
+
/first_name/i => 'first_name',
|
|
113
|
+
/last_name/i => 'last_name',
|
|
114
|
+
/^name$/i => 'name',
|
|
115
|
+
/phone/i => 'phone_number',
|
|
116
|
+
/mobile/i => 'phone_number',
|
|
117
|
+
/address/i => 'address',
|
|
118
|
+
/street/i => 'street_address',
|
|
119
|
+
/city/i => 'city',
|
|
120
|
+
/state/i => 'state',
|
|
121
|
+
/zip/i => 'zip_code',
|
|
122
|
+
/postal/i => 'zip_code',
|
|
123
|
+
/ssn/i => 'ssn',
|
|
124
|
+
/credit_card/i => 'credit_card_number',
|
|
125
|
+
/password/i => 'password',
|
|
126
|
+
/token/i => 'token',
|
|
127
|
+
/api_key/i => 'api_key',
|
|
128
|
+
/secret/i => 'secret',
|
|
129
|
+
/iban/i => 'iban',
|
|
130
|
+
/vat_number/i => 'greek_vat',
|
|
131
|
+
/afm/i => 'greek_vat',
|
|
132
|
+
/amka/i => 'greek_amka',
|
|
133
|
+
/social_security_number/i => 'greek_amka',
|
|
134
|
+
/personal_number/i => 'greek_personal_number',
|
|
135
|
+
/personal_id/i => 'greek_personal_number',
|
|
136
|
+
/ada$/i => 'greek_ada',
|
|
137
|
+
/diavgeia/i => 'greek_ada',
|
|
138
|
+
/adam/i => 'greek_adam',
|
|
139
|
+
/procurement_id/i => 'greek_adam',
|
|
140
|
+
/(date_of_)?birth/i => 'date_of_birth',
|
|
141
|
+
/dob$/i => 'date_of_birth',
|
|
142
|
+
/identity_number/i => 'identity_number',
|
|
143
|
+
/national_id/i => 'identity_number'
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
detected_fields = {}
|
|
147
|
+
field_locations = Hash.new { |h, k| h[k] = [] } # Track where each field appears
|
|
148
|
+
|
|
149
|
+
detected_databases.each do |db_name, db_config|
|
|
150
|
+
begin
|
|
151
|
+
# Get the model base class
|
|
152
|
+
base_class = db_config['model_base_class'].constantize
|
|
153
|
+
|
|
154
|
+
# Find all models that inherit from this base class
|
|
155
|
+
models = ObjectSpace.each_object(Class).select do |klass|
|
|
156
|
+
klass < base_class && !klass.abstract_class?
|
|
157
|
+
rescue
|
|
158
|
+
false
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
models.each do |model|
|
|
162
|
+
begin
|
|
163
|
+
table_name = model.table_name
|
|
164
|
+
|
|
165
|
+
# Get column information
|
|
166
|
+
model.columns.each do |column|
|
|
167
|
+
column_name = column.name
|
|
168
|
+
|
|
169
|
+
# Skip internal Rails columns
|
|
170
|
+
next if %w[id created_at updated_at].include?(column_name)
|
|
171
|
+
|
|
172
|
+
# Match against patterns
|
|
173
|
+
field_patterns.each do |pattern, method|
|
|
174
|
+
if column_name.match?(pattern)
|
|
175
|
+
# Track location for duplicate detection
|
|
176
|
+
field_locations[column_name] << { db: db_name, table: table_name, model: model.name, method: method }
|
|
177
|
+
puts " ✓ Found: #{model.name}.#{column_name} → #{method}"
|
|
178
|
+
break # Use first matching pattern
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
rescue => e
|
|
183
|
+
# Skip models that can't be analyzed
|
|
184
|
+
next
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
rescue => e
|
|
188
|
+
puts " ⚠ Warning: Could not analyze #{db_name}: #{e.message}"
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Build detected_fields with scoping when duplicates exist
|
|
193
|
+
field_locations.each do |field_name, locations|
|
|
194
|
+
if locations.size == 1
|
|
195
|
+
# Single occurrence - use simple field name
|
|
196
|
+
detected_fields[field_name] = locations.first[:method]
|
|
197
|
+
else
|
|
198
|
+
# Multiple occurrences - use scoped names
|
|
199
|
+
puts " ℹ Field '#{field_name}' appears in multiple tables - using scoped configuration"
|
|
200
|
+
locations.each do |loc|
|
|
201
|
+
if locations.count { |l| l[:db] == loc[:db] } > 1
|
|
202
|
+
# Multiple tables in same database - use db.table.field
|
|
203
|
+
scoped_key = "#{loc[:db]}.#{loc[:table]}.#{field_name}"
|
|
204
|
+
detected_fields[scoped_key] = loc[:method]
|
|
205
|
+
puts " → #{scoped_key}"
|
|
206
|
+
else
|
|
207
|
+
# Single table per database - use table.field
|
|
208
|
+
scoped_key = "#{loc[:table]}.#{field_name}"
|
|
209
|
+
detected_fields[scoped_key] = loc[:method]
|
|
210
|
+
puts " → #{scoped_key}"
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
detected_fields
|
|
50
217
|
end
|
|
51
218
|
|
|
52
219
|
def detect_databases_and_models
|
|
@@ -149,6 +316,21 @@ module Grainery
|
|
|
149
316
|
content << "# Lookup tables (harvest all records)"
|
|
150
317
|
content << "lookup_tables: #{config['lookup_tables'].inspect}"
|
|
151
318
|
content << ""
|
|
319
|
+
content << "# Field anonymization (column_name => faker_method)"
|
|
320
|
+
content << "# Set to empty hash {} to disable anonymization"
|
|
321
|
+
content << "# Faker methods: email, first_name, last_name, name, phone_number, address,"
|
|
322
|
+
content << "# street_address, city, state, zip_code, ssn, credit_card_number,"
|
|
323
|
+
content << "# password, token, api_key, secret, iban, greek_vat, greek_amka,"
|
|
324
|
+
content << "# greek_personal_number, greek_ada, identity_number"
|
|
325
|
+
content << "anonymize_fields:"
|
|
326
|
+
if config['anonymize_fields']&.any?
|
|
327
|
+
config['anonymize_fields'].each do |field, faker_method|
|
|
328
|
+
content << " #{field}: #{faker_method}"
|
|
329
|
+
end
|
|
330
|
+
else
|
|
331
|
+
content << " {}"
|
|
332
|
+
end
|
|
333
|
+
content << ""
|
|
152
334
|
content << "# Metadata"
|
|
153
335
|
content << "last_updated: #{config['last_updated']}"
|
|
154
336
|
content << ""
|
|
@@ -205,6 +387,14 @@ module Grainery
|
|
|
205
387
|
Set.new
|
|
206
388
|
end
|
|
207
389
|
|
|
390
|
+
def load_anonymize_fields
|
|
391
|
+
fields = @config['anonymize_fields'] || {}
|
|
392
|
+
fields.is_a?(Hash) ? fields : {}
|
|
393
|
+
rescue => e
|
|
394
|
+
puts " Warning: Could not load anonymize_fields: #{e.message}"
|
|
395
|
+
{}
|
|
396
|
+
end
|
|
397
|
+
|
|
208
398
|
# Security: Safe constant resolution with whitelist
|
|
209
399
|
def safe_const_get(class_name)
|
|
210
400
|
unless class_name.match?(ALLOWED_BASE_CLASS_PATTERN)
|
|
@@ -251,7 +441,7 @@ module Grainery
|
|
|
251
441
|
:primary
|
|
252
442
|
end
|
|
253
443
|
|
|
254
|
-
def harvest_all(limit: nil)
|
|
444
|
+
def harvest_all(limit: nil, dump_schema: false, anonymize: true)
|
|
255
445
|
all_models = get_all_models
|
|
256
446
|
|
|
257
447
|
models_to_harvest = all_models.reject do |model|
|
|
@@ -260,18 +450,31 @@ module Grainery
|
|
|
260
450
|
model.table_name.nil?
|
|
261
451
|
end
|
|
262
452
|
|
|
263
|
-
harvest_models(models_to_harvest, limit: limit)
|
|
453
|
+
harvest_models(models_to_harvest, limit: limit, dump_schema: dump_schema, anonymize: anonymize)
|
|
264
454
|
end
|
|
265
455
|
|
|
266
|
-
def harvest_models(models, limit: nil)
|
|
456
|
+
def harvest_models(models, limit: nil, dump_schema: false, anonymize: true)
|
|
267
457
|
models = Array(models)
|
|
268
458
|
return if models.empty?
|
|
269
459
|
|
|
460
|
+
# Require faker if anonymization is enabled
|
|
461
|
+
if anonymize && @anonymize_fields.any?
|
|
462
|
+
begin
|
|
463
|
+
require 'faker'
|
|
464
|
+
rescue LoadError
|
|
465
|
+
puts " ⚠ Warning: Faker gem not found. Anonymization disabled."
|
|
466
|
+
puts " Install with: gem install faker"
|
|
467
|
+
anonymize = false
|
|
468
|
+
end
|
|
469
|
+
end
|
|
470
|
+
|
|
270
471
|
puts "\n" + "="*80
|
|
271
472
|
puts "Grainer - Extracting Database Seeds"
|
|
272
473
|
puts "="*80
|
|
273
474
|
puts "Total models: #{models.size}"
|
|
274
475
|
puts "Limit per table: #{limit || 'ALL RECORDS'}"
|
|
476
|
+
puts "Schema dump: #{dump_schema ? 'YES' : 'NO'}"
|
|
477
|
+
puts "Anonymization: #{anonymize && @anonymize_fields.any? ? "YES (#{@anonymize_fields.size} fields)" : 'NO'}"
|
|
275
478
|
puts "="*80 + "\n"
|
|
276
479
|
|
|
277
480
|
# Group by database
|
|
@@ -287,13 +490,23 @@ module Grainery
|
|
|
287
490
|
FileUtils.mkdir_p(db_dir)
|
|
288
491
|
end
|
|
289
492
|
|
|
493
|
+
# Dump schemas if requested
|
|
494
|
+
if dump_schema
|
|
495
|
+
puts "\n" + "-"*80
|
|
496
|
+
puts "Dumping Database Schemas"
|
|
497
|
+
puts "-"*80
|
|
498
|
+
grouped_models.each do |db_name, _|
|
|
499
|
+
dump_database_schema(db_name)
|
|
500
|
+
end
|
|
501
|
+
end
|
|
502
|
+
|
|
290
503
|
# Harvest in dependency order
|
|
291
504
|
load_order.each do |model|
|
|
292
505
|
next unless models.include?(model)
|
|
293
506
|
|
|
294
507
|
begin
|
|
295
508
|
db_name = detect_database(model)
|
|
296
|
-
harvest_table(model, db_name, limit: limit)
|
|
509
|
+
harvest_table(model, db_name, limit: limit, anonymize: anonymize)
|
|
297
510
|
rescue => e
|
|
298
511
|
puts " ✗ Error harvesting #{model.name}: #{e.message}"
|
|
299
512
|
end
|
|
@@ -355,7 +568,7 @@ module Grainery
|
|
|
355
568
|
sorted
|
|
356
569
|
end
|
|
357
570
|
|
|
358
|
-
def harvest_table(model, db_name, limit: nil)
|
|
571
|
+
def harvest_table(model, db_name, limit: nil, anonymize: true)
|
|
359
572
|
table_name = model.table_name
|
|
360
573
|
is_lookup = @lookup_tables.include?(table_name)
|
|
361
574
|
|
|
@@ -374,13 +587,14 @@ module Grainery
|
|
|
374
587
|
end
|
|
375
588
|
|
|
376
589
|
# Generate seed file
|
|
377
|
-
seed_content = generate_seed_content(model, records, db_name)
|
|
590
|
+
seed_content = generate_seed_content(model, records, db_name, anonymize: anonymize)
|
|
378
591
|
seed_path = get_seed_path(model, db_name)
|
|
379
592
|
|
|
380
593
|
File.write(seed_path, seed_content)
|
|
381
594
|
|
|
595
|
+
anonymize_suffix = anonymize && @anonymize_fields.any? ? " [anonymized]" : ""
|
|
382
596
|
record_info = is_lookup ? " (lookup: #{records.size} records)" : " (#{records.size} records)"
|
|
383
|
-
puts " ✓ #{model.name.ljust(50)} → #{table_name}.rb#{record_info}"
|
|
597
|
+
puts " ✓ #{model.name.ljust(50)} → #{table_name}.rb#{record_info}#{anonymize_suffix}"
|
|
384
598
|
end
|
|
385
599
|
|
|
386
600
|
def get_seed_path(model, db_name)
|
|
@@ -389,7 +603,7 @@ module Grainery
|
|
|
389
603
|
Rails.root.join(db_dir, "#{model.table_name}.rb")
|
|
390
604
|
end
|
|
391
605
|
|
|
392
|
-
def generate_seed_content(model, records, db_name)
|
|
606
|
+
def generate_seed_content(model, records, db_name, anonymize: true)
|
|
393
607
|
table_name = model.table_name
|
|
394
608
|
|
|
395
609
|
# Get columns to export (exclude id, timestamps)
|
|
@@ -401,6 +615,7 @@ module Grainery
|
|
|
401
615
|
content << "# Harvested from #{db_name} database: #{table_name}"
|
|
402
616
|
content << "# Records: #{records.size}"
|
|
403
617
|
content << "# Generated: #{Time.now}"
|
|
618
|
+
content << "# Anonymized: #{anonymize && @anonymize_fields.any? ? 'YES' : 'NO'}"
|
|
404
619
|
content << ""
|
|
405
620
|
content << "#{model.name}.create!("
|
|
406
621
|
|
|
@@ -411,6 +626,18 @@ module Grainery
|
|
|
411
626
|
|
|
412
627
|
columns.each_with_index do |col, col_idx|
|
|
413
628
|
value = record.send(col.name)
|
|
629
|
+
|
|
630
|
+
# Anonymize if enabled and field is configured
|
|
631
|
+
if anonymize
|
|
632
|
+
faker_method = get_anonymization_method(col.name, table_name, db_name)
|
|
633
|
+
if faker_method
|
|
634
|
+
# Skip anonymization if explicitly set to "skip"
|
|
635
|
+
unless faker_method.to_s == 'skip'
|
|
636
|
+
value = anonymize_value(col.name, faker_method, col, value)
|
|
637
|
+
end
|
|
638
|
+
end
|
|
639
|
+
end
|
|
640
|
+
|
|
414
641
|
formatted_value = format_seed_value(value, col)
|
|
415
642
|
comma = col_idx < columns.size - 1 ? ',' : ''
|
|
416
643
|
content << " #{col.name}: #{formatted_value}#{comma}"
|
|
@@ -424,6 +651,341 @@ module Grainery
|
|
|
424
651
|
content.join("\n")
|
|
425
652
|
end
|
|
426
653
|
|
|
654
|
+
def get_anonymization_method(field_name, table_name, db_name)
|
|
655
|
+
# Priority order:
|
|
656
|
+
# 1. database.table.field (most specific)
|
|
657
|
+
# 2. table.field (table-specific)
|
|
658
|
+
# 3. field (global)
|
|
659
|
+
|
|
660
|
+
scoped_key_db_table = "#{db_name}.#{table_name}.#{field_name}"
|
|
661
|
+
scoped_key_table = "#{table_name}.#{field_name}"
|
|
662
|
+
|
|
663
|
+
@anonymize_fields[scoped_key_db_table] ||
|
|
664
|
+
@anonymize_fields[scoped_key_table] ||
|
|
665
|
+
@anonymize_fields[field_name]
|
|
666
|
+
end
|
|
667
|
+
|
|
668
|
+
def anonymize_value(field_name, faker_method, column, original_value = nil)
|
|
669
|
+
return nil if faker_method.nil?
|
|
670
|
+
|
|
671
|
+
begin
|
|
672
|
+
# Generate fake value based on method
|
|
673
|
+
fake_value = case faker_method.to_s
|
|
674
|
+
when 'email'
|
|
675
|
+
Faker::Internet.email
|
|
676
|
+
when 'first_name'
|
|
677
|
+
Faker::Name.first_name
|
|
678
|
+
when 'last_name'
|
|
679
|
+
Faker::Name.last_name
|
|
680
|
+
when 'name'
|
|
681
|
+
Faker::Name.name
|
|
682
|
+
when 'phone_number'
|
|
683
|
+
Faker::PhoneNumber.phone_number
|
|
684
|
+
when 'address'
|
|
685
|
+
Faker::Address.full_address
|
|
686
|
+
when 'street_address'
|
|
687
|
+
Faker::Address.street_address
|
|
688
|
+
when 'city'
|
|
689
|
+
Faker::Address.city
|
|
690
|
+
when 'state'
|
|
691
|
+
Faker::Address.state
|
|
692
|
+
when 'zip_code'
|
|
693
|
+
Faker::Address.zip_code
|
|
694
|
+
when 'ssn'
|
|
695
|
+
Faker::IDNumber.valid
|
|
696
|
+
when 'credit_card_number'
|
|
697
|
+
Faker::Finance.credit_card
|
|
698
|
+
when 'password'
|
|
699
|
+
Faker::Internet.password
|
|
700
|
+
when 'token'
|
|
701
|
+
generate_token(column)
|
|
702
|
+
when 'api_key'
|
|
703
|
+
generate_api_key(column)
|
|
704
|
+
when 'secret'
|
|
705
|
+
generate_secret(column)
|
|
706
|
+
when 'iban'
|
|
707
|
+
generate_fake_iban(column)
|
|
708
|
+
when 'greek_vat'
|
|
709
|
+
generate_fake_greek_vat(column)
|
|
710
|
+
when 'greek_amka'
|
|
711
|
+
generate_fake_greek_amka(column)
|
|
712
|
+
when 'greek_personal_number'
|
|
713
|
+
generate_fake_greek_personal_number(column)
|
|
714
|
+
when 'greek_ada'
|
|
715
|
+
generate_fake_greek_ada(column)
|
|
716
|
+
when 'greek_adam'
|
|
717
|
+
generate_fake_greek_adam(column)
|
|
718
|
+
when 'date_of_birth'
|
|
719
|
+
generate_fake_date_of_birth(original_value, column)
|
|
720
|
+
when 'identity_number'
|
|
721
|
+
generate_fake_identity_number(column)
|
|
722
|
+
else
|
|
723
|
+
# Try to call the method dynamically if it exists
|
|
724
|
+
if Faker.respond_to?(faker_method)
|
|
725
|
+
Faker.send(faker_method)
|
|
726
|
+
else
|
|
727
|
+
nil
|
|
728
|
+
end
|
|
729
|
+
end
|
|
730
|
+
|
|
731
|
+
# Truncate to column limit if necessary
|
|
732
|
+
if fake_value.is_a?(String) && column.limit
|
|
733
|
+
fake_value = fake_value[0...column.limit]
|
|
734
|
+
end
|
|
735
|
+
|
|
736
|
+
fake_value
|
|
737
|
+
rescue => e
|
|
738
|
+
# If faker fails, return nil or a safe default
|
|
739
|
+
puts " ⚠ Warning: Could not anonymize #{field_name} with #{faker_method}: #{e.message}"
|
|
740
|
+
nil
|
|
741
|
+
end
|
|
742
|
+
end
|
|
743
|
+
|
|
744
|
+
def generate_token(column)
|
|
745
|
+
# Generate token respecting column size
|
|
746
|
+
length = column.limit ? [column.limit, 32].min : 32
|
|
747
|
+
Faker::Alphanumeric.alphanumeric(number: length)
|
|
748
|
+
end
|
|
749
|
+
|
|
750
|
+
def generate_api_key(column)
|
|
751
|
+
# Generate API key respecting column size
|
|
752
|
+
length = column.limit ? [column.limit, 40].min : 40
|
|
753
|
+
Faker::Alphanumeric.alphanumeric(number: length)
|
|
754
|
+
end
|
|
755
|
+
|
|
756
|
+
def generate_secret(column)
|
|
757
|
+
# Generate secret respecting column size
|
|
758
|
+
length = column.limit ? [column.limit, 64].min : 64
|
|
759
|
+
Faker::Alphanumeric.alphanumeric(number: length)
|
|
760
|
+
end
|
|
761
|
+
|
|
762
|
+
def generate_fake_iban(column)
|
|
763
|
+
# Generate a fake Greek IBAN (27 characters)
|
|
764
|
+
# Format: GR + 2 check digits + 7 bank code + 16 account number
|
|
765
|
+
country_code = 'GR'
|
|
766
|
+
check_digits = rand(10..99).to_s
|
|
767
|
+
bank_code = rand(1000000..9999999).to_s
|
|
768
|
+
account_number = rand(1000000000000000..9999999999999999).to_s
|
|
769
|
+
iban = "#{country_code}#{check_digits}#{bank_code}#{account_number}"
|
|
770
|
+
|
|
771
|
+
# Truncate if column has a limit
|
|
772
|
+
if column.limit && iban.length > column.limit
|
|
773
|
+
# Keep the country code prefix if possible
|
|
774
|
+
if column.limit >= 4
|
|
775
|
+
iban = iban[0...column.limit]
|
|
776
|
+
else
|
|
777
|
+
iban = iban[0...column.limit]
|
|
778
|
+
end
|
|
779
|
+
end
|
|
780
|
+
|
|
781
|
+
iban
|
|
782
|
+
end
|
|
783
|
+
|
|
784
|
+
def generate_fake_greek_vat(column)
|
|
785
|
+
# Generate a fake Greek VAT number (AFM - 9 digits)
|
|
786
|
+
# Format: 9 digits
|
|
787
|
+
vat = rand(100000000..999999999).to_s
|
|
788
|
+
|
|
789
|
+
# Adjust length if column has a limit
|
|
790
|
+
if column.limit
|
|
791
|
+
if column.limit >= 9
|
|
792
|
+
vat
|
|
793
|
+
elsif column.limit > 0
|
|
794
|
+
# Generate shorter number
|
|
795
|
+
max_val = (10 ** column.limit) - 1
|
|
796
|
+
min_val = 10 ** (column.limit - 1)
|
|
797
|
+
rand(min_val..max_val).to_s
|
|
798
|
+
else
|
|
799
|
+
vat[0...column.limit]
|
|
800
|
+
end
|
|
801
|
+
else
|
|
802
|
+
vat
|
|
803
|
+
end
|
|
804
|
+
end
|
|
805
|
+
|
|
806
|
+
def generate_fake_greek_amka(column)
|
|
807
|
+
# Generate a fake Greek AMKA (Social Security Number - 11 digits)
|
|
808
|
+
# Format: DDMMYY followed by 5 digits
|
|
809
|
+
# Example: 01011990001 (1st January 1990, sequence 001)
|
|
810
|
+
|
|
811
|
+
# Generate random date (between 1950 and 2005 for realistic working age)
|
|
812
|
+
day = rand(1..28).to_s.rjust(2, '0')
|
|
813
|
+
month = rand(1..12).to_s.rjust(2, '0')
|
|
814
|
+
year = rand(50..105).to_s.rjust(2, '0') # Last 2 digits of year
|
|
815
|
+
sequence = rand(0..99999).to_s.rjust(5, '0')
|
|
816
|
+
|
|
817
|
+
amka = "#{day}#{month}#{year}#{sequence}"
|
|
818
|
+
|
|
819
|
+
# Adjust length if column has a limit
|
|
820
|
+
if column.limit
|
|
821
|
+
if column.limit >= 11
|
|
822
|
+
amka
|
|
823
|
+
elsif column.limit > 0
|
|
824
|
+
# Truncate if needed
|
|
825
|
+
amka[0...column.limit]
|
|
826
|
+
else
|
|
827
|
+
amka[0...column.limit]
|
|
828
|
+
end
|
|
829
|
+
else
|
|
830
|
+
amka
|
|
831
|
+
end
|
|
832
|
+
end
|
|
833
|
+
|
|
834
|
+
def generate_fake_greek_personal_number(column)
|
|
835
|
+
# Generate a fake Greek Personal Number (12 characters)
|
|
836
|
+
# Format: 2 digits + 1 letter + 9-digit AFM
|
|
837
|
+
# Example: 12A123456789 (prefix: 12A, AFM: 123456789)
|
|
838
|
+
|
|
839
|
+
# Generate 2-digit prefix
|
|
840
|
+
prefix_digits = rand(10..99).to_s
|
|
841
|
+
|
|
842
|
+
# Generate random letter
|
|
843
|
+
letters = ('A'..'Z').to_a
|
|
844
|
+
prefix_letter = letters.sample
|
|
845
|
+
|
|
846
|
+
# Generate AFM (9 digits)
|
|
847
|
+
afm = rand(100000000..999999999).to_s
|
|
848
|
+
|
|
849
|
+
personal_number = "#{prefix_digits}#{prefix_letter}#{afm}"
|
|
850
|
+
|
|
851
|
+
# Adjust length if column has a limit
|
|
852
|
+
if column.limit
|
|
853
|
+
if column.limit >= 12
|
|
854
|
+
personal_number
|
|
855
|
+
elsif column.limit >= 10
|
|
856
|
+
# Try to keep prefix + partial AFM
|
|
857
|
+
personal_number[0...column.limit]
|
|
858
|
+
elsif column.limit >= 3
|
|
859
|
+
# Keep at least the prefix
|
|
860
|
+
personal_number[0...column.limit]
|
|
861
|
+
elsif column.limit > 0
|
|
862
|
+
# Very short, just use digits
|
|
863
|
+
rand(10 ** (column.limit - 1)...10 ** column.limit).to_s
|
|
864
|
+
else
|
|
865
|
+
personal_number[0...column.limit]
|
|
866
|
+
end
|
|
867
|
+
else
|
|
868
|
+
personal_number
|
|
869
|
+
end
|
|
870
|
+
end
|
|
871
|
+
|
|
872
|
+
def generate_fake_greek_ada(column)
|
|
873
|
+
# Generate a fake Greek ADA (Diavgeia Decision Number)
|
|
874
|
+
# Format: 4 Greek uppercase letters + 2 digits + 4 Greek uppercase letters + dash + 1 digit + 2 Greek uppercase letters
|
|
875
|
+
# Example: ΨΚΕΘ46ΜΤΛΠ-7ΗΠ
|
|
876
|
+
|
|
877
|
+
greek_letters = ['Α', 'Β', 'Γ', 'Δ', 'Ε', 'Ζ', 'Η', 'Θ', 'Ι', 'Κ', 'Λ', 'Μ', 'Ν', 'Ξ', 'Ο', 'Π', 'Ρ', 'Σ', 'Τ', 'Υ', 'Φ', 'Χ', 'Ψ', 'Ω']
|
|
878
|
+
|
|
879
|
+
# First part: 4 Greek uppercase letters
|
|
880
|
+
part1 = 4.times.map { greek_letters.sample }.join
|
|
881
|
+
|
|
882
|
+
# 2 digits
|
|
883
|
+
digits1 = 2.times.map { rand(0..9) }.join
|
|
884
|
+
|
|
885
|
+
# Second part: 4 Greek uppercase letters
|
|
886
|
+
part2 = 4.times.map { greek_letters.sample }.join
|
|
887
|
+
|
|
888
|
+
# After dash: 1 digit
|
|
889
|
+
digit2 = rand(0..9).to_s
|
|
890
|
+
|
|
891
|
+
# Final part: 2 Greek uppercase letters
|
|
892
|
+
part3 = 2.times.map { greek_letters.sample }.join
|
|
893
|
+
|
|
894
|
+
ada = "#{part1}#{digits1}#{part2}-#{digit2}#{part3}"
|
|
895
|
+
|
|
896
|
+
# Adjust length if column has a limit
|
|
897
|
+
if column.limit && column.limit < 15
|
|
898
|
+
ada[0...column.limit]
|
|
899
|
+
else
|
|
900
|
+
ada
|
|
901
|
+
end
|
|
902
|
+
end
|
|
903
|
+
|
|
904
|
+
def generate_fake_greek_adam(column)
|
|
905
|
+
# Generate a fake Greek ADAM number (Public Procurement Publicity identifier)
|
|
906
|
+
# Format: 2 digits + PROC or REQ + 9 digits
|
|
907
|
+
# Examples: 21PROC009041696, 21REQ008902853
|
|
908
|
+
|
|
909
|
+
# First 2 digits (year)
|
|
910
|
+
year_part = 2.times.map { rand(0..9) }.join
|
|
911
|
+
|
|
912
|
+
# Category type (always PROC or REQ)
|
|
913
|
+
category = ['PROC', 'REQ'].sample
|
|
914
|
+
|
|
915
|
+
# 9 digits (sequential number)
|
|
916
|
+
sequence = 9.times.map { rand(0..9) }.join
|
|
917
|
+
|
|
918
|
+
adam = "#{year_part}#{category}#{sequence}"
|
|
919
|
+
|
|
920
|
+
# Adjust length if column has a limit
|
|
921
|
+
if column.limit && column.limit < adam.length
|
|
922
|
+
adam[0...column.limit]
|
|
923
|
+
else
|
|
924
|
+
adam
|
|
925
|
+
end
|
|
926
|
+
end
|
|
927
|
+
|
|
928
|
+
def generate_fake_date_of_birth(original_value, column)
|
|
929
|
+
# Generate a fake date of birth preserving approximate age (adulthood)
|
|
930
|
+
# Strategy: Keep the year roughly the same (within +/- 2 years) to preserve age category
|
|
931
|
+
|
|
932
|
+
return Faker::Date.birthday(min_age: 18, max_age: 80) if original_value.nil?
|
|
933
|
+
|
|
934
|
+
begin
|
|
935
|
+
# Parse the original date
|
|
936
|
+
original_date = case original_value
|
|
937
|
+
when Date
|
|
938
|
+
original_value
|
|
939
|
+
when Time, DateTime
|
|
940
|
+
original_value.to_date
|
|
941
|
+
when String
|
|
942
|
+
Date.parse(original_value)
|
|
943
|
+
else
|
|
944
|
+
return Faker::Date.birthday(min_age: 18, max_age: 80)
|
|
945
|
+
end
|
|
946
|
+
|
|
947
|
+
# Calculate age
|
|
948
|
+
today = Date.today
|
|
949
|
+
age = today.year - original_date.year
|
|
950
|
+
age -= 1 if today < original_date + age.years
|
|
951
|
+
|
|
952
|
+
# Generate a new birth date with similar age (+/- 2 years)
|
|
953
|
+
min_age = [age - 2, 18].max # Preserve adulthood (minimum 18)
|
|
954
|
+
max_age = age + 2
|
|
955
|
+
|
|
956
|
+
Faker::Date.birthday(min_age: min_age, max_age: max_age)
|
|
957
|
+
rescue
|
|
958
|
+
# Fallback if parsing fails
|
|
959
|
+
Faker::Date.birthday(min_age: 18, max_age: 80)
|
|
960
|
+
end
|
|
961
|
+
end
|
|
962
|
+
|
|
963
|
+
def generate_fake_identity_number(column)
|
|
964
|
+
# Generate a fake identity number (generic format)
|
|
965
|
+
# Using alphanumeric format similar to national ID cards
|
|
966
|
+
letters = ('A'..'Z').to_a
|
|
967
|
+
|
|
968
|
+
if column.limit
|
|
969
|
+
if column.limit >= 8
|
|
970
|
+
# Full format: 2 letters + 6 digits
|
|
971
|
+
"#{letters.sample}#{letters.sample}#{rand(100000..999999)}"
|
|
972
|
+
elsif column.limit >= 2
|
|
973
|
+
# Adjust format to fit column size
|
|
974
|
+
num_letters = [2, column.limit / 2].min
|
|
975
|
+
num_digits = column.limit - num_letters
|
|
976
|
+
letter_part = num_letters.times.map { letters.sample }.join
|
|
977
|
+
digit_part = num_digits > 0 ? rand(10 ** (num_digits - 1)...10 ** num_digits).to_s : ''
|
|
978
|
+
"#{letter_part}#{digit_part}"
|
|
979
|
+
else
|
|
980
|
+
# Very small limit, just use letters
|
|
981
|
+
column.limit.times.map { letters.sample }.join
|
|
982
|
+
end
|
|
983
|
+
else
|
|
984
|
+
# No limit, use default format
|
|
985
|
+
"#{letters.sample}#{letters.sample}#{rand(100000..999999)}"
|
|
986
|
+
end
|
|
987
|
+
end
|
|
988
|
+
|
|
427
989
|
def format_seed_value(value, column)
|
|
428
990
|
return 'nil' if value.nil?
|
|
429
991
|
|
|
@@ -447,6 +1009,79 @@ module Grainery
|
|
|
447
1009
|
end
|
|
448
1010
|
end
|
|
449
1011
|
|
|
1012
|
+
def dump_database_schema(db_name)
|
|
1013
|
+
db_config = @database_configs[db_name]
|
|
1014
|
+
return unless db_config
|
|
1015
|
+
|
|
1016
|
+
schema_path = Rails.root.join(@grainery_path, db_name.to_s, 'schema.rb')
|
|
1017
|
+
|
|
1018
|
+
begin
|
|
1019
|
+
base_class = safe_const_get(db_config[:model_base_class])
|
|
1020
|
+
connection = base_class.connection
|
|
1021
|
+
|
|
1022
|
+
# Generate schema dump
|
|
1023
|
+
schema_content = []
|
|
1024
|
+
schema_content << "# Schema dump for #{db_name} database"
|
|
1025
|
+
schema_content << "# Generated: #{Time.now}"
|
|
1026
|
+
schema_content << "# Adapter: #{db_config[:adapter]}"
|
|
1027
|
+
schema_content << ""
|
|
1028
|
+
schema_content << "ActiveRecord::Schema.define do"
|
|
1029
|
+
schema_content << ""
|
|
1030
|
+
|
|
1031
|
+
# Get all tables for this connection
|
|
1032
|
+
tables = connection.tables.sort
|
|
1033
|
+
|
|
1034
|
+
tables.each do |table_name|
|
|
1035
|
+
# Skip internal Rails tables
|
|
1036
|
+
next if ['schema_migrations', 'ar_internal_metadata'].include?(table_name)
|
|
1037
|
+
|
|
1038
|
+
schema_content << " create_table \"#{table_name}\", force: :cascade do |t|"
|
|
1039
|
+
|
|
1040
|
+
# Get columns
|
|
1041
|
+
connection.columns(table_name).each do |column|
|
|
1042
|
+
next if column.name == 'id' # Primary key is handled by create_table
|
|
1043
|
+
|
|
1044
|
+
type = column.type
|
|
1045
|
+
attrs = []
|
|
1046
|
+
attrs << "limit: #{column.limit}" if column.limit
|
|
1047
|
+
attrs << "precision: #{column.precision}" if column.precision
|
|
1048
|
+
attrs << "scale: #{column.scale}" if column.scale
|
|
1049
|
+
attrs << "null: false" unless column.null
|
|
1050
|
+
attrs << "default: #{column.default.inspect}" if column.default
|
|
1051
|
+
|
|
1052
|
+
attrs_str = attrs.any? ? ", #{attrs.join(', ')}" : ""
|
|
1053
|
+
schema_content << " t.#{type} \"#{column.name}\"#{attrs_str}"
|
|
1054
|
+
end
|
|
1055
|
+
|
|
1056
|
+
schema_content << " end"
|
|
1057
|
+
schema_content << ""
|
|
1058
|
+
|
|
1059
|
+
# Get indexes
|
|
1060
|
+
connection.indexes(table_name).each do |index|
|
|
1061
|
+
columns = index.columns.is_a?(Array) ? index.columns : [index.columns]
|
|
1062
|
+
options = []
|
|
1063
|
+
options << "name: \"#{index.name}\"" if index.name
|
|
1064
|
+
options << "unique: true" if index.unique
|
|
1065
|
+
|
|
1066
|
+
columns_str = columns.size == 1 ? "\"#{columns.first}\"" : "[#{columns.map { |c| "\"#{c}\"" }.join(', ')}]"
|
|
1067
|
+
options_str = options.any? ? ", #{options.join(', ')}" : ""
|
|
1068
|
+
|
|
1069
|
+
schema_content << " add_index \"#{table_name}\", #{columns_str}#{options_str}"
|
|
1070
|
+
end
|
|
1071
|
+
|
|
1072
|
+
schema_content << "" if connection.indexes(table_name).any?
|
|
1073
|
+
end
|
|
1074
|
+
|
|
1075
|
+
schema_content << "end"
|
|
1076
|
+
schema_content << ""
|
|
1077
|
+
|
|
1078
|
+
File.write(schema_path, schema_content.join("\n"))
|
|
1079
|
+
puts " ✓ Schema dumped for #{db_name} → schema.rb"
|
|
1080
|
+
rescue => e
|
|
1081
|
+
puts " ✗ Error dumping schema for #{db_name}: #{e.message}"
|
|
1082
|
+
end
|
|
1083
|
+
end
|
|
1084
|
+
|
|
450
1085
|
def create_load_order_file(load_order, models)
|
|
451
1086
|
order_path = Rails.root.join(@grainery_path, 'load_order.txt')
|
|
452
1087
|
|
|
@@ -469,7 +1104,7 @@ module Grainery
|
|
|
469
1104
|
puts "\n ✓ Load order written to #{@grainery_path}/load_order.txt"
|
|
470
1105
|
end
|
|
471
1106
|
|
|
472
|
-
def load_seeds
|
|
1107
|
+
def load_seeds(load_schema: false)
|
|
473
1108
|
order_file = Rails.root.join(@grainery_path, 'load_order.txt')
|
|
474
1109
|
|
|
475
1110
|
unless File.exist?(order_file)
|
|
@@ -480,8 +1115,35 @@ module Grainery
|
|
|
480
1115
|
puts "\n" + "="*80
|
|
481
1116
|
puts "Loading Harvested Seeds"
|
|
482
1117
|
puts "="*80
|
|
1118
|
+
puts "Load schema: #{load_schema ? 'YES' : 'NO'}"
|
|
1119
|
+
puts "="*80
|
|
1120
|
+
|
|
1121
|
+
# Load schemas first if requested
|
|
1122
|
+
if load_schema
|
|
1123
|
+
puts "\n" + "-"*80
|
|
1124
|
+
puts "Loading Database Schemas"
|
|
1125
|
+
puts "-"*80
|
|
1126
|
+
|
|
1127
|
+
@database_configs.each do |db_name, _|
|
|
1128
|
+
schema_file = Rails.root.join(@grainery_path, db_name.to_s, 'schema.rb')
|
|
1129
|
+
if File.exist?(schema_file)
|
|
1130
|
+
puts " → Loading schema for #{db_name}..."
|
|
1131
|
+
begin
|
|
1132
|
+
load schema_file
|
|
1133
|
+
rescue => e
|
|
1134
|
+
puts " ✗ Error loading schema for #{db_name}: #{e.message}"
|
|
1135
|
+
end
|
|
1136
|
+
else
|
|
1137
|
+
puts " ⚠ No schema file found for #{db_name}"
|
|
1138
|
+
end
|
|
1139
|
+
end
|
|
1140
|
+
end
|
|
483
1141
|
|
|
484
1142
|
# Load harvested seeds in dependency order
|
|
1143
|
+
puts "\n" + "-"*80
|
|
1144
|
+
puts "Loading Seed Data"
|
|
1145
|
+
puts "-"*80
|
|
1146
|
+
|
|
485
1147
|
File.readlines(order_file).each do |line|
|
|
486
1148
|
line = line.strip
|
|
487
1149
|
next if line.empty? || line.start_with?('#')
|