activerecord-graph-extractor 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordGraphExtractor
4
+ class Configuration
5
+ attr_accessor :max_depth, :batch_size, :progress_enabled, :stream_json,
6
+ :validate_records, :use_transactions, :handle_circular_references,
7
+ :skip_missing_models, :included_models, :excluded_models,
8
+ :included_relationships, :excluded_relationships,
9
+ :custom_serializers, :primary_key_strategy
10
+
11
+ def initialize
12
+ reset!
13
+ end
14
+
15
+ def reset!
16
+ @max_depth = 5
17
+ @batch_size = 1000
18
+ @progress_enabled = true
19
+ @stream_json = false
20
+ @validate_records = true
21
+ @use_transactions = true
22
+ @handle_circular_references = true
23
+ @skip_missing_models = true
24
+ @included_models = []
25
+ @excluded_models = []
26
+ @included_relationships = []
27
+ @excluded_relationships = []
28
+ @custom_serializers = {}
29
+ @primary_key_strategy = :generate_new
30
+ end
31
+
32
+ def max_depth=(value)
33
+ raise ArgumentError, 'max_depth must be positive' if value <= 0
34
+ @max_depth = value
35
+ end
36
+
37
+ def batch_size=(value)
38
+ raise ArgumentError, 'batch_size must be positive' if value <= 0
39
+ @batch_size = value
40
+ end
41
+
42
+ def primary_key_strategy=(strategy)
43
+ unless [:preserve_original, :generate_new].include?(strategy)
44
+ raise ArgumentError, 'primary_key_strategy must be :preserve_original or :generate_new'
45
+ end
46
+ @primary_key_strategy = strategy
47
+ end
48
+
49
+ def include_model(model)
50
+ model_name = model.is_a?(Class) ? model.name : model.to_s
51
+ @included_models << model_name unless @included_models.include?(model_name)
52
+ end
53
+
54
+ def exclude_model(model)
55
+ model_name = model.is_a?(Class) ? model.name : model.to_s
56
+ @excluded_models << model_name unless @excluded_models.include?(model_name)
57
+ end
58
+
59
+ def include_relationship(relationship)
60
+ @included_relationships << relationship.to_s unless @included_relationships.include?(relationship.to_s)
61
+ end
62
+
63
+ def exclude_relationship(relationship)
64
+ @excluded_relationships << relationship.to_s unless @excluded_relationships.include?(relationship.to_s)
65
+ end
66
+
67
+ def add_custom_serializer(model, serializer = nil, &block)
68
+ model_name = model.is_a?(Class) ? model.name : model.to_s
69
+ @custom_serializers[model_name] = serializer || block
70
+ end
71
+
72
+ def model_included?(model_name)
73
+ return false if @excluded_models.include?(model_name.to_s)
74
+ return true if @included_models.empty?
75
+ @included_models.include?(model_name.to_s)
76
+ end
77
+
78
+ def relationship_included?(relationship_name)
79
+ return false if @excluded_relationships.include?(relationship_name.to_s)
80
+ return true if @included_relationships.empty?
81
+ @included_relationships.include?(relationship_name.to_s)
82
+ end
83
+
84
+ class << self
85
+ def configure
86
+ yield(configuration)
87
+ end
88
+
89
+ def configuration
90
+ @configuration ||= new
91
+ end
92
+
93
+ def reset!
94
+ @configuration = new
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,406 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordGraphExtractor
4
+ class DependencyResolver
5
+ attr_reader :dependency_graph, :resolved_order
6
+
7
+ def initialize(dependency_graph)
8
+ @dependency_graph = dependency_graph
9
+ @resolved_order = []
10
+ end
11
+
12
+ def resolve_creation_order
13
+ # Create a copy to avoid modifying the original
14
+ graph = dependency_graph.dup
15
+ visited = Set.new
16
+ temp_visited = Set.new
17
+
18
+ graph.keys.each do |model|
19
+ next if visited.include?(model)
20
+
21
+ visit_model(model, graph, visited, temp_visited)
22
+ end
23
+
24
+ # Reverse to get creation order (dependencies first)
25
+ @resolved_order.reverse
26
+ end
27
+
28
+ def resolve_deletion_order
29
+ # For deletion, we want the reverse of creation order
30
+ resolve_creation_order.reverse
31
+ end
32
+
33
+ def validate_dependencies(records_data)
34
+ missing_dependencies = {}
35
+
36
+ records_data.each do |model_name, records|
37
+ next unless dependency_graph[model_name]
38
+
39
+ dependency_graph[model_name].each do |dependency|
40
+ unless records_data.key?(dependency)
41
+ missing_dependencies[model_name] ||= []
42
+ missing_dependencies[model_name] << dependency
43
+ end
44
+ end
45
+ end
46
+
47
+ return missing_dependencies if missing_dependencies.any?
48
+
49
+ # Validate foreign key references
50
+ validate_foreign_key_references(records_data)
51
+ end
52
+
53
+ def group_by_dependency_level
54
+ creation_order = resolve_creation_order
55
+ levels = {}
56
+ current_level = 0
57
+
58
+ creation_order.each do |model_name|
59
+ dependencies = dependency_graph[model_name] || []
60
+
61
+ if dependencies.empty?
62
+ # No dependencies - can be created first
63
+ levels[current_level] ||= []
64
+ levels[current_level] << model_name
65
+ else
66
+ # Find the maximum level of dependencies
67
+ max_dependency_level = dependencies.map do |dep|
68
+ find_model_level(dep, levels)
69
+ end.max || 0
70
+
71
+ model_level = max_dependency_level + 1
72
+ levels[model_level] ||= []
73
+ levels[model_level] << model_name
74
+ end
75
+ end
76
+
77
+ levels
78
+ end
79
+
80
+ def resolve(dependency_graph)
81
+ # Handle different input formats based on test expectations
82
+ if dependency_graph.values.first.is_a?(Hash)
83
+ # New format: { 'TestOrder' => { 'test_user' => { 'model_class' => 'TestUser' } } }
84
+ return resolve_complex_graph(dependency_graph)
85
+ end
86
+
87
+ # Original format: { TestOrder => [TestUser] }
88
+ return [] if dependency_graph.empty?
89
+
90
+ # Check for circular dependencies
91
+ if detect_circular_dependencies(dependency_graph)
92
+ raise CircularDependencyError, "Circular dependency detected in model relationships"
93
+ end
94
+
95
+ # Perform topological sort
96
+ topological_sort(dependency_graph)
97
+ end
98
+
99
+ def detect_circular_dependencies(dependency_graph)
100
+ # Handle different formats
101
+ if dependency_graph.values.first.is_a?(Hash)
102
+ return detect_complex_circular_dependencies(dependency_graph)
103
+ end
104
+
105
+ # Original boolean detection
106
+ visited = Set.new
107
+ rec_stack = Set.new
108
+
109
+ dependency_graph.each_key do |node|
110
+ next if visited.include?(node)
111
+ return true if has_cycle?(node, dependency_graph, visited, rec_stack)
112
+ end
113
+
114
+ false
115
+ end
116
+
117
+ def build_creation_order(records_by_model, dependency_graph)
118
+ grouped_records = group_records_by_dependencies(records_by_model)
119
+ ordered_models = resolve(dependency_graph)
120
+
121
+ # Create ordered list of [model_name, records] pairs
122
+ ordered_records = []
123
+
124
+ ordered_models.each do |model_class|
125
+ model_name = model_class.name
126
+ if grouped_records.key?(model_name)
127
+ ordered_records << [model_name, grouped_records[model_name]]
128
+ end
129
+ end
130
+
131
+ # Add any remaining models not in dependency graph
132
+ grouped_records.each do |model_name, records|
133
+ unless ordered_records.any? { |entry| entry[0] == model_name }
134
+ ordered_records << [model_name, records]
135
+ end
136
+ end
137
+
138
+ ordered_records
139
+ end
140
+
141
+ private
142
+
143
+ def visit_model(model, graph, visited, temp_visited)
144
+ return if visited.include?(model)
145
+
146
+ if temp_visited.include?(model)
147
+ raise DependencyError.new(
148
+ "Circular dependency detected involving #{model}",
149
+ model: model
150
+ )
151
+ end
152
+
153
+ temp_visited << model
154
+
155
+ dependencies = graph[model] || []
156
+ dependencies.each do |dependency|
157
+ visit_model(dependency, graph, visited, temp_visited)
158
+ end
159
+
160
+ temp_visited.delete(model)
161
+ visited << model
162
+ @resolved_order << model
163
+ end
164
+
165
+ def validate_foreign_key_references(records_data)
166
+ missing_references = {}
167
+
168
+ records_data.each do |model_name, records|
169
+ records.each do |record|
170
+ record_relationships = record[:relationships] || {}
171
+
172
+ record_relationships.each do |field, reference|
173
+ referenced_table = reference[:table]
174
+ referenced_id = reference[:original_id]
175
+
176
+ # Check if the referenced record exists in the data
177
+ referenced_records = records_data[referenced_table]
178
+ if referenced_records.nil?
179
+ missing_references[model_name] ||= []
180
+ missing_references[model_name] << {
181
+ record_id: record[:original_id],
182
+ field: field,
183
+ references: reference
184
+ }
185
+ next
186
+ end
187
+
188
+ # Check if specific record exists
189
+ referenced_record = referenced_records.find do |r|
190
+ r[:original_id] == referenced_id
191
+ end
192
+
193
+ unless referenced_record
194
+ missing_references[model_name] ||= []
195
+ missing_references[model_name] << {
196
+ record_id: record[:original_id],
197
+ field: field,
198
+ references: reference
199
+ }
200
+ end
201
+ end
202
+ end
203
+ end
204
+
205
+ missing_references
206
+ end
207
+
208
+ def find_model_level(model_name, levels)
209
+ levels.each do |level, models|
210
+ return level if models.include?(model_name)
211
+ end
212
+
213
+ -1 # Not found, should be level 0
214
+ end
215
+
216
+ def topological_sort(dependency_graph)
217
+ # Create a copy to avoid modifying original
218
+ graph = dependency_graph.dup
219
+ in_degree = {}
220
+
221
+ # Initialize in-degree count for all nodes
222
+ graph.each_key do |node|
223
+ in_degree[node] = 0
224
+ end
225
+
226
+ # Calculate in-degree: how many things depend on each node
227
+ graph.each do |node, dependencies|
228
+ # This node depends on 'dependencies', so this node has in-degree = dependencies.count
229
+ in_degree[node] = dependencies.count { |dep| graph.key?(dep) }
230
+ end
231
+
232
+ # Start with nodes that have no dependencies (in-degree 0)
233
+ queue = in_degree.select { |_, degree| degree == 0 }.keys
234
+ result = []
235
+
236
+ while queue.any?
237
+ # Sort to ensure consistent ordering
238
+ current = queue.sort_by(&:name).first
239
+ queue.delete(current)
240
+ result << current
241
+
242
+ # For each node that depends on the current node, decrease its in-degree
243
+ graph.each do |node, dependencies|
244
+ if dependencies.include?(current)
245
+ in_degree[node] -= 1
246
+ queue << node if in_degree[node] == 0 && !result.include?(node) && !queue.include?(node)
247
+ end
248
+ end
249
+ end
250
+
251
+ result
252
+ end
253
+
254
+ def has_cycle?(node, graph, visited, rec_stack)
255
+ visited.add(node)
256
+ rec_stack.add(node)
257
+
258
+ graph[node]&.each do |neighbor|
259
+ if !visited.include?(neighbor)
260
+ return true if has_cycle?(neighbor, graph, visited, rec_stack)
261
+ elsif rec_stack.include?(neighbor)
262
+ return true
263
+ end
264
+ end
265
+
266
+ rec_stack.delete(node)
267
+ false
268
+ end
269
+
270
+ def group_records_by_dependencies(records)
271
+ if records.is_a?(Array)
272
+ # Convert array of records to hash grouped by model
273
+ grouped = {}
274
+ records.each do |record|
275
+ raise InvalidRecordError, "Record missing _model key: #{record.inspect}" unless record.key?('_model')
276
+
277
+ model_name = record['_model']
278
+ grouped[model_name] ||= []
279
+ grouped[model_name] << record
280
+ end
281
+ grouped
282
+ else
283
+ # Assume it's already grouped by model
284
+ records
285
+ end
286
+ end
287
+
288
+ def resolve_complex_graph(dependency_graph)
289
+ # Build simple dependency graph from complex format
290
+ simple_graph = {}
291
+ missing_models = []
292
+ all_referenced_models = Set.new
293
+
294
+ # Collect all models that are referenced as dependencies
295
+ dependency_graph.each do |model_name, relationships|
296
+ simple_graph[model_name] = []
297
+
298
+ relationships.each do |_relationship_name, relationship_info|
299
+ dep_model = relationship_info['model_class']
300
+ simple_graph[model_name] << dep_model
301
+ all_referenced_models.add(dep_model)
302
+ end
303
+ end
304
+
305
+ # Add missing models with no dependencies
306
+ all_referenced_models.each do |model_name|
307
+ unless dependency_graph.key?(model_name)
308
+ missing_models << model_name
309
+ simple_graph[model_name] = [] # Missing models have no dependencies
310
+ end
311
+ end
312
+
313
+ # Detect circular dependencies
314
+ circular_deps = detect_complex_circular_dependencies(dependency_graph)
315
+
316
+ # Create levels for creation order
317
+ levels = group_models_by_dependency_level(simple_graph)
318
+
319
+ {
320
+ 'creation_order' => levels,
321
+ 'circular_dependencies' => circular_deps,
322
+ 'missing_models' => missing_models.uniq
323
+ }
324
+ end
325
+
326
+ def detect_complex_circular_dependencies(dependency_graph)
327
+ circular_deps = []
328
+ visited = Set.new
329
+
330
+ dependency_graph.each_key do |model_name|
331
+ next if visited.include?(model_name)
332
+
333
+ path = []
334
+ circular_path = find_circular_path(model_name, dependency_graph, visited, Set.new, path)
335
+ if circular_path
336
+ # Remove the duplicate end node that creates the cycle
337
+ clean_cycle = circular_path[0..-2]
338
+ circular_deps << clean_cycle unless circular_deps.any? { |cycle| cycle.sort == clean_cycle.sort }
339
+ end
340
+ end
341
+
342
+ circular_deps
343
+ end
344
+
345
+ def find_circular_path(model_name, dependency_graph, global_visited, local_visited, path)
346
+ return nil if global_visited.include?(model_name)
347
+
348
+ if local_visited.include?(model_name)
349
+ # Found a cycle, extract the circular portion
350
+ cycle_start = path.index(model_name)
351
+ return nil unless cycle_start
352
+
353
+ circular_path = path[cycle_start..-1] + [model_name]
354
+ return circular_path
355
+ end
356
+
357
+ local_visited.add(model_name)
358
+ path << model_name
359
+
360
+ relationships = dependency_graph[model_name] || {}
361
+ relationships.each do |_rel_name, rel_info|
362
+ dep_model = rel_info['model_class']
363
+ next unless dependency_graph.key?(dep_model)
364
+
365
+ result = find_circular_path(dep_model, dependency_graph, global_visited, local_visited, path)
366
+ if result
367
+ local_visited.delete(model_name)
368
+ path.pop
369
+ return result
370
+ end
371
+ end
372
+
373
+ local_visited.delete(model_name)
374
+ path.pop
375
+ global_visited.add(model_name)
376
+ nil
377
+ end
378
+
379
+ def group_models_by_dependency_level(simple_graph)
380
+ levels = []
381
+ processed = Set.new
382
+
383
+ # Continue until all models are processed
384
+ while processed.size < simple_graph.size
385
+ current_level = []
386
+
387
+ simple_graph.each do |model_name, dependencies|
388
+ next if processed.include?(model_name)
389
+
390
+ # Check if all dependencies are already processed
391
+ if dependencies.all? { |dep| processed.include?(dep) }
392
+ current_level << model_name
393
+ end
394
+ end
395
+
396
+ # If no models can be processed, we have a circular dependency
397
+ break if current_level.empty?
398
+
399
+ levels << current_level
400
+ current_level.each { |model| processed.add(model) }
401
+ end
402
+
403
+ levels
404
+ end
405
+ end
406
+ end