activerecord-graph-extractor 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/activerecord_graph_extractor/configuration.rb +2 -1
- data/lib/activerecord_graph_extractor/dry_run_analyzer.rb +81 -8
- data/lib/activerecord_graph_extractor/relationship_analyzer.rb +86 -3
- data/lib/activerecord_graph_extractor/version.rb +2 -2
- metadata +2 -3
- data/activerecord-graph-extractor.gemspec +0 -64
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 34558c2d8251545d4a78aa2db20225c01c4118fdd35246d5c27110913b7c70fc
|
4
|
+
data.tar.gz: 12dc7966fc513148aadf1ac79767078d7f05c2b66c56747df4c217385a8f3638
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0ebfe420b66bbdb6eae26124fff14e22d259df0f381bec4faaef8b3d1113701fe540803b77021792a28d82ce0226358002515f3bc54329ef94d6028709335f17
|
7
|
+
data.tar.gz: fd48bb0cc84f0e6917ea10d8be396002dde9e514c36c5b7b830f69cb6e497a4e2b672ee8acd134c22dcf2ae90c164326456340e2e1a8b18f56fd13736aee3452
|
@@ -6,7 +6,7 @@ module ActiveRecordGraphExtractor
|
|
6
6
|
:validate_records, :use_transactions, :handle_circular_references,
|
7
7
|
:skip_missing_models, :included_models, :excluded_models,
|
8
8
|
:included_relationships, :excluded_relationships,
|
9
|
-
:custom_serializers, :primary_key_strategy
|
9
|
+
:custom_serializers, :primary_key_strategy, :skip_non_primary_database_models
|
10
10
|
|
11
11
|
def initialize
|
12
12
|
reset!
|
@@ -21,6 +21,7 @@ module ActiveRecordGraphExtractor
|
|
21
21
|
@use_transactions = true
|
22
22
|
@handle_circular_references = true
|
23
23
|
@skip_missing_models = true
|
24
|
+
@skip_non_primary_database_models = true
|
24
25
|
@included_models = []
|
25
26
|
@excluded_models = []
|
26
27
|
@included_relationships = []
|
@@ -2,11 +2,14 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordGraphExtractor
|
4
4
|
class DryRunAnalyzer
|
5
|
-
attr_reader :config, :relationship_analyzer
|
5
|
+
attr_reader :config, :relationship_analyzer, :model_count_cache, :relationship_count_cache, :sample_record_cache
|
6
6
|
|
7
7
|
def initialize(config = ActiveRecordGraphExtractor.configuration)
|
8
8
|
@config = config
|
9
9
|
@relationship_analyzer = RelationshipAnalyzer.new(config)
|
10
|
+
@model_count_cache = {} # Cache for model.count queries
|
11
|
+
@relationship_count_cache = {} # Cache for relationship count estimates
|
12
|
+
@sample_record_cache = {} # Cache for sample records
|
10
13
|
end
|
11
14
|
|
12
15
|
def analyze(root_objects, options = {})
|
@@ -134,10 +137,61 @@ module ActiveRecordGraphExtractor
|
|
134
137
|
end
|
135
138
|
|
136
139
|
def estimate_relationship_count(model_class, relationship_name, relationship_info)
|
137
|
-
#
|
138
|
-
|
139
|
-
|
140
|
+
# Create cache key for this specific relationship
|
141
|
+
cache_key = "#{model_class.name}##{relationship_name}"
|
142
|
+
|
143
|
+
# Return cached result if available
|
144
|
+
return @relationship_count_cache[cache_key] if @relationship_count_cache.key?(cache_key)
|
145
|
+
|
146
|
+
# Get cached sample record or fetch and cache it
|
147
|
+
sample_record = get_cached_sample_record(model_class)
|
148
|
+
|
149
|
+
result = if sample_record.nil?
|
150
|
+
0
|
151
|
+
else
|
152
|
+
calculate_relationship_estimate(model_class, relationship_name, relationship_info, sample_record)
|
153
|
+
end
|
154
|
+
|
155
|
+
# Cache the result
|
156
|
+
@relationship_count_cache[cache_key] = result
|
157
|
+
result
|
158
|
+
end
|
159
|
+
|
160
|
+
def get_cached_sample_record(model_class)
|
161
|
+
model_name = model_class.name
|
162
|
+
|
163
|
+
# Return cached sample record if available
|
164
|
+
return @sample_record_cache[model_name] if @sample_record_cache.key?(model_name)
|
165
|
+
|
166
|
+
# Fetch and cache sample record (or nil if none exists)
|
167
|
+
sample_record = begin
|
168
|
+
model_class.first
|
169
|
+
rescue StandardError
|
170
|
+
nil
|
171
|
+
end
|
172
|
+
|
173
|
+
@sample_record_cache[model_name] = sample_record
|
174
|
+
sample_record
|
175
|
+
end
|
140
176
|
|
177
|
+
def get_cached_model_count(model_class)
|
178
|
+
model_name = model_class.name
|
179
|
+
|
180
|
+
# Return cached count if available
|
181
|
+
return @model_count_cache[model_name] if @model_count_cache.key?(model_name)
|
182
|
+
|
183
|
+
# Fetch and cache model count
|
184
|
+
count = begin
|
185
|
+
model_class.count
|
186
|
+
rescue StandardError
|
187
|
+
0
|
188
|
+
end
|
189
|
+
|
190
|
+
@model_count_cache[model_name] = count
|
191
|
+
count
|
192
|
+
end
|
193
|
+
|
194
|
+
def calculate_relationship_estimate(model_class, relationship_name, relationship_info, sample_record)
|
141
195
|
begin
|
142
196
|
case relationship_info['type']
|
143
197
|
when 'has_many', 'has_and_belongs_to_many'
|
@@ -145,8 +199,8 @@ module ActiveRecordGraphExtractor
|
|
145
199
|
related_records = sample_record.public_send(relationship_name)
|
146
200
|
if related_records.respond_to?(:count)
|
147
201
|
sample_count = related_records.limit(100).count
|
148
|
-
#
|
149
|
-
total_records = model_class
|
202
|
+
# Use cached total record count
|
203
|
+
total_records = get_cached_model_count(model_class)
|
150
204
|
return 0 if total_records == 0
|
151
205
|
|
152
206
|
# Use sample count as average, but cap at reasonable limits
|
@@ -155,17 +209,36 @@ module ActiveRecordGraphExtractor
|
|
155
209
|
end
|
156
210
|
when 'has_one', 'belongs_to'
|
157
211
|
# For singular relationships, estimate 1 per parent record
|
158
|
-
total_records = model_class
|
212
|
+
total_records = get_cached_model_count(model_class)
|
159
213
|
return (total_records * 0.9).to_i # 90% factor assuming some records might not have the relationship
|
160
214
|
end
|
161
215
|
rescue StandardError
|
162
216
|
# If we can't estimate, return a conservative estimate
|
163
|
-
|
217
|
+
total_records = get_cached_model_count(model_class)
|
218
|
+
return total_records > 0 ? [total_records / 10, 1].max : 0
|
164
219
|
end
|
165
220
|
|
166
221
|
0
|
167
222
|
end
|
168
223
|
|
224
|
+
# Clear all caches (useful for testing or memory management)
|
225
|
+
def clear_cache!
|
226
|
+
@model_count_cache.clear
|
227
|
+
@relationship_count_cache.clear
|
228
|
+
@sample_record_cache.clear
|
229
|
+
end
|
230
|
+
|
231
|
+
# Get cache statistics for debugging
|
232
|
+
def cache_stats
|
233
|
+
{
|
234
|
+
model_count_cache_size: @model_count_cache.size,
|
235
|
+
relationship_count_cache_size: @relationship_count_cache.size,
|
236
|
+
sample_record_cache_size: @sample_record_cache.size,
|
237
|
+
cached_models: @model_count_cache.keys,
|
238
|
+
cached_relationships: @relationship_count_cache.keys
|
239
|
+
}
|
240
|
+
end
|
241
|
+
|
169
242
|
def estimate_file_size(model_counts, relationship_map)
|
170
243
|
total_size = 0
|
171
244
|
|
@@ -8,6 +8,7 @@ module ActiveRecordGraphExtractor
|
|
8
8
|
@config = config
|
9
9
|
@visited_models = Set.new
|
10
10
|
@circular_paths = []
|
11
|
+
@primary_connection = ActiveRecord::Base.connection
|
11
12
|
end
|
12
13
|
|
13
14
|
def analyze_model(model_class)
|
@@ -25,12 +26,21 @@ module ActiveRecordGraphExtractor
|
|
25
26
|
relationship_name = association.name.to_s
|
26
27
|
next unless config.relationship_included?(relationship_name)
|
27
28
|
|
28
|
-
|
29
|
+
# Check if the associated model uses the primary database
|
30
|
+
associated_model = association.klass
|
31
|
+
model_name = associated_model.name
|
32
|
+
|
33
|
+
# Skip if model is excluded
|
29
34
|
next unless config.model_included?(model_name)
|
35
|
+
|
36
|
+
# Skip if the associated model doesn't use the primary database (if enabled)
|
37
|
+
if config.skip_non_primary_database_models
|
38
|
+
next unless uses_primary_database?(associated_model)
|
39
|
+
end
|
30
40
|
|
31
41
|
relationships[relationship_name] = {
|
32
42
|
'type' => association.macro.to_s,
|
33
|
-
'model_class' =>
|
43
|
+
'model_class' => associated_model,
|
34
44
|
'model_name' => model_name,
|
35
45
|
'foreign_key' => association.foreign_key,
|
36
46
|
'polymorphic' => association.options[:polymorphic] || false,
|
@@ -42,6 +52,13 @@ module ActiveRecordGraphExtractor
|
|
42
52
|
else
|
43
53
|
raise e
|
44
54
|
end
|
55
|
+
rescue StandardError => e
|
56
|
+
# Skip associations that cause database connection errors
|
57
|
+
if e.message.include?('Unknown database') || e.message.include?('database connection')
|
58
|
+
next
|
59
|
+
else
|
60
|
+
raise e unless config.skip_missing_models
|
61
|
+
end
|
45
62
|
end
|
46
63
|
end
|
47
64
|
|
@@ -101,7 +118,7 @@ module ActiveRecordGraphExtractor
|
|
101
118
|
end
|
102
119
|
end
|
103
120
|
|
104
|
-
dependency_graph[model_class] = dependencies
|
121
|
+
dependency_graph[model_class] = dependencies.uniq
|
105
122
|
end
|
106
123
|
|
107
124
|
dependency_graph
|
@@ -113,6 +130,61 @@ module ActiveRecordGraphExtractor
|
|
113
130
|
|
114
131
|
private
|
115
132
|
|
133
|
+
def uses_primary_database?(model_class)
|
134
|
+
begin
|
135
|
+
# Check if the model inherits from a non-primary database class
|
136
|
+
return false if inherits_from_non_primary_database?(model_class)
|
137
|
+
|
138
|
+
# Get the model's connection
|
139
|
+
model_connection = model_class.connection
|
140
|
+
|
141
|
+
# Compare connection configurations
|
142
|
+
primary_config = @primary_connection.instance_variable_get(:@config) || {}
|
143
|
+
model_config = model_connection.instance_variable_get(:@config) || {}
|
144
|
+
|
145
|
+
# Check if they're the same connection object
|
146
|
+
return true if model_connection == @primary_connection
|
147
|
+
|
148
|
+
# Check if they have the same database configuration
|
149
|
+
if primary_config.any? && model_config.any?
|
150
|
+
# Compare key connection parameters
|
151
|
+
same_database = primary_config[:database] == model_config[:database]
|
152
|
+
same_host = (primary_config[:host] || 'localhost') == (model_config[:host] || 'localhost')
|
153
|
+
same_port = (primary_config[:port] || 5432) == (model_config[:port] || 5432)
|
154
|
+
|
155
|
+
return same_database && same_host && same_port
|
156
|
+
end
|
157
|
+
|
158
|
+
# If we can't determine, check if the connection pool is the same
|
159
|
+
primary_pool = ActiveRecord::Base.connection_pool
|
160
|
+
model_pool = model_class.connection_pool
|
161
|
+
|
162
|
+
return primary_pool == model_pool
|
163
|
+
|
164
|
+
rescue StandardError => e
|
165
|
+
# If any error occurs (including database connection errors), assume it's not the primary database
|
166
|
+
return false
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def inherits_from_non_primary_database?(model_class)
|
171
|
+
# Check if the model inherits from known non-primary database classes
|
172
|
+
non_primary_classes = [
|
173
|
+
'Ops::ReplicaDatabase',
|
174
|
+
'Analytics::Base',
|
175
|
+
'Reporting::Base',
|
176
|
+
'DataWarehouse::Base',
|
177
|
+
'External::Base',
|
178
|
+
'ThirdParty::Base'
|
179
|
+
]
|
180
|
+
|
181
|
+
ancestors = model_class.ancestors.map(&:name).compact
|
182
|
+
non_primary_classes.any? { |non_primary_class| ancestors.include?(non_primary_class) }
|
183
|
+
rescue StandardError
|
184
|
+
# If we can't determine the ancestry, err on the side of caution
|
185
|
+
false
|
186
|
+
end
|
187
|
+
|
116
188
|
def filter_relationships(relationships)
|
117
189
|
relationships.select do |name, info|
|
118
190
|
config.model_included?(info['model_name']) &&
|
@@ -128,6 +200,11 @@ module ActiveRecordGraphExtractor
|
|
128
200
|
|
129
201
|
return false unless config.model_included?(association.klass.name)
|
130
202
|
|
203
|
+
# Check if the associated model uses the primary database (if enabled)
|
204
|
+
if config.skip_non_primary_database_models
|
205
|
+
return false unless uses_primary_database?(association.klass)
|
206
|
+
end
|
207
|
+
|
131
208
|
# Skip polymorphic associations that can't be resolved
|
132
209
|
return false if association.polymorphic? && association.foreign_type.nil?
|
133
210
|
|
@@ -135,6 +212,12 @@ module ActiveRecordGraphExtractor
|
|
135
212
|
rescue NameError
|
136
213
|
# Skip associations that reference non-existent models
|
137
214
|
false
|
215
|
+
rescue StandardError => e
|
216
|
+
# Skip associations that cause database connection errors
|
217
|
+
if e.message.include?('Unknown database') || e.message.include?('database connection')
|
218
|
+
return false
|
219
|
+
end
|
220
|
+
raise e unless config.skip_missing_models
|
138
221
|
end
|
139
222
|
|
140
223
|
def build_relationship_info(association)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: activerecord-graph-extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Lorrain
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-06-
|
11
|
+
date: 2025-06-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -335,7 +335,6 @@ files:
|
|
335
335
|
- LICENSE
|
336
336
|
- README.md
|
337
337
|
- Rakefile
|
338
|
-
- activerecord-graph-extractor.gemspec
|
339
338
|
- docs/dry_run.md
|
340
339
|
- docs/examples.md
|
341
340
|
- docs/s3_integration.md
|
@@ -1,64 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative "lib/activerecord_graph_extractor/version"
|
4
|
-
|
5
|
-
Gem::Specification.new do |spec|
|
6
|
-
spec.name = "activerecord-graph-extractor"
|
7
|
-
spec.version = ActiveRecordGraphExtractor::VERSION
|
8
|
-
spec.authors = ["Florian Lorrain"]
|
9
|
-
spec.email = ["lorrain.florian@gmail.com"]
|
10
|
-
|
11
|
-
spec.summary = "Extract and import complex ActiveRecord object graphs while preserving relationships"
|
12
|
-
spec.description = "A Ruby gem for extracting and importing complex ActiveRecord object graphs with smart dependency resolution, beautiful CLI progress visualization, and memory-efficient streaming. Perfect for data migration, testing, and environment synchronization."
|
13
|
-
spec.homepage = "https://github.com/florrain/activerecord-graph-extractor"
|
14
|
-
spec.license = "MIT"
|
15
|
-
spec.required_ruby_version = ">= 2.7.0"
|
16
|
-
|
17
|
-
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
18
|
-
spec.metadata["homepage_uri"] = spec.homepage
|
19
|
-
spec.metadata["source_code_uri"] = "https://github.com/florrain/activerecord-graph-extractor"
|
20
|
-
spec.metadata["changelog_uri"] = "https://github.com/florrain/activerecord-graph-extractor/blob/main/CHANGELOG.md"
|
21
|
-
|
22
|
-
# Specify which files should be added to the gem when it is released.
|
23
|
-
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
24
|
-
`git ls-files -z`.split("\x0").reject do |f|
|
25
|
-
(f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
spec.bindir = "exe"
|
30
|
-
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
31
|
-
spec.require_paths = ["lib"]
|
32
|
-
|
33
|
-
# Core dependencies
|
34
|
-
spec.add_dependency "activerecord", ">= 6.0"
|
35
|
-
spec.add_dependency "activesupport", ">= 6.0"
|
36
|
-
|
37
|
-
# CLI dependencies
|
38
|
-
spec.add_dependency "thor", "~> 1.2"
|
39
|
-
spec.add_dependency "tty-progressbar", "~> 0.18"
|
40
|
-
spec.add_dependency "tty-spinner", "~> 0.9"
|
41
|
-
spec.add_dependency "tty-tree", "~> 0.4"
|
42
|
-
spec.add_dependency "pastel", "~> 0.8"
|
43
|
-
spec.add_dependency "tty-prompt", "~> 0.23"
|
44
|
-
|
45
|
-
# JSON streaming
|
46
|
-
spec.add_dependency "oj", "~> 3.13"
|
47
|
-
spec.add_dependency "yajl-ruby", ">= 1.3"
|
48
|
-
|
49
|
-
# S3 support
|
50
|
-
spec.add_dependency "aws-sdk-s3", "~> 1.0"
|
51
|
-
|
52
|
-
# Development dependencies
|
53
|
-
spec.add_development_dependency "bundler", "~> 2.0"
|
54
|
-
spec.add_development_dependency "rake", "~> 13.0"
|
55
|
-
spec.add_development_dependency "rspec", "~> 3.12"
|
56
|
-
spec.add_development_dependency "rubocop", "~> 1.57"
|
57
|
-
spec.add_development_dependency "sqlite3", "~> 1.6"
|
58
|
-
spec.add_development_dependency "database_cleaner", "~> 2.0"
|
59
|
-
spec.add_development_dependency "factory_bot", "~> 6.2"
|
60
|
-
spec.add_development_dependency "simplecov", "~> 0.22"
|
61
|
-
spec.add_development_dependency "rubocop-rspec", "~> 2.25"
|
62
|
-
spec.add_development_dependency "pry", "~> 0.14"
|
63
|
-
spec.add_development_dependency "pry-byebug", "~> 3.10"
|
64
|
-
end
|