activerecord-graph-extractor 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f64bcc21e756d5c43f0718c2e582f1f39a71f2a612f1a1c15103690fd6c82989
4
- data.tar.gz: 8b0a59753bf91dab1f7fe04859edeb67c9bd920cbb0f515ac5c422a806a8eeaa
3
+ metadata.gz: 34558c2d8251545d4a78aa2db20225c01c4118fdd35246d5c27110913b7c70fc
4
+ data.tar.gz: 12dc7966fc513148aadf1ac79767078d7f05c2b66c56747df4c217385a8f3638
5
5
  SHA512:
6
- metadata.gz: bf60e230bd435b0fa9b7463181dde2591a85a73711b9928cc7a35649999afaf28f3b0e93c40a34b4ede516be364e4a6557876e8e58760825c38b9aa3f8ba490d
7
- data.tar.gz: 37e91e7e9006a1a133819e98155adfac51ce2d8685f79591103183045390a9cf5796b68ae4fc8264566d4ec949733183f2f7b5db49c1081fd9593b600c9ed7c0
6
+ metadata.gz: 0ebfe420b66bbdb6eae26124fff14e22d259df0f381bec4faaef8b3d1113701fe540803b77021792a28d82ce0226358002515f3bc54329ef94d6028709335f17
7
+ data.tar.gz: fd48bb0cc84f0e6917ea10d8be396002dde9e514c36c5b7b830f69cb6e497a4e2b672ee8acd134c22dcf2ae90c164326456340e2e1a8b18f56fd13736aee3452
@@ -6,7 +6,7 @@ module ActiveRecordGraphExtractor
6
6
  :validate_records, :use_transactions, :handle_circular_references,
7
7
  :skip_missing_models, :included_models, :excluded_models,
8
8
  :included_relationships, :excluded_relationships,
9
- :custom_serializers, :primary_key_strategy
9
+ :custom_serializers, :primary_key_strategy, :skip_non_primary_database_models
10
10
 
11
11
  def initialize
12
12
  reset!
@@ -21,6 +21,7 @@ module ActiveRecordGraphExtractor
21
21
  @use_transactions = true
22
22
  @handle_circular_references = true
23
23
  @skip_missing_models = true
24
+ @skip_non_primary_database_models = true
24
25
  @included_models = []
25
26
  @excluded_models = []
26
27
  @included_relationships = []
@@ -2,11 +2,14 @@
2
2
 
3
3
  module ActiveRecordGraphExtractor
4
4
  class DryRunAnalyzer
5
- attr_reader :config, :relationship_analyzer
5
+ attr_reader :config, :relationship_analyzer, :model_count_cache, :relationship_count_cache, :sample_record_cache
6
6
 
7
7
  def initialize(config = ActiveRecordGraphExtractor.configuration)
8
8
  @config = config
9
9
  @relationship_analyzer = RelationshipAnalyzer.new(config)
10
+ @model_count_cache = {} # Cache for model.count queries
11
+ @relationship_count_cache = {} # Cache for relationship count estimates
12
+ @sample_record_cache = {} # Cache for sample records
10
13
  end
11
14
 
12
15
  def analyze(root_objects, options = {})
@@ -134,10 +137,61 @@ module ActiveRecordGraphExtractor
134
137
  end
135
138
 
136
139
  def estimate_relationship_count(model_class, relationship_name, relationship_info)
137
- # Try to get a sample record to estimate relationship sizes
138
- sample_record = model_class.first
139
- return 0 unless sample_record
140
+ # Create cache key for this specific relationship
141
+ cache_key = "#{model_class.name}##{relationship_name}"
142
+
143
+ # Return cached result if available
144
+ return @relationship_count_cache[cache_key] if @relationship_count_cache.key?(cache_key)
145
+
146
+ # Get cached sample record or fetch and cache it
147
+ sample_record = get_cached_sample_record(model_class)
148
+
149
+ result = if sample_record.nil?
150
+ 0
151
+ else
152
+ calculate_relationship_estimate(model_class, relationship_name, relationship_info, sample_record)
153
+ end
154
+
155
+ # Cache the result
156
+ @relationship_count_cache[cache_key] = result
157
+ result
158
+ end
159
+
160
+ def get_cached_sample_record(model_class)
161
+ model_name = model_class.name
162
+
163
+ # Return cached sample record if available
164
+ return @sample_record_cache[model_name] if @sample_record_cache.key?(model_name)
165
+
166
+ # Fetch and cache sample record (or nil if none exists)
167
+ sample_record = begin
168
+ model_class.first
169
+ rescue StandardError
170
+ nil
171
+ end
172
+
173
+ @sample_record_cache[model_name] = sample_record
174
+ sample_record
175
+ end
140
176
 
177
+ def get_cached_model_count(model_class)
178
+ model_name = model_class.name
179
+
180
+ # Return cached count if available
181
+ return @model_count_cache[model_name] if @model_count_cache.key?(model_name)
182
+
183
+ # Fetch and cache model count
184
+ count = begin
185
+ model_class.count
186
+ rescue StandardError
187
+ 0
188
+ end
189
+
190
+ @model_count_cache[model_name] = count
191
+ count
192
+ end
193
+
194
+ def calculate_relationship_estimate(model_class, relationship_name, relationship_info, sample_record)
141
195
  begin
142
196
  case relationship_info['type']
143
197
  when 'has_many', 'has_and_belongs_to_many'
@@ -145,8 +199,8 @@ module ActiveRecordGraphExtractor
145
199
  related_records = sample_record.public_send(relationship_name)
146
200
  if related_records.respond_to?(:count)
147
201
  sample_count = related_records.limit(100).count
148
- # Estimate total based on sample (with some reasonable assumptions)
149
- total_records = model_class.count
202
+ # Use cached total record count
203
+ total_records = get_cached_model_count(model_class)
150
204
  return 0 if total_records == 0
151
205
 
152
206
  # Use sample count as average, but cap at reasonable limits
@@ -155,17 +209,36 @@ module ActiveRecordGraphExtractor
155
209
  end
156
210
  when 'has_one', 'belongs_to'
157
211
  # For singular relationships, estimate 1 per parent record
158
- total_records = model_class.count
212
+ total_records = get_cached_model_count(model_class)
159
213
  return (total_records * 0.9).to_i # 90% factor assuming some records might not have the relationship
160
214
  end
161
215
  rescue StandardError
162
216
  # If we can't estimate, return a conservative estimate
163
- return model_class.count > 0 ? [model_class.count / 10, 1].max : 0
217
+ total_records = get_cached_model_count(model_class)
218
+ return total_records > 0 ? [total_records / 10, 1].max : 0
164
219
  end
165
220
 
166
221
  0
167
222
  end
168
223
 
224
+ # Clear all caches (useful for testing or memory management)
225
+ def clear_cache!
226
+ @model_count_cache.clear
227
+ @relationship_count_cache.clear
228
+ @sample_record_cache.clear
229
+ end
230
+
231
+ # Get cache statistics for debugging
232
+ def cache_stats
233
+ {
234
+ model_count_cache_size: @model_count_cache.size,
235
+ relationship_count_cache_size: @relationship_count_cache.size,
236
+ sample_record_cache_size: @sample_record_cache.size,
237
+ cached_models: @model_count_cache.keys,
238
+ cached_relationships: @relationship_count_cache.keys
239
+ }
240
+ end
241
+
169
242
  def estimate_file_size(model_counts, relationship_map)
170
243
  total_size = 0
171
244
 
@@ -8,6 +8,7 @@ module ActiveRecordGraphExtractor
8
8
  @config = config
9
9
  @visited_models = Set.new
10
10
  @circular_paths = []
11
+ @primary_connection = ActiveRecord::Base.connection
11
12
  end
12
13
 
13
14
  def analyze_model(model_class)
@@ -25,12 +26,21 @@ module ActiveRecordGraphExtractor
25
26
  relationship_name = association.name.to_s
26
27
  next unless config.relationship_included?(relationship_name)
27
28
 
28
- model_name = association.klass.name
29
+ # Check if the associated model uses the primary database
30
+ associated_model = association.klass
31
+ model_name = associated_model.name
32
+
33
+ # Skip if model is excluded
29
34
  next unless config.model_included?(model_name)
35
+
36
+ # Skip if the associated model doesn't use the primary database (if enabled)
37
+ if config.skip_non_primary_database_models
38
+ next unless uses_primary_database?(associated_model)
39
+ end
30
40
 
31
41
  relationships[relationship_name] = {
32
42
  'type' => association.macro.to_s,
33
- 'model_class' => association.klass,
43
+ 'model_class' => associated_model,
34
44
  'model_name' => model_name,
35
45
  'foreign_key' => association.foreign_key,
36
46
  'polymorphic' => association.options[:polymorphic] || false,
@@ -42,6 +52,13 @@ module ActiveRecordGraphExtractor
42
52
  else
43
53
  raise e
44
54
  end
55
+ rescue StandardError => e
56
+ # Skip associations that cause database connection errors
57
+ if e.message.include?('Unknown database') || e.message.include?('database connection')
58
+ next
59
+ else
60
+ raise e unless config.skip_missing_models
61
+ end
45
62
  end
46
63
  end
47
64
 
@@ -101,7 +118,7 @@ module ActiveRecordGraphExtractor
101
118
  end
102
119
  end
103
120
 
104
- dependency_graph[model_class] = dependencies
121
+ dependency_graph[model_class] = dependencies.uniq
105
122
  end
106
123
 
107
124
  dependency_graph
@@ -113,6 +130,61 @@ module ActiveRecordGraphExtractor
113
130
 
114
131
  private
115
132
 
133
+ def uses_primary_database?(model_class)
134
+ begin
135
+ # Check if the model inherits from a non-primary database class
136
+ return false if inherits_from_non_primary_database?(model_class)
137
+
138
+ # Get the model's connection
139
+ model_connection = model_class.connection
140
+
141
+ # Compare connection configurations
142
+ primary_config = @primary_connection.instance_variable_get(:@config) || {}
143
+ model_config = model_connection.instance_variable_get(:@config) || {}
144
+
145
+ # Check if they're the same connection object
146
+ return true if model_connection == @primary_connection
147
+
148
+ # Check if they have the same database configuration
149
+ if primary_config.any? && model_config.any?
150
+ # Compare key connection parameters
151
+ same_database = primary_config[:database] == model_config[:database]
152
+ same_host = (primary_config[:host] || 'localhost') == (model_config[:host] || 'localhost')
153
+ same_port = (primary_config[:port] || 5432) == (model_config[:port] || 5432)
154
+
155
+ return same_database && same_host && same_port
156
+ end
157
+
158
+ # If we can't determine, check if the connection pool is the same
159
+ primary_pool = ActiveRecord::Base.connection_pool
160
+ model_pool = model_class.connection_pool
161
+
162
+ return primary_pool == model_pool
163
+
164
+ rescue StandardError => e
165
+ # If any error occurs (including database connection errors), assume it's not the primary database
166
+ return false
167
+ end
168
+ end
169
+
170
+ def inherits_from_non_primary_database?(model_class)
171
+ # Check if the model inherits from known non-primary database classes
172
+ non_primary_classes = [
173
+ 'Ops::ReplicaDatabase',
174
+ 'Analytics::Base',
175
+ 'Reporting::Base',
176
+ 'DataWarehouse::Base',
177
+ 'External::Base',
178
+ 'ThirdParty::Base'
179
+ ]
180
+
181
+ ancestors = model_class.ancestors.map(&:name).compact
182
+ non_primary_classes.any? { |non_primary_class| ancestors.include?(non_primary_class) }
183
+ rescue StandardError
184
+ # If we can't determine the ancestry, err on the side of caution
185
+ false
186
+ end
187
+
116
188
  def filter_relationships(relationships)
117
189
  relationships.select do |name, info|
118
190
  config.model_included?(info['model_name']) &&
@@ -128,6 +200,11 @@ module ActiveRecordGraphExtractor
128
200
 
129
201
  return false unless config.model_included?(association.klass.name)
130
202
 
203
+ # Check if the associated model uses the primary database (if enabled)
204
+ if config.skip_non_primary_database_models
205
+ return false unless uses_primary_database?(association.klass)
206
+ end
207
+
131
208
  # Skip polymorphic associations that can't be resolved
132
209
  return false if association.polymorphic? && association.foreign_type.nil?
133
210
 
@@ -135,6 +212,12 @@ module ActiveRecordGraphExtractor
135
212
  rescue NameError
136
213
  # Skip associations that reference non-existent models
137
214
  false
215
+ rescue StandardError => e
216
+ # Skip associations that cause database connection errors
217
+ if e.message.include?('Unknown database') || e.message.include?('database connection')
218
+ return false
219
+ end
220
+ raise e unless config.skip_missing_models
138
221
  end
139
222
 
140
223
  def build_relationship_info(association)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ActiveRecordGraphExtractor
4
- VERSION = "0.1.0"
5
- end
4
+ VERSION = "0.2.0"
5
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: activerecord-graph-extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Lorrain
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-06-14 00:00:00.000000000 Z
11
+ date: 2025-06-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -335,7 +335,6 @@ files:
335
335
  - LICENSE
336
336
  - README.md
337
337
  - Rakefile
338
- - activerecord-graph-extractor.gemspec
339
338
  - docs/dry_run.md
340
339
  - docs/examples.md
341
340
  - docs/s3_integration.md
@@ -1,64 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "lib/activerecord_graph_extractor/version"
4
-
5
- Gem::Specification.new do |spec|
6
- spec.name = "activerecord-graph-extractor"
7
- spec.version = ActiveRecordGraphExtractor::VERSION
8
- spec.authors = ["Florian Lorrain"]
9
- spec.email = ["lorrain.florian@gmail.com"]
10
-
11
- spec.summary = "Extract and import complex ActiveRecord object graphs while preserving relationships"
12
- spec.description = "A Ruby gem for extracting and importing complex ActiveRecord object graphs with smart dependency resolution, beautiful CLI progress visualization, and memory-efficient streaming. Perfect for data migration, testing, and environment synchronization."
13
- spec.homepage = "https://github.com/florrain/activerecord-graph-extractor"
14
- spec.license = "MIT"
15
- spec.required_ruby_version = ">= 2.7.0"
16
-
17
- spec.metadata["allowed_push_host"] = "https://rubygems.org"
18
- spec.metadata["homepage_uri"] = spec.homepage
19
- spec.metadata["source_code_uri"] = "https://github.com/florrain/activerecord-graph-extractor"
20
- spec.metadata["changelog_uri"] = "https://github.com/florrain/activerecord-graph-extractor/blob/main/CHANGELOG.md"
21
-
22
- # Specify which files should be added to the gem when it is released.
23
- spec.files = Dir.chdir(File.expand_path(__dir__)) do
24
- `git ls-files -z`.split("\x0").reject do |f|
25
- (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
26
- end
27
- end
28
-
29
- spec.bindir = "exe"
30
- spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
- spec.require_paths = ["lib"]
32
-
33
- # Core dependencies
34
- spec.add_dependency "activerecord", ">= 6.0"
35
- spec.add_dependency "activesupport", ">= 6.0"
36
-
37
- # CLI dependencies
38
- spec.add_dependency "thor", "~> 1.2"
39
- spec.add_dependency "tty-progressbar", "~> 0.18"
40
- spec.add_dependency "tty-spinner", "~> 0.9"
41
- spec.add_dependency "tty-tree", "~> 0.4"
42
- spec.add_dependency "pastel", "~> 0.8"
43
- spec.add_dependency "tty-prompt", "~> 0.23"
44
-
45
- # JSON streaming
46
- spec.add_dependency "oj", "~> 3.13"
47
- spec.add_dependency "yajl-ruby", ">= 1.3"
48
-
49
- # S3 support
50
- spec.add_dependency "aws-sdk-s3", "~> 1.0"
51
-
52
- # Development dependencies
53
- spec.add_development_dependency "bundler", "~> 2.0"
54
- spec.add_development_dependency "rake", "~> 13.0"
55
- spec.add_development_dependency "rspec", "~> 3.12"
56
- spec.add_development_dependency "rubocop", "~> 1.57"
57
- spec.add_development_dependency "sqlite3", "~> 1.6"
58
- spec.add_development_dependency "database_cleaner", "~> 2.0"
59
- spec.add_development_dependency "factory_bot", "~> 6.2"
60
- spec.add_development_dependency "simplecov", "~> 0.22"
61
- spec.add_development_dependency "rubocop-rspec", "~> 2.25"
62
- spec.add_development_dependency "pry", "~> 0.14"
63
- spec.add_development_dependency "pry-byebug", "~> 3.10"
64
- end