schema_sherlock 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -1
- data/Gemfile.lock +4 -1
- data/README.md +2 -0
- data/lib/schema_sherlock/analyzers/foreign_key_detector.rb +4 -5
- data/lib/schema_sherlock/analyzers/index_recommendation_detector.rb +44 -0
- data/lib/schema_sherlock/binary_index.rb +113 -0
- data/lib/schema_sherlock/commands/analyze_command.rb +59 -2
- data/lib/schema_sherlock/file_cache.rb +189 -0
- data/lib/schema_sherlock/indexed_usage_tracker.rb +19 -0
- data/lib/schema_sherlock/optimized_scanner.rb +167 -0
- data/lib/schema_sherlock/performance_optimizer.rb +111 -0
- data/lib/schema_sherlock/schema_cache.rb +118 -0
- data/lib/schema_sherlock/usage_tracker.rb +37 -23
- data/lib/schema_sherlock/version.rb +1 -1
- data/schema_sherlock.gemspec +2 -0
- metadata +36 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5c1ac4a467622a053245b33bb795bd7a08b4cc4408cf99c9a74088f29e46c948
|
|
4
|
+
data.tar.gz: af8c4a4e1700ed94300bc6e95f72d70218477ca93de0049699ac94d08ad2807a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fe035c9e89e93ed74870d7f6adbc8d5f1596b8f0f6d5fbe6178a0628aa5f124b56e353501e73c792a4e31b0879777bb0ac0d1703207e52d7cf0932649ab1cb61
|
|
7
|
+
data.tar.gz: f80d18cc2a292c8c85c401dd359980e362dce3666c9bd516e75c0f9acdada5005f0d6845a6734cb7d729da4f490a9b3db6140692a6f1b798aae2badcdd5a85e3
|
data/CHANGELOG.md
CHANGED
|
@@ -16,4 +16,13 @@
|
|
|
16
16
|
- Filters suggestions based on actual usage frequency
|
|
17
17
|
- Provides detailed analysis reports
|
|
18
18
|
- Supports complex foreign key types (integer, bigint, UUID, string)
|
|
19
|
-
- Smart table and model inference
|
|
19
|
+
- Smart table and model inference
|
|
20
|
+
|
|
21
|
+
## [0.1.1] - 2025-01-24
|
|
22
|
+
|
|
23
|
+
### Features
|
|
24
|
+
- Adds recommendations for missing indices for foreign keys
|
|
25
|
+
|
|
26
|
+
### Bugs and improvements
|
|
27
|
+
- Make analysis 80% fast
|
|
28
|
+
- Safe load models to avoid break constraint errors
|
data/Gemfile.lock
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
schema_sherlock (0.1.
|
|
4
|
+
schema_sherlock (0.1.1)
|
|
5
5
|
activerecord (>= 6.0)
|
|
6
|
+
concurrent-ruby (~> 1.0)
|
|
7
|
+
msgpack (~> 1.0)
|
|
6
8
|
rails (>= 6.0)
|
|
7
9
|
thor (~> 1.0)
|
|
8
10
|
|
|
@@ -113,6 +115,7 @@ GEM
|
|
|
113
115
|
marcel (1.0.4)
|
|
114
116
|
mini_mime (1.1.5)
|
|
115
117
|
minitest (5.25.5)
|
|
118
|
+
msgpack (1.8.0)
|
|
116
119
|
net-imap (0.5.8)
|
|
117
120
|
date
|
|
118
121
|
net-protocol
|
data/README.md
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# SchemaSherlock
|
|
2
2
|
|
|
3
|
+
[](https://badge.fury.io/rb/schema_sherlock)
|
|
4
|
+
|
|
3
5
|
Intelligent Rails model analysis and annotation tool that extends beyond traditional schema annotation to provide intelligent analysis and actionable suggestions for Rails model code quality, performance, and maintainability.
|
|
4
6
|
|
|
5
7
|
## Installation
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
require_relative "base_analyzer"
|
|
2
2
|
require_relative "../usage_tracker"
|
|
3
|
+
require_relative "../schema_cache"
|
|
3
4
|
|
|
4
5
|
module SchemaSherlock
|
|
5
6
|
module Analyzers
|
|
@@ -77,7 +78,7 @@ module SchemaSherlock
|
|
|
77
78
|
end
|
|
78
79
|
|
|
79
80
|
def table_exists?(table_name)
|
|
80
|
-
|
|
81
|
+
SchemaCache.table_exists?(table_name)
|
|
81
82
|
end
|
|
82
83
|
|
|
83
84
|
def valid_foreign_key?(column)
|
|
@@ -155,13 +156,11 @@ module SchemaSherlock
|
|
|
155
156
|
# Fallback method when model class is not available
|
|
156
157
|
# Check if the table has an 'id' column with compatible type
|
|
157
158
|
begin
|
|
158
|
-
|
|
159
|
-
primary_key_name = connection.primary_key(table_name)
|
|
159
|
+
primary_key_name = SchemaCache.primary_key(table_name)
|
|
160
160
|
|
|
161
161
|
return false unless primary_key_name
|
|
162
162
|
|
|
163
|
-
|
|
164
|
-
primary_key_column = table_columns.find { |col| col.name == primary_key_name }
|
|
163
|
+
primary_key_column = SchemaCache.column(table_name, primary_key_name)
|
|
165
164
|
|
|
166
165
|
return false unless primary_key_column
|
|
167
166
|
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
require_relative "base_analyzer"
|
|
2
|
+
require_relative "../schema_cache"
|
|
3
|
+
|
|
4
|
+
module SchemaSherlock
|
|
5
|
+
module Analyzers
|
|
6
|
+
class IndexRecommendationDetector < BaseAnalyzer
|
|
7
|
+
def analyze
|
|
8
|
+
@results = {
|
|
9
|
+
missing_foreign_key_indexes: find_missing_foreign_key_indexes,
|
|
10
|
+
}
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def find_missing_foreign_key_indexes
|
|
16
|
+
foreign_key_columns.reject do |column|
|
|
17
|
+
has_index_on_column?(column.name)
|
|
18
|
+
end.map do |column|
|
|
19
|
+
{
|
|
20
|
+
column: column.name,
|
|
21
|
+
table: table_name,
|
|
22
|
+
migration: "add_index :#{table_name}, :#{column.name}",
|
|
23
|
+
reason: "Foreign key without index"
|
|
24
|
+
}
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def foreign_key_columns
|
|
29
|
+
@foreign_key_columns ||= columns.select { |col| col.name.end_with?('_id') && col.name != 'id' }
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def has_index_on_column?(column_name)
|
|
33
|
+
existing_indexes.any? do |index|
|
|
34
|
+
index_columns = Array(index.columns)
|
|
35
|
+
index_columns.include?(column_name) && index_columns.size == 1
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def existing_indexes
|
|
40
|
+
@existing_indexes ||= SchemaCache.indexes(table_name)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
require 'msgpack'
|
|
2
|
+
require 'digest'
|
|
3
|
+
|
|
4
|
+
module SchemaSherlock
|
|
5
|
+
class BinaryIndex
|
|
6
|
+
INDEX_VERSION = "1.0"
|
|
7
|
+
INDEX_FILE = "tmp/.schema_sherlock_index"
|
|
8
|
+
|
|
9
|
+
class << self
|
|
10
|
+
def load_or_build(root_path)
|
|
11
|
+
index_path = File.join(root_path, INDEX_FILE)
|
|
12
|
+
|
|
13
|
+
if File.exist?(index_path) && index_valid?(index_path, root_path)
|
|
14
|
+
load_index(index_path)
|
|
15
|
+
else
|
|
16
|
+
build_and_save_index(root_path, index_path)
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def load_index(path)
|
|
21
|
+
data = File.binread(path)
|
|
22
|
+
MessagePack.unpack(data, symbolize_keys: true)
|
|
23
|
+
rescue => e
|
|
24
|
+
Rails.logger.warn("Failed to load index: #{e.message}") if defined?(Rails)
|
|
25
|
+
nil
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def build_and_save_index(root_path, index_path)
|
|
29
|
+
index = build_index(root_path)
|
|
30
|
+
|
|
31
|
+
# Binary serialization
|
|
32
|
+
packed_data = MessagePack.pack(index)
|
|
33
|
+
File.binwrite(index_path, packed_data)
|
|
34
|
+
|
|
35
|
+
index
|
|
36
|
+
rescue => e
|
|
37
|
+
Rails.logger.warn("Failed to save index: #{e.message}") if defined?(Rails)
|
|
38
|
+
index
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def index_valid?(index_path, root_path)
|
|
44
|
+
return false unless File.exist?(index_path)
|
|
45
|
+
|
|
46
|
+
index = load_index(index_path)
|
|
47
|
+
return false unless index && index[:version] == INDEX_VERSION
|
|
48
|
+
|
|
49
|
+
# Check if any files have been modified since index was built
|
|
50
|
+
index_time = File.mtime(index_path)
|
|
51
|
+
|
|
52
|
+
# If any Ruby file is newer than index, rebuild
|
|
53
|
+
Dir.glob(File.join(root_path, "**/*.rb")).any? do |file|
|
|
54
|
+
File.mtime(file) > index_time
|
|
55
|
+
end == false
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def build_index(root_path)
|
|
59
|
+
index = {
|
|
60
|
+
version: INDEX_VERSION,
|
|
61
|
+
created_at: Time.now.to_i,
|
|
62
|
+
files: {},
|
|
63
|
+
column_references: {},
|
|
64
|
+
file_checksums: {}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# Build file index with checksums
|
|
68
|
+
Dir.glob(File.join(root_path, "**/*.rb")).each do |file|
|
|
69
|
+
next if should_skip_file?(file)
|
|
70
|
+
|
|
71
|
+
content = File.read(file, encoding: 'UTF-8', invalid: :replace, undef: :replace)
|
|
72
|
+
checksum = Digest::MD5.hexdigest(content)
|
|
73
|
+
|
|
74
|
+
index[:files][file] = {
|
|
75
|
+
size: File.size(file),
|
|
76
|
+
mtime: File.mtime(file).to_i,
|
|
77
|
+
checksum: checksum
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
# Pre-scan for common patterns and cache results
|
|
81
|
+
pre_scan_content(content, file, index)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
index
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def pre_scan_content(content, file, index)
|
|
88
|
+
# Pre-scan for column references
|
|
89
|
+
content.scan(/\.(\w+)_id\b/) do |match|
|
|
90
|
+
column = "#{match[0]}_id"
|
|
91
|
+
index[:column_references][column] ||= []
|
|
92
|
+
index[:column_references][column] << file
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Pre-scan for associations
|
|
96
|
+
content.scan(/\.(?:joins|includes)\s*\(\s*['":]?(\w+)/) do |match|
|
|
97
|
+
association = match[0]
|
|
98
|
+
column = "#{association}_id"
|
|
99
|
+
index[:column_references][column] ||= []
|
|
100
|
+
index[:column_references][column] << file
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def should_skip_file?(file)
|
|
105
|
+
file.include?('/spec/') ||
|
|
106
|
+
file.include?('/test/') ||
|
|
107
|
+
file.include?('/vendor/') ||
|
|
108
|
+
file.include?('/node_modules/')
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
end
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
require_relative "base_command"
|
|
2
2
|
require_relative "../analyzers/foreign_key_detector"
|
|
3
|
+
require_relative "../analyzers/index_recommendation_detector"
|
|
4
|
+
require_relative "../schema_cache"
|
|
5
|
+
require_relative "../file_cache"
|
|
6
|
+
require_relative "../binary_index"
|
|
3
7
|
|
|
4
8
|
module SchemaSherlock
|
|
5
9
|
module Commands
|
|
@@ -7,6 +11,7 @@ module SchemaSherlock
|
|
|
7
11
|
desc "analyze [MODEL]", "Analyze models for missing associations and optimization opportunities"
|
|
8
12
|
option :output, type: :string, desc: "Output file for analysis results"
|
|
9
13
|
option :min_usage, type: :numeric, desc: "Minimum usage threshold for suggestions (overrides config)"
|
|
14
|
+
option :use_index, type: :boolean, default: true, desc: "Use binary index for faster analysis (if available)"
|
|
10
15
|
|
|
11
16
|
def analyze(model_name = nil)
|
|
12
17
|
load_rails_environment
|
|
@@ -20,11 +25,41 @@ module SchemaSherlock
|
|
|
20
25
|
models = model_name ? [find_model(model_name)] : all_models
|
|
21
26
|
|
|
22
27
|
puts "Analyzing #{models.length} model(s)..."
|
|
28
|
+
|
|
29
|
+
# Try to load binary index for faster analysis
|
|
30
|
+
@binary_index = nil
|
|
31
|
+
if options[:use_index] && defined?(Rails) && Rails.root
|
|
32
|
+
puts "Loading binary index..."
|
|
33
|
+
@binary_index = BinaryIndex.load_or_build(Rails.root.to_s)
|
|
34
|
+
if @binary_index
|
|
35
|
+
puts " Index loaded with #{@binary_index[:files].size} files indexed"
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Preload metadata cache for performance
|
|
40
|
+
puts "Preloading database metadata..."
|
|
41
|
+
cache_stats = SchemaCache.preload_all_metadata
|
|
42
|
+
puts " Cached: #{cache_stats[:tables_cached]} tables, #{cache_stats[:columns_cached]} column sets, #{cache_stats[:indexes_cached]} index sets"
|
|
43
|
+
|
|
44
|
+
# Preload file cache for performance (only if usage tracking is enabled and no index available)
|
|
45
|
+
if SchemaSherlock.configuration.min_usage_threshold && SchemaSherlock.configuration.min_usage_threshold > 0
|
|
46
|
+
if @binary_index
|
|
47
|
+
puts "Using binary index for file analysis (skipping file cache preload)"
|
|
48
|
+
else
|
|
49
|
+
puts "Preloading file cache..."
|
|
50
|
+
file_stats = FileCache.preload_all_files
|
|
51
|
+
puts " Cached: #{file_stats[:files_scanned]} files (#{(file_stats[:total_size] / 1024.0 / 1024.0).round(2)} MB), #{file_stats[:files_failed]} failed"
|
|
52
|
+
end
|
|
53
|
+
end
|
|
23
54
|
|
|
24
55
|
results = {}
|
|
25
56
|
|
|
26
57
|
models.each do |model|
|
|
27
58
|
puts " Analyzing #{model.name}..."
|
|
59
|
+
|
|
60
|
+
# Set binary index for usage tracker
|
|
61
|
+
UsageTracker.binary_index = @binary_index
|
|
62
|
+
|
|
28
63
|
analysis = analyze_model(model)
|
|
29
64
|
|
|
30
65
|
# Only include models with issues in results
|
|
@@ -39,6 +74,10 @@ module SchemaSherlock
|
|
|
39
74
|
say e.message, :red
|
|
40
75
|
exit 1
|
|
41
76
|
ensure
|
|
77
|
+
# Clear caches to free memory
|
|
78
|
+
SchemaCache.clear_cache
|
|
79
|
+
FileCache.clear_cache
|
|
80
|
+
|
|
42
81
|
# Restore original threshold if it was overridden
|
|
43
82
|
if options[:min_usage] && defined?(original_threshold)
|
|
44
83
|
SchemaSherlock.configuration.min_usage_threshold = original_threshold
|
|
@@ -49,7 +88,8 @@ module SchemaSherlock
|
|
|
49
88
|
|
|
50
89
|
def analyze_model(model)
|
|
51
90
|
{
|
|
52
|
-
foreign_key_analysis: run_foreign_key_analysis(model)
|
|
91
|
+
foreign_key_analysis: run_foreign_key_analysis(model),
|
|
92
|
+
index_analysis: run_index_analysis(model)
|
|
53
93
|
}
|
|
54
94
|
end
|
|
55
95
|
|
|
@@ -59,12 +99,21 @@ module SchemaSherlock
|
|
|
59
99
|
analyzer.results
|
|
60
100
|
end
|
|
61
101
|
|
|
102
|
+
def run_index_analysis(model)
|
|
103
|
+
analyzer = SchemaSherlock::Analyzers::IndexRecommendationDetector.new(model)
|
|
104
|
+
analyzer.analyze
|
|
105
|
+
analyzer.results
|
|
106
|
+
end
|
|
107
|
+
|
|
62
108
|
def has_issues?(analysis)
|
|
63
109
|
foreign_key_analysis = analysis[:foreign_key_analysis]
|
|
64
110
|
missing = foreign_key_analysis[:missing_associations]
|
|
65
111
|
orphaned = foreign_key_analysis[:orphaned_foreign_keys]
|
|
66
112
|
|
|
67
|
-
|
|
113
|
+
index_analysis = analysis[:index_analysis]
|
|
114
|
+
missing_indexes = index_analysis[:missing_foreign_key_indexes]
|
|
115
|
+
|
|
116
|
+
missing.any? || orphaned.any? || missing_indexes.any?
|
|
68
117
|
end
|
|
69
118
|
|
|
70
119
|
def display_results(results, total_models)
|
|
@@ -91,6 +140,14 @@ module SchemaSherlock
|
|
|
91
140
|
puts " #{key[:column]} -> #{key[:issue]}"
|
|
92
141
|
end
|
|
93
142
|
end
|
|
143
|
+
|
|
144
|
+
missing_indexes = analysis[:index_analysis][:missing_foreign_key_indexes]
|
|
145
|
+
if missing_indexes.any?
|
|
146
|
+
puts " Missing Indexes:"
|
|
147
|
+
missing_indexes.each do |idx|
|
|
148
|
+
puts " #{idx[:migration]} # #{idx[:reason]}"
|
|
149
|
+
end
|
|
150
|
+
end
|
|
94
151
|
end
|
|
95
152
|
|
|
96
153
|
puts "\n" + "="*50
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
require 'concurrent'
|
|
2
|
+
|
|
3
|
+
module SchemaSherlock
|
|
4
|
+
class FileCache
|
|
5
|
+
class << self
|
|
6
|
+
def initialize_cache
|
|
7
|
+
@file_contents_cache = {}
|
|
8
|
+
@scan_stats = { files_scanned: 0, files_failed: 0, total_size: 0 }
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def clear_cache
|
|
12
|
+
@file_contents_cache&.clear
|
|
13
|
+
@scan_stats = { files_scanned: 0, files_failed: 0, total_size: 0 }
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def preload_all_files(max_threads: 4)
|
|
17
|
+
initialize_cache if @file_contents_cache.nil?
|
|
18
|
+
|
|
19
|
+
files_to_scan = gather_all_files
|
|
20
|
+
return { files_scanned: 0, files_failed: 0, total_size: 0 } if files_to_scan.empty?
|
|
21
|
+
|
|
22
|
+
if max_threads > 1
|
|
23
|
+
preload_files_parallel(files_to_scan, max_threads)
|
|
24
|
+
else
|
|
25
|
+
preload_files_sequential(files_to_scan)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
@scan_stats.dup
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def get_file_content(file_path)
|
|
32
|
+
initialize_cache if @file_contents_cache.nil?
|
|
33
|
+
|
|
34
|
+
# Return cached content if available
|
|
35
|
+
return @file_contents_cache[file_path] if @file_contents_cache.key?(file_path)
|
|
36
|
+
|
|
37
|
+
# If not cached, read and cache it
|
|
38
|
+
content = read_file_safely(file_path)
|
|
39
|
+
@file_contents_cache[file_path] = content if content
|
|
40
|
+
content
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def cached_files_count
|
|
44
|
+
@file_contents_cache&.size || 0
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def cache_stats
|
|
48
|
+
{
|
|
49
|
+
cached_files: @file_contents_cache&.size || 0,
|
|
50
|
+
scan_stats: @scan_stats || { files_scanned: 0, files_failed: 0, total_size: 0 }
|
|
51
|
+
}
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def scan_for_pattern_in_all_files(pattern, table_name: nil, column_name: nil)
|
|
55
|
+
initialize_cache if @file_contents_cache.nil?
|
|
56
|
+
|
|
57
|
+
total_matches = 0
|
|
58
|
+
|
|
59
|
+
@file_contents_cache.each do |file_path, content|
|
|
60
|
+
next unless content
|
|
61
|
+
|
|
62
|
+
if block_given?
|
|
63
|
+
# Allow custom matching logic
|
|
64
|
+
matches = yield(content, file_path, table_name, column_name)
|
|
65
|
+
else
|
|
66
|
+
# Default regex matching
|
|
67
|
+
matches = content.scan(pattern).length
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
total_matches += matches
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
total_matches
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
private
|
|
77
|
+
|
|
78
|
+
def preload_files_parallel(files_to_scan, max_threads)
|
|
79
|
+
# Use concurrent processing for file reading
|
|
80
|
+
thread_pool = Concurrent::FixedThreadPool.new(max_threads)
|
|
81
|
+
futures = []
|
|
82
|
+
|
|
83
|
+
files_to_scan.each do |file_path|
|
|
84
|
+
future = Concurrent::Future.execute(executor: thread_pool) do
|
|
85
|
+
read_file_safely(file_path)
|
|
86
|
+
end
|
|
87
|
+
futures << [file_path, future]
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Collect results
|
|
91
|
+
futures.each do |file_path, future|
|
|
92
|
+
begin
|
|
93
|
+
content = future.value(10) # 10 second timeout per file
|
|
94
|
+
if content
|
|
95
|
+
@file_contents_cache[file_path] = content
|
|
96
|
+
@scan_stats[:files_scanned] += 1
|
|
97
|
+
@scan_stats[:total_size] += content.bytesize
|
|
98
|
+
else
|
|
99
|
+
@scan_stats[:files_failed] += 1
|
|
100
|
+
end
|
|
101
|
+
rescue Concurrent::TimeoutError, StandardError
|
|
102
|
+
@scan_stats[:files_failed] += 1
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
thread_pool.shutdown
|
|
107
|
+
thread_pool.wait_for_termination(30)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def preload_files_sequential(files_to_scan)
|
|
111
|
+
# Fallback to sequential processing
|
|
112
|
+
files_to_scan.each do |file_path|
|
|
113
|
+
content = read_file_safely(file_path)
|
|
114
|
+
if content
|
|
115
|
+
@file_contents_cache[file_path] = content
|
|
116
|
+
@scan_stats[:files_scanned] += 1
|
|
117
|
+
@scan_stats[:total_size] += content.bytesize
|
|
118
|
+
else
|
|
119
|
+
@scan_stats[:files_failed] += 1
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def gather_all_files
|
|
125
|
+
return [] unless defined?(Rails) && Rails.root
|
|
126
|
+
|
|
127
|
+
directories = scan_directories
|
|
128
|
+
all_files = []
|
|
129
|
+
|
|
130
|
+
directories.each do |dir|
|
|
131
|
+
next unless Dir.exist?(dir)
|
|
132
|
+
|
|
133
|
+
pattern = File.join(dir, '**', '*.rb')
|
|
134
|
+
files = Dir.glob(pattern)
|
|
135
|
+
all_files.concat(files)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Remove duplicates and filter out files we might not want to scan
|
|
139
|
+
all_files.uniq.reject { |file| should_skip_file?(file) }
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def scan_directories
|
|
143
|
+
return [] unless defined?(Rails) && Rails.root
|
|
144
|
+
|
|
145
|
+
[
|
|
146
|
+
Rails.root.join('app', 'controllers'),
|
|
147
|
+
Rails.root.join('app', 'models'),
|
|
148
|
+
Rails.root.join('app', 'services'),
|
|
149
|
+
Rails.root.join('app', 'jobs'),
|
|
150
|
+
Rails.root.join('app', 'workers'),
|
|
151
|
+
Rails.root.join('app', 'queries'),
|
|
152
|
+
Rails.root.join('lib')
|
|
153
|
+
].map(&:to_s)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def should_skip_file?(file_path)
|
|
157
|
+
# Skip files that are unlikely to contain foreign key references
|
|
158
|
+
filename = File.basename(file_path)
|
|
159
|
+
|
|
160
|
+
# Skip test files, migrations, and other non-relevant files
|
|
161
|
+
return true if filename.match?(/(_spec|_test|\.spec|\.test)\.rb$/)
|
|
162
|
+
return true if file_path.include?('/spec/')
|
|
163
|
+
return true if file_path.include?('/test/')
|
|
164
|
+
return true if file_path.include?('/migrate/')
|
|
165
|
+
return true if file_path.include?('/db/migrate/')
|
|
166
|
+
|
|
167
|
+
# Skip very large files (likely generated or data files)
|
|
168
|
+
return true if File.size(file_path) > 1_048_576 # 1MB
|
|
169
|
+
|
|
170
|
+
false
|
|
171
|
+
rescue StandardError
|
|
172
|
+
true # Skip files we can't access
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def read_file_safely(file_path)
|
|
176
|
+
return nil unless File.readable?(file_path)
|
|
177
|
+
|
|
178
|
+
content = File.read(file_path, encoding: 'UTF-8', invalid: :replace, undef: :replace)
|
|
179
|
+
|
|
180
|
+
# Validation - skip binary files or files with null bytes
|
|
181
|
+
return nil if content.include?("\x00")
|
|
182
|
+
|
|
183
|
+
content
|
|
184
|
+
rescue StandardError
|
|
185
|
+
nil
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
require_relative 'performance_optimizer'
|
|
2
|
+
|
|
3
|
+
module SchemaSherlock
|
|
4
|
+
# Lookup using the binary index
|
|
5
|
+
class IndexedUsageTracker
|
|
6
|
+
class << self
|
|
7
|
+
def count_column_references_with_index(index, table_name, column_name)
|
|
8
|
+
return 0 unless index && index[:column_references]
|
|
9
|
+
|
|
10
|
+
# Get files that potentially reference this column
|
|
11
|
+
relevant_files = index[:column_references][column_name] || []
|
|
12
|
+
return 0 if relevant_files.empty?
|
|
13
|
+
|
|
14
|
+
# Use performance optimizer for parallel processing
|
|
15
|
+
PerformanceOptimizer.process_files_parallel(relevant_files, table_name, column_name)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
require 'strscan'
|
|
2
|
+
|
|
3
|
+
module SchemaSherlock
|
|
4
|
+
class OptimizedScanner
|
|
5
|
+
# Pre-compiled patterns stored as constants to avoid recompilation
|
|
6
|
+
BOUNDARY_CHARS = /[\s\(\)\[\]\{\},;:'"]/
|
|
7
|
+
|
|
8
|
+
class << self
|
|
9
|
+
# Use StringScanner for efficient single-pass scanning
|
|
10
|
+
def count_column_references(content, table_name, column_name)
|
|
11
|
+
# Convert to downcase once for case-insensitive matching
|
|
12
|
+
content_lower = content.downcase
|
|
13
|
+
column_lower = column_name.downcase
|
|
14
|
+
association_name = column_name.gsub(/_id$/, '').downcase
|
|
15
|
+
|
|
16
|
+
count = 0
|
|
17
|
+
scanner = StringScanner.new(content_lower)
|
|
18
|
+
|
|
19
|
+
# Single pass through the content
|
|
20
|
+
while scanner.scan_until(/\./)
|
|
21
|
+
|
|
22
|
+
# Check for .where patterns
|
|
23
|
+
if scanner.match?(/where\s*\(/)
|
|
24
|
+
scanner.skip(/where\s*\(\s*/)
|
|
25
|
+
if match_column_reference(scanner, column_lower)
|
|
26
|
+
count += 1
|
|
27
|
+
next
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Check for .find_by patterns
|
|
32
|
+
if scanner.match?(/find_by\s*\(/)
|
|
33
|
+
scanner.skip(/find_by\s*\(\s*/)
|
|
34
|
+
if match_column_reference(scanner, column_lower)
|
|
35
|
+
count += 1
|
|
36
|
+
next
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Check for .joins and .includes with association
|
|
41
|
+
if scanner.match?(/joins\s*\(/)
|
|
42
|
+
scanner.skip(/joins\s*\(\s*/)
|
|
43
|
+
if match_association_reference(scanner, association_name)
|
|
44
|
+
count += 1
|
|
45
|
+
next
|
|
46
|
+
end
|
|
47
|
+
elsif scanner.match?(/includes\s*\(/)
|
|
48
|
+
scanner.skip(/includes\s*\(\s*/)
|
|
49
|
+
if match_association_reference(scanner, association_name)
|
|
50
|
+
count += 1
|
|
51
|
+
next
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Check for direct column access
|
|
56
|
+
if scanner.match?(/#{Regexp.escape(column_lower)}\b/)
|
|
57
|
+
scanner.skip(/#{Regexp.escape(column_lower)}\b/)
|
|
58
|
+
count += 1
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
count
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Native string operations version - even faster for simple patterns
|
|
66
|
+
def count_column_references_native(content, table_name, column_name)
|
|
67
|
+
content_lower = content.downcase
|
|
68
|
+
column_lower = column_name.downcase
|
|
69
|
+
association_name = column_name.gsub(/_id$/, '').downcase
|
|
70
|
+
|
|
71
|
+
count = 0
|
|
72
|
+
|
|
73
|
+
# Use native string operations for simple patterns
|
|
74
|
+
# Count .where( patterns
|
|
75
|
+
count += count_pattern_native(content_lower, ".where(", column_lower)
|
|
76
|
+
count += count_pattern_native(content_lower, ".where (", column_lower)
|
|
77
|
+
count += count_pattern_native(content_lower, ".find_by(", column_lower)
|
|
78
|
+
count += count_pattern_native(content_lower, ".find_by (", column_lower)
|
|
79
|
+
|
|
80
|
+
# Count joins/includes
|
|
81
|
+
count += count_pattern_native(content_lower, ".joins(", association_name)
|
|
82
|
+
count += count_pattern_native(content_lower, ".joins (", association_name)
|
|
83
|
+
count += count_pattern_native(content_lower, ".includes(", association_name)
|
|
84
|
+
count += count_pattern_native(content_lower, ".includes (", association_name)
|
|
85
|
+
|
|
86
|
+
# Count direct access - use boundary checking
|
|
87
|
+
count += count_direct_access(content_lower, ".#{column_lower}")
|
|
88
|
+
|
|
89
|
+
count
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
private
|
|
93
|
+
|
|
94
|
+
def match_column_reference(scanner, column_name)
|
|
95
|
+
# Skip quotes if present
|
|
96
|
+
scanner.skip(/['":]*/)
|
|
97
|
+
|
|
98
|
+
# Check if column name matches
|
|
99
|
+
if scanner.match?(/#{Regexp.escape(column_name)}/)
|
|
100
|
+
scanner.skip(/#{Regexp.escape(column_name)}/)
|
|
101
|
+
# Verify it's followed by appropriate characters
|
|
102
|
+
scanner.match?(/['":]*\s*[=:]/)
|
|
103
|
+
else
|
|
104
|
+
false
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def match_association_reference(scanner, association_name)
|
|
109
|
+
# Skip quotes if present
|
|
110
|
+
scanner.skip(/['":]*/)
|
|
111
|
+
|
|
112
|
+
# Check if association name matches
|
|
113
|
+
if scanner.match?(/#{Regexp.escape(association_name)}/)
|
|
114
|
+
scanner.skip(/#{Regexp.escape(association_name)}/)
|
|
115
|
+
# Verify it's followed by appropriate characters
|
|
116
|
+
scanner.match?(/['":]*\s*[\),]/)
|
|
117
|
+
else
|
|
118
|
+
false
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def count_pattern_native(content, prefix, target)
|
|
123
|
+
count = 0
|
|
124
|
+
index = 0
|
|
125
|
+
|
|
126
|
+
while (pos = content.index(prefix, index))
|
|
127
|
+
# Move past the prefix
|
|
128
|
+
check_pos = pos + prefix.length
|
|
129
|
+
|
|
130
|
+
# Skip whitespace and quotes
|
|
131
|
+
while check_pos < content.length && " \t'\":".include?(content[check_pos])
|
|
132
|
+
check_pos += 1
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Check if target matches at this position
|
|
136
|
+
if content[check_pos, target.length] == target
|
|
137
|
+
# Verify word boundary
|
|
138
|
+
next_char_pos = check_pos + target.length
|
|
139
|
+
if next_char_pos >= content.length || !('a'..'z').include?(content[next_char_pos])
|
|
140
|
+
count += 1
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
index = pos + 1
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
count
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def count_direct_access(content, pattern)
|
|
151
|
+
count = 0
|
|
152
|
+
index = 0
|
|
153
|
+
|
|
154
|
+
while (pos = content.index(pattern, index))
|
|
155
|
+
# Check word boundary after pattern
|
|
156
|
+
next_pos = pos + pattern.length
|
|
157
|
+
if next_pos >= content.length || !('a'..'z').include?(content[next_pos])
|
|
158
|
+
count += 1
|
|
159
|
+
end
|
|
160
|
+
index = pos + 1
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
count
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
module SchemaSherlock
|
|
2
|
+
# Centralized performance optimization for file and pattern processing
|
|
3
|
+
class PerformanceOptimizer
|
|
4
|
+
# File size thresholds for processing strategies
|
|
5
|
+
SMALL_FILE_THRESHOLD = 64 * 1024 # 64KB
|
|
6
|
+
LARGE_FILE_THRESHOLD = 1024 * 1024 # 1MB
|
|
7
|
+
|
|
8
|
+
class << self
|
|
9
|
+
# High-performance file reading with size-based optimization
|
|
10
|
+
def read_file_optimized(file_path)
|
|
11
|
+
return "" unless File.exist?(file_path) && File.readable?(file_path)
|
|
12
|
+
|
|
13
|
+
file_size = File.size(file_path)
|
|
14
|
+
return "" if file_size == 0
|
|
15
|
+
|
|
16
|
+
if file_size < LARGE_FILE_THRESHOLD
|
|
17
|
+
# Small/medium files: direct read
|
|
18
|
+
File.read(file_path, encoding: 'UTF-8', invalid: :replace, undef: :replace)
|
|
19
|
+
else
|
|
20
|
+
# Large files: chunked reading with buffer
|
|
21
|
+
read_large_file_chunked(file_path)
|
|
22
|
+
end
|
|
23
|
+
rescue
|
|
24
|
+
""
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Fast pattern matching with pre-filtering
|
|
28
|
+
def count_patterns_optimized(content, table_name, column_name)
|
|
29
|
+
# Early exit if content is empty or too short
|
|
30
|
+
return 0 if content.nil? || content.length < column_name.length
|
|
31
|
+
|
|
32
|
+
# Quick pre-filter: check if column name exists at all
|
|
33
|
+
content_lower = content.downcase
|
|
34
|
+
column_lower = column_name.downcase
|
|
35
|
+
|
|
36
|
+
# If column name doesn't appear anywhere, skip expensive matching
|
|
37
|
+
return 0 unless content_lower.include?(column_lower)
|
|
38
|
+
|
|
39
|
+
# Use optimized scanner
|
|
40
|
+
OptimizedScanner.count_column_references_native(content, table_name, column_name)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Parallel file processing with optimal thread count
|
|
44
|
+
def process_files_parallel(file_paths, table_name, column_name)
|
|
45
|
+
return 0 if file_paths.empty?
|
|
46
|
+
|
|
47
|
+
# Limit threads to avoid overwhelming the system
|
|
48
|
+
max_threads = [Concurrent.processor_count, file_paths.size, 8].min
|
|
49
|
+
|
|
50
|
+
futures = []
|
|
51
|
+
thread_pool = Concurrent::FixedThreadPool.new(max_threads)
|
|
52
|
+
|
|
53
|
+
file_paths.each do |file_path|
|
|
54
|
+
future = Concurrent::Future.execute(executor: thread_pool) do
|
|
55
|
+
content = read_file_optimized(file_path)
|
|
56
|
+
count_patterns_optimized(content, table_name, column_name)
|
|
57
|
+
end
|
|
58
|
+
futures << future
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Collect results efficiently
|
|
62
|
+
total_count = futures.sum do |future|
|
|
63
|
+
future.value || 0
|
|
64
|
+
rescue
|
|
65
|
+
0
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
thread_pool.shutdown
|
|
69
|
+
thread_pool.wait_for_termination(5)
|
|
70
|
+
|
|
71
|
+
total_count
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Smart file filtering to reduce I/O
|
|
75
|
+
def filter_relevant_files(file_paths, column_name)
|
|
76
|
+
# For very large sets, do a quick filename-based filter first
|
|
77
|
+
if file_paths.size > 1000
|
|
78
|
+
# Filter by filename patterns that are likely to contain the column
|
|
79
|
+
association_name = column_name.gsub(/_id$/, '')
|
|
80
|
+
relevant_patterns = [column_name, association_name, 'model', 'service', 'query']
|
|
81
|
+
|
|
82
|
+
file_paths.select do |path|
|
|
83
|
+
filename = File.basename(path, '.rb').downcase
|
|
84
|
+
relevant_patterns.any? { |pattern| filename.include?(pattern) }
|
|
85
|
+
end
|
|
86
|
+
else
|
|
87
|
+
file_paths
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
def read_large_file_chunked(file_path)
|
|
94
|
+
content = String.new
|
|
95
|
+
chunk_size = 64 * 1024 # 64KB chunks
|
|
96
|
+
|
|
97
|
+
File.open(file_path, 'rb') do |file|
|
|
98
|
+
# OS hint for sequential access
|
|
99
|
+
file.advise(:sequential) if file.respond_to?(:advise)
|
|
100
|
+
|
|
101
|
+
while chunk = file.read(chunk_size)
|
|
102
|
+
content << chunk
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Single UTF-8 conversion for entire content
|
|
107
|
+
content.force_encoding('UTF-8').encode('UTF-8', invalid: :replace, undef: :replace)
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
module SchemaSherlock
|
|
2
|
+
class SchemaCache
|
|
3
|
+
class << self
|
|
4
|
+
def initialize_cache
|
|
5
|
+
@table_exists_cache = {}
|
|
6
|
+
@columns_cache = {}
|
|
7
|
+
@indexes_cache = {}
|
|
8
|
+
@primary_keys_cache = {}
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def clear_cache
|
|
12
|
+
@table_exists_cache&.clear
|
|
13
|
+
@columns_cache&.clear
|
|
14
|
+
@indexes_cache&.clear
|
|
15
|
+
@primary_keys_cache&.clear
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def connection
|
|
19
|
+
ActiveRecord::Base.connection
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def preload_all_metadata
|
|
23
|
+
initialize_cache if @table_exists_cache.nil?
|
|
24
|
+
|
|
25
|
+
# Preload all tables existence
|
|
26
|
+
all_tables = connection.tables
|
|
27
|
+
all_tables.each { |table| @table_exists_cache[table] = true }
|
|
28
|
+
|
|
29
|
+
# Preload columns, indexes, and primary keys for all tables
|
|
30
|
+
all_tables.each do |table|
|
|
31
|
+
@columns_cache[table] = connection.columns(table)
|
|
32
|
+
@indexes_cache[table] = connection.indexes(table)
|
|
33
|
+
@primary_keys_cache[table] = connection.primary_key(table)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Return stats for debugging
|
|
37
|
+
{
|
|
38
|
+
tables_cached: @table_exists_cache.size,
|
|
39
|
+
columns_cached: @columns_cache.size,
|
|
40
|
+
indexes_cached: @indexes_cache.size,
|
|
41
|
+
primary_keys_cached: @primary_keys_cache.size
|
|
42
|
+
}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def table_exists?(table_name)
|
|
46
|
+
initialize_cache if @table_exists_cache.nil?
|
|
47
|
+
|
|
48
|
+
# Check cache first
|
|
49
|
+
return @table_exists_cache[table_name] if @table_exists_cache.key?(table_name)
|
|
50
|
+
|
|
51
|
+
# If not in cache, check database and cache result
|
|
52
|
+
exists = connection.table_exists?(table_name)
|
|
53
|
+
@table_exists_cache[table_name] = exists
|
|
54
|
+
exists
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def columns(table_name)
|
|
58
|
+
initialize_cache if @columns_cache.nil?
|
|
59
|
+
|
|
60
|
+
# Check cache first
|
|
61
|
+
return @columns_cache[table_name] if @columns_cache.key?(table_name)
|
|
62
|
+
|
|
63
|
+
# If not in cache, fetch from database and cache result
|
|
64
|
+
return nil unless table_exists?(table_name)
|
|
65
|
+
|
|
66
|
+
columns = connection.columns(table_name)
|
|
67
|
+
@columns_cache[table_name] = columns
|
|
68
|
+
columns
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def indexes(table_name)
|
|
72
|
+
initialize_cache if @indexes_cache.nil?
|
|
73
|
+
|
|
74
|
+
# Check cache first
|
|
75
|
+
return @indexes_cache[table_name] if @indexes_cache.key?(table_name)
|
|
76
|
+
|
|
77
|
+
# If not in cache, fetch from database and cache result
|
|
78
|
+
return [] unless table_exists?(table_name)
|
|
79
|
+
|
|
80
|
+
indexes = connection.indexes(table_name)
|
|
81
|
+
@indexes_cache[table_name] = indexes
|
|
82
|
+
indexes
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def primary_key(table_name)
|
|
86
|
+
initialize_cache if @primary_keys_cache.nil?
|
|
87
|
+
|
|
88
|
+
# Check cache first
|
|
89
|
+
return @primary_keys_cache[table_name] if @primary_keys_cache.key?(table_name)
|
|
90
|
+
|
|
91
|
+
# If not in cache, fetch from database and cache result
|
|
92
|
+
return nil unless table_exists?(table_name)
|
|
93
|
+
|
|
94
|
+
primary_key = connection.primary_key(table_name)
|
|
95
|
+
@primary_keys_cache[table_name] = primary_key
|
|
96
|
+
primary_key
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Helper method to get column by name
|
|
100
|
+
def column(table_name, column_name)
|
|
101
|
+
table_columns = columns(table_name)
|
|
102
|
+
return nil unless table_columns
|
|
103
|
+
|
|
104
|
+
table_columns.find { |col| col.name == column_name }
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Get cache statistics
|
|
108
|
+
def cache_stats
|
|
109
|
+
{
|
|
110
|
+
table_exists_cache_size: @table_exists_cache&.size || 0,
|
|
111
|
+
columns_cache_size: @columns_cache&.size || 0,
|
|
112
|
+
indexes_cache_size: @indexes_cache&.size || 0,
|
|
113
|
+
primary_keys_cache_size: @primary_keys_cache&.size || 0
|
|
114
|
+
}
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
@@ -1,6 +1,14 @@
|
|
|
1
|
+
require_relative 'file_cache'
|
|
2
|
+
require_relative 'optimized_scanner'
|
|
3
|
+
require_relative 'binary_index'
|
|
4
|
+
require_relative 'indexed_usage_tracker'
|
|
5
|
+
require_relative 'performance_optimizer'
|
|
6
|
+
|
|
1
7
|
module SchemaSherlock
|
|
2
8
|
class UsageTracker
|
|
3
9
|
class << self
|
|
10
|
+
attr_accessor :binary_index
|
|
11
|
+
|
|
4
12
|
def track_foreign_key_usage(model_class)
|
|
5
13
|
return {} unless SchemaSherlock.configuration.min_usage_threshold
|
|
6
14
|
|
|
@@ -27,21 +35,41 @@ module SchemaSherlock
|
|
|
27
35
|
end
|
|
28
36
|
|
|
29
37
|
def scan_for_column_usage(table_name, column_name)
|
|
30
|
-
|
|
38
|
+
# Use binary index if available for fastest lookup
|
|
39
|
+
if binary_index
|
|
40
|
+
IndexedUsageTracker.count_column_references_with_index(binary_index, table_name, column_name)
|
|
41
|
+
else
|
|
42
|
+
# Use performance-optimized scanning
|
|
43
|
+
scan_with_performance_optimizer(table_name, column_name)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def scan_with_performance_optimizer(table_name, column_name)
|
|
48
|
+
all_files = get_relevant_files
|
|
49
|
+
filtered_files = PerformanceOptimizer.filter_relevant_files(all_files, column_name)
|
|
50
|
+
|
|
51
|
+
PerformanceOptimizer.process_files_parallel(filtered_files, table_name, column_name)
|
|
52
|
+
end
|
|
31
53
|
|
|
32
|
-
|
|
54
|
+
def get_relevant_files
|
|
55
|
+
files = []
|
|
33
56
|
scan_directories.each do |dir|
|
|
34
57
|
next unless Dir.exist?(dir)
|
|
35
58
|
|
|
36
|
-
Dir.glob("
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
rescue
|
|
40
|
-
# Skip files that can't be read
|
|
59
|
+
Dir.glob(File.join(dir, "**/*.rb")).each do |file|
|
|
60
|
+
next if should_skip_file?(file)
|
|
61
|
+
files << file
|
|
41
62
|
end
|
|
42
63
|
end
|
|
64
|
+
files
|
|
65
|
+
end
|
|
43
66
|
|
|
44
|
-
|
|
67
|
+
def should_skip_file?(file)
|
|
68
|
+
file.include?('/spec/') ||
|
|
69
|
+
file.include?('/test/') ||
|
|
70
|
+
file.include?('/vendor/') ||
|
|
71
|
+
file.include?('/node_modules/') ||
|
|
72
|
+
File.size(file) > 50 * 1024 * 1024 # Skip files larger than 50MB
|
|
45
73
|
end
|
|
46
74
|
|
|
47
75
|
def scan_directories
|
|
@@ -58,21 +86,7 @@ module SchemaSherlock
|
|
|
58
86
|
|
|
59
87
|
|
|
60
88
|
def count_column_references(content, table_name, column_name)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
# Count WHERE clauses using the foreign key
|
|
64
|
-
count += content.scan(/\.where\s*\(\s*['":]?#{column_name}['":]?\s*[=:]/i).length
|
|
65
|
-
count += content.scan(/\.find_by\s*\(\s*['":]?#{column_name}['":]?\s*[=:]/i).length
|
|
66
|
-
|
|
67
|
-
# Count joins using the foreign key
|
|
68
|
-
association_name = column_name.gsub(/_id$/, '')
|
|
69
|
-
count += content.scan(/\.joins\s*\(\s*['":]?#{association_name}['":]?\s*\)/i).length
|
|
70
|
-
count += content.scan(/\.includes\s*\(\s*['":]?#{association_name}['":]?\s*\)/i).length
|
|
71
|
-
|
|
72
|
-
# Count direct foreign key access
|
|
73
|
-
count += content.scan(/\.#{column_name}\b/i).length
|
|
74
|
-
|
|
75
|
-
count
|
|
89
|
+
OptimizedScanner.count_column_references_native(content, table_name, column_name)
|
|
76
90
|
end
|
|
77
91
|
end
|
|
78
92
|
end
|
data/schema_sherlock.gemspec
CHANGED
|
@@ -30,6 +30,8 @@ Gem::Specification.new do |spec|
|
|
|
30
30
|
spec.add_dependency "rails", ">= 6.0"
|
|
31
31
|
spec.add_dependency "thor", "~> 1.0"
|
|
32
32
|
spec.add_dependency "activerecord", ">= 6.0"
|
|
33
|
+
spec.add_dependency "concurrent-ruby", "~> 1.0"
|
|
34
|
+
spec.add_dependency "msgpack", "~> 1.0"
|
|
33
35
|
|
|
34
36
|
# Development dependencies
|
|
35
37
|
spec.add_development_dependency "rspec", "~> 3.0"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: schema_sherlock
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Prateek Choudhary
|
|
@@ -52,6 +52,34 @@ dependencies:
|
|
|
52
52
|
- - ">="
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
54
|
version: '6.0'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: concurrent-ruby
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - "~>"
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '1.0'
|
|
62
|
+
type: :runtime
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - "~>"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '1.0'
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: msgpack
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - "~>"
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '1.0'
|
|
76
|
+
type: :runtime
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - "~>"
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '1.0'
|
|
55
83
|
- !ruby/object:Gem::Dependency
|
|
56
84
|
name: rspec
|
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -85,10 +113,17 @@ files:
|
|
|
85
113
|
- lib/schema_sherlock.rb
|
|
86
114
|
- lib/schema_sherlock/analyzers/base_analyzer.rb
|
|
87
115
|
- lib/schema_sherlock/analyzers/foreign_key_detector.rb
|
|
116
|
+
- lib/schema_sherlock/analyzers/index_recommendation_detector.rb
|
|
117
|
+
- lib/schema_sherlock/binary_index.rb
|
|
88
118
|
- lib/schema_sherlock/commands/analyze_command.rb
|
|
89
119
|
- lib/schema_sherlock/commands/base_command.rb
|
|
90
120
|
- lib/schema_sherlock/configuration.rb
|
|
121
|
+
- lib/schema_sherlock/file_cache.rb
|
|
122
|
+
- lib/schema_sherlock/indexed_usage_tracker.rb
|
|
91
123
|
- lib/schema_sherlock/model_loader.rb
|
|
124
|
+
- lib/schema_sherlock/optimized_scanner.rb
|
|
125
|
+
- lib/schema_sherlock/performance_optimizer.rb
|
|
126
|
+
- lib/schema_sherlock/schema_cache.rb
|
|
92
127
|
- lib/schema_sherlock/usage_tracker.rb
|
|
93
128
|
- lib/schema_sherlock/version.rb
|
|
94
129
|
- schema_sherlock.gemspec
|