QueryWise 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +45 -0
  3. data/CLOUD_RUN_README.md +263 -0
  4. data/DOCKER_README.md +327 -0
  5. data/Dockerfile +69 -0
  6. data/Dockerfile.cloudrun +76 -0
  7. data/Dockerfile.dev +36 -0
  8. data/GEM_Gemfile +16 -0
  9. data/GEM_README.md +421 -0
  10. data/GEM_Rakefile +10 -0
  11. data/GEM_gitignore +137 -0
  12. data/LICENSE.txt +21 -0
  13. data/PUBLISHING_GUIDE.md +269 -0
  14. data/README.md +392 -0
  15. data/app/controllers/api/v1/analysis_controller.rb +340 -0
  16. data/app/controllers/api/v1/api_keys_controller.rb +83 -0
  17. data/app/controllers/api/v1/base_controller.rb +93 -0
  18. data/app/controllers/api/v1/health_controller.rb +86 -0
  19. data/app/controllers/application_controller.rb +2 -0
  20. data/app/controllers/concerns/.keep +0 -0
  21. data/app/jobs/application_job.rb +7 -0
  22. data/app/mailers/application_mailer.rb +4 -0
  23. data/app/models/app_profile.rb +18 -0
  24. data/app/models/application_record.rb +3 -0
  25. data/app/models/concerns/.keep +0 -0
  26. data/app/models/optimization_suggestion.rb +44 -0
  27. data/app/models/query_analysis.rb +47 -0
  28. data/app/models/query_pattern.rb +55 -0
  29. data/app/services/missing_index_detector_service.rb +244 -0
  30. data/app/services/n_plus_one_detector_service.rb +177 -0
  31. data/app/services/slow_query_analyzer_service.rb +225 -0
  32. data/app/services/sql_parser_service.rb +352 -0
  33. data/app/validators/query_data_validator.rb +96 -0
  34. data/app/views/layouts/mailer.html.erb +13 -0
  35. data/app/views/layouts/mailer.text.erb +1 -0
  36. data/app.yaml +109 -0
  37. data/cloudbuild.yaml +47 -0
  38. data/config/application.rb +32 -0
  39. data/config/boot.rb +4 -0
  40. data/config/cable.yml +17 -0
  41. data/config/cache.yml +16 -0
  42. data/config/credentials.yml.enc +1 -0
  43. data/config/database.yml +69 -0
  44. data/config/deploy.yml +116 -0
  45. data/config/environment.rb +5 -0
  46. data/config/environments/development.rb +70 -0
  47. data/config/environments/production.rb +87 -0
  48. data/config/environments/test.rb +53 -0
  49. data/config/initializers/cors.rb +16 -0
  50. data/config/initializers/filter_parameter_logging.rb +8 -0
  51. data/config/initializers/inflections.rb +16 -0
  52. data/config/locales/en.yml +31 -0
  53. data/config/master.key +1 -0
  54. data/config/puma.rb +41 -0
  55. data/config/puma_cloudrun.rb +48 -0
  56. data/config/queue.yml +18 -0
  57. data/config/recurring.yml +15 -0
  58. data/config/routes.rb +28 -0
  59. data/config/storage.yml +34 -0
  60. data/config.ru +6 -0
  61. data/db/cable_schema.rb +11 -0
  62. data/db/cache_schema.rb +14 -0
  63. data/db/migrate/20250818214709_create_app_profiles.rb +13 -0
  64. data/db/migrate/20250818214731_create_query_analyses.rb +22 -0
  65. data/db/migrate/20250818214740_create_query_patterns.rb +22 -0
  66. data/db/migrate/20250818214805_create_optimization_suggestions.rb +20 -0
  67. data/db/queue_schema.rb +129 -0
  68. data/db/schema.rb +79 -0
  69. data/db/seeds.rb +9 -0
  70. data/init.sql +9 -0
  71. data/lib/query_optimizer_client/client.rb +176 -0
  72. data/lib/query_optimizer_client/configuration.rb +43 -0
  73. data/lib/query_optimizer_client/generators/install_generator.rb +43 -0
  74. data/lib/query_optimizer_client/generators/templates/README +46 -0
  75. data/lib/query_optimizer_client/generators/templates/analysis_job.rb +84 -0
  76. data/lib/query_optimizer_client/generators/templates/initializer.rb +30 -0
  77. data/lib/query_optimizer_client/middleware.rb +126 -0
  78. data/lib/query_optimizer_client/railtie.rb +37 -0
  79. data/lib/query_optimizer_client/tasks.rake +228 -0
  80. data/lib/query_optimizer_client/version.rb +5 -0
  81. data/lib/query_optimizer_client.rb +48 -0
  82. data/lib/tasks/.keep +0 -0
  83. data/public/robots.txt +1 -0
  84. data/query_optimizer_client.gemspec +60 -0
  85. data/script/.keep +0 -0
  86. data/storage/.keep +0 -0
  87. data/storage/development.sqlite3 +0 -0
  88. data/storage/test.sqlite3 +0 -0
  89. data/vendor/.keep +0 -0
  90. metadata +265 -0
@@ -0,0 +1,47 @@
1
+ class QueryAnalysis < ApplicationRecord
2
+ belongs_to :app_profile
3
+ has_many :optimization_suggestions, dependent: :destroy
4
+
5
+ validates :sql_query, presence: true
6
+ validates :analyzed_at, presence: true
7
+ validates :query_type, inclusion: { in: %w[SELECT INSERT UPDATE DELETE] }
8
+
9
+ before_save :generate_query_hash, :extract_table_name
10
+
11
+ scope :slow_queries, ->(threshold = 200) { where('duration_ms > ?', threshold) }
12
+ scope :by_table, ->(table_name) { where(table_name: table_name) }
13
+ scope :recent, -> { where('analyzed_at > ?', 24.hours.ago) }
14
+
15
+ def slow_query?(threshold = 200)
16
+ duration_ms && duration_ms > threshold
17
+ end
18
+
19
+ def similar_queries
20
+ return QueryAnalysis.none if query_hash.blank?
21
+
22
+ QueryAnalysis.where(query_hash: query_hash)
23
+ .where.not(id: id)
24
+ .where(app_profile: app_profile)
25
+ end
26
+
27
+ private
28
+
29
+ def generate_query_hash
30
+ # Create a normalized hash of the query for similarity detection
31
+ normalized_query = sql_query.gsub(/\d+/, '?')
32
+ .gsub(/'[^']*'/, '?')
33
+ .gsub(/\s+/, ' ')
34
+ .strip
35
+ .downcase
36
+ self.query_hash = Digest::SHA256.hexdigest(normalized_query)
37
+ end
38
+
39
+ def extract_table_name
40
+ return if sql_query.blank?
41
+
42
+ # Simple table name extraction - can be enhanced with pg_query
43
+ if sql_query.match(/(?:FROM|UPDATE|INTO)\s+([a-zA-Z_][a-zA-Z0-9_]*)/i)
44
+ self.table_name = $1.downcase
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,55 @@
1
+ class QueryPattern < ApplicationRecord
2
+ validates :pattern_type, presence: true,
3
+ inclusion: { in: %w[n_plus_one slow_query missing_index] }
4
+ validates :table_name, presence: true
5
+ validates :frequency, presence: true, numericality: { greater_than: 0 }
6
+ validates :first_seen, presence: true
7
+ validates :last_seen, presence: true
8
+ validates :pattern_signature, presence: true, uniqueness: true
9
+
10
+ before_validation :generate_pattern_signature, on: :create
11
+
12
+ scope :n_plus_one, -> { where(pattern_type: 'n_plus_one') }
13
+ scope :slow_queries, -> { where(pattern_type: 'slow_query') }
14
+ scope :missing_indexes, -> { where(pattern_type: 'missing_index') }
15
+ scope :frequent, ->(threshold = 5) { where('frequency >= ?', threshold) }
16
+ scope :recent, -> { where('last_seen > ?', 24.hours.ago) }
17
+
18
+ def self.record_pattern(type:, table:, column: nil, metadata: {})
19
+ signature = generate_signature(type, table, column)
20
+
21
+ pattern = find_or_initialize_by(pattern_signature: signature)
22
+
23
+ if pattern.persisted?
24
+ pattern.increment!(:frequency)
25
+ pattern.update!(last_seen: Time.current)
26
+ else
27
+ pattern.assign_attributes(
28
+ pattern_type: type,
29
+ table_name: table,
30
+ column_name: column,
31
+ frequency: 1,
32
+ first_seen: Time.current,
33
+ last_seen: Time.current,
34
+ metadata: metadata
35
+ )
36
+ pattern.save!
37
+ end
38
+
39
+ pattern
40
+ end
41
+
42
+ def self.generate_signature(type, table, column = nil)
43
+ parts = [type, table]
44
+ parts << column if column.present?
45
+ Digest::SHA256.hexdigest(parts.join(':'))
46
+ end
47
+
48
+ private
49
+
50
+ def generate_pattern_signature
51
+ self.pattern_signature = self.class.generate_signature(
52
+ pattern_type, table_name, column_name
53
+ )
54
+ end
55
+ end
@@ -0,0 +1,244 @@
1
+ class MissingIndexDetectorService
2
+ attr_reader :queries, :frequency_threshold
3
+
4
+ def initialize(queries, frequency_threshold: 3)
5
+ @queries = Array(queries)
6
+ @frequency_threshold = frequency_threshold
7
+ end
8
+
9
+ def self.detect(queries, **options)
10
+ new(queries, **options).detect
11
+ end
12
+
13
+ def detect
14
+ index_suggestions = []
15
+
16
+ # Group queries by table and analyze patterns
17
+ table_queries = group_queries_by_table
18
+
19
+ table_queries.each do |table_name, table_queries_list|
20
+ suggestions = analyze_table_queries(table_name, table_queries_list)
21
+ index_suggestions.concat(suggestions)
22
+ end
23
+
24
+ # Remove duplicate suggestions and sort by priority
25
+ deduplicate_and_prioritize(index_suggestions)
26
+ end
27
+
28
+ private
29
+
30
+ def group_queries_by_table
31
+ queries.group_by do |query|
32
+ parser = SqlParserService.new(query.sql_query)
33
+ parser.primary_table
34
+ end.reject { |table, _| table.nil? }
35
+ end
36
+
37
+ def analyze_table_queries(table_name, table_queries)
38
+ suggestions = []
39
+
40
+ # Analyze WHERE clause patterns
41
+ where_column_frequency = analyze_where_patterns(table_queries)
42
+ suggestions.concat(generate_where_index_suggestions(table_name, where_column_frequency))
43
+
44
+ # Analyze ORDER BY patterns
45
+ order_column_frequency = analyze_order_patterns(table_queries)
46
+ suggestions.concat(generate_order_index_suggestions(table_name, order_column_frequency))
47
+
48
+ # Analyze composite index opportunities
49
+ composite_patterns = analyze_composite_patterns(table_queries)
50
+ suggestions.concat(generate_composite_index_suggestions(table_name, composite_patterns))
51
+
52
+ # Analyze foreign key patterns
53
+ foreign_key_patterns = analyze_foreign_key_patterns(table_queries)
54
+ suggestions.concat(generate_foreign_key_index_suggestions(table_name, foreign_key_patterns))
55
+
56
+ suggestions
57
+ end
58
+
59
+ def analyze_where_patterns(table_queries)
60
+ column_frequency = Hash.new(0)
61
+
62
+ table_queries.each do |query|
63
+ parser = SqlParserService.new(query.sql_query)
64
+ next unless parser.valid?
65
+
66
+ parser.where_columns.each do |column|
67
+ # Clean column name (remove table prefix if present)
68
+ clean_column = column.split('.').last
69
+ column_frequency[clean_column] += 1
70
+ end
71
+ end
72
+
73
+ column_frequency.select { |_, freq| freq >= frequency_threshold }
74
+ end
75
+
76
+ def analyze_order_patterns(table_queries)
77
+ column_frequency = Hash.new(0)
78
+
79
+ table_queries.each do |query|
80
+ parser = SqlParserService.new(query.sql_query)
81
+ next unless parser.valid?
82
+
83
+ parser.order_by_columns.each do |column|
84
+ clean_column = column.split('.').last
85
+ column_frequency[clean_column] += 1
86
+ end
87
+ end
88
+
89
+ column_frequency.select { |_, freq| freq >= frequency_threshold }
90
+ end
91
+
92
+ def analyze_composite_patterns(table_queries)
93
+ pattern_frequency = Hash.new(0)
94
+
95
+ table_queries.each do |query|
96
+ parser = SqlParserService.new(query.sql_query)
97
+ next unless parser.valid?
98
+
99
+ where_columns = parser.where_columns.map { |col| col.split('.').last }
100
+
101
+ # Look for queries with multiple WHERE conditions
102
+ if where_columns.length > 1
103
+ # Sort columns to create consistent pattern
104
+ pattern = where_columns.sort.join(',')
105
+ pattern_frequency[pattern] += 1
106
+ end
107
+ end
108
+
109
+ pattern_frequency.select { |_, freq| freq >= frequency_threshold }
110
+ end
111
+
112
+ def analyze_foreign_key_patterns(table_queries)
113
+ fk_frequency = Hash.new(0)
114
+
115
+ table_queries.each do |query|
116
+ parser = SqlParserService.new(query.sql_query)
117
+ next unless parser.valid?
118
+
119
+ parser.where_columns.each do |column|
120
+ clean_column = column.split('.').last
121
+ # Detect foreign key patterns (ending with _id)
122
+ if clean_column.match?(/\w+_id$/)
123
+ fk_frequency[clean_column] += 1
124
+ end
125
+ end
126
+ end
127
+
128
+ fk_frequency.select { |_, freq| freq >= frequency_threshold }
129
+ end
130
+
131
+ def generate_where_index_suggestions(table_name, column_frequency)
132
+ suggestions = []
133
+
134
+ column_frequency.each do |column, frequency|
135
+ suggestions << {
136
+ type: 'single_column_index',
137
+ table_name: table_name,
138
+ columns: [column],
139
+ frequency: frequency,
140
+ priority: calculate_priority(frequency),
141
+ reason: 'Frequently used in WHERE clauses',
142
+ sql: "CREATE INDEX idx_#{table_name}_#{column} ON #{table_name}(#{column});",
143
+ impact: 'high',
144
+ description: "Column '#{column}' is used in WHERE clauses #{frequency} times"
145
+ }
146
+ end
147
+
148
+ suggestions
149
+ end
150
+
151
+ def generate_order_index_suggestions(table_name, column_frequency)
152
+ suggestions = []
153
+
154
+ column_frequency.each do |column, frequency|
155
+ suggestions << {
156
+ type: 'order_by_index',
157
+ table_name: table_name,
158
+ columns: [column],
159
+ frequency: frequency,
160
+ priority: calculate_priority(frequency),
161
+ reason: 'Frequently used in ORDER BY clauses',
162
+ sql: "CREATE INDEX idx_#{table_name}_#{column}_order ON #{table_name}(#{column});",
163
+ impact: 'medium',
164
+ description: "Column '#{column}' is used in ORDER BY clauses #{frequency} times"
165
+ }
166
+ end
167
+
168
+ suggestions
169
+ end
170
+
171
+ def generate_composite_index_suggestions(table_name, pattern_frequency)
172
+ suggestions = []
173
+
174
+ pattern_frequency.each do |pattern, frequency|
175
+ columns = pattern.split(',')
176
+ column_list = columns.join(', ')
177
+ index_name = "idx_#{table_name}_#{columns.join('_')}"
178
+
179
+ suggestions << {
180
+ type: 'composite_index',
181
+ table_name: table_name,
182
+ columns: columns,
183
+ frequency: frequency,
184
+ priority: calculate_priority(frequency, bonus: 1), # Composite indexes get priority bonus
185
+ reason: 'Frequently used together in WHERE clauses',
186
+ sql: "CREATE INDEX #{index_name} ON #{table_name}(#{column_list});",
187
+ impact: 'high',
188
+ description: "Columns '#{column_list}' are frequently used together #{frequency} times"
189
+ }
190
+ end
191
+
192
+ suggestions
193
+ end
194
+
195
+ def generate_foreign_key_index_suggestions(table_name, fk_frequency)
196
+ suggestions = []
197
+
198
+ fk_frequency.each do |fk_column, frequency|
199
+ suggestions << {
200
+ type: 'foreign_key_index',
201
+ table_name: table_name,
202
+ columns: [fk_column],
203
+ frequency: frequency,
204
+ priority: calculate_priority(frequency, bonus: 2), # Foreign keys get high priority
205
+ reason: 'Foreign key used in WHERE clauses',
206
+ sql: "CREATE INDEX idx_#{table_name}_#{fk_column} ON #{table_name}(#{fk_column});",
207
+ impact: 'high',
208
+ description: "Foreign key '#{fk_column}' is used in WHERE clauses #{frequency} times"
209
+ }
210
+ end
211
+
212
+ suggestions
213
+ end
214
+
215
+ def calculate_priority(frequency, bonus: 0)
216
+ base_priority = case frequency
217
+ when 0..2
218
+ 1
219
+ when 3..5
220
+ 2
221
+ when 6..10
222
+ 3
223
+ when 11..14
224
+ 4
225
+ else
226
+ 5
227
+ end
228
+
229
+ [base_priority + bonus, 5].min # Cap at 5
230
+ end
231
+
232
+ def deduplicate_and_prioritize(suggestions)
233
+ # Group by table and columns to remove duplicates
234
+ unique_suggestions = suggestions.group_by do |suggestion|
235
+ [suggestion[:table_name], suggestion[:columns].sort]
236
+ end.map do |_, grouped|
237
+ # Keep the highest priority suggestion for each unique combination
238
+ grouped.max_by { |s| s[:priority] }
239
+ end
240
+
241
+ # Sort by priority (highest first) and frequency
242
+ unique_suggestions.sort_by { |s| [-s[:priority], -s[:frequency]] }
243
+ end
244
+ end
@@ -0,0 +1,177 @@
1
+ class NPlusOneDetectorService
2
+ attr_reader :queries, :time_window, :threshold
3
+
4
+ def initialize(queries, time_window: 5.seconds, threshold: 3)
5
+ @queries = queries.sort_by(&:analyzed_at)
6
+ @time_window = time_window
7
+ @threshold = threshold
8
+ end
9
+
10
+ def self.detect(queries, **options)
11
+ new(queries, **options).detect
12
+ end
13
+
14
+ def detect
15
+ n_plus_one_patterns = []
16
+
17
+ # Group queries by their normalized signature
18
+ query_groups = group_queries_by_signature
19
+
20
+ query_groups.each do |signature, grouped_queries|
21
+ next if grouped_queries.length < threshold
22
+
23
+ # Check if queries are clustered in time (indicating N+1 pattern)
24
+ time_clusters = find_time_clusters(grouped_queries)
25
+
26
+ time_clusters.each do |cluster|
27
+ next if cluster.length < threshold
28
+
29
+ pattern = analyze_cluster_for_n_plus_one(cluster)
30
+ n_plus_one_patterns << pattern if pattern
31
+ end
32
+ end
33
+
34
+ n_plus_one_patterns
35
+ end
36
+
37
+ private
38
+
39
+ def group_queries_by_signature
40
+ queries.group_by do |query|
41
+ parser = SqlParserService.new(query.sql_query)
42
+ parser.query_signature
43
+ end
44
+ end
45
+
46
+ def find_time_clusters(queries)
47
+ clusters = []
48
+ current_cluster = []
49
+
50
+ queries.each do |query|
51
+ if current_cluster.empty?
52
+ current_cluster = [query]
53
+ else
54
+ time_diff = query.analyzed_at - current_cluster.last.analyzed_at
55
+
56
+ if time_diff <= time_window
57
+ current_cluster << query
58
+ else
59
+ clusters << current_cluster if current_cluster.length >= threshold
60
+ current_cluster = [query]
61
+ end
62
+ end
63
+ end
64
+
65
+ clusters << current_cluster if current_cluster.length >= threshold
66
+ clusters
67
+ end
68
+
69
+ def analyze_cluster_for_n_plus_one(cluster)
70
+ # Parse the first query to understand the pattern
71
+ first_query = cluster.first
72
+ parser = SqlParserService.new(first_query.sql_query)
73
+
74
+ return nil unless parser.valid?
75
+ return nil unless parser.query_type == 'SELECT'
76
+ return nil unless parser.potential_n_plus_one?
77
+
78
+ # Verify all queries in cluster have same structure but different parameters
79
+ return nil unless all_queries_similar_structure?(cluster)
80
+
81
+ # Extract the pattern details
82
+ where_conditions = parser.where_conditions
83
+ id_condition = where_conditions.find do |c|
84
+ column = c[:column]
85
+ column && column.match?(/\bid\b|_id$/i)
86
+ end
87
+
88
+ return nil unless id_condition
89
+
90
+ {
91
+ type: 'n_plus_one',
92
+ table_name: parser.primary_table,
93
+ column_name: id_condition[:column],
94
+ query_count: cluster.length,
95
+ time_span: cluster.last.analyzed_at - cluster.first.analyzed_at,
96
+ first_query: first_query,
97
+ sample_queries: cluster.first(5), # Keep first 5 as samples
98
+ pattern_signature: parser.query_signature,
99
+ severity: calculate_severity(cluster.length),
100
+ suggestion: generate_suggestion(parser, cluster)
101
+ }
102
+ end
103
+
104
+ def all_queries_similar_structure?(cluster)
105
+ return true if cluster.length <= 1
106
+
107
+ first_signature = SqlParserService.new(cluster.first.sql_query).query_signature
108
+
109
+ cluster.all? do |query|
110
+ SqlParserService.new(query.sql_query).query_signature == first_signature
111
+ end
112
+ end
113
+
114
+ def calculate_severity(query_count)
115
+ case query_count
116
+ when 0..5
117
+ 'low'
118
+ when 6..15
119
+ 'medium'
120
+ when 16..50
121
+ 'high'
122
+ else
123
+ 'critical'
124
+ end
125
+ end
126
+
127
+ def generate_suggestion(parser, cluster)
128
+ table_name = parser.primary_table
129
+ where_conditions = parser.where_conditions
130
+ id_condition = where_conditions.find do |c|
131
+ column = c[:column]
132
+ column && column.match?(/\bid\b|_id$/i)
133
+ end
134
+
135
+ if id_condition
136
+ foreign_key = id_condition[:column]
137
+
138
+ if foreign_key.end_with?('_id')
139
+ # This looks like a foreign key lookup
140
+ association_name = foreign_key.gsub('_id', '').pluralize
141
+
142
+ {
143
+ title: "Use includes() to avoid N+1 queries",
144
+ description: "Detected #{cluster.length} similar queries on #{table_name} table. " \
145
+ "This appears to be an N+1 query pattern where you're loading #{table_name} " \
146
+ "records one by one instead of using eager loading.",
147
+ rails_suggestion: "Use `includes(:#{association_name})` or `preload(:#{association_name})` " \
148
+ "to load all related records in a single query.",
149
+ example_code: "# Instead of:\n" \
150
+ "users.each { |user| user.#{association_name}.count }\n\n" \
151
+ "# Use:\n" \
152
+ "users.includes(:#{association_name}).each { |user| user.#{association_name}.count }",
153
+ sql_suggestion: "Consider using a JOIN or IN clause to fetch all records at once."
154
+ }
155
+ else
156
+ {
157
+ title: "Optimize repeated ID lookups",
158
+ description: "Detected #{cluster.length} similar queries looking up records by ID. " \
159
+ "Consider batching these lookups.",
160
+ rails_suggestion: "Use `where(id: [id1, id2, id3])` to fetch multiple records at once.",
161
+ example_code: "# Instead of multiple queries:\n" \
162
+ "ids.each { |id| #{table_name.classify}.find(id) }\n\n" \
163
+ "# Use:\n" \
164
+ "#{table_name.classify}.where(id: ids)",
165
+ sql_suggestion: "Use WHERE id IN (...) to fetch multiple records in a single query."
166
+ }
167
+ end
168
+ else
169
+ {
170
+ title: "Optimize repeated queries",
171
+ description: "Detected #{cluster.length} similar queries that could be optimized.",
172
+ rails_suggestion: "Consider batching these queries or using eager loading.",
173
+ sql_suggestion: "Analyze the query pattern and consider using JOINs or IN clauses."
174
+ }
175
+ end
176
+ end
177
+ end