code_to_query 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,250 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+
5
+ module CodeToQuery
6
+ module Performance
7
+ # Intelligent caching system for query results and parsed intents
8
+ class Cache
9
+ DEFAULT_TTL = 3600 # 1 hour
10
+ MAX_CACHE_SIZE = 1000
11
+
12
+ def initialize(config = {})
13
+ @config = config
14
+ @cache_store = build_cache_store
15
+ @hit_count = 0
16
+ @miss_count = 0
17
+ @size_limit = config[:max_size] || MAX_CACHE_SIZE
18
+ end
19
+
20
+ def get(key, &block)
21
+ cache_key = normalize_key(key)
22
+
23
+ if (cached_value = @cache_store[cache_key])
24
+ @hit_count += 1
25
+ # update access metadata for LRU
26
+ if cached_value.is_a?(Hash)
27
+ cached_value[:access_count] = (cached_value[:access_count] || 0) + 1
28
+ cached_value[:last_access_at] = Time.now
29
+ end
30
+ return cached_value[:data] if cached_value[:expires_at] > Time.now
31
+
32
+ # Expired entry
33
+ @cache_store.delete(cache_key)
34
+ end
35
+
36
+ @miss_count += 1
37
+
38
+ return nil unless block_given?
39
+
40
+ # Generate new value
41
+ value = block.call
42
+ set(key, value)
43
+ value
44
+ end
45
+
46
+ def set(key, value, ttl: DEFAULT_TTL)
47
+ cache_key = normalize_key(key)
48
+
49
+ # Evict if at size limit
50
+ evict_if_needed
51
+
52
+ @cache_store[cache_key] = {
53
+ data: value,
54
+ created_at: Time.now,
55
+ expires_at: Time.now + ttl,
56
+ access_count: 0
57
+ }
58
+
59
+ value
60
+ end
61
+
62
+ def delete(key)
63
+ cache_key = normalize_key(key)
64
+ @cache_store.delete(cache_key)
65
+ end
66
+
67
+ def clear
68
+ @cache_store.clear
69
+ @hit_count = 0
70
+ @miss_count = 0
71
+ end
72
+
73
+ def stats
74
+ {
75
+ size: @cache_store.size,
76
+ hits: @hit_count,
77
+ misses: @miss_count,
78
+ hit_rate: hit_rate,
79
+ memory_usage: calculate_memory_usage
80
+ }
81
+ end
82
+
83
+ def hit_rate
84
+ total_requests = @hit_count + @miss_count
85
+ return 0.0 if total_requests.zero?
86
+
87
+ (@hit_count.to_f / total_requests * 100).round(2)
88
+ end
89
+
90
+ private
91
+
92
+ def build_cache_store
93
+ if defined?(Rails) && Rails.cache
94
+ # Use Rails cache if available
95
+ RailsCacheAdapter.new(Rails.cache)
96
+ else
97
+ # Fallback to in-memory hash
98
+ {}
99
+ end
100
+ end
101
+
102
+ def normalize_key(key)
103
+ case key
104
+ when String
105
+ Digest::SHA256.hexdigest(key)
106
+ when Hash
107
+ Digest::SHA256.hexdigest(key.to_json)
108
+ when Array
109
+ Digest::SHA256.hexdigest(key.join('|'))
110
+ else
111
+ Digest::SHA256.hexdigest(key.to_s)
112
+ end
113
+ end
114
+
115
+ def evict_if_needed
116
+ return if @cache_store.size < @size_limit
117
+
118
+ # LRU eviction - remove least recently used entries
119
+ return unless @cache_store.respond_to?(:each)
120
+
121
+ sorted_entries = @cache_store.to_a.sort_by do |_key, value|
122
+ [value[:access_count] || 0, value[:last_access_at] || Time.at(0)]
123
+ end
124
+
125
+ # Remove 20% of entries
126
+ evict_count = (@size_limit * 0.2).to_i
127
+ evict_count.times do
128
+ key_to_remove, _value = sorted_entries.shift
129
+ @cache_store.delete(key_to_remove) if key_to_remove
130
+ end
131
+ end
132
+
133
+ def calculate_memory_usage
134
+ # Rough estimate of memory usage
135
+ if @cache_store.respond_to?(:each)
136
+ total_size = 0
137
+ @cache_store.each do |key, value|
138
+ total_size += key.bytesize if key.respond_to?(:bytesize)
139
+ total_size += estimate_object_size(value[:data])
140
+ end
141
+ total_size
142
+ else
143
+ 0
144
+ end
145
+ end
146
+
147
+ def estimate_object_size(obj)
148
+ case obj
149
+ when String
150
+ obj.bytesize
151
+ when Hash
152
+ obj.to_json.bytesize
153
+ when Array
154
+ obj.map { |item| estimate_object_size(item) }.sum
155
+ else
156
+ obj.to_s.bytesize
157
+ end
158
+ rescue StandardError
159
+ 100 # Fallback estimate
160
+ end
161
+
162
+ # Adapter for Rails cache
163
+ class RailsCacheAdapter
164
+ def initialize(rails_cache)
165
+ @rails_cache = rails_cache
166
+ end
167
+
168
+ def [](key)
169
+ @rails_cache.read(key)
170
+ end
171
+
172
+ def []=(key, value)
173
+ @rails_cache.write(key, value, expires_in: 3600) # 1 hour
174
+ end
175
+
176
+ def delete(key)
177
+ @rails_cache.delete(key)
178
+ end
179
+
180
+ def clear
181
+ @rails_cache.clear
182
+ end
183
+
184
+ def size
185
+ # Rails cache doesn't expose size easily
186
+ 0
187
+ end
188
+
189
+ def each(&block)
190
+ # Rails cache doesn't support iteration
191
+ # This limits our LRU eviction capability with Rails cache
192
+ end
193
+ end
194
+ end
195
+
196
+ # Intent parsing cache specifically for NLP results
197
+ class IntentCache < Cache
198
+ def initialize(config = {})
199
+ super(config.merge(max_size: 500)) # Smaller cache for intents
200
+ end
201
+
202
+ def cache_intent(prompt, schema_hash, allow_tables, intent)
203
+ cache_key = build_intent_key(prompt, schema_hash, allow_tables)
204
+ set(cache_key, intent, ttl: 1800) # 30 minutes
205
+ end
206
+
207
+ def get_cached_intent(prompt, schema_hash, allow_tables)
208
+ cache_key = build_intent_key(prompt, schema_hash, allow_tables)
209
+ get(cache_key)
210
+ end
211
+
212
+ private
213
+
214
+ def build_intent_key(prompt, schema_hash, allow_tables)
215
+ {
216
+ prompt: prompt.to_s.strip.downcase,
217
+ schema: schema_hash,
218
+ tables: Array(allow_tables).sort
219
+ }
220
+ end
221
+ end
222
+
223
+ # Query execution result cache
224
+ class QueryCache < Cache
225
+ def initialize(config = {})
226
+ super(config.merge(max_size: 200)) # Smaller cache for query results
227
+ end
228
+
229
+ def cache_query_result(sql, params, result)
230
+ cache_key = build_query_key(sql, params)
231
+ # Shorter TTL for query results as data changes frequently
232
+ set(cache_key, result, ttl: 300) # 5 minutes
233
+ end
234
+
235
+ def get_cached_result(sql, params)
236
+ cache_key = build_query_key(sql, params)
237
+ get(cache_key)
238
+ end
239
+
240
+ private
241
+
242
+ def build_query_key(sql, params)
243
+ {
244
+ sql: sql.to_s.strip,
245
+ params: params.sort.to_h
246
+ }
247
+ end
248
+ end
249
+ end
250
+ end
@@ -0,0 +1,396 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+
5
+ module CodeToQuery
6
+ module Performance
7
+ # Query optimization and performance analysis
8
+ class Optimizer
9
+ def initialize(config)
10
+ @config = config
11
+ @query_stats = {}
12
+ end
13
+
14
+ # Analyze and optimize a query before execution
15
+ def optimize_query(sql, intent = {})
16
+ start_time = Time.now
17
+
18
+ analysis = analyze_query(sql, intent)
19
+ optimized_sql = apply_optimizations(sql, analysis)
20
+
21
+ optimization_time = Time.now - start_time
22
+
23
+ {
24
+ original_sql: sql,
25
+ optimized_sql: optimized_sql,
26
+ analysis: analysis,
27
+ optimization_time: optimization_time,
28
+ recommendations: generate_recommendations(analysis)
29
+ }
30
+ end
31
+
32
+ # Performance monitoring for executed queries
33
+ def track_query_performance(sql, execution_time, result_count = 0)
34
+ query_hash = Digest::SHA256.hexdigest(sql)
35
+
36
+ @query_stats[query_hash] ||= {
37
+ sql: sql,
38
+ execution_count: 0,
39
+ total_time: 0.0,
40
+ min_time: Float::INFINITY,
41
+ max_time: 0.0,
42
+ avg_time: 0.0,
43
+ last_executed: nil,
44
+ result_counts: []
45
+ }
46
+
47
+ stats = @query_stats[query_hash]
48
+ prev_count = stats[:execution_count]
49
+ prev_total = stats[:total_time]
50
+ prev_avg = prev_count.positive? ? (prev_total / prev_count) : 0.0
51
+
52
+ stats[:execution_count] += 1
53
+ stats[:total_time] += execution_time
54
+ stats[:min_time] = [stats[:min_time], execution_time].min
55
+ stats[:max_time] = [stats[:max_time], execution_time].max
56
+ stats[:avg_time] = stats[:total_time] / stats[:execution_count]
57
+ stats[:last_executed] = Time.now
58
+ stats[:result_counts] << result_count
59
+
60
+ # Alert on performance degradation relative to previous average
61
+ if prev_count >= 5 && execution_time > (prev_avg * 3)
62
+ message = '[code_to_query] PERFORMANCE ALERT: Query performance degrading'
63
+ CodeToQuery.config.logger.warn(message)
64
+ warn message
65
+ end
66
+
67
+ # Alert on slow queries based on running average and other signals
68
+ check_performance_alerts(stats)
69
+
70
+ stats
71
+ end
72
+
73
+ # Get performance insights
74
+ def performance_report
75
+ {
76
+ total_queries: @query_stats.size,
77
+ most_frequent: most_frequent_queries,
78
+ slowest: slowest_queries,
79
+ fastest: fastest_queries,
80
+ recommendations: global_recommendations
81
+ }
82
+ end
83
+
84
+ private
85
+
86
+ def analyze_query(sql, intent)
87
+ {
88
+ estimated_complexity: estimate_complexity(sql),
89
+ join_count: count_joins(sql),
90
+ has_subqueries: has_subqueries?(sql),
91
+ has_aggregations: has_aggregations?(sql),
92
+ has_order_by: has_order_by?(sql),
93
+ has_group_by: has_group_by?(sql),
94
+ limit_clause: extract_limit(sql),
95
+ table_count: count_tables(sql),
96
+ where_conditions: count_where_conditions(sql),
97
+ potential_bottlenecks: identify_bottlenecks(sql),
98
+ index_recommendations: suggest_indexes(sql, intent)
99
+ }
100
+ end
101
+
102
+ def estimate_complexity(sql)
103
+ complexity_score = 0
104
+
105
+ # Base complexity
106
+ complexity_score += 1
107
+
108
+ # Joins add significant complexity
109
+ join_count = count_joins(sql)
110
+ complexity_score += join_count * 3 # Increased from 2
111
+
112
+ # Subqueries add complexity
113
+ complexity_score += sql.scan(/\(\s*SELECT/i).size * 4 # Increased from 3
114
+
115
+ # Aggregations add complexity
116
+ complexity_score += %w[SUM AVG COUNT MAX MIN].count { |func| sql.include?(func) } * 2
117
+
118
+ # GROUP BY adds complexity
119
+ complexity_score += 3 if has_group_by?(sql) # Increased from 2
120
+
121
+ # ORDER BY adds complexity
122
+ complexity_score += 1 if has_order_by?(sql)
123
+
124
+ # Multiple tables add complexity
125
+ table_count = count_tables(sql)
126
+ complexity_score += (table_count - 1) if table_count > 1
127
+
128
+ case complexity_score
129
+ when 0..2
130
+ :low
131
+ when 3..6
132
+ :medium
133
+ when 7..12
134
+ :high
135
+ else
136
+ :very_high
137
+ end
138
+ end
139
+
140
+ def count_joins(sql)
141
+ sql.scan(/\bJOIN\b/i).size
142
+ end
143
+
144
+ def has_subqueries?(sql)
145
+ sql.match?(/\(\s*SELECT/i)
146
+ end
147
+
148
+ def has_aggregations?(sql)
149
+ %w[SUM AVG COUNT MAX MIN].any? { |func| sql.match?(/\b#{func}\s*\(/i) }
150
+ end
151
+
152
+ def has_order_by?(sql)
153
+ sql.match?(/\bORDER\s+BY\b/i)
154
+ end
155
+
156
+ def has_group_by?(sql)
157
+ sql.match?(/\bGROUP\s+BY\b/i)
158
+ end
159
+
160
+ def extract_limit(sql)
161
+ match = sql.match(/\bLIMIT\s+(\d+)/i)
162
+ match ? match[1].to_i : nil
163
+ end
164
+
165
+ def count_tables(sql)
166
+ # Simple approximation - count FROM and JOIN clauses
167
+ from_count = sql.scan(/\bFROM\s+\w+/i).size
168
+ join_count = count_joins(sql)
169
+ [from_count + join_count, 1].max
170
+ end
171
+
172
+ def count_where_conditions(sql)
173
+ where_match = sql.match(/\bWHERE\s+(.+?)(?:\s+ORDER\s+BY|\s+GROUP\s+BY|\s+LIMIT|\s*$)/i)
174
+ return 0 unless where_match
175
+
176
+ where_clause = where_match[1]
177
+ # Count AND/OR operators as a proxy for condition complexity
178
+ and_count = where_clause.scan(/\bAND\b/i).size
179
+ or_count = where_clause.scan(/\bOR\b/i).size
180
+
181
+ [and_count + or_count + 1, 1].max
182
+ end
183
+
184
+ def identify_bottlenecks(sql)
185
+ bottlenecks = []
186
+
187
+ bottlenecks << :missing_limit if extract_limit(sql).nil? || extract_limit(sql) > 1000
188
+ bottlenecks << :too_many_joins if count_joins(sql) > 3
189
+ bottlenecks << :complex_where if count_where_conditions(sql) > 5
190
+ bottlenecks << :subqueries if has_subqueries?(sql)
191
+ bottlenecks << :wildcard_select if sql.match?(/SELECT\s+\*/i)
192
+ bottlenecks << :no_indexes if lacks_indexed_columns?(sql)
193
+
194
+ bottlenecks
195
+ end
196
+
197
+ def lacks_indexed_columns?(sql)
198
+ # Heuristic: if we're filtering on non-standard columns, might lack indexes
199
+ where_match = sql.match(/\bWHERE\s+(.+?)(?:\s+ORDER|\s+GROUP|\s+LIMIT|\s*$)/i)
200
+ return false unless where_match
201
+
202
+ where_clause = where_match[1]
203
+ # Common indexed columns
204
+ indexed_columns = %w[id created_at updated_at]
205
+
206
+ # If WHERE clause doesn't mention any commonly indexed columns
207
+ indexed_columns.none? { |col| where_clause.match?(/\b#{col}\b/i) }
208
+ end
209
+
210
+ def suggest_indexes(sql, _intent)
211
+ # Extract columns from WHERE clause
212
+ where_columns = extract_where_columns(sql)
213
+ suggestions = where_columns.map do |column|
214
+ {
215
+ type: :single_column,
216
+ column: column,
217
+ reason: 'Filtered in WHERE clause'
218
+ }
219
+ end
220
+
221
+ # Extract columns from ORDER BY
222
+ order_columns = extract_order_columns(sql)
223
+ order_columns.each do |column|
224
+ next if where_columns.include?(column)
225
+
226
+ suggestions << {
227
+ type: :single_column,
228
+ column: column,
229
+ reason: 'Used in ORDER BY'
230
+ }
231
+ end
232
+
233
+ # Suggest composite indexes for multiple WHERE conditions
234
+ if where_columns.size > 1
235
+ suggestions << {
236
+ type: :composite,
237
+ columns: where_columns,
238
+ reason: 'Multiple WHERE conditions'
239
+ }
240
+ end
241
+
242
+ suggestions
243
+ end
244
+
245
+ def extract_where_columns(sql)
246
+ where_match = sql.match(/\bWHERE\s+(.+?)(?:\s+ORDER|\s+GROUP|\s+LIMIT|\s*$)/i)
247
+ return [] unless where_match
248
+
249
+ where_clause = where_match[1]
250
+ # Extract column names with improved regex patterns
251
+ columns = []
252
+
253
+ # Standard comparison operators
254
+ columns += where_clause.scan(/\b([a-zA-Z_][a-zA-Z0-9_]*)\s*[=<>!]=?/).flatten
255
+
256
+ # IN clauses
257
+ columns += where_clause.scan(/\b([a-zA-Z_][a-zA-Z0-9_]*)\s+IN\s*\(/i).flatten
258
+
259
+ # LIKE clauses
260
+ columns += where_clause.scan(/\b([a-zA-Z_][a-zA-Z0-9_]*)\s+(?:I?LIKE)/i).flatten
261
+
262
+ # BETWEEN clauses
263
+ columns += where_clause.scan(/\b([a-zA-Z_][a-zA-Z0-9_]*)\s+BETWEEN/i).flatten
264
+
265
+ columns.uniq
266
+ end
267
+
268
+ def extract_order_columns(sql)
269
+ order_match = sql.match(/\bORDER\s+BY\s+([^;]+)/i)
270
+ return [] unless order_match
271
+
272
+ order_clause = order_match[1]
273
+ # Extract column names from ORDER BY
274
+ columns = order_clause.scan(/\b([a-zA-Z_][a-zA-Z0-9_]*)\b/)
275
+ columns.flatten.reject { |col| %w[ASC DESC].include?(col.upcase) }
276
+ end
277
+
278
+ def apply_optimizations(sql, analysis)
279
+ optimized_sql = sql.dup
280
+
281
+ # Add LIMIT if missing and query seems expensive
282
+ if analysis[:limit_clause].nil? && (analysis[:estimated_complexity] != :low || analysis[:join_count].positive?)
283
+ optimized_sql += " LIMIT #{@config.default_limit}"
284
+ end
285
+
286
+ # Suggest query restructuring for very complex queries
287
+ if analysis[:estimated_complexity] == :very_high
288
+ CodeToQuery.config.logger.warn('[code_to_query] Query complexity is very high, consider breaking into smaller queries')
289
+ end
290
+
291
+ optimized_sql
292
+ end
293
+
294
+ def generate_recommendations(analysis)
295
+ recommendations = []
296
+
297
+ case analysis[:estimated_complexity]
298
+ when :high, :very_high
299
+ recommendations << 'Consider breaking this query into smaller, simpler queries'
300
+ recommendations << 'Review the necessity of all JOIN operations'
301
+ end
302
+
303
+ if analysis[:join_count] > 3
304
+ recommendations << 'High number of JOINs detected - ensure proper indexing on join columns'
305
+ end
306
+
307
+ recommendations << 'Consider rewriting subqueries as JOINs for better performance' if analysis[:has_subqueries]
308
+
309
+ if analysis[:limit_clause].nil?
310
+ recommendations << 'Add LIMIT clause to prevent excessive result sets'
311
+ elsif analysis[:limit_clause] > 1000
312
+ recommendations << 'Consider reducing LIMIT value for better performance'
313
+ end
314
+
315
+ analysis[:potential_bottlenecks].each do |bottleneck|
316
+ case bottleneck
317
+ when :wildcard_select
318
+ recommendations << 'Avoid SELECT * - specify only needed columns'
319
+ when :no_indexes
320
+ recommendations << 'Consider adding indexes on filtered columns'
321
+ when :complex_where
322
+ recommendations << 'Simplify WHERE clause conditions'
323
+ end
324
+ end
325
+
326
+ recommendations
327
+ end
328
+
329
+ def check_performance_alerts(stats)
330
+ # Alert on slow queries (> 1 second average)
331
+ if stats[:avg_time] > 1.0
332
+ CodeToQuery.config.logger.warn("[code_to_query] PERFORMANCE ALERT: Slow query detected (avg: #{stats[:avg_time].round(3)}s)")
333
+ end
334
+
335
+ # Alert on queries that are getting slower
336
+ return unless stats[:execution_count] > 5 && stats[:max_time] > stats[:avg_time] * 3
337
+
338
+ CodeToQuery.config.logger.warn('[code_to_query] PERFORMANCE ALERT: Query performance degrading')
339
+ end
340
+
341
+ def most_frequent_queries(limit = 5)
342
+ @query_stats.values
343
+ .sort_by { |stats| -stats[:execution_count] }
344
+ .first(limit)
345
+ .map { |stats| format_query_stats(stats) }
346
+ end
347
+
348
+ def slowest_queries(limit = 5)
349
+ @query_stats.values
350
+ .sort_by { |stats| -stats[:avg_time] }
351
+ .first(limit)
352
+ .map { |stats| format_query_stats(stats) }
353
+ end
354
+
355
+ def fastest_queries(limit = 5)
356
+ @query_stats.values
357
+ .select { |stats| stats[:execution_count] > 1 }
358
+ .sort_by { |stats| stats[:avg_time] }
359
+ .first(limit)
360
+ .map { |stats| format_query_stats(stats) }
361
+ end
362
+
363
+ def format_query_stats(stats)
364
+ {
365
+ sql: safe_truncate(stats[:sql], 100),
366
+ executions: stats[:execution_count],
367
+ avg_time: stats[:avg_time].round(3),
368
+ min_time: stats[:min_time].round(3),
369
+ max_time: stats[:max_time].round(3)
370
+ }
371
+ end
372
+
373
+ def safe_truncate(str, length)
374
+ s = str.to_s
375
+ return s if s.length <= length
376
+
377
+ "#{s[0, length]}…"
378
+ end
379
+
380
+ def global_recommendations
381
+ recommendations = []
382
+
383
+ if @query_stats.any? { |_hash, stats| stats[:avg_time] > 1.0 }
384
+ recommendations << 'Some queries are running slowly - consider optimization'
385
+ end
386
+
387
+ slow_query_count = @query_stats.count { |_hash, stats| stats[:avg_time] > 0.5 }
388
+ if slow_query_count > @query_stats.size * 0.3
389
+ recommendations << 'High percentage of slow queries - review indexing strategy'
390
+ end
391
+
392
+ recommendations
393
+ end
394
+ end
395
+ end
396
+ end