glancer 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/ci.yml +96 -0
- data/.rubocop.yml +54 -0
- data/CHANGELOG.md +88 -0
- data/CLAUDE.md +115 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/README.md +354 -0
- data/app/assets/config/glancer_manifest.js +1 -0
- data/app/assets/javascripts/glancer/application.js +15 -0
- data/app/assets/javascripts/glancer/controllers/chat_controller.js +101 -0
- data/app/assets/javascripts/glancer/controllers/message_controller.js +1052 -0
- data/app/assets/javascripts/glancer/controllers/toast_controller.js +63 -0
- data/app/assets/stylesheets/glancer/application.css +350 -0
- data/app/assets/stylesheets/glancer/code-blocks.css +6 -0
- data/app/assets/stylesheets/glancer/list.css +31 -0
- data/app/assets/stylesheets/glancer/scrollbar.css +16 -0
- data/app/assets/stylesheets/glancer/table.css +97 -0
- data/app/controllers/glancer/application_controller.rb +33 -0
- data/app/controllers/glancer/chats_controller.rb +49 -0
- data/app/controllers/glancer/messages_controller.rb +144 -0
- data/app/controllers/glancer/schema_controller.rb +29 -0
- data/app/controllers/glancer/settings_controller.rb +23 -0
- data/app/helpers/glancer/application_helper.rb +17 -0
- data/app/jobs/glancer/application_job.rb +6 -0
- data/app/jobs/glancer/process_message_job.rb +38 -0
- data/app/models/glancer/audit.rb +12 -0
- data/app/models/glancer/chat.rb +8 -0
- data/app/models/glancer/code_version.rb +12 -0
- data/app/models/glancer/embedding.rb +6 -0
- data/app/models/glancer/message.rb +25 -0
- data/app/models/glancer/setting.rb +23 -0
- data/app/models/glancer/sql_version.rb +6 -0
- data/app/views/glancer/_data/_importmap.json.erb +7 -0
- data/app/views/glancer/chats/_chat_sidebar.html.erb +2 -0
- data/app/views/glancer/chats/_show.html.erb +52 -0
- data/app/views/glancer/chats/_sidebar_chat_list.html.erb +30 -0
- data/app/views/glancer/chats/index.html.erb +10 -0
- data/app/views/glancer/chats/show.html.erb +1 -0
- data/app/views/glancer/messages/_data_table.html.erb +268 -0
- data/app/views/glancer/messages/_execution_error.html.erb +26 -0
- data/app/views/glancer/messages/_form.html.erb +93 -0
- data/app/views/glancer/messages/_message.html.erb +206 -0
- data/app/views/glancer/messages/_message_info.html.erb +176 -0
- data/app/views/glancer/messages/_temp_form.html.erb +100 -0
- data/app/views/glancer/messages/create.turbo_stream.erb +25 -0
- data/app/views/glancer/schema/show.html.erb +123 -0
- data/app/views/glancer/settings/show.html.erb +306 -0
- data/app/views/glancer/shared/_icons.html.erb +126 -0
- data/app/views/layouts/glancer/application.html.erb +234 -0
- data/config/locales/glancer.en.yml +90 -0
- data/config/locales/glancer.es.yml +90 -0
- data/config/locales/glancer.pt-BR.yml +90 -0
- data/config/routes.rb +20 -0
- data/db/migrate/20250629212642_create_glancer_audits.rb +19 -0
- data/db/migrate/20250629212643_create_glancer_chats.rb +10 -0
- data/db/migrate/20250629212645_create_glancer_embeddings.rb +17 -0
- data/db/migrate/20250629212647_create_glancer_messages.rb +29 -0
- data/db/migrate/20260513204129_add_user_edited_sql_to_glancer_messages.rb +11 -0
- data/db/migrate/20260513210647_create_glancer_sql_versions.rb +18 -0
- data/db/migrate/20260513210648_add_message_id_to_glancer_audits.rb +8 -0
- data/db/migrate/20260513220000_create_glancer_settings.rb +12 -0
- data/db/migrate/20260514083509_add_llm_model_to_glancer_messages.rb +9 -0
- data/db/migrate/20260523120000_rename_code_columns_in_glancer_messages.rb +8 -0
- data/db/migrate/20260523120001_rename_code_column_in_glancer_audits.rb +7 -0
- data/db/migrate/20260523120002_add_code_type_to_glancer_tables.rb +10 -0
- data/db/migrate/20260523120003_rename_glancer_sql_versions_to_code_versions.rb +8 -0
- data/db/migrate/20260523130000_add_enriched_question_to_glancer_messages.rb +7 -0
- data/db/migrate/20260524100000_add_status_to_glancer_messages.rb +9 -0
- data/lib/generators/glancer/install/install_generator.rb +74 -0
- data/lib/generators/glancer/install/templates/glancer.rb +227 -0
- data/lib/generators/glancer/install/templates/llm_context.glancer.md +51 -0
- data/lib/glancer/async_runner.rb +50 -0
- data/lib/glancer/chart_analyzer.rb +230 -0
- data/lib/glancer/configuration.rb +372 -0
- data/lib/glancer/engine.rb +90 -0
- data/lib/glancer/indexer/context_indexer.rb +58 -0
- data/lib/glancer/indexer/model_indexer.rb +64 -0
- data/lib/glancer/indexer/schema_indexer.rb +171 -0
- data/lib/glancer/indexer.rb +50 -0
- data/lib/glancer/retriever.rb +114 -0
- data/lib/glancer/utils/logger.rb +83 -0
- data/lib/glancer/utils/markdown_helper.rb +56 -0
- data/lib/glancer/utils/result_formatter.rb +25 -0
- data/lib/glancer/utils/table_stats.rb +18 -0
- data/lib/glancer/utils/transaction.rb +59 -0
- data/lib/glancer/version.rb +5 -0
- data/lib/glancer/workflow/ar_executor.rb +104 -0
- data/lib/glancer/workflow/ar_extractor.rb +25 -0
- data/lib/glancer/workflow/ar_prompt_builder.rb +64 -0
- data/lib/glancer/workflow/ar_sanitizer.rb +88 -0
- data/lib/glancer/workflow/builder.rb +129 -0
- data/lib/glancer/workflow/cache.rb +55 -0
- data/lib/glancer/workflow/executor.rb +72 -0
- data/lib/glancer/workflow/llm.rb +123 -0
- data/lib/glancer/workflow/prompt_builder.rb +143 -0
- data/lib/glancer/workflow/query_enricher.rb +117 -0
- data/lib/glancer/workflow/sql_extractor.rb +42 -0
- data/lib/glancer/workflow/sql_sanitizer.rb +42 -0
- data/lib/glancer/workflow/sql_validator.rb +67 -0
- data/lib/glancer/workflow.rb +158 -0
- data/lib/glancer.rb +50 -0
- data/lib/tasks/glancer/tailwind.rake +8 -0
- data/lib/tasks/glancer.rake +99 -0
- data/spec/glancer_spec.rb +62 -0
- data/spec/lib/glancer/async_runner_spec.rb +133 -0
- data/spec/lib/glancer/chart_analyzer_spec.rb +296 -0
- data/spec/lib/glancer/configuration_spec.rb +858 -0
- data/spec/lib/glancer/engine_spec.rb +209 -0
- data/spec/lib/glancer/indexer/context_indexer_spec.rb +96 -0
- data/spec/lib/glancer/indexer/model_indexer_spec.rb +103 -0
- data/spec/lib/glancer/indexer/schema_indexer_spec.rb +382 -0
- data/spec/lib/glancer/indexer_spec.rb +95 -0
- data/spec/lib/glancer/retriever_spec.rb +179 -0
- data/spec/lib/glancer/utils/logger_spec.rb +85 -0
- data/spec/lib/glancer/utils/markdown_helper_spec.rb +92 -0
- data/spec/lib/glancer/utils/result_formatter_spec.rb +73 -0
- data/spec/lib/glancer/utils/table_stats_spec.rb +34 -0
- data/spec/lib/glancer/utils/transaction_spec.rb +73 -0
- data/spec/lib/glancer/workflow/ar_executor_spec.rb +155 -0
- data/spec/lib/glancer/workflow/ar_extractor_spec.rb +50 -0
- data/spec/lib/glancer/workflow/ar_prompt_builder_spec.rb +79 -0
- data/spec/lib/glancer/workflow/ar_sanitizer_spec.rb +175 -0
- data/spec/lib/glancer/workflow/builder_spec.rb +204 -0
- data/spec/lib/glancer/workflow/cache_spec.rb +142 -0
- data/spec/lib/glancer/workflow/executor_spec.rb +149 -0
- data/spec/lib/glancer/workflow/llm_spec.rb +124 -0
- data/spec/lib/glancer/workflow/prompt_builder_spec.rb +196 -0
- data/spec/lib/glancer/workflow/query_enricher_spec.rb +184 -0
- data/spec/lib/glancer/workflow/sql_extractor_spec.rb +82 -0
- data/spec/lib/glancer/workflow/sql_sanitizer_spec.rb +98 -0
- data/spec/lib/glancer/workflow/sql_validator_spec.rb +166 -0
- data/spec/lib/glancer/workflow_spec.rb +308 -0
- data/spec/models/glancer/audit_spec.rb +82 -0
- data/spec/models/glancer/chat_spec.rb +60 -0
- data/spec/models/glancer/code_version_spec.rb +71 -0
- data/spec/models/glancer/embedding_spec.rb +73 -0
- data/spec/models/glancer/message_spec.rb +144 -0
- data/spec/models/glancer/setting_spec.rb +88 -0
- data/spec/models/glancer/sql_version_spec.rb +4 -0
- data/spec/spec_helper.rb +128 -0
- data/spec/support/schema.rb +55 -0
- metadata +255 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glancer
|
|
4
|
+
module Workflow
|
|
5
|
+
class LLM
|
|
6
|
+
def self.humanized_response(question, _data, code, mode: :sql)
|
|
7
|
+
chat = RubyLLM.chat(
|
|
8
|
+
provider: Glancer.configuration.resolved_chat_provider,
|
|
9
|
+
model: Glancer.configuration.resolved_chat_model,
|
|
10
|
+
assume_model_exists: true
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
code_label = mode == :activerecord ? "Ruby/ActiveRecord expression" : "SQL query"
|
|
14
|
+
code_lang = mode == :activerecord ? "ruby" : "sql"
|
|
15
|
+
|
|
16
|
+
context = <<~PROMPT
|
|
17
|
+
You are **Glancer**, a concise database assistant.
|
|
18
|
+
|
|
19
|
+
CRITICAL RULES:
|
|
20
|
+
- **Language Match**: Respond ONLY in the same language as the user's question.
|
|
21
|
+
- **Never say the query "ran", "executed", or "returned"** — the code was GENERATED to answer the user's question.
|
|
22
|
+
The actual results are displayed separately in the UI.
|
|
23
|
+
- **What to explain**: Describe WHAT the code does logically and WHY it answers the question.
|
|
24
|
+
- **Brevity**: 2–4 sentences maximum. No bullet points unless truly necessary.
|
|
25
|
+
- **No code repeat**: The generated code is already shown; do not include it in your response.
|
|
26
|
+
- **No hallucinations**: You have no knowledge of the actual result values. Do not describe or infer data values.
|
|
27
|
+
- **Formatting**: Use Markdown and bold for key terms.
|
|
28
|
+
|
|
29
|
+
#{code_label.upcase} GENERATED to answer the user's question:
|
|
30
|
+
```#{code_lang}
|
|
31
|
+
#{code}
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
USER QUESTION:
|
|
35
|
+
#{question}
|
|
36
|
+
PROMPT
|
|
37
|
+
|
|
38
|
+
custom = Glancer::Setting.get("custom_instructions")
|
|
39
|
+
context += "\n\nADDITIONAL INSTRUCTIONS:\n#{custom}" if custom.present?
|
|
40
|
+
|
|
41
|
+
chat.with_instructions(context)
|
|
42
|
+
response = chat.ask(question)
|
|
43
|
+
|
|
44
|
+
response.content
|
|
45
|
+
rescue StandardError => e
|
|
46
|
+
Glancer::Utils::Logger.error("Workflow::LLM", "Humanized response failed: #{e.message}")
|
|
47
|
+
"I processed the query but failed to generate a humanized explanation. You can still see the raw data below."
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def self.explain_missing_tables(question, error_message)
|
|
51
|
+
missing = error_message.scan(/Missing table\(s\) in indexed schema: (.+)/).flatten.first ||
|
|
52
|
+
error_message.scan(/Table validation failed: Missing table\(s\) in indexed schema: (.+)/).flatten.first ||
|
|
53
|
+
"desconhecidas"
|
|
54
|
+
|
|
55
|
+
prompt = <<~PROMPT
|
|
56
|
+
You are **Glancer**, a helpful SQL assistant.
|
|
57
|
+
|
|
58
|
+
The user asked: "#{question}"
|
|
59
|
+
|
|
60
|
+
When I tried to generate the SQL query, I referenced table(s) that don't exist in the indexed schema: **#{missing}**.
|
|
61
|
+
This is likely a naming mismatch (e.g., the user said "afiliados" but the actual table is "filiais").
|
|
62
|
+
|
|
63
|
+
Please:
|
|
64
|
+
1. Do NOT start with a greeting. Get straight to the point.
|
|
65
|
+
2. Tell the user that the table(s) **#{missing}** could not be found in the indexed schema.
|
|
66
|
+
3. Suggest they check the schema viewer at `/glancer/db-schema` to see all available tables.
|
|
67
|
+
4. Keep it to 2 sentences. Respond in the exact same language as the user's question.
|
|
68
|
+
PROMPT
|
|
69
|
+
|
|
70
|
+
chat = RubyLLM.chat(
|
|
71
|
+
provider: Glancer.configuration.resolved_chat_provider,
|
|
72
|
+
model: Glancer.configuration.resolved_chat_model,
|
|
73
|
+
assume_model_exists: true
|
|
74
|
+
)
|
|
75
|
+
chat.ask(prompt).content
|
|
76
|
+
rescue StandardError => e
|
|
77
|
+
Glancer::Utils::Logger.error("Workflow::LLM", "explain_missing_tables failed: #{e.message}")
|
|
78
|
+
"Não consegui encontrar a(s) tabela(s) **#{missing}** no schema indexado. " \
|
|
79
|
+
"Acesse `/glancer/db-schema` para ver todas as tabelas disponíveis e reformule sua pergunta com o nome correto."
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def self.generate_title(question)
|
|
83
|
+
chat = RubyLLM.chat(
|
|
84
|
+
provider: Glancer.configuration.resolved_chat_provider,
|
|
85
|
+
model: Glancer.configuration.resolved_chat_model,
|
|
86
|
+
assume_model_exists: true
|
|
87
|
+
)
|
|
88
|
+
prompt = "Generate a concise, descriptive title (max 45 characters, no quotes, no punctuation at end) " \
|
|
89
|
+
"for a database query session starting with this question: #{question}"
|
|
90
|
+
chat.ask(prompt).content.strip.truncate(50)
|
|
91
|
+
rescue StandardError => e
|
|
92
|
+
Glancer::Utils::Logger.error("Workflow::LLM", "generate_title failed: #{e.message}")
|
|
93
|
+
question.truncate(45)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def self.explain_error(question, error_message, code, mode: :sql)
|
|
97
|
+
chat = RubyLLM.chat(
|
|
98
|
+
provider: Glancer.configuration.resolved_chat_provider,
|
|
99
|
+
model: Glancer.configuration.resolved_chat_model,
|
|
100
|
+
assume_model_exists: true
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
code_label = mode == :activerecord ? "Ruby/ActiveRecord expression" : "SQL"
|
|
104
|
+
|
|
105
|
+
prompt = <<~PROMPT
|
|
106
|
+
You are **Glancer**. The user asked: "#{question}".
|
|
107
|
+
We tried to generate a #{code_label} but failed after 3 attempts.
|
|
108
|
+
Last error: "#{error_message}"
|
|
109
|
+
Last code attempted: "#{code}"
|
|
110
|
+
|
|
111
|
+
Your task:
|
|
112
|
+
1. Do NOT start with a greeting or salutation (no "Hi", "Hello", "Olá", "Oi", etc.). Get straight to the point.
|
|
113
|
+
2. Explain briefly what went wrong and why (e.g., "The column 'status' doesn't exist in the 'pages' table").
|
|
114
|
+
3. Suggest how the user could rephrase or what alternative they can try.
|
|
115
|
+
4. Keep it concise — 2–3 sentences max.
|
|
116
|
+
5. Respond in the user's language.
|
|
117
|
+
PROMPT
|
|
118
|
+
|
|
119
|
+
chat.ask(prompt).content
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glancer
|
|
4
|
+
module Workflow
|
|
5
|
+
class PromptBuilder
|
|
6
|
+
def self.custom_instructions_block
|
|
7
|
+
custom = Glancer::Setting.get("custom_instructions")
|
|
8
|
+
custom.present? ? "CUSTOM RULES — MUST BE FOLLOWED STRICTLY:\n#{custom}\n" : ""
|
|
9
|
+
rescue StandardError
|
|
10
|
+
""
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def self.call(question, embeddings, history: [], few_shot_examples: [])
|
|
14
|
+
Glancer::Utils::Logger.info("Workflow::PromptBuilder", "Building prompt for question: #{question.inspect}")
|
|
15
|
+
|
|
16
|
+
now = Time.current.strftime("%Y-%m-%d %H:%M:%S")
|
|
17
|
+
adapter = Glancer.configuration.resolved_adapter
|
|
18
|
+
db_name = begin
|
|
19
|
+
ActiveRecord::Base.connection.current_database
|
|
20
|
+
rescue StandardError
|
|
21
|
+
"unknown"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
Glancer::Utils::Logger.debug("Workflow::PromptBuilder", "Current time: #{now}, Adapter: #{adapter}, DB: #{db_name}")
|
|
25
|
+
|
|
26
|
+
history_context = history.map do |msg|
|
|
27
|
+
if msg.role == "assistant" && msg.code.present?
|
|
28
|
+
"ASSISTANT (SQL used): #{msg.code.strip}\nASSISTANT (response): #{msg.content}"
|
|
29
|
+
else
|
|
30
|
+
"#{msg.role.upcase}: #{msg.content}"
|
|
31
|
+
end
|
|
32
|
+
end.join("\n\n")
|
|
33
|
+
|
|
34
|
+
schema_context, fk_context = partition_embeddings(embeddings)
|
|
35
|
+
examples_context = format_few_shot_examples(few_shot_examples)
|
|
36
|
+
|
|
37
|
+
prompt = <<~PROMPT
|
|
38
|
+
Current datetime: #{now}
|
|
39
|
+
Active Database Adapter: #{adapter}
|
|
40
|
+
Database Name: #{db_name}
|
|
41
|
+
|
|
42
|
+
You are a specialized Ruby on Rails SQL expert.
|
|
43
|
+
Your only task is to generate a valid SQL SELECT statement based on the provided DATABASE CONTEXT.
|
|
44
|
+
|
|
45
|
+
STRICT GUIDELINES:
|
|
46
|
+
1. **Output**: Return ONLY the SQL query. No explanation, no reasoning text, no markdown prose — just the SQL.
|
|
47
|
+
2. **No Translations**: NEVER translate table names or column names. Use names EXACTLY as they appear in the schema.
|
|
48
|
+
3. **SELECT Only**: Only generate SELECT or WITH (CTE) statements. Destructive operations are strictly forbidden.
|
|
49
|
+
4. **Joins**: Use the SCHEMA RELATIONSHIPS section below to determine correct JOIN conditions.
|
|
50
|
+
5. **Formatting**: Format SQL with proper indentation and line breaks:
|
|
51
|
+
- Each major clause (SELECT, FROM, WHERE, JOIN, GROUP BY, ORDER BY, HAVING, LIMIT) on its own line.
|
|
52
|
+
- Indent selected columns, JOIN conditions, and WHERE predicates with 2 spaces.
|
|
53
|
+
- Use a new line for each selected column when there are more than 2 columns.
|
|
54
|
+
|
|
55
|
+
Think through the query internally before writing it, but your final response must contain SQL only — no surrounding text.
|
|
56
|
+
|
|
57
|
+
Rules for generation:
|
|
58
|
+
- Use **column aliases (AS ...)** to improve readability.
|
|
59
|
+
- The SQL must be valid and executable for #{adapter.to_s.upcase}.
|
|
60
|
+
- Always qualify column names with the table name (e.g., `orders.created_at`).
|
|
61
|
+
|
|
62
|
+
SCHEMA RELATIONSHIPS:
|
|
63
|
+
#{fk_context.presence || "(no foreign keys indexed)"}
|
|
64
|
+
|
|
65
|
+
#{examples_context.present? ? "EXAMPLE QUERIES (from this database):\n#{examples_context}\n" : ""}
|
|
66
|
+
CONVERSATION HISTORY:
|
|
67
|
+
#{history_context.presence || "(no prior messages)"}
|
|
68
|
+
|
|
69
|
+
DATABASE CONTEXT:
|
|
70
|
+
#{format_embeddings_with_stats(schema_context)}
|
|
71
|
+
|
|
72
|
+
#{custom_instructions_block}
|
|
73
|
+
NEW QUESTION:
|
|
74
|
+
#{question}
|
|
75
|
+
|
|
76
|
+
OUTPUT SQL ONLY:
|
|
77
|
+
PROMPT
|
|
78
|
+
|
|
79
|
+
Glancer::Utils::Logger.debug("Workflow::PromptBuilder", "Prompt constructed successfully")
|
|
80
|
+
|
|
81
|
+
prompt
|
|
82
|
+
rescue StandardError => e
|
|
83
|
+
Glancer::Utils::Logger.error("Workflow::PromptBuilder", "Failed to build prompt: #{e.class} - #{e.message}")
|
|
84
|
+
Glancer::Utils::Logger.debug("Workflow::PromptBuilder", "Backtrace:\n#{e.backtrace.join("\n")}")
|
|
85
|
+
raise Glancer::Error, "Prompt construction failed: #{e.message}"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def self.example_sql(adapter)
|
|
89
|
+
case adapter.to_s
|
|
90
|
+
when "mysql", "mysql2"
|
|
91
|
+
<<~SQL
|
|
92
|
+
SELECT
|
|
93
|
+
DATE_FORMAT(created_at, '%Y-%m') AS mes,
|
|
94
|
+
COUNT(*) AS total_vendas
|
|
95
|
+
FROM
|
|
96
|
+
vendas
|
|
97
|
+
WHERE
|
|
98
|
+
YEAR(created_at) = 2025
|
|
99
|
+
GROUP BY
|
|
100
|
+
mes
|
|
101
|
+
ORDER BY
|
|
102
|
+
mes;
|
|
103
|
+
SQL
|
|
104
|
+
when "postgres", "postgresql"
|
|
105
|
+
<<~SQL
|
|
106
|
+
SELECT
|
|
107
|
+
TO_CHAR(created_at, 'YYYY-MM') AS mes,
|
|
108
|
+
COUNT(*) AS total_vendas
|
|
109
|
+
FROM
|
|
110
|
+
vendas
|
|
111
|
+
WHERE
|
|
112
|
+
EXTRACT(YEAR FROM created_at) = 2025
|
|
113
|
+
GROUP BY
|
|
114
|
+
mes
|
|
115
|
+
ORDER BY
|
|
116
|
+
mes;
|
|
117
|
+
SQL
|
|
118
|
+
else
|
|
119
|
+
"-- Example not available for this adapter."
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def self.partition_embeddings(embeddings)
|
|
124
|
+
fk_embeds = embeddings.select { |e| e.source_path.to_s.end_with?("#foreign_keys") }
|
|
125
|
+
other_embeds = embeddings.reject { |e| e.source_path.to_s.end_with?("#foreign_keys") }
|
|
126
|
+
fk_text = fk_embeds.map { |e| e.content.strip }.join("\n")
|
|
127
|
+
[other_embeds, fk_text]
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def self.format_embeddings_with_stats(embeddings)
|
|
131
|
+
embeddings.map { |embed| embed.content.strip }.join("\n\n")
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def self.format_few_shot_examples(examples)
|
|
135
|
+
return "" if examples.blank?
|
|
136
|
+
|
|
137
|
+
examples.each_with_index.map do |(question, sql), i|
|
|
138
|
+
"Example #{i + 1}:\nQuestion: #{question}\nSQL: #{sql.strip}"
|
|
139
|
+
end.join("\n\n")
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glancer
|
|
4
|
+
module Workflow
|
|
5
|
+
class QueryEnricher
|
|
6
|
+
PROMPT_TEMPLATE = <<~PROMPT
|
|
7
|
+
You are a database query specification expert. Enrich the user's question into a precise technical specification that a code generator will use to produce correct %<adapter>s code.
|
|
8
|
+
|
|
9
|
+
## Available Tables
|
|
10
|
+
%<tables>s
|
|
11
|
+
|
|
12
|
+
%<schema_context>s%<history>s## User Question
|
|
13
|
+
"%<question>s"
|
|
14
|
+
|
|
15
|
+
## Your Output
|
|
16
|
+
Write a dense, unambiguous technical specification in English covering ALL applicable points below. Be concrete — use exact names, operators, and formats.
|
|
17
|
+
|
|
18
|
+
**Model/Table**: State the exact model class (PascalCase for ActiveRecord, table name for SQL) involved. Resolve @mention shortcuts to their table names (e.g. @pages → Page model, pages table). Map natural-language synonyms ("records", "entries", "items") to the correct table.
|
|
19
|
+
|
|
20
|
+
**Conditions**: Translate relative time expressions into explicit date arithmetic (e.g. "last 6 months" → `created_at >= 6.months.ago.beginning_of_month`). Name every column used in filters.
|
|
21
|
+
|
|
22
|
+
**Aggregations**: Name the exact aggregate function and column (COUNT, SUM, AVG, GROUP BY). For time-based grouping, specify the format string (e.g. `TO_CHAR(created_at, 'MM/YYYY')` for month/year, `DATE_TRUNC('month', created_at)` for date truncation). PostgreSQL is the database.
|
|
23
|
+
|
|
24
|
+
**Follow-up resolution**: Check RECENT CONVERSATION — if this is a follow-up question, carry forward the same model, columns, and filters from before. Resolve pronouns ("it", "them", "that result") explicitly using context.
|
|
25
|
+
|
|
26
|
+
**Output shape**: Describe expected result structure (e.g. "one row per month: columns month as MM/YYYY string, count as integer"). If the result should be sorted, specify direction (chronological ASC unless stated otherwise).
|
|
27
|
+
|
|
28
|
+
Output ONLY the specification. English. No code. No preamble. No explanations.
|
|
29
|
+
PROMPT
|
|
30
|
+
|
|
31
|
+
def self.enrich(question, table_names, history: [], adapter: nil)
|
|
32
|
+
return question if table_names.empty?
|
|
33
|
+
|
|
34
|
+
adapter_label = adapter&.to_s == "activerecord" ? "ActiveRecord Ruby" : "SQL"
|
|
35
|
+
history_block = build_history_block(history)
|
|
36
|
+
schema_ctx = schema_context_for(question, table_names)
|
|
37
|
+
|
|
38
|
+
prompt = format(
|
|
39
|
+
PROMPT_TEMPLATE,
|
|
40
|
+
adapter: adapter_label,
|
|
41
|
+
tables: table_names.join(", "),
|
|
42
|
+
schema_context: schema_ctx,
|
|
43
|
+
history: history_block,
|
|
44
|
+
question: question
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
chat = RubyLLM.chat(
|
|
48
|
+
provider: Glancer.configuration.resolved_enrichment_provider,
|
|
49
|
+
model: Glancer.configuration.resolved_enrichment_model,
|
|
50
|
+
assume_model_exists: true
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
enriched = chat.ask(prompt).content.to_s.strip
|
|
54
|
+
enriched.presence || question
|
|
55
|
+
rescue StandardError => e
|
|
56
|
+
Glancer::Utils::Logger.warn("Workflow::QueryEnricher", "Enrichment failed, using original: #{e.message}")
|
|
57
|
+
question
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def self.known_table_names
|
|
61
|
+
Glancer::Embedding
|
|
62
|
+
.where(source_type: "schema")
|
|
63
|
+
.pluck(:source_path)
|
|
64
|
+
.filter_map do |path|
|
|
65
|
+
fragment = path.to_s.split("#").last
|
|
66
|
+
next if fragment.blank? || fragment == "foreign_keys" || fragment.include?("/")
|
|
67
|
+
|
|
68
|
+
fragment
|
|
69
|
+
end
|
|
70
|
+
.uniq
|
|
71
|
+
rescue StandardError
|
|
72
|
+
[]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def self.build_history_block(history)
|
|
76
|
+
return "" if history.blank?
|
|
77
|
+
|
|
78
|
+
lines = history.last(4).map do |msg|
|
|
79
|
+
if msg.role == "assistant" && msg.code.present?
|
|
80
|
+
"#{msg.role.upcase} [#{msg.code_type}: #{msg.code.strip.truncate(120)}]: #{msg.content.truncate(160)}"
|
|
81
|
+
else
|
|
82
|
+
"#{msg.role.upcase}: #{msg.content.truncate(200)}"
|
|
83
|
+
end
|
|
84
|
+
end.join("\n")
|
|
85
|
+
"## Recent Conversation\n#{lines}\n\n"
|
|
86
|
+
end
|
|
87
|
+
private_class_method :build_history_block
|
|
88
|
+
|
|
89
|
+
# Fetches schema embedding content for tables explicitly @mentioned in the question.
|
|
90
|
+
# Provides the code generator with column names and types for accurate specification.
|
|
91
|
+
def self.schema_context_for(question, table_names)
|
|
92
|
+
at_mentioned = question.scan(/@([A-Za-z]\w*)/).flatten
|
|
93
|
+
relevant = at_mentioned.select { |mention| table_names.any? { |tbl| tbl.casecmp?(mention) } }
|
|
94
|
+
return "" if relevant.empty?
|
|
95
|
+
|
|
96
|
+
rows = Glancer::Embedding
|
|
97
|
+
.where(source_type: "schema")
|
|
98
|
+
.where("source_path LIKE '%#%'")
|
|
99
|
+
.pluck(:source_path, :content)
|
|
100
|
+
|
|
101
|
+
matched = relevant.flat_map do |mention|
|
|
102
|
+
rows.filter_map do |(path, content)|
|
|
103
|
+
table = path.split("#").last
|
|
104
|
+
content if table.casecmp?(mention)
|
|
105
|
+
end
|
|
106
|
+
end.first(3)
|
|
107
|
+
|
|
108
|
+
return "" if matched.empty?
|
|
109
|
+
|
|
110
|
+
"## Referenced Schema\n#{matched.join("\n\n")}\n\n"
|
|
111
|
+
rescue StandardError
|
|
112
|
+
""
|
|
113
|
+
end
|
|
114
|
+
private_class_method :schema_context_for
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glancer
|
|
4
|
+
module Workflow
|
|
5
|
+
class SQLExtractor
|
|
6
|
+
SQL_START = /\A\s*(select|with|explain)\b/i
|
|
7
|
+
|
|
8
|
+
def self.extract(text)
|
|
9
|
+
Glancer::Utils::Logger.info("Workflow::SQLExtractor", "Extracting SQL from text response...")
|
|
10
|
+
|
|
11
|
+
# Match ```sql, ```SQL, or plain ``` fenced blocks
|
|
12
|
+
if text =~ /```(?:sql)?\s*\n?(.*?)\s*```/mi
|
|
13
|
+
sql = ::Regexp.last_match(1).strip
|
|
14
|
+
Glancer::Utils::Logger.debug("Workflow::SQLExtractor", "Extracted SQL from formatted code block.")
|
|
15
|
+
else
|
|
16
|
+
# Fallback: find the first line that looks like the start of a SQL statement
|
|
17
|
+
# and take everything from there, ignoring leading explanation text.
|
|
18
|
+
lines = text.lines
|
|
19
|
+
start_idx = lines.index { |l| l.strip.match?(SQL_START) }
|
|
20
|
+
|
|
21
|
+
sql = if start_idx
|
|
22
|
+
lines[start_idx..].join.strip
|
|
23
|
+
else
|
|
24
|
+
text.lines.map(&:strip).reject(&:empty?).join(" ")
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
fallback_type = start_idx ? " (SQL found at line #{start_idx})" : " (raw join)"
|
|
28
|
+
Glancer::Utils::Logger.debug("Workflow::SQLExtractor",
|
|
29
|
+
"No code block found. Fallback extraction#{fallback_type}.")
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
Glancer::Utils::Logger.debug("Workflow::SQLExtractor", "Final extracted SQL:\n#{sql}")
|
|
33
|
+
|
|
34
|
+
sql
|
|
35
|
+
rescue StandardError => e
|
|
36
|
+
Glancer::Utils::Logger.error("Workflow::SQLExtractor", "SQL extraction failed: #{e.class} - #{e.message}")
|
|
37
|
+
Glancer::Utils::Logger.debug("Workflow::SQLExtractor", "Backtrace:\n#{e.backtrace.join("\n")}")
|
|
38
|
+
raise Glancer::Error, "SQL extraction failed: #{e.message}"
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glancer
|
|
4
|
+
module Workflow
|
|
5
|
+
class SQLSanitizer
|
|
6
|
+
FORBIDDEN_KEYWORDS = %w[
|
|
7
|
+
delete update insert drop truncate alter create replace
|
|
8
|
+
].freeze
|
|
9
|
+
|
|
10
|
+
def self.ensure_safe!(sql)
|
|
11
|
+
Glancer::Utils::Logger.info("Workflow::SQLSanitizer", "Sanitizing SQL...")
|
|
12
|
+
|
|
13
|
+
cleaned = strip_strings_and_comments(sql.downcase)
|
|
14
|
+
Glancer::Utils::Logger.debug("Workflow::SQLSanitizer", "Sanitized SQL for inspection:\n#{cleaned}")
|
|
15
|
+
|
|
16
|
+
forbidden = FORBIDDEN_KEYWORDS.find { |kw| cleaned.match?(/\b#{kw}\b/) }
|
|
17
|
+
|
|
18
|
+
if forbidden
|
|
19
|
+
Glancer::Utils::Logger.error("Workflow::SQLSanitizer", "Blocked SQL due to forbidden keyword: '#{forbidden}'")
|
|
20
|
+
raise Glancer::Error, "Query blocked due to forbidden keyword: '#{forbidden}' in SQL: #{sql.inspect}"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
Glancer::Utils::Logger.info("Workflow::SQLSanitizer", "SQL passed sanitization check.")
|
|
24
|
+
rescue StandardError => e
|
|
25
|
+
Glancer::Utils::Logger.error("Workflow::SQLSanitizer", "Sanitization failed: #{e.class} - #{e.message}")
|
|
26
|
+
Glancer::Utils::Logger.debug("Workflow::SQLSanitizer", "Backtrace:\n#{e.backtrace.join("\n")}")
|
|
27
|
+
raise Glancer::Error, "SQL sanitization failed: #{e.message}"
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def self.strip_strings_and_comments(sql)
|
|
31
|
+
# Remove strings: '...', allowing escaped quotes
|
|
32
|
+
sql = sql.gsub(/'(?:\\'|[^'])*'/, "")
|
|
33
|
+
|
|
34
|
+
# Remove inline comments -- ...
|
|
35
|
+
sql = sql.gsub(/--.*/, "")
|
|
36
|
+
|
|
37
|
+
# Remove block comments /* ... */
|
|
38
|
+
sql.gsub(%r{/\*.*?\*/}m, "")
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glancer
|
|
4
|
+
module Workflow
|
|
5
|
+
class SQLValidator
|
|
6
|
+
def self.validate_tables_exist!(sql)
|
|
7
|
+
Glancer::Utils::Logger.info("Workflow::SQLValidator", "Validating presence of tables in indexed schema...")
|
|
8
|
+
|
|
9
|
+
tables_in_sql = extract_table_names(sql)
|
|
10
|
+
Glancer::Utils::Logger.debug("Workflow::SQLValidator", "Tables found in SQL: #{tables_in_sql.inspect}")
|
|
11
|
+
|
|
12
|
+
indexed_tables = indexed_schema_table_names
|
|
13
|
+
Glancer::Utils::Logger.debug("Workflow::SQLValidator",
|
|
14
|
+
"Tables available in indexed schema: #{indexed_tables.inspect}")
|
|
15
|
+
|
|
16
|
+
missing = tables_in_sql.reject do |table|
|
|
17
|
+
system_table?(table) || indexed_tables.include?(table)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
if missing.any?
|
|
21
|
+
Glancer::Utils::Logger.error("Workflow::SQLValidator", "Missing table(s): #{missing.join(", ")}")
|
|
22
|
+
raise Glancer::Error, "Missing table(s) in indexed schema: #{missing.join(", ")}"
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
Glancer::Utils::Logger.info("Workflow::SQLValidator", "All referenced tables are present in indexed schema.")
|
|
26
|
+
rescue StandardError => e
|
|
27
|
+
Glancer::Utils::Logger.error("Workflow::SQLValidator", "Table validation failed: #{e.class} - #{e.message}")
|
|
28
|
+
Glancer::Utils::Logger.debug("Workflow::SQLValidator", "Backtrace:\n#{e.backtrace.join("\n")}")
|
|
29
|
+
raise Glancer::Error, "Table validation failed: #{e.message}"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def self.extract_table_names(sql)
|
|
33
|
+
sql.scan(/\bfrom\s+([a-zA-Z0-9_."]+)/i).flatten.map do |name|
|
|
34
|
+
name.gsub('"', "").downcase.strip
|
|
35
|
+
end.uniq
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def self.system_table?(table_name)
|
|
39
|
+
schema = table_name.include?(".") ? table_name.split(".").first.downcase : table_name
|
|
40
|
+
system_schemas.include?(schema)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def self.indexed_schema_table_names
|
|
44
|
+
Glancer::Embedding
|
|
45
|
+
.where(source_type: "schema")
|
|
46
|
+
.pluck(:source_path)
|
|
47
|
+
.map { |path| path[/#(.*?)\z/, 1] }
|
|
48
|
+
.compact
|
|
49
|
+
.map(&:downcase)
|
|
50
|
+
.uniq
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def self.system_schemas
|
|
54
|
+
case Glancer.configuration.resolved_adapter.to_s
|
|
55
|
+
when "postgres", "postgresql"
|
|
56
|
+
%w[information_schema pg_catalog pg_toast]
|
|
57
|
+
when "mysql", "mysql2"
|
|
58
|
+
%w[information_schema mysql performance_schema sys]
|
|
59
|
+
when "sqlite", "sqlite3"
|
|
60
|
+
%w[sqlite_master]
|
|
61
|
+
else
|
|
62
|
+
[]
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|