text_to_sql_assistant 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +163 -0
- data/lib/text_to_sql_assistant/assistant.rb +127 -0
- data/lib/text_to_sql_assistant/configuration.rb +45 -0
- data/lib/text_to_sql_assistant/providers/anthropic.rb +27 -0
- data/lib/text_to_sql_assistant/providers/base.rb +40 -0
- data/lib/text_to_sql_assistant/providers/gemini.rb +25 -0
- data/lib/text_to_sql_assistant/providers/openai.rb +28 -0
- data/lib/text_to_sql_assistant/query_validator.rb +62 -0
- data/lib/text_to_sql_assistant/schema_reader.rb +29 -0
- data/lib/text_to_sql_assistant/version.rb +5 -0
- data/lib/text_to_sql_assistant.rb +34 -0
- metadata +75 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 47525eb6324617379c5fe49c4de11c74bed7394d7adbfc265167c9ccbcb82266
|
|
4
|
+
data.tar.gz: d54068932f6cb2de49af6e83a7945c0b641885670d156ad1f867bdde47035ccd
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: cec6b64f8c63340b0034e00360037a1b248ed26d9d5097ed2d6aa152ccc902bfc545bc10a179862a9cfa51024673eea3dad98c9b9e810e6bbf197cdb9eb3dae8
|
|
7
|
+
data.tar.gz: c7cdcf4ffb07fccdc13ed25497f9615619dd12da653e3d2f3efa79c4f5fa87bee95a086232200696703d23670f92a81c24f95888b71c82632c019d75176a0edc
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Vibol Teav
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# TextToSqlAssistant
|
|
2
|
+
|
|
3
|
+
Ask your database questions in plain English. Get SQL queries and human-readable answers.
|
|
4
|
+
|
|
5
|
+
Works with **Anthropic Claude**, **OpenAI**, and **Google Gemini**. Zero dependencies beyond ActiveRecord and Ruby stdlib.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
gem "text_to_sql_assistant"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```ruby
|
|
16
|
+
TextToSqlAssistant.configure do |c|
|
|
17
|
+
c.provider = :anthropic
|
|
18
|
+
c.api_key = ENV["ANTHROPIC_API_KEY"]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
assistant = TextToSqlAssistant.new(
|
|
22
|
+
connection: ActiveRecord::Base.connection
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
result = assistant.ask("Who are the top 5 customers by order total?")
|
|
26
|
+
puts result[:answer] # Human-readable explanation
|
|
27
|
+
puts result[:sql] # Generated SELECT query
|
|
28
|
+
puts result[:results] # First 10 rows
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## How It Works
|
|
32
|
+
|
|
33
|
+
1. Auto-reads your database schema (tables, columns, types)
|
|
34
|
+
2. Sends schema + question to the LLM
|
|
35
|
+
3. LLM generates a SELECT query
|
|
36
|
+
4. Validates the query (SELECT-only, no sensitive columns, row limit)
|
|
37
|
+
5. Executes against your database
|
|
38
|
+
6. Sends results back to LLM for human-readable interpretation
|
|
39
|
+
|
|
40
|
+
Two LLM calls per question. ~$0.002 on Claude Haiku.
|
|
41
|
+
|
|
42
|
+
## Providers
|
|
43
|
+
|
|
44
|
+
### Anthropic Claude (default)
|
|
45
|
+
|
|
46
|
+
```ruby
|
|
47
|
+
TextToSqlAssistant.configure do |c|
|
|
48
|
+
c.provider = :anthropic
|
|
49
|
+
c.api_key = ENV["ANTHROPIC_API_KEY"]
|
|
50
|
+
c.model = "claude-haiku-4-5-20251001" # default, cheapest
|
|
51
|
+
end
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### OpenAI
|
|
55
|
+
|
|
56
|
+
```ruby
|
|
57
|
+
TextToSqlAssistant.configure do |c|
|
|
58
|
+
c.provider = :openai
|
|
59
|
+
c.api_key = ENV["OPENAI_API_KEY"]
|
|
60
|
+
c.model = "gpt-4o-mini" # default
|
|
61
|
+
end
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### Google Gemini
|
|
65
|
+
|
|
66
|
+
```ruby
|
|
67
|
+
TextToSqlAssistant.configure do |c|
|
|
68
|
+
c.provider = :gemini
|
|
69
|
+
c.api_key = ENV["GEMINI_API_KEY"]
|
|
70
|
+
c.model = "gemini-2.0-flash" # default
|
|
71
|
+
end
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Custom Provider
|
|
75
|
+
|
|
76
|
+
```ruby
|
|
77
|
+
class MyProvider < TextToSqlAssistant::Providers::Base
|
|
78
|
+
def complete(system_prompt, user_message)
|
|
79
|
+
# Call your LLM, return the response text
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
assistant = TextToSqlAssistant.new(provider: MyProvider)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Configuration
|
|
87
|
+
|
|
88
|
+
```ruby
|
|
89
|
+
TextToSqlAssistant.configure do |c|
|
|
90
|
+
c.provider = :anthropic
|
|
91
|
+
c.api_key = ENV["ANTHROPIC_API_KEY"]
|
|
92
|
+
c.model = "claude-haiku-4-5-20251001"
|
|
93
|
+
c.max_rows = 50 # LIMIT enforced on all queries
|
|
94
|
+
c.query_timeout = 5 # seconds
|
|
95
|
+
c.log_queries = true # log to Rails.logger
|
|
96
|
+
c.blocked_columns = %w[ # reject queries touching these
|
|
97
|
+
encrypted_password
|
|
98
|
+
reset_password_token
|
|
99
|
+
api_secret
|
|
100
|
+
]
|
|
101
|
+
c.blocked_tables = %w[ # excluded from schema
|
|
102
|
+
information_schema
|
|
103
|
+
]
|
|
104
|
+
c.on_query = ->(question, result) { # audit callback
|
|
105
|
+
AuditLog.create!(query: question, sql: result[:sql])
|
|
106
|
+
}
|
|
107
|
+
end
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Custom Schema
|
|
111
|
+
|
|
112
|
+
By default, the gem reads your database schema automatically. You can override this with a custom description for better LLM accuracy:
|
|
113
|
+
|
|
114
|
+
```ruby
|
|
115
|
+
assistant = TextToSqlAssistant.new(
|
|
116
|
+
schema: <<~SCHEMA
|
|
117
|
+
- users: id, email, name, role(admin/member), created_at
|
|
118
|
+
- orders: id, user_id, total_cents, status(pending/paid/refunded), created_at
|
|
119
|
+
- products: id, name, price_cents, category, active(boolean)
|
|
120
|
+
NOTE: orders.total_cents is in cents, divide by 100 for dollars.
|
|
121
|
+
NOTE: users with role='admin' are internal, exclude from customer queries.
|
|
122
|
+
SCHEMA
|
|
123
|
+
)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Security
|
|
127
|
+
|
|
128
|
+
The gem blocks dangerous queries at the application level:
|
|
129
|
+
|
|
130
|
+
- **SELECT only** — rejects INSERT, UPDATE, DELETE, DROP, etc.
|
|
131
|
+
- **Column blocklist** — blocks `encrypted_password`, `reset_password_token`, etc.
|
|
132
|
+
- **Table blocklist** — excludes `information_schema` and system tables
|
|
133
|
+
- **Row limit** — forces LIMIT on all queries (default 50)
|
|
134
|
+
- **Query timeout** — kills slow queries (default 5 seconds)
|
|
135
|
+
|
|
136
|
+
**Important:** Application-level validation is defense in depth. For production, always use a **read-only database user**:
|
|
137
|
+
|
|
138
|
+
```sql
|
|
139
|
+
CREATE USER 'ai_readonly'@'%' IDENTIFIED BY '...';
|
|
140
|
+
GRANT SELECT ON your_database.* TO 'ai_readonly'@'%';
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Response Format
|
|
144
|
+
|
|
145
|
+
```ruby
|
|
146
|
+
result = assistant.ask("How many users signed up this month?")
|
|
147
|
+
|
|
148
|
+
result[:answer] # "47 users signed up this month, up 12% from last month..."
|
|
149
|
+
result[:sql] # "SELECT COUNT(*) FROM users WHERE created_at >= '2026-03-01'"
|
|
150
|
+
result[:results] # [{"count" => 47}] (first 10 rows)
|
|
151
|
+
result[:total_rows] # 1
|
|
152
|
+
result[:duration_ms] # 3241.5
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Requirements
|
|
156
|
+
|
|
157
|
+
- Ruby >= 3.1
|
|
158
|
+
- ActiveRecord >= 7.0
|
|
159
|
+
- An API key for Anthropic, OpenAI, or Google Gemini
|
|
160
|
+
|
|
161
|
+
## License
|
|
162
|
+
|
|
163
|
+
MIT
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TextToSqlAssistant
|
|
4
|
+
class Assistant
|
|
5
|
+
SYSTEM_PROMPT = <<~PROMPT
|
|
6
|
+
You are a SQL expert. Given a database schema and a question, generate a SELECT query to answer it.
|
|
7
|
+
|
|
8
|
+
Rules:
|
|
9
|
+
1. ONLY generate SELECT queries. Never INSERT, UPDATE, DELETE, DROP, or ALTER.
|
|
10
|
+
2. Always LIMIT results to %{max_rows} rows.
|
|
11
|
+
3. Use JOINs to show human-readable names instead of IDs where possible.
|
|
12
|
+
4. Format dates readably.
|
|
13
|
+
5. Explain your query logic briefly, then provide the SQL in a ```sql block.
|
|
14
|
+
|
|
15
|
+
## Schema
|
|
16
|
+
%{schema}
|
|
17
|
+
PROMPT
|
|
18
|
+
|
|
19
|
+
def initialize(connection: nil, schema: nil, provider: nil, api_key: nil, model: nil)
|
|
20
|
+
@connection = connection || ActiveRecord::Base.connection
|
|
21
|
+
@config = TextToSqlAssistant.configuration
|
|
22
|
+
@provider = build_provider(provider, api_key, model)
|
|
23
|
+
@schema = schema || SchemaReader.new(@connection).read
|
|
24
|
+
@validator = QueryValidator.new(@config)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def ask(question)
|
|
28
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
29
|
+
|
|
30
|
+
# Step 1: Generate SQL
|
|
31
|
+
prompt = format(SYSTEM_PROMPT, max_rows: @config.max_rows, schema: @schema)
|
|
32
|
+
sql_response = @provider.complete(prompt, question)
|
|
33
|
+
|
|
34
|
+
# Step 2: Extract SQL
|
|
35
|
+
sql = extract_sql(sql_response)
|
|
36
|
+
unless sql
|
|
37
|
+
return { answer: "Could not generate a valid SQL query.", sql: nil, results: [], total_rows: 0,
|
|
38
|
+
duration_ms: duration_since(start_time) }
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Step 3: Validate and execute
|
|
42
|
+
begin
|
|
43
|
+
validated_sql = @validator.validate!(sql)
|
|
44
|
+
set_timeout
|
|
45
|
+
data = @connection.select_all(validated_sql).to_a
|
|
46
|
+
rescue QueryBlockedError => e
|
|
47
|
+
return { answer: "Query blocked: #{e.message}", sql: sql, results: [], total_rows: 0,
|
|
48
|
+
duration_ms: duration_since(start_time) }
|
|
49
|
+
rescue ActiveRecord::StatementInvalid => e
|
|
50
|
+
return { answer: "SQL error: #{e.message.split("\n").first}", sql: sql, results: [], total_rows: 0,
|
|
51
|
+
duration_ms: duration_since(start_time) }
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Step 4: Interpret results
|
|
55
|
+
preview = data.first(20).map(&:to_h).to_json
|
|
56
|
+
answer = @provider.complete(
|
|
57
|
+
prompt,
|
|
58
|
+
"I asked: #{question}\n\nSQL:\n```sql\n#{validated_sql}\n```\n\nResults (#{data.length} rows):\n#{preview}\n\nProvide a clear answer."
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
duration = duration_since(start_time)
|
|
62
|
+
|
|
63
|
+
result = {
|
|
64
|
+
answer: answer,
|
|
65
|
+
sql: validated_sql,
|
|
66
|
+
results: data.first(10),
|
|
67
|
+
total_rows: data.length,
|
|
68
|
+
duration_ms: duration
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
log_query(question, result) if @config.log_queries
|
|
72
|
+
@config.on_query&.call(question, result)
|
|
73
|
+
|
|
74
|
+
result
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
def build_provider(provider_arg, api_key_arg, model_arg)
|
|
80
|
+
# Accept a pre-built provider instance directly
|
|
81
|
+
return provider_arg if provider_arg.is_a?(Providers::Base)
|
|
82
|
+
|
|
83
|
+
provider_name = provider_arg || @config.provider
|
|
84
|
+
api_key = api_key_arg || @config.api_key
|
|
85
|
+
model = model_arg || @config.effective_model
|
|
86
|
+
|
|
87
|
+
case provider_name
|
|
88
|
+
when :anthropic
|
|
89
|
+
Providers::Anthropic.new(api_key: api_key, model: model)
|
|
90
|
+
when :openai
|
|
91
|
+
Providers::OpenAI.new(api_key: api_key, model: model)
|
|
92
|
+
when :gemini
|
|
93
|
+
Providers::Gemini.new(api_key: api_key, model: model)
|
|
94
|
+
else
|
|
95
|
+
if provider_name.respond_to?(:new)
|
|
96
|
+
provider_name.new(api_key: api_key, model: model)
|
|
97
|
+
else
|
|
98
|
+
raise ConfigurationError, "Unknown provider: #{provider_name}. Use :anthropic, :openai, :gemini, or a custom class."
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def extract_sql(response)
|
|
104
|
+
return nil unless response
|
|
105
|
+
|
|
106
|
+
match = response.match(/```sql\s*\n?(.*?)\n?```/m)
|
|
107
|
+
return match[1].strip if match
|
|
108
|
+
|
|
109
|
+
match = response.match(/(SELECT\s+.+?;)/im)
|
|
110
|
+
match ? match[1].strip : nil
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def set_timeout
|
|
114
|
+
timeout_ms = @config.query_timeout * 1000
|
|
115
|
+
@connection.execute("SET SESSION MAX_EXECUTION_TIME = #{timeout_ms}") rescue nil
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def duration_since(start_time)
|
|
119
|
+
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round(2)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def log_query(question, result)
|
|
123
|
+
logger = @config.logger || (defined?(Rails) ? Rails.logger : nil)
|
|
124
|
+
logger&.info("[TextToSqlAssistant] Q: #{question} | SQL: #{result[:sql]&.truncate(100)} | Rows: #{result[:total_rows]} | #{result[:duration_ms]}ms")
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TextToSqlAssistant
|
|
4
|
+
class Configuration
|
|
5
|
+
attr_accessor :provider, # :anthropic, :openai, :ollama, or custom class
|
|
6
|
+
:api_key,
|
|
7
|
+
:model,
|
|
8
|
+
:max_rows,
|
|
9
|
+
:query_timeout, # seconds
|
|
10
|
+
:blocked_columns,
|
|
11
|
+
:blocked_tables,
|
|
12
|
+
:log_queries,
|
|
13
|
+
:logger,
|
|
14
|
+
:on_query # callback proc for audit logging
|
|
15
|
+
|
|
16
|
+
def initialize
|
|
17
|
+
@provider = :anthropic
|
|
18
|
+
@model = nil # auto-detect based on provider
|
|
19
|
+
@max_rows = 50
|
|
20
|
+
@query_timeout = 5
|
|
21
|
+
@blocked_columns = %w[
|
|
22
|
+
encrypted_password reset_password_token confirmation_token
|
|
23
|
+
unconfirmed_email temp_password secret_key api_key api_secret
|
|
24
|
+
]
|
|
25
|
+
@blocked_tables = %w[information_schema mysql performance_schema sys]
|
|
26
|
+
@log_queries = false
|
|
27
|
+
@logger = nil
|
|
28
|
+
@on_query = nil
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def default_model
|
|
32
|
+
case provider
|
|
33
|
+
when :anthropic then "claude-haiku-4-5-20251001"
|
|
34
|
+
when :openai then "gpt-4o-mini"
|
|
35
|
+
when :gemini then "gemini-2.0-flash"
|
|
36
|
+
else
|
|
37
|
+
raise ConfigurationError, "No default model for provider #{provider}. Set config.model explicitly."
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def effective_model
|
|
42
|
+
model || default_model
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TextToSqlAssistant
|
|
4
|
+
module Providers
|
|
5
|
+
class Anthropic < Base
|
|
6
|
+
URL = "https://api.anthropic.com/v1/messages"
|
|
7
|
+
|
|
8
|
+
def complete(system_prompt, user_message)
|
|
9
|
+
data = post_json(
|
|
10
|
+
URI(URL),
|
|
11
|
+
{
|
|
12
|
+
"x-api-key" => @api_key,
|
|
13
|
+
"anthropic-version" => "2023-06-01",
|
|
14
|
+
"content-type" => "application/json"
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
model: @model,
|
|
18
|
+
max_tokens: 2048,
|
|
19
|
+
system: system_prompt,
|
|
20
|
+
messages: [{ role: "user", content: user_message }]
|
|
21
|
+
}
|
|
22
|
+
)
|
|
23
|
+
data.dig("content", 0, "text")
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "json"
|
|
5
|
+
require "uri"
|
|
6
|
+
|
|
7
|
+
module TextToSqlAssistant
|
|
8
|
+
module Providers
|
|
9
|
+
class Base
|
|
10
|
+
def initialize(api_key:, model:)
|
|
11
|
+
@api_key = api_key
|
|
12
|
+
@model = model
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def complete(system_prompt, user_message)
|
|
16
|
+
raise NotImplementedError
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
|
|
21
|
+
def post_json(uri, headers, body)
|
|
22
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
23
|
+
http.use_ssl = true
|
|
24
|
+
http.read_timeout = 30
|
|
25
|
+
|
|
26
|
+
request = Net::HTTP::Post.new(uri)
|
|
27
|
+
headers.each { |k, v| request[k] = v }
|
|
28
|
+
request.body = body.to_json
|
|
29
|
+
|
|
30
|
+
response = http.request(request)
|
|
31
|
+
|
|
32
|
+
unless response.code.to_i.between?(200, 299)
|
|
33
|
+
raise ProviderError, "#{self.class.name} API error #{response.code}: #{response.body[0..200]}"
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
JSON.parse(response.body)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TextToSqlAssistant
|
|
4
|
+
module Providers
|
|
5
|
+
class Gemini < Base
|
|
6
|
+
URL = "https://generativelanguage.googleapis.com/v1beta/models/%{model}:generateContent"
|
|
7
|
+
|
|
8
|
+
def complete(system_prompt, user_message)
|
|
9
|
+
uri = URI(format(URL, model: @model))
|
|
10
|
+
uri.query = "key=#{@api_key}"
|
|
11
|
+
|
|
12
|
+
data = post_json(
|
|
13
|
+
uri,
|
|
14
|
+
{ "Content-Type" => "application/json" },
|
|
15
|
+
{
|
|
16
|
+
system_instruction: { parts: [{ text: system_prompt }] },
|
|
17
|
+
contents: [{ role: "user", parts: [{ text: user_message }] }],
|
|
18
|
+
generationConfig: { maxOutputTokens: 2048 }
|
|
19
|
+
}
|
|
20
|
+
)
|
|
21
|
+
data.dig("candidates", 0, "content", "parts", 0, "text")
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TextToSqlAssistant
|
|
4
|
+
module Providers
|
|
5
|
+
class OpenAI < Base
|
|
6
|
+
URL = "https://api.openai.com/v1/chat/completions"
|
|
7
|
+
|
|
8
|
+
def complete(system_prompt, user_message)
|
|
9
|
+
data = post_json(
|
|
10
|
+
URI(URL),
|
|
11
|
+
{
|
|
12
|
+
"Authorization" => "Bearer #{@api_key}",
|
|
13
|
+
"Content-Type" => "application/json"
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
model: @model,
|
|
17
|
+
max_tokens: 2048,
|
|
18
|
+
messages: [
|
|
19
|
+
{ role: "system", content: system_prompt },
|
|
20
|
+
{ role: "user", content: user_message }
|
|
21
|
+
]
|
|
22
|
+
}
|
|
23
|
+
)
|
|
24
|
+
data.dig("choices", 0, "message", "content")
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TextToSqlAssistant
|
|
4
|
+
class QueryValidator
|
|
5
|
+
DANGEROUS_KEYWORDS = %w[INSERT UPDATE DELETE DROP ALTER TRUNCATE CREATE GRANT REVOKE].freeze
|
|
6
|
+
BLOCKED_PATTERNS = ["INTO OUTFILE", "INTO DUMPFILE", "LOAD_FILE", "BENCHMARK(", "INFORMATION_SCHEMA"].freeze
|
|
7
|
+
|
|
8
|
+
def initialize(config)
|
|
9
|
+
@config = config
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def validate!(sql)
|
|
13
|
+
normalized = sql.strip.gsub(/\s+/, " ").upcase
|
|
14
|
+
|
|
15
|
+
validate_select_only!(normalized)
|
|
16
|
+
validate_no_dangerous_keywords!(normalized)
|
|
17
|
+
validate_no_sensitive_columns!(normalized)
|
|
18
|
+
validate_no_blocked_patterns!(normalized)
|
|
19
|
+
|
|
20
|
+
ensure_limit(sql, normalized)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def validate_select_only!(normalized)
|
|
26
|
+
unless normalized.start_with?("SELECT")
|
|
27
|
+
raise QueryBlockedError, "Only SELECT queries are allowed"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def validate_no_dangerous_keywords!(normalized)
|
|
32
|
+
words = normalized.split(/[\s;,()]+/)
|
|
33
|
+
found = DANGEROUS_KEYWORDS.select { |kw| words.include?(kw) }
|
|
34
|
+
if found.any?
|
|
35
|
+
raise QueryBlockedError, "Query contains forbidden keyword: #{found.join(', ')}"
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def validate_no_sensitive_columns!(normalized)
|
|
40
|
+
blocked = @config.blocked_columns.map(&:upcase)
|
|
41
|
+
found = blocked.select { |col| normalized.include?(col) }
|
|
42
|
+
if found.any?
|
|
43
|
+
raise QueryBlockedError, "Query references sensitive column: #{found.join(', ')}"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def validate_no_blocked_patterns!(normalized)
|
|
48
|
+
found = BLOCKED_PATTERNS.select { |pat| normalized.include?(pat) }
|
|
49
|
+
if found.any?
|
|
50
|
+
raise QueryBlockedError, "Query contains blocked pattern: #{found.join(', ')}"
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def ensure_limit(sql, normalized)
|
|
55
|
+
if normalized.include?("LIMIT")
|
|
56
|
+
sql.chomp(";")
|
|
57
|
+
else
|
|
58
|
+
"#{sql.chomp(';')} LIMIT #{@config.max_rows}"
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TextToSqlAssistant
|
|
4
|
+
# Auto-reads database schema from an ActiveRecord connection.
|
|
5
|
+
# No manual schema description needed.
|
|
6
|
+
class SchemaReader
|
|
7
|
+
def initialize(connection)
|
|
8
|
+
@connection = connection
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def read
|
|
12
|
+
tables = @connection.tables.reject { |t| system_table?(t) }
|
|
13
|
+
|
|
14
|
+
tables.map do |table|
|
|
15
|
+
columns = @connection.columns(table).map do |col|
|
|
16
|
+
"#{col.name}(#{col.type}#{col.null ? '' : ', NOT NULL'}#{col.default ? ", default: #{col.default}" : ''})"
|
|
17
|
+
end
|
|
18
|
+
"- **#{table}**: #{columns.join(', ')}"
|
|
19
|
+
end.join("\n")
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def system_table?(name)
|
|
25
|
+
%w[ar_internal_metadata schema_migrations].include?(name) ||
|
|
26
|
+
TextToSqlAssistant.configuration.blocked_tables.any? { |bt| name.downcase.include?(bt) }
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "active_record"
|
|
4
|
+
|
|
5
|
+
require_relative "text_to_sql_assistant/version"
|
|
6
|
+
require_relative "text_to_sql_assistant/configuration"
|
|
7
|
+
require_relative "text_to_sql_assistant/schema_reader"
|
|
8
|
+
require_relative "text_to_sql_assistant/query_validator"
|
|
9
|
+
require_relative "text_to_sql_assistant/assistant"
|
|
10
|
+
require_relative "text_to_sql_assistant/providers/base"
|
|
11
|
+
require_relative "text_to_sql_assistant/providers/anthropic"
|
|
12
|
+
require_relative "text_to_sql_assistant/providers/openai"
|
|
13
|
+
require_relative "text_to_sql_assistant/providers/gemini"
|
|
14
|
+
|
|
15
|
+
module TextToSqlAssistant
|
|
16
|
+
class Error < StandardError; end
|
|
17
|
+
class ConfigurationError < Error; end
|
|
18
|
+
class QueryBlockedError < Error; end
|
|
19
|
+
class ProviderError < Error; end
|
|
20
|
+
|
|
21
|
+
class << self
|
|
22
|
+
def configuration
|
|
23
|
+
@configuration ||= Configuration.new
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def configure
|
|
27
|
+
yield(configuration)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def new(**kwargs)
|
|
31
|
+
Assistant.new(**kwargs)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: text_to_sql_assistant
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Vibol Teav
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-03-26 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: activerecord
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ">="
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '7.0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '7.0'
|
|
27
|
+
description: 'Ask questions in plain English, get SQL queries and human-readable answers.
|
|
28
|
+
Works with any LLM provider (Anthropic Claude, OpenAI, Ollama, or custom). Includes
|
|
29
|
+
security guardrails: SELECT-only, column blocklists, query timeouts, audit logging.'
|
|
30
|
+
email:
|
|
31
|
+
- vt@gotabs.net
|
|
32
|
+
executables: []
|
|
33
|
+
extensions: []
|
|
34
|
+
extra_rdoc_files: []
|
|
35
|
+
files:
|
|
36
|
+
- LICENSE
|
|
37
|
+
- README.md
|
|
38
|
+
- lib/text_to_sql_assistant.rb
|
|
39
|
+
- lib/text_to_sql_assistant/assistant.rb
|
|
40
|
+
- lib/text_to_sql_assistant/configuration.rb
|
|
41
|
+
- lib/text_to_sql_assistant/providers/anthropic.rb
|
|
42
|
+
- lib/text_to_sql_assistant/providers/base.rb
|
|
43
|
+
- lib/text_to_sql_assistant/providers/gemini.rb
|
|
44
|
+
- lib/text_to_sql_assistant/providers/openai.rb
|
|
45
|
+
- lib/text_to_sql_assistant/query_validator.rb
|
|
46
|
+
- lib/text_to_sql_assistant/schema_reader.rb
|
|
47
|
+
- lib/text_to_sql_assistant/version.rb
|
|
48
|
+
homepage: https://github.com/tvcam/text_to_sql_assistant
|
|
49
|
+
licenses:
|
|
50
|
+
- MIT
|
|
51
|
+
metadata:
|
|
52
|
+
homepage_uri: https://github.com/tvcam/text_to_sql_assistant
|
|
53
|
+
source_code_uri: https://github.com/tvcam/text_to_sql_assistant
|
|
54
|
+
changelog_uri: https://github.com/tvcam/text_to_sql_assistant/blob/main/CHANGELOG.md
|
|
55
|
+
bug_tracker_uri: https://github.com/tvcam/text_to_sql_assistant/issues
|
|
56
|
+
post_install_message:
|
|
57
|
+
rdoc_options: []
|
|
58
|
+
require_paths:
|
|
59
|
+
- lib
|
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
61
|
+
requirements:
|
|
62
|
+
- - ">="
|
|
63
|
+
- !ruby/object:Gem::Version
|
|
64
|
+
version: 3.1.0
|
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
66
|
+
requirements:
|
|
67
|
+
- - ">="
|
|
68
|
+
- !ruby/object:Gem::Version
|
|
69
|
+
version: '0'
|
|
70
|
+
requirements: []
|
|
71
|
+
rubygems_version: 3.4.19
|
|
72
|
+
signing_key:
|
|
73
|
+
specification_version: 4
|
|
74
|
+
summary: Natural language to SQL query assistant for Ruby/Rails apps
|
|
75
|
+
test_files: []
|