rails-nl2sql 0.1.8 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.DS_Store +0 -0
- data/README.md +56 -1
- data/lib/generators/rails/nl2sql/templates/rails_nl2sql.rb +4 -1
- data/lib/generators/rails_nl2sql/install/install_generator.rb +13 -0
- data/lib/generators/rails_nl2sql/install/templates/rails_nl2sql.rb +7 -0
- data/lib/rails/nl2sql/active_record_extension.rb +21 -0
- data/lib/rails/nl2sql/prompts/default.yml.erb +13 -0
- data/lib/rails/nl2sql/providers/anthropic_provider.rb +23 -0
- data/lib/rails/nl2sql/providers/base.rb +13 -0
- data/lib/rails/nl2sql/providers/llama_provider.rb +23 -0
- data/lib/rails/nl2sql/providers/openai_provider.rb +18 -0
- data/lib/rails/nl2sql/query_generator.rb +59 -110
- data/lib/rails/nl2sql/query_validator.rb +8 -1
- data/lib/rails/nl2sql/schema_builder.rb +14 -1
- data/lib/rails/nl2sql/version.rb +1 -1
- data/lib/rails/nl2sql.rb +38 -28
- data/lib/rails-nl2sql.rb +2 -0
- data/rails-nl2sql.gemspec +3 -2
- metadata +30 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ffb9e53bea983b462fc1c550a4b53623bd9ff2934ba8a38776121833866ea45
|
4
|
+
data.tar.gz: 1cb0973477ea334d95e719e202879334c6f79505a199f08e8a1957b4c344384b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2ca2b1516ffe66ced4783bbe75f96d9b8801e6be6d1d070deba162b3c410f0dc77c643fcb666a30bc137e1395b317a453552a7f7ed5a14b250f008ad3d44d205
|
7
|
+
data.tar.gz: 4bbea3200d74e19cb3d5df7cfd3e5c4ce248c61cf2c5ad53e7b943eb283b89bd24fe5fac3c43e925f05ea1b98f269eb19fef518a4bd5f3f6567ab1ad30433180
|
data/.DS_Store
ADDED
Binary file
|
data/README.md
CHANGED
@@ -28,7 +28,10 @@ This will create an initializer file at `config/initializers/rails_nl2sql.rb`. Y
|
|
28
28
|
# config/initializers/rails_nl2sql.rb
|
29
29
|
Rails::Nl2sql.configure do |config|
|
30
30
|
config.api_key = ENV["OPENAI_API_KEY"] # It's recommended to use an environment variable
|
31
|
-
# config.model = "
|
31
|
+
# config.model = "gpt-3.5-turbo-instruct" # Optional
|
32
|
+
# config.provider = Rails::Nl2sql::Providers::OpenaiProvider.new(api_key: config.api_key)
|
33
|
+
# config.prompt_template_path = Rails::Nl2sql.prompt_template_path
|
34
|
+
# config.max_schema_lines = 200
|
32
35
|
end
|
33
36
|
```
|
34
37
|
|
@@ -42,6 +45,16 @@ To execute a natural language query, you can use the `execute` method:
|
|
42
45
|
results = Rails::Nl2sql::Processor.execute("Show me all the users from California")
|
43
46
|
```
|
44
47
|
|
48
|
+
### Using `from_nl` with ActiveRecord
|
49
|
+
|
50
|
+
You can call the NL2SQL processor directly on your models. The `from_nl` method
|
51
|
+
returns an `ActiveRecord::Relation` so you can chain scopes, pagination and
|
52
|
+
other query modifiers as usual.
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
User.from_nl("all users who signed up last week").limit(10)
|
56
|
+
```
|
57
|
+
|
45
58
|
You can also specify which tables to include or exclude:
|
46
59
|
|
47
60
|
```ruby
|
@@ -60,6 +73,48 @@ Rails::Nl2sql::Processor.get_tables
|
|
60
73
|
Rails::Nl2sql::Processor.get_schema(include: ["users", "orders"])
|
61
74
|
```
|
62
75
|
|
76
|
+
### Schema caching
|
77
|
+
|
78
|
+
For efficiency the gem caches the full database schema on first use. The cached
|
79
|
+
schema is reused for subsequent requests so your application doesn't need to hit
|
80
|
+
the database every time a prompt is generated.
|
81
|
+
|
82
|
+
You can clear the cached schema if your database changes:
|
83
|
+
|
84
|
+
```ruby
|
85
|
+
Rails::Nl2sql::SchemaBuilder.clear_cache!
|
86
|
+
```
|
87
|
+
|
88
|
+
## Pluggable LLM Providers
|
89
|
+
|
90
|
+
Rails NL2SQL ships with a simple adapter system so you can use different large language model providers.
|
91
|
+
By default the gem uses OpenAI, but you can plug in others like Anthropic or a local Llama‑based HTTP endpoint.
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
Rails::Nl2sql.configure do |config|
|
95
|
+
config.provider = Rails::Nl2sql::Providers::AnthropicProvider.new(api_key: ENV['ANTHROPIC_KEY'])
|
96
|
+
end
|
97
|
+
```
|
98
|
+
|
99
|
+
## Prompt Templates
|
100
|
+
|
101
|
+
The prompts used to talk to the LLM are defined in a YAML/ERB template. You can override this template
|
102
|
+
to enforce your own naming conventions or add company specific instructions.
|
103
|
+
|
104
|
+
```yaml
|
105
|
+
system: |
|
106
|
+
Custom system prompt text...
|
107
|
+
user: |
|
108
|
+
Query: <%= input %>
|
109
|
+
```
|
110
|
+
|
111
|
+
Set the path via `config.prompt_template_path`.
|
112
|
+
|
113
|
+
## Context Window Management
|
114
|
+
|
115
|
+
Large schemas can exceed the model context window. Use `config.max_schema_lines` to automatically truncate
|
116
|
+
the schema snippet sent to the model. Only the first N lines are included.
|
117
|
+
|
63
118
|
## Development
|
64
119
|
|
65
120
|
After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -1,4 +1,7 @@
|
|
1
1
|
Rails::Nl2sql.configure do |config|
|
2
2
|
config.api_key = "YOUR_API_KEY"
|
3
|
-
# config.model = "
|
3
|
+
# config.model = "gpt-3.5-turbo-instruct"
|
4
|
+
# config.provider = Rails::Nl2sql::Providers::OpenaiProvider.new(api_key: config.api_key)
|
5
|
+
# config.prompt_template_path = Rails::Nl2sql.prompt_template_path
|
6
|
+
# config.max_schema_lines = 200
|
4
7
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'rails/generators'
|
2
|
+
|
3
|
+
module RailsNl2sql
|
4
|
+
module Generators
|
5
|
+
class InstallGenerator < Rails::Generators::Base
|
6
|
+
source_root File.expand_path('../templates', __FILE__)
|
7
|
+
|
8
|
+
def copy_initializer
|
9
|
+
template 'rails_nl2sql.rb', 'config/initializers/rails_nl2sql.rb'
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
Rails::Nl2sql.configure do |config|
|
2
|
+
config.api_key = "YOUR_API_KEY"
|
3
|
+
# config.model = "gpt-3.5-turbo-instruct"
|
4
|
+
# config.provider = Rails::Nl2sql::Providers::OpenaiProvider.new(api_key: config.api_key)
|
5
|
+
# config.prompt_template_path = Rails::Nl2sql.prompt_template_path
|
6
|
+
# config.max_schema_lines = 200
|
7
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
require 'active_support/lazy_load_hooks'
|
3
|
+
|
4
|
+
module Rails
|
5
|
+
module Nl2sql
|
6
|
+
module ActiveRecordExtension
|
7
|
+
extend ActiveSupport::Concern
|
8
|
+
|
9
|
+
class_methods do
|
10
|
+
def from_nl(prompt, options = {})
|
11
|
+
sql = Rails::Nl2sql::Processor.generate_query_only(prompt, options)
|
12
|
+
from(Arel.sql("(#{sql}) AS #{table_name}"))
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
ActiveSupport.on_load(:active_record) do
|
20
|
+
include Rails::Nl2sql::ActiveRecordExtension
|
21
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
system: |
|
2
|
+
You are an expert SQL assistant specializing in generating dynamic queries based on natural language.
|
3
|
+
Your primary goal is to generate **correct, safe, and executable <%= db_server %> SQL queries** based on user questions.
|
4
|
+
|
5
|
+
---
|
6
|
+
**DATABASE CONTEXT (SCHEMA):**
|
7
|
+
<%= retrieved_context %>
|
8
|
+
---
|
9
|
+
Follow best practices and never generate DML or DDL statements.
|
10
|
+
Respond only with SQL.
|
11
|
+
user: |
|
12
|
+
Here is the USER QUESTION:
|
13
|
+
<%= input %>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
begin
|
2
|
+
require 'anthropic'
|
3
|
+
rescue LoadError
|
4
|
+
warn 'Anthropic gem not installed; AnthropicProvider will not work'
|
5
|
+
end
|
6
|
+
|
7
|
+
module Rails
|
8
|
+
module Nl2sql
|
9
|
+
module Providers
|
10
|
+
class AnthropicProvider < Base
|
11
|
+
def initialize(api_key:, model: 'claude-3-opus-20240229')
|
12
|
+
raise 'anthropic gem missing' unless defined?(::Anthropic::Client)
|
13
|
+
@client = ::Anthropic::Client.new(api_key: api_key)
|
14
|
+
@model = model
|
15
|
+
end
|
16
|
+
|
17
|
+
def complete(prompt:, **params)
|
18
|
+
@client.completions(model: @model, prompt: prompt, **params)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
module Rails
|
6
|
+
module Nl2sql
|
7
|
+
module Providers
|
8
|
+
class LlamaProvider < Base
|
9
|
+
def initialize(endpoint:, model: nil)
|
10
|
+
@uri = URI.parse(endpoint)
|
11
|
+
@model = model
|
12
|
+
end
|
13
|
+
|
14
|
+
def complete(prompt:, **_params)
|
15
|
+
http = Net::HTTP.new(@uri.host, @uri.port)
|
16
|
+
http.use_ssl = @uri.scheme == 'https'
|
17
|
+
response = http.post(@uri.path, {prompt: prompt, model: @model}.to_json, 'Content-Type' => 'application/json')
|
18
|
+
JSON.parse(response.body)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'openai'
|
2
|
+
|
3
|
+
module Rails
|
4
|
+
module Nl2sql
|
5
|
+
module Providers
|
6
|
+
class OpenaiProvider < Base
|
7
|
+
def initialize(api_key:, model: 'gpt-3.5-turbo-instruct')
|
8
|
+
@client = ::OpenAI::Client.new(access_token: api_key)
|
9
|
+
@model = model
|
10
|
+
end
|
11
|
+
|
12
|
+
def complete(prompt:, **params)
|
13
|
+
@client.completions(parameters: {model: @model, prompt: prompt}.merge(params))
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -1,179 +1,128 @@
|
|
1
|
-
require "
|
2
|
-
|
1
|
+
require "erb"
|
2
|
+
require "yaml"
|
3
3
|
module Rails
|
4
4
|
module Nl2sql
|
5
5
|
class QueryGenerator
|
6
|
-
|
7
|
-
|
6
|
+
DEFAULT_MODEL = 'gpt-3.5-turbo-instruct'
|
7
|
+
|
8
|
+
def initialize(provider: nil, model: DEFAULT_MODEL)
|
9
|
+
@provider = provider || Rails::Nl2sql.provider || default_provider(model)
|
8
10
|
@model = model
|
9
11
|
end
|
10
12
|
|
11
|
-
def generate_query(prompt, schema, db_server =
|
13
|
+
def generate_query(prompt, schema, db_server = 'PostgreSQL', tables = nil)
|
12
14
|
retrieved_context = build_context(schema, tables)
|
13
|
-
|
14
|
-
system_prompt =
|
15
|
-
user_prompt = build_user_prompt(prompt)
|
16
|
-
|
15
|
+
|
16
|
+
system_prompt, user_prompt = build_prompts(prompt, db_server, retrieved_context)
|
17
17
|
full_prompt = "#{system_prompt}\n\n#{user_prompt}"
|
18
18
|
|
19
|
-
response = @
|
20
|
-
|
21
|
-
|
22
|
-
prompt: full_prompt,
|
23
|
-
max_tokens: 300,
|
24
|
-
temperature: 0.1
|
25
|
-
}
|
26
|
-
)
|
27
|
-
|
28
|
-
generated_query = response.dig("choices", 0, "text")&.strip
|
29
|
-
|
30
|
-
# Clean up the response to remove markdown formatting
|
19
|
+
response = @provider.complete(prompt: full_prompt, max_tokens: 500, temperature: 0.1)
|
20
|
+
generated_query = extract_text(response)
|
21
|
+
|
31
22
|
generated_query = clean_sql_response(generated_query)
|
32
|
-
|
33
|
-
# Safety check
|
34
23
|
validate_query_safety(generated_query)
|
35
|
-
|
24
|
+
|
36
25
|
generated_query
|
37
26
|
end
|
38
27
|
|
39
28
|
private
|
40
29
|
|
30
|
+
def default_provider(model)
|
31
|
+
Providers::OpenaiProvider.new(api_key: Rails::Nl2sql.api_key, model: model)
|
32
|
+
end
|
33
|
+
|
34
|
+
def extract_text(response)
|
35
|
+
if response.is_a?(Hash)
|
36
|
+
response.dig('choices', 0, 'text')&.strip
|
37
|
+
else
|
38
|
+
nil
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
41
42
|
def build_context(schema, tables)
|
42
|
-
if tables&.any?
|
43
|
-
|
44
|
-
filtered_schema = filter_schema_by_tables(schema, tables)
|
45
|
-
filtered_schema
|
43
|
+
context = if tables&.any?
|
44
|
+
filter_schema_by_tables(schema, tables)
|
46
45
|
else
|
47
46
|
schema
|
48
47
|
end
|
48
|
+
apply_context_window(context)
|
49
|
+
end
|
50
|
+
|
51
|
+
def apply_context_window(context)
|
52
|
+
max_lines = Rails::Nl2sql.max_schema_lines
|
53
|
+
return context unless max_lines
|
54
|
+
|
55
|
+
lines = context.split("\n")
|
56
|
+
return context if lines.length <= max_lines
|
57
|
+
|
58
|
+
lines.first(max_lines).join("\n")
|
49
59
|
end
|
50
60
|
|
51
61
|
def filter_schema_by_tables(schema, tables)
|
52
|
-
# Simple filtering - in a real implementation, this would be more sophisticated
|
53
62
|
lines = schema.split("\n")
|
54
63
|
filtered_lines = []
|
55
64
|
current_table = nil
|
56
65
|
include_current = false
|
57
|
-
|
66
|
+
|
58
67
|
lines.each do |line|
|
59
68
|
if line.match(/CREATE TABLE (\w+)/)
|
60
|
-
current_table =
|
69
|
+
current_table = Regexp.last_match(1)
|
61
70
|
include_current = tables.include?(current_table)
|
62
71
|
end
|
63
|
-
|
64
|
-
if include_current || line.strip.empty?
|
65
|
-
filtered_lines << line
|
66
|
-
end
|
72
|
+
|
73
|
+
filtered_lines << line if include_current || line.strip.empty?
|
67
74
|
end
|
68
|
-
|
69
|
-
filtered_lines.join("\n")
|
70
|
-
end
|
71
75
|
|
72
|
-
|
73
|
-
<<~PROMPT
|
74
|
-
You are an expert SQL assistant specializing in generating dynamic queries based on natural language.
|
75
|
-
Your primary goal is to generate **correct, safe, and executable #{db_server} SQL queries** based on user questions.
|
76
|
-
|
77
|
-
---
|
78
|
-
**DATABASE CONTEXT (SCHEMA):**
|
79
|
-
You are provided with relevant schema details from the database, retrieved to help you.
|
80
|
-
**STRICTLY adhere to this provided schema context.** Do not use any tables or columns not explicitly listed here.
|
81
|
-
#{retrieved_context}
|
82
|
-
|
83
|
-
---
|
84
|
-
**SQL GENERATION RULES:**
|
85
|
-
1. **SQL Dialect:** All generated SQL must be valid **#{db_server} syntax**.
|
86
|
-
* For limiting results, use `LIMIT` (e.g., `LIMIT 10`) instead of `TOP`.
|
87
|
-
* Be mindful of #{db_server}'s specific function names (e.g., `COUNT(*)`, `MAX()`) and behaviors.
|
88
|
-
* For subqueries that return a single value to be used in a `WHERE` clause, ensure they are correctly formatted for #{db_server}.
|
89
|
-
2. **Schema Adherence:** Only use table names and column names that are explicitly present in the provided context. Do not invent names.
|
90
|
-
3. **Valid JOIN Paths:** All `JOIN` operations must be based on valid foreign key relationships. The provided schema context explicitly details many of these.
|
91
|
-
4. **Safety First:** Absolutely **DO NOT** generate any DDL (CREATE, ALTER, DROP) or DML (INSERT, UPDATE, DELETE) statements. Only `SELECT` queries are permitted.
|
92
|
-
5. **CRITICAL: Handling Missing/Empty Text Data:**
|
93
|
-
* When a user asks about "missing," "no," "empty," or "null" values for a TEXT column (like 'email', 'phone', 'address', 'company', 'fax'), generate a `WHERE` clause that explicitly checks for **both `IS NULL` and `= ''` (an empty string)**.
|
94
|
-
* **Example:** To find agents with no email, the query should be `SELECT first_name, last_name FROM agents WHERE email IS NULL OR email = '';`
|
95
|
-
* This is essential
|
96
|
-
6. **Ambiguity:** If a user question is ambiguous or requires more information to form a precise SQL query, clearly state that you need clarification and ask for more details. Do not guess.
|
97
|
-
|
98
|
-
**RESPOND WITH ONLY THE SQL QUERY - NO EXPLANATIONS, NO MARKDOWN FORMATTING, NO CODE BLOCKS, NO ADDITIONAL TEXT.**
|
99
|
-
PROMPT
|
76
|
+
filtered_lines.join("\n")
|
100
77
|
end
|
101
78
|
|
102
|
-
def
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
79
|
+
def build_prompts(input, db_server, retrieved_context)
|
80
|
+
template = Rails::Nl2sql.prompt_template
|
81
|
+
system_prompt = ERB.new(template['system']).result(binding)
|
82
|
+
user_prompt = ERB.new(template['user']).result(binding)
|
83
|
+
[system_prompt, user_prompt]
|
107
84
|
end
|
108
85
|
|
109
86
|
def clean_sql_response(query)
|
110
87
|
return query unless query
|
111
88
|
|
112
|
-
# Remove markdown code blocks
|
113
89
|
query = query.gsub(/```sql\n?/, '')
|
114
90
|
query = query.gsub(/```\n?/, '')
|
115
|
-
|
116
|
-
# Remove any leading/trailing whitespace
|
117
91
|
query = query.strip
|
118
|
-
|
119
|
-
# Remove any explanatory text before or after the query
|
120
|
-
# Look for common patterns like "Here's the SQL query:" or "The query is:"
|
121
92
|
query = query.gsub(/^.*?(SELECT|WITH|INSERT|UPDATE|DELETE|CREATE|DROP|ALTER)/i, '\1')
|
122
|
-
|
123
|
-
# Remove any trailing explanatory text after the query
|
124
|
-
# Split by newlines and take only the SQL part
|
125
93
|
lines = query.split("\n")
|
126
94
|
sql_lines = []
|
127
|
-
|
95
|
+
|
128
96
|
lines.each do |line|
|
129
97
|
line = line.strip
|
130
|
-
# Skip empty lines or lines that look like explanations
|
131
98
|
next if line.empty?
|
132
99
|
next if line.match(/^(here|this|the query|explanation|note)/i)
|
133
|
-
|
100
|
+
|
134
101
|
sql_lines << line
|
135
102
|
end
|
136
|
-
|
137
|
-
# Rejoin the SQL lines
|
103
|
+
|
138
104
|
cleaned_query = sql_lines.join("\n").strip
|
139
|
-
|
140
|
-
# Ensure it ends with a semicolon if it's a complete query
|
141
|
-
if cleaned_query.match(/^(SELECT|WITH)/i) && !cleaned_query.end_with?(';')
|
142
|
-
cleaned_query += ';'
|
143
|
-
end
|
144
|
-
|
105
|
+
cleaned_query += ';' if cleaned_query.match(/^(SELECT|WITH)/i) && !cleaned_query.end_with?(';')
|
145
106
|
cleaned_query
|
146
107
|
end
|
147
108
|
|
148
109
|
def validate_query_safety(query)
|
149
110
|
return unless query
|
150
111
|
|
151
|
-
banned_keywords = [
|
152
|
-
"delete", "drop", "truncate", "update", "insert", "alter",
|
153
|
-
"exec", "execute", "create", "merge", "replace", "into"
|
154
|
-
]
|
155
|
-
|
112
|
+
banned_keywords = %w[delete drop truncate update insert alter exec execute create merge replace into]
|
156
113
|
banned_phrases = [
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
114
|
+
'ignore previous instructions', 'pretend you are', 'i am the admin',
|
115
|
+
'you are no longer bound', 'bypass the rules', 'run this instead',
|
116
|
+
'for testing, run', 'no safety constraints', 'show me a dangerous query',
|
117
|
+
'this is a dev environment', 'drop all data', 'delete all users', 'wipe the database'
|
161
118
|
]
|
162
119
|
|
163
120
|
query_lower = query.downcase
|
164
|
-
|
165
|
-
# Check for banned keywords
|
166
121
|
banned_keywords.each do |keyword|
|
167
|
-
if query_lower.include?(keyword)
|
168
|
-
raise Rails::Nl2sql::Error, "Query contains banned keyword: #{keyword}"
|
169
|
-
end
|
122
|
+
raise Rails::Nl2sql::Error, "Query contains banned keyword: #{keyword}" if query_lower.include?(keyword)
|
170
123
|
end
|
171
|
-
|
172
|
-
# Check for banned phrases
|
173
124
|
banned_phrases.each do |phrase|
|
174
|
-
if query_lower.include?(phrase)
|
175
|
-
raise Rails::Nl2sql::Error, "Query contains banned phrase: #{phrase}"
|
176
|
-
end
|
125
|
+
raise Rails::Nl2sql::Error, "Query contains banned phrase: #{phrase}" if query_lower.include?(phrase)
|
177
126
|
end
|
178
127
|
end
|
179
128
|
end
|
@@ -15,11 +15,18 @@ module Rails
|
|
15
15
|
# Basic validation: prevent destructive commands
|
16
16
|
disallowed_keywords = %w(DROP DELETE UPDATE INSERT TRUNCATE ALTER CREATE EXEC EXECUTE MERGE REPLACE)
|
17
17
|
query_upper = query.upcase
|
18
|
-
|
18
|
+
|
19
19
|
if disallowed_keywords.any? { |keyword| query_upper.include?(keyword) }
|
20
20
|
raise Rails::Nl2sql::Error, "Query contains disallowed keywords."
|
21
21
|
end
|
22
22
|
|
23
|
+
# Ensure there is only a single statement
|
24
|
+
cleaned_query = query.rstrip
|
25
|
+
cleaned_query = cleaned_query.chomp(';')
|
26
|
+
if cleaned_query.include?(';')
|
27
|
+
raise Rails::Nl2sql::Error, "Query contains multiple statements."
|
28
|
+
end
|
29
|
+
|
23
30
|
# Ensure it's a SELECT query
|
24
31
|
unless query_upper.strip.start_with?('SELECT', 'WITH')
|
25
32
|
raise Rails::Nl2sql::Error, "Only SELECT queries are allowed."
|
@@ -1,13 +1,26 @@
|
|
1
1
|
module Rails
|
2
2
|
module Nl2sql
|
3
3
|
class SchemaBuilder
|
4
|
+
@@schema_cache = nil
|
5
|
+
|
4
6
|
def self.build_schema(options = {})
|
7
|
+
if options.empty? && @@schema_cache
|
8
|
+
return @@schema_cache
|
9
|
+
end
|
10
|
+
|
5
11
|
tables = get_filtered_tables(options)
|
6
|
-
|
12
|
+
|
7
13
|
schema_text = build_schema_text(tables)
|
14
|
+
|
15
|
+
@@schema_cache = schema_text if options.empty?
|
16
|
+
|
8
17
|
schema_text
|
9
18
|
end
|
10
19
|
|
20
|
+
def self.clear_cache!
|
21
|
+
@@schema_cache = nil
|
22
|
+
end
|
23
|
+
|
11
24
|
def self.get_database_type
|
12
25
|
adapter = ActiveRecord::Base.connection.adapter_name.downcase
|
13
26
|
case adapter
|
data/lib/rails/nl2sql/version.rb
CHANGED
data/lib/rails/nl2sql.rb
CHANGED
@@ -1,8 +1,15 @@
|
|
1
1
|
require "rails/nl2sql/version"
|
2
|
+
require "rails/nl2sql/providers/base"
|
3
|
+
require "rails/nl2sql/providers/openai_provider"
|
4
|
+
require "rails/nl2sql/providers/anthropic_provider"
|
5
|
+
require "rails/nl2sql/providers/llama_provider"
|
2
6
|
require "rails/nl2sql/query_generator"
|
3
7
|
require "rails/nl2sql/schema_builder"
|
4
8
|
require "rails/nl2sql/query_validator"
|
9
|
+
require "rails/nl2sql/active_record_extension"
|
5
10
|
require "rails/nl2sql/railtie" if defined?(Rails)
|
11
|
+
require 'yaml'
|
12
|
+
require 'erb'
|
6
13
|
|
7
14
|
module Rails
|
8
15
|
module Nl2sql
|
@@ -11,62 +18,65 @@ module Rails
|
|
11
18
|
class << self
|
12
19
|
attr_accessor :api_key
|
13
20
|
attr_accessor :model
|
21
|
+
attr_accessor :provider
|
22
|
+
attr_accessor :max_schema_lines
|
23
|
+
|
24
|
+
def prompt_template_path=(path)
|
25
|
+
@prompt_template = nil
|
26
|
+
@prompt_template_path = path
|
27
|
+
end
|
28
|
+
|
29
|
+
def prompt_template_path
|
30
|
+
@prompt_template_path || File.expand_path('nl2sql/prompts/default.yml.erb', __dir__)
|
31
|
+
end
|
14
32
|
end
|
15
|
-
|
33
|
+
|
34
|
+
@model = 'gpt-3.5-turbo-instruct'
|
35
|
+
@max_schema_lines = 200
|
16
36
|
|
17
37
|
def self.configure
|
18
38
|
yield self
|
19
39
|
end
|
20
40
|
|
41
|
+
def self.prompt_template
|
42
|
+
@prompt_template ||= begin
|
43
|
+
erb = ERB.new(File.read(prompt_template_path))
|
44
|
+
YAML.safe_load(erb.result)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
21
48
|
class Processor
|
22
49
|
def self.execute(natural_language_query, options = {})
|
23
|
-
# Get database type
|
24
50
|
db_server = SchemaBuilder.get_database_type
|
25
|
-
|
26
|
-
# Build schema with optional table filtering
|
27
51
|
schema = SchemaBuilder.build_schema(options)
|
28
|
-
|
29
|
-
# Extract tables for filtering if specified
|
30
52
|
tables = options[:tables]
|
31
|
-
|
32
|
-
|
33
|
-
query_generator = QueryGenerator.new(Rails::Nl2sql.api_key, Rails::Nl2sql.model)
|
53
|
+
|
54
|
+
query_generator = QueryGenerator.new(model: Rails::Nl2sql.model)
|
34
55
|
generated_query = query_generator.generate_query(
|
35
|
-
natural_language_query,
|
36
|
-
schema,
|
37
|
-
db_server,
|
56
|
+
natural_language_query,
|
57
|
+
schema,
|
58
|
+
db_server,
|
38
59
|
tables
|
39
60
|
)
|
40
61
|
|
41
|
-
# Validate the generated query
|
42
62
|
QueryValidator.validate(generated_query)
|
43
|
-
|
44
|
-
# Execute the query
|
45
63
|
ActiveRecord::Base.connection.execute(generated_query)
|
46
64
|
end
|
47
65
|
|
48
66
|
def self.generate_query_only(natural_language_query, options = {})
|
49
|
-
# Get database type
|
50
67
|
db_server = SchemaBuilder.get_database_type
|
51
|
-
|
52
|
-
# Build schema with optional table filtering
|
53
68
|
schema = SchemaBuilder.build_schema(options)
|
54
|
-
|
55
|
-
# Extract tables for filtering if specified
|
56
69
|
tables = options[:tables]
|
57
|
-
|
58
|
-
|
59
|
-
query_generator = QueryGenerator.new(Rails::Nl2sql.api_key, Rails::Nl2sql.model)
|
70
|
+
|
71
|
+
query_generator = QueryGenerator.new(model: Rails::Nl2sql.model)
|
60
72
|
generated_query = query_generator.generate_query(
|
61
|
-
natural_language_query,
|
62
|
-
schema,
|
63
|
-
db_server,
|
73
|
+
natural_language_query,
|
74
|
+
schema,
|
75
|
+
db_server,
|
64
76
|
tables
|
65
77
|
)
|
66
78
|
|
67
|
-
# Validate the generated query
|
68
79
|
QueryValidator.validate(generated_query)
|
69
|
-
|
70
80
|
generated_query
|
71
81
|
end
|
72
82
|
|
data/lib/rails-nl2sql.rb
ADDED
data/rails-nl2sql.gemspec
CHANGED
@@ -37,8 +37,9 @@ Gem::Specification.new do |spec|
|
|
37
37
|
spec.require_paths = ["lib"]
|
38
38
|
|
39
39
|
spec.add_dependency "openai", "~> 0.3"
|
40
|
-
spec.
|
40
|
+
spec.add_dependency "anthropic", "~> 0.1"
|
41
|
+
spec.add_development_dependency "bundler", "~> 2.0"
|
41
42
|
spec.add_development_dependency "rake", "~> 10.0"
|
42
43
|
spec.add_development_dependency "rspec-rails", "~> 6.0"
|
43
|
-
spec.add_dependency "railties", "
|
44
|
+
spec.add_dependency "railties", "~> 6.0"
|
44
45
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rails-nl2sql
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Russell Van Curen
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-07-
|
11
|
+
date: 2025-07-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: openai
|
@@ -24,20 +24,34 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: anthropic
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.1'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.1'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
45
|
- - "~>"
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
47
|
+
version: '2.0'
|
34
48
|
type: :development
|
35
49
|
prerelease: false
|
36
50
|
version_requirements: !ruby/object:Gem::Requirement
|
37
51
|
requirements:
|
38
52
|
- - "~>"
|
39
53
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
54
|
+
version: '2.0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: rake
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -70,14 +84,14 @@ dependencies:
|
|
70
84
|
name: railties
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
72
86
|
requirements:
|
73
|
-
- - "
|
87
|
+
- - "~>"
|
74
88
|
- !ruby/object:Gem::Version
|
75
89
|
version: '6.0'
|
76
90
|
type: :runtime
|
77
91
|
prerelease: false
|
78
92
|
version_requirements: !ruby/object:Gem::Requirement
|
79
93
|
requirements:
|
80
|
-
- - "
|
94
|
+
- - "~>"
|
81
95
|
- !ruby/object:Gem::Version
|
82
96
|
version: '6.0'
|
83
97
|
description: This gem provides an easy way to integrate natural language to SQL functionality
|
@@ -89,6 +103,7 @@ executables: []
|
|
89
103
|
extensions: []
|
90
104
|
extra_rdoc_files: []
|
91
105
|
files:
|
106
|
+
- ".DS_Store"
|
92
107
|
- ".gitignore"
|
93
108
|
- Gemfile
|
94
109
|
- Gemfile.lock
|
@@ -99,7 +114,16 @@ files:
|
|
99
114
|
- bin/setup
|
100
115
|
- lib/generators/rails/nl2sql/install_generator.rb
|
101
116
|
- lib/generators/rails/nl2sql/templates/rails_nl2sql.rb
|
117
|
+
- lib/generators/rails_nl2sql/install/install_generator.rb
|
118
|
+
- lib/generators/rails_nl2sql/install/templates/rails_nl2sql.rb
|
119
|
+
- lib/rails-nl2sql.rb
|
102
120
|
- lib/rails/nl2sql.rb
|
121
|
+
- lib/rails/nl2sql/active_record_extension.rb
|
122
|
+
- lib/rails/nl2sql/prompts/default.yml.erb
|
123
|
+
- lib/rails/nl2sql/providers/anthropic_provider.rb
|
124
|
+
- lib/rails/nl2sql/providers/base.rb
|
125
|
+
- lib/rails/nl2sql/providers/llama_provider.rb
|
126
|
+
- lib/rails/nl2sql/providers/openai_provider.rb
|
103
127
|
- lib/rails/nl2sql/query_generator.rb
|
104
128
|
- lib/rails/nl2sql/query_validator.rb
|
105
129
|
- lib/rails/nl2sql/railtie.rb
|