ai_record_finder 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +2 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +10 -0
- data/LICENSE.txt +21 -0
- data/README.md +115 -0
- data/Rakefile +8 -0
- data/docs/DEVELOPER_GUIDE.md +144 -0
- data/lib/ai_record_finder/ai_adapter.rb +46 -0
- data/lib/ai_record_finder/client.rb +69 -0
- data/lib/ai_record_finder/configuration.rb +26 -0
- data/lib/ai_record_finder/dsl_parser.rb +158 -0
- data/lib/ai_record_finder/errors.rb +18 -0
- data/lib/ai_record_finder/prompt_builder.rb +51 -0
- data/lib/ai_record_finder/query_builder.rb +104 -0
- data/lib/ai_record_finder/railtie.rb +12 -0
- data/lib/ai_record_finder/safety_guard.rb +54 -0
- data/lib/ai_record_finder/schema_introspector.rb +65 -0
- data/lib/ai_record_finder/version.rb +5 -0
- data/lib/ai_record_finder.rb +66 -0
- data/sig/ai_record_finder.rbs +3 -0
- data/spec/ai_adapter_spec.rb +28 -0
- data/spec/ai_record_finder_spec.rb +151 -0
- data/spec/spec_helper.rb +63 -0
- metadata +136 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: da4695452d3dbf655e818a95f773afca2b366086983122b4b2d75abfe9cc6894
|
|
4
|
+
data.tar.gz: d3d628ed44b129d95a41d3a99057e0c6a78ddae844e6f5752b3c2a3998af6565
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: dba189e5c9cdee52306e027ca8c506b264441b18cc8c4e7a9ccc95a5004f8f4a61ed9541c834b110b745a0750ba9ce9fea8de43cd3ead69fcc5105f1f762afed
|
|
7
|
+
data.tar.gz: 67e99f16ed5e8280aa0af1543a5bb9141884ec96e4a1768659c794fd3f4f84e47d9b14f02480af63a136e3535a21498dbd40cec7c92f464e8e9bc766f340f487
|
data/.rspec
ADDED
data/CHANGELOG.md
ADDED
data/CODE_OF_CONDUCT.md
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Code of Conduct
|
|
2
|
+
|
|
3
|
+
"ai_record_finder" follows [The Ruby Community Conduct Guideline](https://www.ruby-lang.org/en/conduct) in all "collaborative space", which is defined as community communications channels (such as mailing lists, submitted patches, commit comments, etc.):
|
|
4
|
+
|
|
5
|
+
* Participants will be tolerant of opposing views.
|
|
6
|
+
* Participants must ensure that their language and actions are free of personal attacks and disparaging personal remarks.
|
|
7
|
+
* When interpreting the words and actions of others, participants should always assume good intentions.
|
|
8
|
+
* Behaviour which can be reasonably considered harassment will not be tolerated.
|
|
9
|
+
|
|
10
|
+
If you have any concerns about behaviour within this project, please contact us at ["bosejijo@gmail.com"](mailto:"bosejijo@gmail.com").
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jijo Bose
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# AIRecordFinder
|
|
2
|
+
|
|
3
|
+
`ai_record_finder` converts natural language prompts into safe `ActiveRecord::Relation` objects.
|
|
4
|
+
|
|
5
|
+
It is designed for B2B Rails applications that need strict query safety, tenant boundaries, and model-level authorization.
|
|
6
|
+
|
|
7
|
+
Documentation homepage: `docs/HOME.md`
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
Add to your Gemfile:
|
|
12
|
+
|
|
13
|
+
```ruby
|
|
14
|
+
gem "ai_record_finder"
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Then run:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
bundle install
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Configuration
|
|
24
|
+
|
|
25
|
+
Add an initializer at `config/initializers/ai_record_finder.rb`:
|
|
26
|
+
|
|
27
|
+
```ruby
|
|
28
|
+
AIRecordFinder.configure do |config|
|
|
29
|
+
config.api_key = ENV.fetch("OPENAI_API_KEY")
|
|
30
|
+
config.model_name = "gpt-4o-mini"
|
|
31
|
+
config.max_limit = 100
|
|
32
|
+
config.allowed_models = [Invoice, User]
|
|
33
|
+
|
|
34
|
+
# Optional: allow controlled joins by model.
|
|
35
|
+
config.allowed_associations = {
|
|
36
|
+
"Invoice" => ["user"]
|
|
37
|
+
}
|
|
38
|
+
end
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Usage
|
|
42
|
+
|
|
43
|
+
```ruby
|
|
44
|
+
relation = AIRecordFinder.query(
|
|
45
|
+
prompt: "Unpaid invoices above 50000 from last quarter",
|
|
46
|
+
model: Invoice
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Always ActiveRecord::Relation
|
|
50
|
+
relation.limit(10).pluck(:id)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
For associated-table constraints, reference fields as `association.column` in natural language intent (for example: "invoices where `user.email` contains `@acme.com`"). The gem will auto-join needed associations, but they must still be whitelisted in `allowed_associations`.
|
|
54
|
+
|
|
55
|
+
## Security Model
|
|
56
|
+
|
|
57
|
+
`ai_record_finder` is fail-closed and built to avoid LLM-to-SQL injection:
|
|
58
|
+
|
|
59
|
+
- AI is forced to return JSON DSL only (no SQL allowed).
|
|
60
|
+
- AI output is sanitized (markdown/code fences stripped) and JSON-parsed safely.
|
|
61
|
+
- Unknown keys/operators/fields are rejected.
|
|
62
|
+
- Fields are validated against model schema introspection.
|
|
63
|
+
- `limit` is strictly validated and hard-capped by configuration.
|
|
64
|
+
- Models must be explicitly whitelisted in `allowed_models`.
|
|
65
|
+
- Optional joins are blocked unless explicitly whitelisted in `allowed_associations`.
|
|
66
|
+
- If model defines `current_tenant_scope`, it is always merged.
|
|
67
|
+
- No `eval`, no destructive operations, no raw SQL execution from AI output.
|
|
68
|
+
|
|
69
|
+
## Architecture Overview
|
|
70
|
+
|
|
71
|
+
Core components:
|
|
72
|
+
|
|
73
|
+
- `AIRecordFinder::Configuration`: runtime safety and API settings.
|
|
74
|
+
- `AIRecordFinder::SchemaIntrospector`: model table/column/association/enum summary.
|
|
75
|
+
- `AIRecordFinder::PromptBuilder`: strict system prompt with schema and DSL contract.
|
|
76
|
+
- `AIRecordFinder::Client`: OpenAI-compatible HTTP transport (Faraday).
|
|
77
|
+
- `AIRecordFinder::AIAdapter`: AI response extraction and JSON parsing.
|
|
78
|
+
- `AIRecordFinder::DSLParser`: validates DSL structure and values.
|
|
79
|
+
- `AIRecordFinder::SafetyGuard`: model authorization, limit policies, join policies, tenant scope.
|
|
80
|
+
- `AIRecordFinder::QueryBuilder`: converts validated DSL into `ActiveRecord::Relation`.
|
|
81
|
+
- `AIRecordFinder::Railtie`: auto-load support in Rails.
|
|
82
|
+
|
|
83
|
+
## Error Types
|
|
84
|
+
|
|
85
|
+
- `AIRecordFinder::InvalidModelError`
|
|
86
|
+
- `AIRecordFinder::InvalidDSL`
|
|
87
|
+
- `AIRecordFinder::AIResponseError`
|
|
88
|
+
- `AIRecordFinder::UnauthorizedModel`
|
|
89
|
+
|
|
90
|
+
## Testing
|
|
91
|
+
|
|
92
|
+
Run:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
bundle exec rspec
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Included tests cover:
|
|
99
|
+
|
|
100
|
+
- Valid query generation
|
|
101
|
+
- Invalid field rejection
|
|
102
|
+
- Limit overflow
|
|
103
|
+
- Unknown operator
|
|
104
|
+
- Unauthorized model
|
|
105
|
+
- JSON injection attempt
|
|
106
|
+
|
|
107
|
+
## Pro Roadmap
|
|
108
|
+
|
|
109
|
+
Potential Pro features:
|
|
110
|
+
|
|
111
|
+
- Query explain/preview before execution
|
|
112
|
+
- Auditable prompt and DSL logs with redaction controls
|
|
113
|
+
- Policy packs (SOC2/HIPAA presets)
|
|
114
|
+
- Per-tenant usage quotas and rate-limits
|
|
115
|
+
- Multi-model query planning with approval workflows
|
data/Rakefile
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# Developer Guide (Basic)
|
|
2
|
+
|
|
3
|
+
This guide explains how to use `ai_record_finder` in a Rails app.
|
|
4
|
+
|
|
5
|
+
## 1. Install
|
|
6
|
+
|
|
7
|
+
Add the gem:
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
# Gemfile
|
|
11
|
+
gem "ai_record_finder"
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Install dependencies:
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
bundle install
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## 2. Configure
|
|
21
|
+
|
|
22
|
+
Create an initializer:
|
|
23
|
+
|
|
24
|
+
```ruby
|
|
25
|
+
# config/initializers/ai_record_finder.rb
|
|
26
|
+
AIRecordFinder.configure do |config|
|
|
27
|
+
config.api_key = ENV.fetch("OPENAI_API_KEY")
|
|
28
|
+
config.model_name = "gpt-4o-mini"
|
|
29
|
+
config.max_limit = 100
|
|
30
|
+
|
|
31
|
+
# Only models in this list can be queried.
|
|
32
|
+
config.allowed_models = [Invoice, User]
|
|
33
|
+
|
|
34
|
+
# Optional: allow specific joins per model.
|
|
35
|
+
config.allowed_associations = {
|
|
36
|
+
"Invoice" => ["user"]
|
|
37
|
+
}
|
|
38
|
+
end
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## 3. Run a Query
|
|
42
|
+
|
|
43
|
+
Use natural language + model:
|
|
44
|
+
|
|
45
|
+
```ruby
|
|
46
|
+
relation = AIRecordFinder.query(
|
|
47
|
+
prompt: "Unpaid invoices above 50000 from last quarter",
|
|
48
|
+
model: Invoice
|
|
49
|
+
)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
For associated-table constraints, use `association.column` semantics in the request intent, such as: "unpaid invoices where `user.email` contains `@acme.com`". The gem will auto-join associations used by these fields, but they must still be whitelisted in `allowed_associations`.
|
|
53
|
+
|
|
54
|
+
The return value is always an `ActiveRecord::Relation`, so you can chain it:
|
|
55
|
+
|
|
56
|
+
```ruby
|
|
57
|
+
relation.limit(20).pluck(:id)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## 4. What Happens Internally
|
|
61
|
+
|
|
62
|
+
1. The model is checked against `allowed_models`.
|
|
63
|
+
2. The model schema (columns, associations, enums) is introspected.
|
|
64
|
+
3. A strict AI prompt is built that allows only JSON DSL.
|
|
65
|
+
4. AI output is cleaned and JSON-parsed.
|
|
66
|
+
5. DSL is validated (fields, operators, sort, limit, keys).
|
|
67
|
+
6. A safe ActiveRecord relation is built and returned.
|
|
68
|
+
|
|
69
|
+
## 5. DSL Constraints (Important)
|
|
70
|
+
|
|
71
|
+
The AI can only use these operators:
|
|
72
|
+
|
|
73
|
+
- `eq`
|
|
74
|
+
- `gt`
|
|
75
|
+
- `lt`
|
|
76
|
+
- `gte`
|
|
77
|
+
- `lte`
|
|
78
|
+
- `between`
|
|
79
|
+
- `in`
|
|
80
|
+
- `like`
|
|
81
|
+
|
|
82
|
+
Safety rules:
|
|
83
|
+
|
|
84
|
+
- Unknown fields are rejected.
|
|
85
|
+
- Unknown operators are rejected.
|
|
86
|
+
- Unknown JSON keys are rejected.
|
|
87
|
+
- `limit` must be `<= config.max_limit`.
|
|
88
|
+
- Joins are blocked unless explicitly allowed.
|
|
89
|
+
|
|
90
|
+
## 6. Tenant Safety Hook
|
|
91
|
+
|
|
92
|
+
If your model defines `current_tenant_scope`, it is automatically merged:
|
|
93
|
+
|
|
94
|
+
```ruby
|
|
95
|
+
class Invoice < ApplicationRecord
|
|
96
|
+
def self.current_tenant_scope
|
|
97
|
+
where(account_id: Current.account_id)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
This helps enforce tenant boundaries by default.
|
|
103
|
+
|
|
104
|
+
## 7. Common Errors
|
|
105
|
+
|
|
106
|
+
- `AIRecordFinder::UnauthorizedModel`
|
|
107
|
+
: model is not in `allowed_models`.
|
|
108
|
+
- `AIRecordFinder::InvalidModelError`
|
|
109
|
+
: model is not an ActiveRecord model.
|
|
110
|
+
- `AIRecordFinder::InvalidDSL`
|
|
111
|
+
: AI returned unsupported fields/operators/keys/limit.
|
|
112
|
+
- `AIRecordFinder::AIResponseError`
|
|
113
|
+
: AI response was invalid or API call failed.
|
|
114
|
+
|
|
115
|
+
## 8. Basic Controller Example
|
|
116
|
+
|
|
117
|
+
```ruby
|
|
118
|
+
class InvoicesController < ApplicationController
|
|
119
|
+
def search
|
|
120
|
+
relation = AIRecordFinder.query(
|
|
121
|
+
prompt: params[:q],
|
|
122
|
+
model: Invoice
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
@invoices = relation.limit(50)
|
|
126
|
+
rescue AIRecordFinder::Error => e
|
|
127
|
+
render json: { error: e.message }, status: :unprocessable_entity
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## 9. Testing Your Integration
|
|
133
|
+
|
|
134
|
+
Run library tests:
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
bundle exec rspec
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
For app-level tests, mock `AIRecordFinder.query` in controller/service specs and assert:
|
|
141
|
+
|
|
142
|
+
- It returns a relation.
|
|
143
|
+
- Unauthorized models are rejected.
|
|
144
|
+
- Tenant constraints remain applied.
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module AIRecordFinder
|
|
6
|
+
# Converts natural language + schema prompt into parsed DSL JSON.
|
|
7
|
+
class AIAdapter
|
|
8
|
+
def initialize(client:)
|
|
9
|
+
@client = client
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def call(system_prompt:, user_prompt:)
|
|
13
|
+
raw_content = @client.chat_completion(system_prompt: system_prompt, user_prompt: user_prompt)
|
|
14
|
+
parse_json(extract_json(raw_content))
|
|
15
|
+
rescue AIResponseError
|
|
16
|
+
raise
|
|
17
|
+
rescue StandardError => e
|
|
18
|
+
raise AIResponseError, "Failed to parse AI response: #{e.message}"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def extract_json(raw_content)
|
|
24
|
+
content = raw_content.to_s.strip
|
|
25
|
+
|
|
26
|
+
if content.start_with?("```")
|
|
27
|
+
content = content.gsub(/\A```(?:json)?\s*/i, "").gsub(/\s*```\z/, "")
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
first_brace = content.index("{")
|
|
31
|
+
last_brace = content.rindex("}")
|
|
32
|
+
raise AIResponseError, "No JSON object found in AI response" unless first_brace && last_brace
|
|
33
|
+
|
|
34
|
+
content[first_brace..last_brace]
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def parse_json(json_string)
|
|
38
|
+
parsed = JSON.parse(json_string)
|
|
39
|
+
raise AIResponseError, "AI response must be a JSON object" unless parsed.is_a?(Hash)
|
|
40
|
+
|
|
41
|
+
parsed
|
|
42
|
+
rescue JSON::ParserError
|
|
43
|
+
raise AIResponseError, "AI returned invalid JSON"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "faraday"
|
|
4
|
+
require "json"
|
|
5
|
+
|
|
6
|
+
module AIRecordFinder
|
|
7
|
+
# HTTP client for OpenAI-compatible chat completion APIs.
|
|
8
|
+
class Client
|
|
9
|
+
CHAT_COMPLETIONS_PATH = "/chat/completions"
|
|
10
|
+
|
|
11
|
+
def initialize(configuration:)
|
|
12
|
+
@configuration = configuration
|
|
13
|
+
validate_configuration!
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def chat_completion(system_prompt:, user_prompt:)
|
|
17
|
+
response = connection.post(CHAT_COMPLETIONS_PATH) do |req|
|
|
18
|
+
req.headers["Authorization"] = "Bearer #{@configuration.api_key}"
|
|
19
|
+
req.headers["Content-Type"] = "application/json"
|
|
20
|
+
req.body = JSON.generate(payload(system_prompt, user_prompt))
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
parsed = parse_body(response.body)
|
|
24
|
+
extract_content(parsed)
|
|
25
|
+
rescue Faraday::Error => e
|
|
26
|
+
raise AIResponseError, "AI request failed: #{e.message}"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def connection
|
|
32
|
+
@connection ||= Faraday.new(url: @configuration.api_base_url) do |f|
|
|
33
|
+
f.options.timeout = @configuration.request_timeout
|
|
34
|
+
f.options.open_timeout = @configuration.request_timeout
|
|
35
|
+
f.adapter Faraday.default_adapter
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def payload(system_prompt, user_prompt)
|
|
40
|
+
{
|
|
41
|
+
model: @configuration.model_name,
|
|
42
|
+
temperature: @configuration.temperature,
|
|
43
|
+
messages: [
|
|
44
|
+
{ role: "system", content: system_prompt },
|
|
45
|
+
{ role: "user", content: user_prompt }
|
|
46
|
+
]
|
|
47
|
+
}
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def parse_body(body)
|
|
51
|
+
JSON.parse(body)
|
|
52
|
+
rescue JSON::ParserError
|
|
53
|
+
raise AIResponseError, "AI response body is not valid JSON"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def extract_content(parsed)
|
|
57
|
+
choices = parsed["choices"]
|
|
58
|
+
return choices.first.dig("message", "content") if choices.is_a?(Array) && choices.first
|
|
59
|
+
|
|
60
|
+
raise AIResponseError, "AI response missing choices.message.content"
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def validate_configuration!
|
|
64
|
+
if @configuration.api_key.to_s.strip.empty?
|
|
65
|
+
raise AIResponseError, "Missing API key. Set AIRecordFinder.configure { |c| c.api_key = ... }"
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AIRecordFinder
|
|
4
|
+
# Runtime configuration for AIRecordFinder.
|
|
5
|
+
class Configuration
|
|
6
|
+
DEFAULT_MODEL_NAME = "gpt-4o-mini"
|
|
7
|
+
DEFAULT_API_BASE_URL = "https://api.openai.com/v1"
|
|
8
|
+
DEFAULT_MAX_LIMIT = 100
|
|
9
|
+
DEFAULT_TIMEOUT = 15
|
|
10
|
+
|
|
11
|
+
attr_accessor :api_key, :model_name, :max_limit, :allowed_models,
|
|
12
|
+
:api_base_url, :request_timeout, :temperature,
|
|
13
|
+
:allowed_associations
|
|
14
|
+
|
|
15
|
+
def initialize
|
|
16
|
+
@api_key = nil
|
|
17
|
+
@model_name = DEFAULT_MODEL_NAME
|
|
18
|
+
@max_limit = DEFAULT_MAX_LIMIT
|
|
19
|
+
@allowed_models = []
|
|
20
|
+
@api_base_url = DEFAULT_API_BASE_URL
|
|
21
|
+
@request_timeout = DEFAULT_TIMEOUT
|
|
22
|
+
@temperature = 0.0
|
|
23
|
+
@allowed_associations = {}
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AIRecordFinder
|
|
4
|
+
# Validates and normalizes the AI-generated query DSL.
|
|
5
|
+
class DSLParser
|
|
6
|
+
ALLOWED_TOP_LEVEL_KEYS = %w[filters limit sort joins].freeze
|
|
7
|
+
ALLOWED_OPERATORS = %w[eq gt lt gte lte between in like].freeze
|
|
8
|
+
ALLOWED_SORT_DIRECTIONS = %w[asc desc].freeze
|
|
9
|
+
|
|
10
|
+
def initialize(model:, schema:, dsl:, max_limit:)
|
|
11
|
+
@model = model
|
|
12
|
+
@schema = schema
|
|
13
|
+
@dsl = dsl
|
|
14
|
+
@max_limit = max_limit
|
|
15
|
+
@columns = @schema.fetch(:columns).keys
|
|
16
|
+
@associations = @schema.fetch(:associations).keys
|
|
17
|
+
@association_columns = @schema.fetch(:association_columns, {})
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def call
|
|
21
|
+
raise InvalidDSL, "DSL must be a JSON object" unless @dsl.is_a?(Hash)
|
|
22
|
+
|
|
23
|
+
dsl = deep_stringify_keys(@dsl)
|
|
24
|
+
validate_top_level_keys!(dsl)
|
|
25
|
+
|
|
26
|
+
filters = validate_filters!(dsl.fetch("filters", []))
|
|
27
|
+
limit = validate_limit!(dsl.fetch("limit", @max_limit))
|
|
28
|
+
sort = validate_sort!(dsl["sort"])
|
|
29
|
+
joins = validate_joins!(dsl.fetch("joins", []))
|
|
30
|
+
|
|
31
|
+
{
|
|
32
|
+
"filters" => filters,
|
|
33
|
+
"limit" => limit,
|
|
34
|
+
"sort" => sort,
|
|
35
|
+
"joins" => joins
|
|
36
|
+
}
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def deep_stringify_keys(value)
|
|
42
|
+
case value
|
|
43
|
+
when Hash
|
|
44
|
+
value.each_with_object({}) do |(key, val), memo|
|
|
45
|
+
memo[key.to_s] = deep_stringify_keys(val)
|
|
46
|
+
end
|
|
47
|
+
when Array
|
|
48
|
+
value.map { |val| deep_stringify_keys(val) }
|
|
49
|
+
else
|
|
50
|
+
value
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def validate_top_level_keys!(dsl)
|
|
55
|
+
unknown = dsl.keys - ALLOWED_TOP_LEVEL_KEYS
|
|
56
|
+
raise InvalidDSL, "Unknown DSL keys: #{unknown.join(', ')}" if unknown.any?
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def validate_filters!(filters)
|
|
60
|
+
raise InvalidDSL, "filters must be an array" unless filters.is_a?(Array)
|
|
61
|
+
|
|
62
|
+
filters.map do |filter|
|
|
63
|
+
raise InvalidDSL, "each filter must be an object" unless filter.is_a?(Hash)
|
|
64
|
+
|
|
65
|
+
keys = filter.keys
|
|
66
|
+
unknown = keys - %w[field operator value]
|
|
67
|
+
raise InvalidDSL, "Unknown filter keys: #{unknown.join(', ')}" if unknown.any?
|
|
68
|
+
|
|
69
|
+
field = filter["field"].to_s
|
|
70
|
+
operator = filter["operator"].to_s
|
|
71
|
+
value = filter["value"]
|
|
72
|
+
|
|
73
|
+
validate_field!(field)
|
|
74
|
+
validate_operator!(operator)
|
|
75
|
+
validate_operator_value!(operator, value)
|
|
76
|
+
|
|
77
|
+
{ "field" => field, "operator" => operator, "value" => value }
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def validate_limit!(limit)
|
|
82
|
+
raise InvalidDSL, "limit must be an integer" unless limit.is_a?(Integer)
|
|
83
|
+
|
|
84
|
+
SafetyGuard.enforce_limit!(limit: limit, max_limit: @max_limit)
|
|
85
|
+
limit
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def validate_sort!(sort)
|
|
89
|
+
return nil if sort.nil?
|
|
90
|
+
raise InvalidDSL, "sort must be an object" unless sort.is_a?(Hash)
|
|
91
|
+
|
|
92
|
+
unknown = sort.keys - %w[field direction]
|
|
93
|
+
raise InvalidDSL, "Unknown sort keys: #{unknown.join(', ')}" if unknown.any?
|
|
94
|
+
|
|
95
|
+
field = sort.fetch("field").to_s
|
|
96
|
+
direction = sort.fetch("direction").to_s.downcase
|
|
97
|
+
|
|
98
|
+
validate_field!(field)
|
|
99
|
+
unless ALLOWED_SORT_DIRECTIONS.include?(direction)
|
|
100
|
+
raise InvalidDSL, "sort direction must be asc or desc"
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
{ "field" => field, "direction" => direction }
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def validate_joins!(joins)
|
|
107
|
+
raise InvalidDSL, "joins must be an array" unless joins.is_a?(Array)
|
|
108
|
+
|
|
109
|
+
joins.map do |association|
|
|
110
|
+
name = association.to_s
|
|
111
|
+
unless @associations.include?(name)
|
|
112
|
+
raise InvalidDSL, "Unknown association join: #{name}"
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
name
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def validate_field!(field)
|
|
120
|
+
return if @columns.include?(field)
|
|
121
|
+
return if valid_association_field?(field)
|
|
122
|
+
|
|
123
|
+
raise InvalidDSL, "Unknown field: #{field}"
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def valid_association_field?(field)
|
|
127
|
+
association_name, column_name = field.split(".", 2)
|
|
128
|
+
return false if association_name.to_s.empty? || column_name.to_s.empty?
|
|
129
|
+
|
|
130
|
+
return false unless @associations.include?(association_name)
|
|
131
|
+
|
|
132
|
+
association_columns = @association_columns.fetch(association_name, {}).fetch(:columns, nil) ||
|
|
133
|
+
@association_columns.fetch(association_name, {}).fetch("columns", nil)
|
|
134
|
+
return false unless association_columns
|
|
135
|
+
|
|
136
|
+
association_columns.keys.map(&:to_s).include?(column_name)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def validate_operator!(operator)
|
|
140
|
+
return if ALLOWED_OPERATORS.include?(operator)
|
|
141
|
+
|
|
142
|
+
raise InvalidDSL, "Unknown operator: #{operator}"
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def validate_operator_value!(operator, value)
|
|
146
|
+
case operator
|
|
147
|
+
when "between"
|
|
148
|
+
unless value.is_a?(Array) && value.length == 2
|
|
149
|
+
raise InvalidDSL, "between operator requires exactly two values"
|
|
150
|
+
end
|
|
151
|
+
when "in"
|
|
152
|
+
raise InvalidDSL, "in operator requires an array" unless value.is_a?(Array)
|
|
153
|
+
when "like"
|
|
154
|
+
raise InvalidDSL, "like operator requires a scalar value" if value.is_a?(Array) || value.is_a?(Hash)
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AIRecordFinder
|
|
4
|
+
# Base error class for all gem-specific failures.
|
|
5
|
+
class Error < StandardError; end
|
|
6
|
+
|
|
7
|
+
# Raised when the given model is not an ActiveRecord model.
|
|
8
|
+
class InvalidModelError < Error; end
|
|
9
|
+
|
|
10
|
+
# Raised when AI output does not conform to the expected DSL.
|
|
11
|
+
class InvalidDSL < Error; end
|
|
12
|
+
|
|
13
|
+
# Raised when AI API responses are malformed or unusable.
|
|
14
|
+
class AIResponseError < Error; end
|
|
15
|
+
|
|
16
|
+
# Raised when a model is not explicitly whitelisted.
|
|
17
|
+
class UnauthorizedModel < Error; end
|
|
18
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module AIRecordFinder
|
|
6
|
+
# Builds strict prompts so the AI emits only the expected JSON DSL.
|
|
7
|
+
class PromptBuilder
|
|
8
|
+
ALLOWED_OPERATORS = %w[eq gt lt gte lte between in like].freeze
|
|
9
|
+
|
|
10
|
+
def initialize(schema:, max_limit:)
|
|
11
|
+
@schema = schema
|
|
12
|
+
@max_limit = max_limit
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def system_prompt
|
|
16
|
+
<<~PROMPT
|
|
17
|
+
You convert user requests into a strict query JSON DSL for ActiveRecord.
|
|
18
|
+
Output rules:
|
|
19
|
+
- Return ONLY JSON. No markdown, no code fences, no commentary.
|
|
20
|
+
- Never output SQL, pseudo-SQL, or Ruby code.
|
|
21
|
+
- Do not include keys not listed below.
|
|
22
|
+
- Use only the provided schema fields.
|
|
23
|
+
- Limit must be an integer between 1 and #{@max_limit}.
|
|
24
|
+
- Operators allowed: #{ALLOWED_OPERATORS.join(', ')}
|
|
25
|
+
|
|
26
|
+
JSON format:
|
|
27
|
+
{
|
|
28
|
+
"filters": [
|
|
29
|
+
{ "field": "status", "operator": "eq", "value": "unpaid" }
|
|
30
|
+
],
|
|
31
|
+
"limit": 50,
|
|
32
|
+
"sort": { "field": "created_at", "direction": "desc" }
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
Field constraints:
|
|
36
|
+
- Allowed base fields are these columns: #{@schema[:columns].keys.sort.join(', ')}
|
|
37
|
+
- Associated fields must use "association.column" (example: "user.email")
|
|
38
|
+
- Allowed sort directions: asc, desc
|
|
39
|
+
- For operator "between", value must be an array of two values
|
|
40
|
+
- For operator "in", value must be an array
|
|
41
|
+
|
|
42
|
+
Schema summary:
|
|
43
|
+
#{JSON.pretty_generate(@schema)}
|
|
44
|
+
PROMPT
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def user_prompt(natural_language_prompt)
|
|
48
|
+
natural_language_prompt.to_s.strip
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|