gaard-core 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. gaard_core/__init__.py +0 -0
  2. gaard_core/audit/__init__.py +0 -0
  3. gaard_core/errors.py +79 -0
  4. gaard_core/evaluation/__init__.py +0 -0
  5. gaard_core/execution/__init__.py +0 -0
  6. gaard_core/execution/mock_executor.py +25 -0
  7. gaard_core/investigation/__init__.py +25 -0
  8. gaard_core/investigation/llm_readiness_agent.py +220 -0
  9. gaard_core/investigation/loop.py +83 -0
  10. gaard_core/investigation/mock_readiness_agent.py +20 -0
  11. gaard_core/investigation/models.py +62 -0
  12. gaard_core/json_utils.py +58 -0
  13. gaard_core/llm_output.py +12 -0
  14. gaard_core/policy_engine/__init__.py +0 -0
  15. gaard_core/prompt_compiler/__init__.py +0 -0
  16. gaard_core/prompt_compiler/intent_classification_prompt.py +58 -0
  17. gaard_core/prompt_compiler/investigation_readiness_prompt.py +84 -0
  18. gaard_core/prompt_compiler/models.py +19 -0
  19. gaard_core/prompt_compiler/result_classification_prompt.py +62 -0
  20. gaard_core/prompt_compiler/result_interpretation_prompt.py +73 -0
  21. gaard_core/prompt_compiler/schema_formatter.py +43 -0
  22. gaard_core/prompt_compiler/sql_generation_prompt.py +105 -0
  23. gaard_core/query_intent/__init__.py +1 -0
  24. gaard_core/query_intent/llm_classifier.py +112 -0
  25. gaard_core/query_intent/mock_classifier.py +14 -0
  26. gaard_core/query_pipeline/__init__.py +0 -0
  27. gaard_core/query_pipeline/llm_sql_generator.py +85 -0
  28. gaard_core/query_pipeline/mock_sql_generator.py +33 -0
  29. gaard_core/query_pipeline/models.py +57 -0
  30. gaard_core/query_pipeline/pipeline.py +124 -0
  31. gaard_core/result_classifier/__init__.py +1 -0
  32. gaard_core/result_classifier/llm_classifier.py +87 -0
  33. gaard_core/result_classifier/mock_classifier.py +10 -0
  34. gaard_core/result_interpreter/__init__.py +0 -0
  35. gaard_core/result_interpreter/llm_interpreter.py +66 -0
  36. gaard_core/result_interpreter/mock_interpreter.py +25 -0
  37. gaard_core/schema/__init__.py +0 -0
  38. gaard_core/schema/cache.py +59 -0
  39. gaard_core/schema/context.py +40 -0
  40. gaard_core/schema/models.py +27 -0
  41. gaard_core/security/__init__.py +0 -0
  42. gaard_core/semantic_layer/__init__.py +0 -0
  43. gaard_core/sql_validator/__init__.py +0 -0
  44. gaard_core/sql_validator/select_only.py +37 -0
  45. gaard_core-0.1.0.dist-info/METADATA +23 -0
  46. gaard_core-0.1.0.dist-info/RECORD +48 -0
  47. gaard_core-0.1.0.dist-info/WHEEL +5 -0
  48. gaard_core-0.1.0.dist-info/top_level.txt +1 -0
gaard_core/__init__.py ADDED
File without changes
File without changes
gaard_core/errors.py ADDED
@@ -0,0 +1,79 @@
1
+ from typing import Any
2
+
3
+
4
+ class GaardError(Exception):
5
+ code = "GAARD_ERROR"
6
+ status_code = 500
7
+
8
+ def __init__(self, message: str | None = None) -> None:
9
+ self.message = message or "GAARD error."
10
+ super().__init__(self.message)
11
+
12
+
13
+ class ConfigurationError(GaardError):
14
+ code = "CONFIGURATION_ERROR"
15
+ status_code = 500
16
+
17
+
18
+ class SqlGenerationError(GaardError):
19
+ code = "SQL_GENERATION_ERROR"
20
+ status_code = 502
21
+
22
+
23
+ class SqlValidationError(GaardError):
24
+ code = "SQL_VALIDATION_ERROR"
25
+ status_code = 400
26
+
27
+ def __init__(
28
+ self,
29
+ message: str | None = None,
30
+ sql: str = "",
31
+ error_detail: str = "",
32
+ metadata: dict[str, Any] | None = None,
33
+ ) -> None:
34
+ self.sql = sql
35
+ self.error_detail = error_detail
36
+ self.metadata = metadata or {}
37
+ super().__init__(message)
38
+
39
+
40
+ class QueryExecutionError(GaardError):
41
+ code = "QUERY_EXECUTION_ERROR"
42
+ status_code = 400
43
+
44
+ def __init__(
45
+ self,
46
+ message: str | None = None,
47
+ sql: str = "",
48
+ error_detail: str = "",
49
+ metadata: dict[str, Any] | None = None,
50
+ ) -> None:
51
+ self.sql = sql
52
+ self.error_detail = error_detail
53
+ self.metadata = metadata or {}
54
+ super().__init__(message)
55
+
56
+
57
+ class QueryPipelineStepError(GaardError):
58
+ status_code = 502
59
+
60
+ def __init__(
61
+ self,
62
+ message: str | None = None,
63
+ phase: str = "",
64
+ sql: str = "",
65
+ error_code: str = "QUERY_PIPELINE_STEP_ERROR",
66
+ error_detail: str = "",
67
+ metadata: dict[str, Any] | None = None,
68
+ ) -> None:
69
+ self.code = error_code
70
+ self.phase = phase
71
+ self.sql = sql
72
+ self.error_detail = error_detail
73
+ self.metadata = metadata or {}
74
+ super().__init__(message)
75
+
76
+
77
+ class LlmProviderError(GaardError):
78
+ code = "LLM_PROVIDER_ERROR"
79
+ status_code = 502
File without changes
File without changes
@@ -0,0 +1,25 @@
1
+ from gaard_core.query_pipeline.models import QueryResult
2
+
3
+
4
+ class MockQueryExecutor:
5
+ def execute(self, sql: str) -> QueryResult:
6
+ normalized = sql.lower()
7
+
8
+ if "patients" in normalized:
9
+ return QueryResult(
10
+ columns=["patients_count"],
11
+ rows=[
12
+ {
13
+ "patients_count": 124,
14
+ }
15
+ ],
16
+ )
17
+
18
+ return QueryResult(
19
+ columns=["value"],
20
+ rows=[
21
+ {
22
+ "value": 1,
23
+ }
24
+ ],
25
+ )
@@ -0,0 +1,25 @@
1
+ from gaard_core.investigation.loop import InvestigationLoop
2
+ from gaard_core.investigation.llm_readiness_agent import LlmInvestigationReadinessAgent
3
+ from gaard_core.investigation.mock_readiness_agent import MockInvestigationReadinessAgent
4
+ from gaard_core.investigation.models import (
5
+ InvestigationContext,
6
+ InvestigationIteration,
7
+ InvestigationLoopConfig,
8
+ InvestigationLoopResult,
9
+ InvestigationReadinessDecision,
10
+ InvestigationRoute,
11
+ RequiredAnalysisTask,
12
+ )
13
+
14
+ __all__ = [
15
+ "InvestigationContext",
16
+ "InvestigationIteration",
17
+ "InvestigationLoop",
18
+ "InvestigationLoopConfig",
19
+ "InvestigationLoopResult",
20
+ "InvestigationReadinessDecision",
21
+ "InvestigationRoute",
22
+ "LlmInvestigationReadinessAgent",
23
+ "MockInvestigationReadinessAgent",
24
+ "RequiredAnalysisTask",
25
+ ]
@@ -0,0 +1,220 @@
1
+ import json
2
+ from typing import Any, Protocol
3
+
4
+ from gaard_core.investigation.models import (
5
+ InvestigationContext,
6
+ InvestigationReadinessDecision,
7
+ InvestigationRoute,
8
+ RequiredAnalysisTask,
9
+ )
10
+ from gaard_core.llm_output import remove_thinking_blocks
11
+ from gaard_core.prompt_compiler.investigation_readiness_prompt import (
12
+ InvestigationReadinessPromptCompiler,
13
+ )
14
+ from gaard_core.prompt_compiler.models import CompiledPrompt
15
+ from gaard_llm.openai_compatible.client import OpenAICompatibleClient
16
+ from gaard_llm.providers.models import ChatCompletionRequest, ChatMessage
17
+
18
+
19
+ class InvestigationReadinessPromptCompilerProtocol(Protocol):
20
+ def compile(self, context: InvestigationContext) -> CompiledPrompt:
21
+ pass
22
+
23
+
24
+ class LlmInvestigationReadinessAgent:
25
+ name = "llm_investigation_readiness"
26
+
27
+ def __init__(
28
+ self,
29
+ client: OpenAICompatibleClient,
30
+ model: str,
31
+ extra_body: dict[str, Any] | None = None,
32
+ prompt_compiler: InvestigationReadinessPromptCompilerProtocol | None = None,
33
+ ) -> None:
34
+ self.client = client
35
+ self.model = model
36
+ self.extra_body = extra_body or {}
37
+ self.prompt_compiler = prompt_compiler or InvestigationReadinessPromptCompiler()
38
+
39
+ def assess(self, context: InvestigationContext) -> InvestigationReadinessDecision:
40
+ compiled_prompt = self.prompt_compiler.compile(context=context)
41
+
42
+ response = self.client.create_chat_completion(
43
+ ChatCompletionRequest(
44
+ model=self.model,
45
+ temperature=0.0,
46
+ extra_body=self.extra_body,
47
+ messages=[
48
+ ChatMessage(
49
+ role="system",
50
+ content=compiled_prompt.system_prompt,
51
+ ),
52
+ ChatMessage(
53
+ role="user",
54
+ content=compiled_prompt.user_prompt,
55
+ ),
56
+ ],
57
+ )
58
+ )
59
+
60
+ return parse_investigation_readiness_decision(response.content)
61
+
62
+
63
+ def parse_investigation_readiness_decision(value: str) -> InvestigationReadinessDecision:
64
+ cleaned = remove_thinking_blocks(value).strip()
65
+
66
+ try:
67
+ payload = json.loads(cleaned)
68
+ except json.JSONDecodeError:
69
+ return InvestigationReadinessDecision(
70
+ ready_for_sql=False,
71
+ route=InvestigationRoute.ANALYSIS,
72
+ confidence=0.0,
73
+ reason="Investigation readiness agent returned invalid JSON.",
74
+ missing_information=["valid readiness JSON"],
75
+ required_analysis=["Retry readiness assessment with a valid JSON response."],
76
+ model_response={"raw": cleaned},
77
+ )
78
+
79
+ if not isinstance(payload, dict):
80
+ return InvestigationReadinessDecision(
81
+ ready_for_sql=False,
82
+ route=InvestigationRoute.ANALYSIS,
83
+ confidence=0.0,
84
+ reason="Investigation readiness agent returned a non-object JSON value.",
85
+ missing_information=["valid readiness JSON object"],
86
+ required_analysis=["Retry readiness assessment with a JSON object response."],
87
+ model_response={"raw": payload},
88
+ )
89
+
90
+ ready_for_sql = parse_bool(payload.get("ready_for_sql"))
91
+ route = parse_route(payload.get("route"), ready_for_sql)
92
+
93
+ if route == InvestigationRoute.SQL and not ready_for_sql:
94
+ route = InvestigationRoute.ANALYSIS
95
+
96
+ if route == InvestigationRoute.ANALYSIS:
97
+ ready_for_sql = False
98
+
99
+ missing_information = parse_string_list(payload.get("missing_information"))
100
+ required_analysis = parse_string_list(payload.get("required_analysis"))
101
+
102
+ return InvestigationReadinessDecision(
103
+ ready_for_sql=ready_for_sql,
104
+ route=route,
105
+ confidence=parse_confidence(payload.get("confidence")),
106
+ reason=str(payload.get("reason") or ""),
107
+ missing_information=missing_information,
108
+ required_analysis=required_analysis,
109
+ required_analysis_tasks=parse_required_analysis_tasks(
110
+ payload.get("required_analysis_tasks"),
111
+ missing_information,
112
+ required_analysis,
113
+ ),
114
+ assumptions=parse_string_list(payload.get("assumptions")),
115
+ model_response=payload,
116
+ )
117
+
118
+
119
+ def parse_route(value: object, ready_for_sql: bool) -> InvestigationRoute:
120
+ if isinstance(value, str):
121
+ normalized = value.strip().lower().replace("-", "_").replace(" ", "_")
122
+ if normalized in {"sql", "ready", "ready_for_sql"}:
123
+ return InvestigationRoute.SQL
124
+ if normalized in {"analysis", "analyze", "requires_analysis"}:
125
+ return InvestigationRoute.ANALYSIS
126
+
127
+ return InvestigationRoute.SQL if ready_for_sql else InvestigationRoute.ANALYSIS
128
+
129
+
130
+ def parse_bool(value: object) -> bool:
131
+ if isinstance(value, bool):
132
+ return value
133
+
134
+ if isinstance(value, str):
135
+ return value.strip().lower() in {"true", "yes", "tak", "1"}
136
+
137
+ return False
138
+
139
+
140
+ def parse_confidence(value: object) -> float:
141
+ try:
142
+ confidence = float(value)
143
+ except (TypeError, ValueError):
144
+ return 0.0
145
+
146
+ return max(0.0, min(1.0, confidence))
147
+
148
+
149
+ def parse_string_list(value: object) -> list[str]:
150
+ if not isinstance(value, list):
151
+ return []
152
+
153
+ items: list[str] = []
154
+ for item in value:
155
+ if item is None:
156
+ continue
157
+ text = str(item).strip()
158
+ if text:
159
+ items.append(text)
160
+
161
+ return items
162
+
163
+
164
+ def parse_required_analysis_tasks(
165
+ value: object,
166
+ missing_information: list[str],
167
+ required_analysis: list[str],
168
+ ) -> list[RequiredAnalysisTask]:
169
+ if isinstance(value, list):
170
+ tasks = [
171
+ parse_required_analysis_task(item)
172
+ for item in value
173
+ if isinstance(item, dict)
174
+ ]
175
+ tasks = [task for task in tasks if task.required_analysis]
176
+ if tasks:
177
+ return tasks
178
+
179
+ return required_analysis_tasks_from_lists(missing_information, required_analysis)
180
+
181
+
182
+ def parse_required_analysis_task(value: dict[str, object]) -> RequiredAnalysisTask:
183
+ return RequiredAnalysisTask(
184
+ missing_information=str(value.get("missing_information") or "").strip(),
185
+ required_analysis=str(value.get("required_analysis") or "").strip(),
186
+ category=normalize_analysis_category(value.get("category")),
187
+ expected_output=str(value.get("expected_output") or "").strip(),
188
+ )
189
+
190
+
191
+ def required_analysis_tasks_from_lists(
192
+ missing_information: list[str],
193
+ required_analysis: list[str],
194
+ ) -> list[RequiredAnalysisTask]:
195
+ tasks: list[RequiredAnalysisTask] = []
196
+ for index, analysis_question in enumerate(required_analysis):
197
+ tasks.append(
198
+ RequiredAnalysisTask(
199
+ missing_information=missing_information[index]
200
+ if index < len(missing_information)
201
+ else "",
202
+ required_analysis=analysis_question,
203
+ )
204
+ )
205
+
206
+ return tasks
207
+
208
+
209
+ def normalize_analysis_category(value: object) -> str:
210
+ normalized = str(value or "unknown").strip().lower().replace("-", "_").replace(" ", "_")
211
+ allowed_categories = {
212
+ "dictionary_value",
213
+ "relationship_logic",
214
+ "filter_logic",
215
+ "aggregation_logic",
216
+ "entity_mapping",
217
+ "unknown",
218
+ }
219
+
220
+ return normalized if normalized in allowed_categories else "unknown"
@@ -0,0 +1,83 @@
1
+ from typing import Protocol
2
+
3
+ from gaard_core.investigation.models import (
4
+ InvestigationContext,
5
+ InvestigationIteration,
6
+ InvestigationLoopConfig,
7
+ InvestigationLoopResult,
8
+ InvestigationReadinessDecision,
9
+ InvestigationRoute,
10
+ )
11
+
12
+
13
+ class InvestigationReadinessAgent(Protocol):
14
+ name: str
15
+
16
+ def assess(self, context: InvestigationContext) -> InvestigationReadinessDecision:
17
+ pass
18
+
19
+
20
+ class InvestigationLoop:
21
+ def __init__(
22
+ self,
23
+ readiness_agent: InvestigationReadinessAgent,
24
+ config: InvestigationLoopConfig | None = None,
25
+ ) -> None:
26
+ self.readiness_agent = readiness_agent
27
+ self.config = config or InvestigationLoopConfig()
28
+
29
+ def run(self, context: InvestigationContext) -> InvestigationLoopResult:
30
+ iterations: list[InvestigationIteration] = []
31
+
32
+ for iteration_number in range(1, self.config.max_iterations + 1):
33
+ decision = self.readiness_agent.assess(context)
34
+ normalized_decision = self._normalize_decision(decision)
35
+ iterations.append(
36
+ InvestigationIteration(
37
+ iteration=iteration_number,
38
+ agent=self.readiness_agent.name,
39
+ decision=normalized_decision,
40
+ )
41
+ )
42
+
43
+ if normalized_decision.route == InvestigationRoute.SQL:
44
+ return InvestigationLoopResult(
45
+ route=InvestigationRoute.SQL,
46
+ ready_for_sql=True,
47
+ max_iterations=self.config.max_iterations,
48
+ confidence_threshold=self.config.readiness_confidence_threshold,
49
+ iterations=iterations,
50
+ )
51
+
52
+ return InvestigationLoopResult(
53
+ route=InvestigationRoute.ANALYSIS,
54
+ ready_for_sql=False,
55
+ max_iterations=self.config.max_iterations,
56
+ confidence_threshold=self.config.readiness_confidence_threshold,
57
+ iterations=iterations,
58
+ )
59
+
60
+ return InvestigationLoopResult(
61
+ route=InvestigationRoute.ANALYSIS,
62
+ ready_for_sql=False,
63
+ max_iterations=self.config.max_iterations,
64
+ confidence_threshold=self.config.readiness_confidence_threshold,
65
+ iterations=iterations,
66
+ )
67
+
68
+ def _normalize_decision(
69
+ self,
70
+ decision: InvestigationReadinessDecision,
71
+ ) -> InvestigationReadinessDecision:
72
+ ready = (
73
+ decision.ready_for_sql
74
+ and decision.confidence >= self.config.readiness_confidence_threshold
75
+ )
76
+ route = InvestigationRoute.SQL if ready else InvestigationRoute.ANALYSIS
77
+
78
+ return decision.model_copy(
79
+ update={
80
+ "ready_for_sql": ready,
81
+ "route": route,
82
+ }
83
+ )
@@ -0,0 +1,20 @@
1
+ from gaard_core.investigation.models import (
2
+ InvestigationContext,
3
+ InvestigationReadinessDecision,
4
+ InvestigationRoute,
5
+ )
6
+
7
+
8
+ class MockInvestigationReadinessAgent:
9
+ name = "mock_investigation_readiness"
10
+
11
+ def __init__(self, decision: InvestigationReadinessDecision | None = None) -> None:
12
+ self.decision = decision or InvestigationReadinessDecision(
13
+ ready_for_sql=True,
14
+ route=InvestigationRoute.SQL,
15
+ confidence=1.0,
16
+ reason="Mock readiness agent allows the normal SQL pipeline.",
17
+ )
18
+
19
+ def assess(self, context: InvestigationContext) -> InvestigationReadinessDecision:
20
+ return self.decision
@@ -0,0 +1,62 @@
1
+ from enum import StrEnum
2
+ from typing import Any
3
+
4
+ from pydantic import BaseModel, Field
5
+
6
+
7
+ class InvestigationRoute(StrEnum):
8
+ SQL = "sql"
9
+ ANALYSIS = "analysis"
10
+
11
+
12
+ class InvestigationContext(BaseModel):
13
+ question: str = Field(min_length=1)
14
+ datasource_id: str = "default"
15
+ user_id: str = "local-admin"
16
+ formatted_schema: str = ""
17
+ business_logic: str = ""
18
+
19
+
20
+ class RequiredAnalysisTask(BaseModel):
21
+ missing_information: str = ""
22
+ required_analysis: str = ""
23
+ category: str = "unknown"
24
+ expected_output: str = ""
25
+
26
+
27
+ class InvestigationReadinessDecision(BaseModel):
28
+ ready_for_sql: bool = False
29
+ route: InvestigationRoute = InvestigationRoute.ANALYSIS
30
+ confidence: float = 0.0
31
+ reason: str = ""
32
+ missing_information: list[str] = Field(default_factory=list)
33
+ required_analysis: list[str] = Field(default_factory=list)
34
+ required_analysis_tasks: list[RequiredAnalysisTask] = Field(default_factory=list)
35
+ assumptions: list[str] = Field(default_factory=list)
36
+ model_response: dict[str, Any] = Field(default_factory=dict)
37
+
38
+
39
+ class InvestigationIteration(BaseModel):
40
+ iteration: int
41
+ agent: str
42
+ decision: InvestigationReadinessDecision
43
+
44
+
45
+ class InvestigationLoopConfig(BaseModel):
46
+ max_iterations: int = Field(default=1, ge=1)
47
+ readiness_confidence_threshold: float = Field(default=0.85, ge=0.0, le=1.0)
48
+
49
+
50
+ class InvestigationLoopResult(BaseModel):
51
+ route: InvestigationRoute
52
+ ready_for_sql: bool
53
+ max_iterations: int
54
+ confidence_threshold: float
55
+ iterations: list[InvestigationIteration] = Field(default_factory=list)
56
+
57
+ @property
58
+ def final_decision(self) -> InvestigationReadinessDecision | None:
59
+ if not self.iterations:
60
+ return None
61
+
62
+ return self.iterations[-1].decision
@@ -0,0 +1,58 @@
1
+ import json
2
+ import math
3
+ from collections.abc import Mapping
4
+ from datetime import date, datetime, time
5
+ from decimal import Decimal
6
+ from typing import Any
7
+
8
+
9
+ def to_jsonable(value: Any) -> Any:
10
+ if value is None or isinstance(value, str | int | float | bool):
11
+ return value
12
+
13
+ if isinstance(value, Decimal):
14
+ return _decimal_to_jsonable(value)
15
+
16
+ if isinstance(value, datetime | date | time):
17
+ return value.isoformat()
18
+
19
+ if isinstance(value, bytes | bytearray | memoryview):
20
+ return _bytes_to_jsonable(value)
21
+
22
+ if isinstance(value, Mapping):
23
+ return {str(key): to_jsonable(item) for key, item in value.items()}
24
+
25
+ if isinstance(value, list | tuple | set | frozenset):
26
+ return [to_jsonable(item) for item in value]
27
+
28
+ if hasattr(value, "model_dump"):
29
+ return to_jsonable(value.model_dump())
30
+
31
+ return str(value)
32
+
33
+
34
+ def json_dumps(value: Any, **kwargs: Any) -> str:
35
+ return json.dumps(to_jsonable(value), **kwargs)
36
+
37
+
38
+ def _decimal_to_jsonable(value: Decimal) -> int | float | str:
39
+ if not value.is_finite():
40
+ return str(value)
41
+
42
+ if value == value.to_integral_value():
43
+ return int(value)
44
+
45
+ as_float = float(value)
46
+ if math.isfinite(as_float):
47
+ return as_float
48
+
49
+ return str(value)
50
+
51
+
52
+ def _bytes_to_jsonable(value: bytes | bytearray | memoryview) -> str:
53
+ raw = bytes(value)
54
+
55
+ try:
56
+ return raw.decode("utf-8")
57
+ except UnicodeDecodeError:
58
+ return raw.hex()
@@ -0,0 +1,12 @@
1
+ import re
2
+
3
+
4
+ def remove_thinking_blocks(value: str) -> str:
5
+ cleaned = re.sub(
6
+ r"<think>.*?</think>",
7
+ "",
8
+ value,
9
+ flags=re.IGNORECASE | re.DOTALL,
10
+ )
11
+
12
+ return cleaned.strip()
File without changes
File without changes
@@ -0,0 +1,58 @@
1
+ from gaard_core.json_utils import json_dumps
2
+ from gaard_core.prompt_compiler.models import CompiledPrompt
3
+ from gaard_core.query_pipeline.models import QueryIntentDecision, QueryRequest
4
+
5
+
6
+ class IntentClassificationPromptCompiler:
7
+ def compile(self, request: QueryRequest) -> CompiledPrompt:
8
+ payload = {
9
+ "question": request.question,
10
+ "datasource_id": request.datasource_id,
11
+ "user_id": request.user_id,
12
+ }
13
+
14
+ return CompiledPrompt(
15
+ system_prompt=self._build_system_prompt(),
16
+ user_prompt=self._build_user_prompt(payload),
17
+ metadata={
18
+ "allowed_decisions": [item.value for item in QueryIntentDecision],
19
+ },
20
+ )
21
+
22
+ def _build_system_prompt(self) -> str:
23
+ return """You are GAARD Query Intent Classification.
24
+
25
+ Your task is to decide whether the user's request can be fulfilled only by a read-only SQL SELECT query.
26
+
27
+ Allowed decisions:
28
+ - read_only_data_question: the user asks a question about data that can be answered with a read-only SELECT or WITH query.
29
+ - write_or_mutation_request: the user asks to insert, update, delete, reset, clear, modify, create, alter, drop, or otherwise change data, schema, configuration, files, permissions, or system state.
30
+ - non_data_request: the request is not a question about database data.
31
+ - ambiguous: the intent is unclear or it is not safe to decide that it is read-only.
32
+
33
+ Decision rules:
34
+ 1. Allow only requests whose intent is to read, count, list, aggregate, compare, summarize, inspect, or analyze existing data.
35
+ 2. Reject requests that ask for a change, even if a SELECT query could be used to find the affected rows.
36
+ 3. Reject destructive, administrative, or state-changing requests.
37
+ 4. Choose ambiguous instead of guessing when the intent is unclear.
38
+
39
+ Output rules:
40
+ - Return only a JSON object.
41
+ - Do not include markdown.
42
+ - Do not include reasoning outside the JSON.
43
+ - Do not include <think> blocks.
44
+ - Use exactly this JSON shape:
45
+ {"decision":"read_only_data_question","confidence":0.0,"reason":"short reason"}
46
+ """
47
+
48
+ def _build_user_prompt(self, payload: dict[str, str]) -> str:
49
+ return f"""Classify this user request before SQL generation.
50
+
51
+ Input JSON:
52
+ {json_dumps(payload, ensure_ascii=False, indent=2)}
53
+
54
+ Return one JSON object with:
55
+ - decision: one of {", ".join(item.value for item in QueryIntentDecision)}
56
+ - confidence: number from 0 to 1
57
+ - reason: short explanation
58
+ """