PyPI - gaard-core - Versions diffs - 0.1.0__py3-none-any.whl - Mend

gaard-core 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

gaard_core/__init__.py +0 -0
gaard_core/audit/__init__.py +0 -0
gaard_core/errors.py +79 -0
gaard_core/evaluation/__init__.py +0 -0
gaard_core/execution/__init__.py +0 -0
gaard_core/execution/mock_executor.py +25 -0
gaard_core/investigation/__init__.py +25 -0
gaard_core/investigation/llm_readiness_agent.py +220 -0
gaard_core/investigation/loop.py +83 -0
gaard_core/investigation/mock_readiness_agent.py +20 -0
gaard_core/investigation/models.py +62 -0
gaard_core/json_utils.py +58 -0
gaard_core/llm_output.py +12 -0
gaard_core/policy_engine/__init__.py +0 -0
gaard_core/prompt_compiler/__init__.py +0 -0
gaard_core/prompt_compiler/intent_classification_prompt.py +58 -0
gaard_core/prompt_compiler/investigation_readiness_prompt.py +84 -0
gaard_core/prompt_compiler/models.py +19 -0
gaard_core/prompt_compiler/result_classification_prompt.py +62 -0
gaard_core/prompt_compiler/result_interpretation_prompt.py +73 -0
gaard_core/prompt_compiler/schema_formatter.py +43 -0
gaard_core/prompt_compiler/sql_generation_prompt.py +105 -0
gaard_core/query_intent/__init__.py +1 -0
gaard_core/query_intent/llm_classifier.py +112 -0
gaard_core/query_intent/mock_classifier.py +14 -0
gaard_core/query_pipeline/__init__.py +0 -0
gaard_core/query_pipeline/llm_sql_generator.py +85 -0
gaard_core/query_pipeline/mock_sql_generator.py +33 -0
gaard_core/query_pipeline/models.py +57 -0
gaard_core/query_pipeline/pipeline.py +124 -0
gaard_core/result_classifier/__init__.py +1 -0
gaard_core/result_classifier/llm_classifier.py +87 -0
gaard_core/result_classifier/mock_classifier.py +10 -0
gaard_core/result_interpreter/__init__.py +0 -0
gaard_core/result_interpreter/llm_interpreter.py +66 -0
gaard_core/result_interpreter/mock_interpreter.py +25 -0
gaard_core/schema/__init__.py +0 -0
gaard_core/schema/cache.py +59 -0
gaard_core/schema/context.py +40 -0
gaard_core/schema/models.py +27 -0
gaard_core/security/__init__.py +0 -0
gaard_core/semantic_layer/__init__.py +0 -0
gaard_core/sql_validator/__init__.py +0 -0
gaard_core/sql_validator/select_only.py +37 -0
gaard_core-0.1.0.dist-info/METADATA +23 -0
gaard_core-0.1.0.dist-info/RECORD +48 -0
gaard_core-0.1.0.dist-info/WHEEL +5 -0
gaard_core-0.1.0.dist-info/top_level.txt +1 -0

gaard_core/__init__.py ADDED Viewed

File without changes

gaard_core/audit/__init__.py ADDED Viewed

File without changes

gaard_core/errors.py ADDED Viewed

@@ -0,0 +1,79 @@
+from typing import Any
+class GaardError(Exception):
+    code = "GAARD_ERROR"
+    status_code = 500
+    def __init__(self, message: str | None = None) -> None:
+        self.message = message or "GAARD error."
+        super().__init__(self.message)
+class ConfigurationError(GaardError):
+    code = "CONFIGURATION_ERROR"
+    status_code = 500
+class SqlGenerationError(GaardError):
+    code = "SQL_GENERATION_ERROR"
+    status_code = 502
+class SqlValidationError(GaardError):
+    code = "SQL_VALIDATION_ERROR"
+    status_code = 400
+    def __init__(
+        self,
+        message: str | None = None,
+        sql: str = "",
+        error_detail: str = "",
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        self.sql = sql
+        self.error_detail = error_detail
+        self.metadata = metadata or {}
+        super().__init__(message)
+class QueryExecutionError(GaardError):
+    code = "QUERY_EXECUTION_ERROR"
+    status_code = 400
+    def __init__(
+        self,
+        message: str | None = None,
+        sql: str = "",
+        error_detail: str = "",
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        self.sql = sql
+        self.error_detail = error_detail
+        self.metadata = metadata or {}
+        super().__init__(message)
+class QueryPipelineStepError(GaardError):
+    status_code = 502
+    def __init__(
+        self,
+        message: str | None = None,
+        phase: str = "",
+        sql: str = "",
+        error_code: str = "QUERY_PIPELINE_STEP_ERROR",
+        error_detail: str = "",
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        self.code = error_code
+        self.phase = phase
+        self.sql = sql
+        self.error_detail = error_detail
+        self.metadata = metadata or {}
+        super().__init__(message)
+class LlmProviderError(GaardError):
+    code = "LLM_PROVIDER_ERROR"
+    status_code = 502

gaard_core/evaluation/__init__.py ADDED Viewed

File without changes

gaard_core/execution/__init__.py ADDED Viewed

File without changes

gaard_core/execution/mock_executor.py ADDED Viewed

@@ -0,0 +1,25 @@
+from gaard_core.query_pipeline.models import QueryResult
+class MockQueryExecutor:
+    def execute(self, sql: str) -> QueryResult:
+        normalized = sql.lower()
+        if "patients" in normalized:
+            return QueryResult(
+                columns=["patients_count"],
+                rows=[
+                    {
+                        "patients_count": 124,
+                    }
+                ],
+            )
+        return QueryResult(
+            columns=["value"],
+            rows=[
+                {
+                    "value": 1,
+                }
+            ],
+        )

gaard_core/investigation/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+from gaard_core.investigation.loop import InvestigationLoop
+from gaard_core.investigation.llm_readiness_agent import LlmInvestigationReadinessAgent
+from gaard_core.investigation.mock_readiness_agent import MockInvestigationReadinessAgent
+from gaard_core.investigation.models import (
+    InvestigationContext,
+    InvestigationIteration,
+    InvestigationLoopConfig,
+    InvestigationLoopResult,
+    InvestigationReadinessDecision,
+    InvestigationRoute,
+    RequiredAnalysisTask,
+)
+__all__ = [
+    "InvestigationContext",
+    "InvestigationIteration",
+    "InvestigationLoop",
+    "InvestigationLoopConfig",
+    "InvestigationLoopResult",
+    "InvestigationReadinessDecision",
+    "InvestigationRoute",
+    "LlmInvestigationReadinessAgent",
+    "MockInvestigationReadinessAgent",
+    "RequiredAnalysisTask",
+]

gaard_core/investigation/llm_readiness_agent.py ADDED Viewed

@@ -0,0 +1,220 @@
+import json
+from typing import Any, Protocol
+from gaard_core.investigation.models import (
+    InvestigationContext,
+    InvestigationReadinessDecision,
+    InvestigationRoute,
+    RequiredAnalysisTask,
+)
+from gaard_core.llm_output import remove_thinking_blocks
+from gaard_core.prompt_compiler.investigation_readiness_prompt import (
+    InvestigationReadinessPromptCompiler,
+)
+from gaard_core.prompt_compiler.models import CompiledPrompt
+from gaard_llm.openai_compatible.client import OpenAICompatibleClient
+from gaard_llm.providers.models import ChatCompletionRequest, ChatMessage
+class InvestigationReadinessPromptCompilerProtocol(Protocol):
+    def compile(self, context: InvestigationContext) -> CompiledPrompt:
+        pass
+class LlmInvestigationReadinessAgent:
+    name = "llm_investigation_readiness"
+    def __init__(
+        self,
+        client: OpenAICompatibleClient,
+        model: str,
+        extra_body: dict[str, Any] | None = None,
+        prompt_compiler: InvestigationReadinessPromptCompilerProtocol | None = None,
+    ) -> None:
+        self.client = client
+        self.model = model
+        self.extra_body = extra_body or {}
+        self.prompt_compiler = prompt_compiler or InvestigationReadinessPromptCompiler()
+    def assess(self, context: InvestigationContext) -> InvestigationReadinessDecision:
+        compiled_prompt = self.prompt_compiler.compile(context=context)
+        response = self.client.create_chat_completion(
+            ChatCompletionRequest(
+                model=self.model,
+                temperature=0.0,
+                extra_body=self.extra_body,
+                messages=[
+                    ChatMessage(
+                        role="system",
+                        content=compiled_prompt.system_prompt,
+                    ),
+                    ChatMessage(
+                        role="user",
+                        content=compiled_prompt.user_prompt,
+                    ),
+                ],
+            )
+        )
+        return parse_investigation_readiness_decision(response.content)
+def parse_investigation_readiness_decision(value: str) -> InvestigationReadinessDecision:
+    cleaned = remove_thinking_blocks(value).strip()
+    try:
+        payload = json.loads(cleaned)
+    except json.JSONDecodeError:
+        return InvestigationReadinessDecision(
+            ready_for_sql=False,
+            route=InvestigationRoute.ANALYSIS,
+            confidence=0.0,
+            reason="Investigation readiness agent returned invalid JSON.",
+            missing_information=["valid readiness JSON"],
+            required_analysis=["Retry readiness assessment with a valid JSON response."],
+            model_response={"raw": cleaned},
+        )
+    if not isinstance(payload, dict):
+        return InvestigationReadinessDecision(
+            ready_for_sql=False,
+            route=InvestigationRoute.ANALYSIS,
+            confidence=0.0,
+            reason="Investigation readiness agent returned a non-object JSON value.",
+            missing_information=["valid readiness JSON object"],
+            required_analysis=["Retry readiness assessment with a JSON object response."],
+            model_response={"raw": payload},
+        )
+    ready_for_sql = parse_bool(payload.get("ready_for_sql"))
+    route = parse_route(payload.get("route"), ready_for_sql)
+    if route == InvestigationRoute.SQL and not ready_for_sql:
+        route = InvestigationRoute.ANALYSIS
+    if route == InvestigationRoute.ANALYSIS:
+        ready_for_sql = False
+    missing_information = parse_string_list(payload.get("missing_information"))
+    required_analysis = parse_string_list(payload.get("required_analysis"))
+    return InvestigationReadinessDecision(
+        ready_for_sql=ready_for_sql,
+        route=route,
+        confidence=parse_confidence(payload.get("confidence")),
+        reason=str(payload.get("reason") or ""),
+        missing_information=missing_information,
+        required_analysis=required_analysis,
+        required_analysis_tasks=parse_required_analysis_tasks(
+            payload.get("required_analysis_tasks"),
+            missing_information,
+            required_analysis,
+        ),
+        assumptions=parse_string_list(payload.get("assumptions")),
+        model_response=payload,
+    )
+def parse_route(value: object, ready_for_sql: bool) -> InvestigationRoute:
+    if isinstance(value, str):
+        normalized = value.strip().lower().replace("-", "_").replace(" ", "_")
+        if normalized in {"sql", "ready", "ready_for_sql"}:
+            return InvestigationRoute.SQL
+        if normalized in {"analysis", "analyze", "requires_analysis"}:
+            return InvestigationRoute.ANALYSIS
+    return InvestigationRoute.SQL if ready_for_sql else InvestigationRoute.ANALYSIS
+def parse_bool(value: object) -> bool:
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        return value.strip().lower() in {"true", "yes", "tak", "1"}
+    return False
+def parse_confidence(value: object) -> float:
+    try:
+        confidence = float(value)
+    except (TypeError, ValueError):
+        return 0.0
+    return max(0.0, min(1.0, confidence))
+def parse_string_list(value: object) -> list[str]:
+    if not isinstance(value, list):
+        return []
+    items: list[str] = []
+    for item in value:
+        if item is None:
+            continue
+        text = str(item).strip()
+        if text:
+            items.append(text)
+    return items
+def parse_required_analysis_tasks(
+    value: object,
+    missing_information: list[str],
+    required_analysis: list[str],
+) -> list[RequiredAnalysisTask]:
+    if isinstance(value, list):
+        tasks = [
+            parse_required_analysis_task(item)
+            for item in value
+            if isinstance(item, dict)
+        ]
+        tasks = [task for task in tasks if task.required_analysis]
+        if tasks:
+            return tasks
+    return required_analysis_tasks_from_lists(missing_information, required_analysis)
+def parse_required_analysis_task(value: dict[str, object]) -> RequiredAnalysisTask:
+    return RequiredAnalysisTask(
+        missing_information=str(value.get("missing_information") or "").strip(),
+        required_analysis=str(value.get("required_analysis") or "").strip(),
+        category=normalize_analysis_category(value.get("category")),
+        expected_output=str(value.get("expected_output") or "").strip(),
+    )
+def required_analysis_tasks_from_lists(
+    missing_information: list[str],
+    required_analysis: list[str],
+) -> list[RequiredAnalysisTask]:
+    tasks: list[RequiredAnalysisTask] = []
+    for index, analysis_question in enumerate(required_analysis):
+        tasks.append(
+            RequiredAnalysisTask(
+                missing_information=missing_information[index]
+                if index < len(missing_information)
+                else "",
+                required_analysis=analysis_question,
+            )
+        )
+    return tasks
+def normalize_analysis_category(value: object) -> str:
+    normalized = str(value or "unknown").strip().lower().replace("-", "_").replace(" ", "_")
+    allowed_categories = {
+        "dictionary_value",
+        "relationship_logic",
+        "filter_logic",
+        "aggregation_logic",
+        "entity_mapping",
+        "unknown",
+    }
+    return normalized if normalized in allowed_categories else "unknown"

gaard_core/investigation/loop.py ADDED Viewed

@@ -0,0 +1,83 @@
+from typing import Protocol
+from gaard_core.investigation.models import (
+    InvestigationContext,
+    InvestigationIteration,
+    InvestigationLoopConfig,
+    InvestigationLoopResult,
+    InvestigationReadinessDecision,
+    InvestigationRoute,
+)
+class InvestigationReadinessAgent(Protocol):
+    name: str
+    def assess(self, context: InvestigationContext) -> InvestigationReadinessDecision:
+        pass
+class InvestigationLoop:
+    def __init__(
+        self,
+        readiness_agent: InvestigationReadinessAgent,
+        config: InvestigationLoopConfig | None = None,
+    ) -> None:
+        self.readiness_agent = readiness_agent
+        self.config = config or InvestigationLoopConfig()
+    def run(self, context: InvestigationContext) -> InvestigationLoopResult:
+        iterations: list[InvestigationIteration] = []
+        for iteration_number in range(1, self.config.max_iterations + 1):
+            decision = self.readiness_agent.assess(context)
+            normalized_decision = self._normalize_decision(decision)
+            iterations.append(
+                InvestigationIteration(
+                    iteration=iteration_number,
+                    agent=self.readiness_agent.name,
+                    decision=normalized_decision,
+                )
+            )
+            if normalized_decision.route == InvestigationRoute.SQL:
+                return InvestigationLoopResult(
+                    route=InvestigationRoute.SQL,
+                    ready_for_sql=True,
+                    max_iterations=self.config.max_iterations,
+                    confidence_threshold=self.config.readiness_confidence_threshold,
+                    iterations=iterations,
+                )
+            return InvestigationLoopResult(
+                route=InvestigationRoute.ANALYSIS,
+                ready_for_sql=False,
+                max_iterations=self.config.max_iterations,
+                confidence_threshold=self.config.readiness_confidence_threshold,
+                iterations=iterations,
+            )
+        return InvestigationLoopResult(
+            route=InvestigationRoute.ANALYSIS,
+            ready_for_sql=False,
+            max_iterations=self.config.max_iterations,
+            confidence_threshold=self.config.readiness_confidence_threshold,
+            iterations=iterations,
+        )
+    def _normalize_decision(
+        self,
+        decision: InvestigationReadinessDecision,
+    ) -> InvestigationReadinessDecision:
+        ready = (
+            decision.ready_for_sql
+            and decision.confidence >= self.config.readiness_confidence_threshold
+        )
+        route = InvestigationRoute.SQL if ready else InvestigationRoute.ANALYSIS
+        return decision.model_copy(
+            update={
+                "ready_for_sql": ready,
+                "route": route,
+            }
+        )

gaard_core/investigation/mock_readiness_agent.py ADDED Viewed

@@ -0,0 +1,20 @@
+from gaard_core.investigation.models import (
+    InvestigationContext,
+    InvestigationReadinessDecision,
+    InvestigationRoute,
+)
+class MockInvestigationReadinessAgent:
+    name = "mock_investigation_readiness"
+    def __init__(self, decision: InvestigationReadinessDecision | None = None) -> None:
+        self.decision = decision or InvestigationReadinessDecision(
+            ready_for_sql=True,
+            route=InvestigationRoute.SQL,
+            confidence=1.0,
+            reason="Mock readiness agent allows the normal SQL pipeline.",
+        )
+    def assess(self, context: InvestigationContext) -> InvestigationReadinessDecision:
+        return self.decision

gaard_core/investigation/models.py ADDED Viewed

@@ -0,0 +1,62 @@
+from enum import StrEnum
+from typing import Any
+from pydantic import BaseModel, Field
+class InvestigationRoute(StrEnum):
+    SQL = "sql"
+    ANALYSIS = "analysis"
+class InvestigationContext(BaseModel):
+    question: str = Field(min_length=1)
+    datasource_id: str = "default"
+    user_id: str = "local-admin"
+    formatted_schema: str = ""
+    business_logic: str = ""
+class RequiredAnalysisTask(BaseModel):
+    missing_information: str = ""
+    required_analysis: str = ""
+    category: str = "unknown"
+    expected_output: str = ""
+class InvestigationReadinessDecision(BaseModel):
+    ready_for_sql: bool = False
+    route: InvestigationRoute = InvestigationRoute.ANALYSIS
+    confidence: float = 0.0
+    reason: str = ""
+    missing_information: list[str] = Field(default_factory=list)
+    required_analysis: list[str] = Field(default_factory=list)
+    required_analysis_tasks: list[RequiredAnalysisTask] = Field(default_factory=list)
+    assumptions: list[str] = Field(default_factory=list)
+    model_response: dict[str, Any] = Field(default_factory=dict)
+class InvestigationIteration(BaseModel):
+    iteration: int
+    agent: str
+    decision: InvestigationReadinessDecision
+class InvestigationLoopConfig(BaseModel):
+    max_iterations: int = Field(default=1, ge=1)
+    readiness_confidence_threshold: float = Field(default=0.85, ge=0.0, le=1.0)
+class InvestigationLoopResult(BaseModel):
+    route: InvestigationRoute
+    ready_for_sql: bool
+    max_iterations: int
+    confidence_threshold: float
+    iterations: list[InvestigationIteration] = Field(default_factory=list)
+    @property
+    def final_decision(self) -> InvestigationReadinessDecision | None:
+        if not self.iterations:
+            return None
+        return self.iterations[-1].decision

gaard_core/json_utils.py ADDED Viewed

@@ -0,0 +1,58 @@
+import json
+import math
+from collections.abc import Mapping
+from datetime import date, datetime, time
+from decimal import Decimal
+from typing import Any
+def to_jsonable(value: Any) -> Any:
+    if value is None or isinstance(value, str | int | float | bool):
+        return value
+    if isinstance(value, Decimal):
+        return _decimal_to_jsonable(value)
+    if isinstance(value, datetime | date | time):
+        return value.isoformat()
+    if isinstance(value, bytes | bytearray | memoryview):
+        return _bytes_to_jsonable(value)
+    if isinstance(value, Mapping):
+        return {str(key): to_jsonable(item) for key, item in value.items()}
+    if isinstance(value, list | tuple | set | frozenset):
+        return [to_jsonable(item) for item in value]
+    if hasattr(value, "model_dump"):
+        return to_jsonable(value.model_dump())
+    return str(value)
+def json_dumps(value: Any, **kwargs: Any) -> str:
+    return json.dumps(to_jsonable(value), **kwargs)
+def _decimal_to_jsonable(value: Decimal) -> int | float | str:
+    if not value.is_finite():
+        return str(value)
+    if value == value.to_integral_value():
+        return int(value)
+    as_float = float(value)
+    if math.isfinite(as_float):
+        return as_float
+    return str(value)
+def _bytes_to_jsonable(value: bytes | bytearray | memoryview) -> str:
+    raw = bytes(value)
+    try:
+        return raw.decode("utf-8")
+    except UnicodeDecodeError:
+        return raw.hex()

gaard_core/llm_output.py ADDED Viewed

@@ -0,0 +1,12 @@
+import re
+def remove_thinking_blocks(value: str) -> str:
+    cleaned = re.sub(
+        r"<think>.*?</think>",
+        "",
+        value,
+        flags=re.IGNORECASE | re.DOTALL,
+    )
+    return cleaned.strip()

gaard_core/policy_engine/__init__.py ADDED Viewed

File without changes

gaard_core/prompt_compiler/__init__.py ADDED Viewed

File without changes

gaard_core/prompt_compiler/intent_classification_prompt.py ADDED Viewed

@@ -0,0 +1,58 @@
+from gaard_core.json_utils import json_dumps
+from gaard_core.prompt_compiler.models import CompiledPrompt
+from gaard_core.query_pipeline.models import QueryIntentDecision, QueryRequest
+class IntentClassificationPromptCompiler:
+    def compile(self, request: QueryRequest) -> CompiledPrompt:
+        payload = {
+            "question": request.question,
+            "datasource_id": request.datasource_id,
+            "user_id": request.user_id,
+        }
+        return CompiledPrompt(
+            system_prompt=self._build_system_prompt(),
+            user_prompt=self._build_user_prompt(payload),
+            metadata={
+                "allowed_decisions": [item.value for item in QueryIntentDecision],
+            },
+        )
+    def _build_system_prompt(self) -> str:
+        return """You are GAARD Query Intent Classification.
+Your task is to decide whether the user's request can be fulfilled only by a read-only SQL SELECT query.
+Allowed decisions:
+- read_only_data_question: the user asks a question about data that can be answered with a read-only SELECT or WITH query.
+- write_or_mutation_request: the user asks to insert, update, delete, reset, clear, modify, create, alter, drop, or otherwise change data, schema, configuration, files, permissions, or system state.
+- non_data_request: the request is not a question about database data.
+- ambiguous: the intent is unclear or it is not safe to decide that it is read-only.
+Decision rules:
+1. Allow only requests whose intent is to read, count, list, aggregate, compare, summarize, inspect, or analyze existing data.
+2. Reject requests that ask for a change, even if a SELECT query could be used to find the affected rows.
+3. Reject destructive, administrative, or state-changing requests.
+4. Choose ambiguous instead of guessing when the intent is unclear.
+Output rules:
+- Return only a JSON object.
+- Do not include markdown.
+- Do not include reasoning outside the JSON.
+- Do not include <think> blocks.
+- Use exactly this JSON shape:
+  {"decision":"read_only_data_question","confidence":0.0,"reason":"short reason"}
+"""
+    def _build_user_prompt(self, payload: dict[str, str]) -> str:
+        return f"""Classify this user request before SQL generation.
+Input JSON:
+{json_dumps(payload, ensure_ascii=False, indent=2)}
+Return one JSON object with:
+- decision: one of {", ".join(item.value for item in QueryIntentDecision)}
+- confidence: number from 0 to 1
+- reason: short explanation
+"""