PyPI - generflow-core - Versions diffs - 0.2.0__py3-none-any.whl - Mend

generflow-core 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

generflow_core/__init__.py +3 -0
generflow_core/actions/__init__.py +22 -0
generflow_core/actions/dispatcher.py +223 -0
generflow_core/adapters/__init__.py +11 -0
generflow_core/adapters/llm.py +186 -0
generflow_core/api/__init__.py +5 -0
generflow_core/api/app.py +494 -0
generflow_core/api/prompt.py +64 -0
generflow_core/cli.py +241 -0
generflow_core/databind/__init__.py +30 -0
generflow_core/databind/config.py +183 -0
generflow_core/databind/resolver.py +306 -0
generflow_core/hitl/__init__.py +22 -0
generflow_core/hitl/gates.py +165 -0
generflow_core/interop/__init__.py +257 -0
generflow_core/observability/__init__.py +208 -0
generflow_core/py.typed +0 -0
generflow_core/registry/__init__.py +4 -0
generflow_core/registry/registry.py +194 -0
generflow_core/replay/__init__.py +189 -0
generflow_core/spec/__init__.py +21 -0
generflow_core/spec/ast.py +61 -0
generflow_core/spec/diff.py +177 -0
generflow_core/spec/parser.py +332 -0
generflow_core/spec/update.py +136 -0
generflow_core-0.2.0.dist-info/METADATA +161 -0
generflow_core-0.2.0.dist-info/RECORD +30 -0
generflow_core-0.2.0.dist-info/WHEEL +5 -0
generflow_core-0.2.0.dist-info/entry_points.txt +3 -0
generflow_core-0.2.0.dist-info/top_level.txt +1 -0

generflow_core/databind/resolver.py ADDED Viewed

@@ -0,0 +1,306 @@
+"""Data source resolvers: turn `src="name"` references into live data.
+Each source type (rest, sql, graphql, mcp) implements a Resolver.
+The registry maps source names → resolvers. The SSE pipeline emits
+`data.fill` events as each `src=` ref resolves.
+"""
+from __future__ import annotations
+import asyncio
+import hashlib
+import json
+import time
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Any
+from .config import AppConfig, DataSource
+class ResolverError(Exception):
+    pass
+class Resolver(ABC):
+    type: str = "base"
+    @abstractmethod
+    async def fetch(self, source: DataSource, params: dict[str, Any]) -> Any:
+        """Execute the source and return its data. `params` are runtime args."""
+# ── Simple in-memory cache (per-process, TTL-based) ────────────────────────
+_CACHE: dict[tuple[str, frozenset], tuple[float, Any]] = {}
+def _cache_key(name: str, params: dict) -> tuple[str, frozenset]:
+    # stable key from sorted params
+    return (name, frozenset(params.items()))
+def _cache_get(name: str, params: dict, ttl: int) -> Any | None:
+    if ttl <= 0:
+        return None
+    key = _cache_key(name, params)
+    hit = _CACHE.get(key)
+    if hit is None:
+        return None
+    expires_at, value = hit
+    if time.time() > expires_at:
+        _CACHE.pop(key, None)
+        return None
+    return value
+def _cache_put(name: str, params: dict, ttl: int, value: Any) -> None:
+    if ttl <= 0:
+        return
+    _CACHE[_cache_key(name, params)] = (time.time() + ttl, value)
+def cache_clear() -> None:
+    _CACHE.clear()
+# ── Resolver registry ──────────────────────────────────────────────────────
+_RESOLVERS: dict[str, type[Resolver]] = {}
+def register_resolver(t: str, cls: type[Resolver]) -> None:
+    _RESOLVERS[t] = cls
+def get_resolver(t: str) -> Resolver:
+    cls = _RESOLVERS.get(t)
+    if cls is None:
+        raise ResolverError(f"Unknown source type: {t!r}")
+    return cls()
+# ── REST resolver ─────────────────────────────────────────────────────────
+class RestResolver(Resolver):
+    type = "rest"
+    async def fetch(self, source: DataSource, params: dict) -> Any:
+        import httpx
+        method = source.config.get("method", "GET").upper()
+        url = source.config.get("url", "")
+        if not url:
+            raise ResolverError(f"Source {source.name!r}: missing url")
+        # interpolate params into URL path (/orders/$order_id) and query string
+        for k, v in params.items():
+            url = url.replace(f"${k}", str(v))
+        query = {k: v for k, v in source.config.get("params", {}).items()}
+        query.update({k: v for k, v in params.items() if k not in query})
+        body = source.config.get("body")
+        headers = dict(source.config.get("headers", {}))
+        auth = source.config.get("auth")
+        if isinstance(auth, str) and auth.startswith("bearer:"):
+            headers["Authorization"] = f"Bearer {auth[7:]}"
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            r = await client.request(method, url, params=query, json=body, headers=headers)
+            r.raise_for_status()
+            return r.json()
+register_resolver("rest", RestResolver)
+# ── SQL resolver ──────────────────────────────────────────────────────────
+class SqlResolver(Resolver):
+    type = "sql"
+    async def fetch(self, source: DataSource, params: dict) -> Any:
+        # use sqlite3 (stdlib) for portability; production would use asyncpg
+        import sqlite3
+        dsn = source.config.get("connection") or source.config.get("dsn", ":memory:")
+        if isinstance(dsn, str) and dsn.startswith("sqlite:///"):
+            dsn = dsn[len("sqlite:///"):]
+        query = source.config.get("query", "")
+        if not query:
+            raise ResolverError(f"Source {source.name!r}: missing query")
+        # bind params from runtime args
+        positional = [params.get(b) for b in source.config.get("bind", [])]
+        conn = sqlite3.connect(dsn)
+        try:
+            cur = conn.execute(query, positional)
+            cols = [d[0] for d in cur.description] if cur.description else []
+            rows = [dict(zip(cols, row)) for row in cur.fetchall()]
+            return rows
+        finally:
+            conn.close()
+register_resolver("sql", SqlResolver)
+# ── GraphQL resolver ──────────────────────────────────────────────────────
+class GraphqlResolver(Resolver):
+    type = "graphql"
+    async def fetch(self, source: DataSource, params: dict) -> Any:
+        # Minimal GraphQL: POST {query, variables} to endpoint.
+        # We use raw urllib (stdlib) so we don't add a dependency.
+        import urllib.request
+        import urllib.error
+        endpoint = source.config.get("url", "")
+        if not endpoint:
+            raise ResolverError(f"Source {source.name!r}: missing url")
+        query = source.config.get("query", "")
+        variables = dict(source.config.get("variables", {}))
+        variables.update(params)
+        body = json.dumps({"query": query, "variables": variables}).encode("utf-8")
+        headers = {"Content-Type": "application/json"}
+        auth = source.config.get("auth")
+        if isinstance(auth, str) and auth.startswith("bearer:"):
+            headers["Authorization"] = f"Bearer {auth[7:]}"
+        req = urllib.request.Request(endpoint, data=body, headers=headers, method="POST")
+        loop = asyncio.get_event_loop()
+        resp = await loop.run_in_executor(None, lambda: urllib.request.urlopen(req, timeout=10))
+        payload = json.loads(resp.read().decode("utf-8"))
+        if "errors" in payload:
+            raise ResolverError(f"GraphQL errors: {payload['errors']}")
+        return payload.get("data", {})
+register_resolver("graphql", GraphqlResolver)
+# ── MCP resolver ──────────────────────────────────────────────────────────
+class McpResolver(Resolver):
+    """MCP (Model Context Protocol) tool caller.
+    Talks to an MCP server over stdio or HTTP. For v1, we ship a stdio
+    adapter (the MCP standard transport). HTTP MCP support is a v2 add.
+    MCP servers expose a list of tools via `tools/list`. Each tool call
+    is `tools/call` with name + arguments.
+    """
+    type = "mcp"
+    async def fetch(self, source: DataSource, params: dict) -> Any:
+        command = source.config.get("command", "")
+        tool = source.config.get("tool", "")
+        if not command or not tool:
+            raise ResolverError(f"Source {source.name!r}: missing command or tool")
+        args = dict(source.config.get("args", {}))
+        args.update(params)
+        # spawn the MCP server, send tools/call, collect result
+        return await self._call_stdio(command, tool, args, source.config.get("env", {}))
+    async def _call_stdio(self, command: str, tool: str, arguments: dict, env_overrides: dict) -> Any:
+        import os
+        import subprocess
+        env = {**os.environ, **env_overrides}
+        proc = await asyncio.create_subprocess_exec(
+            *command.split(),
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            env=env,
+        )
+        # Minimal MCP handshake: initialize + tools/call
+        init_msg = {
+            "jsonrpc": "2.0", "id": 1, "method": "initialize",
+            "params": {"protocolVersion": "2024-11-05", "capabilities": {}, "clientInfo": {"name": "generflow", "version": "0.1.0"}},
+        }
+        initialized = {"jsonrpc": "2.0", "method": "notifications/initialized"}
+        call_msg = {
+            "jsonrpc": "2.0", "id": 2, "method": "tools/call",
+            "params": {"name": tool, "arguments": arguments},
+        }
+        messages = json.dumps(init_msg) + "\n" + json.dumps(initialized) + "\n" + json.dumps(call_msg) + "\n"
+        try:
+            stdout, stderr = await asyncio.wait_for(proc.communicate(messages.encode()), timeout=15.0)
+        except asyncio.TimeoutError:
+            proc.kill()
+            raise ResolverError(f"MCP call to {command!r} timed out")
+        # parse responses
+        for line in stdout.decode("utf-8", errors="replace").splitlines():
+            line = line.strip()
+            if not line or not line.startswith("{"):
+                continue
+            try:
+                msg = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+            if msg.get("id") == 2:
+                if "error" in msg:
+                    raise ResolverError(f"MCP error: {msg['error']}")
+                return msg.get("result", {})
+        raise ResolverError(f"MCP server {command!r} returned no response")
+register_resolver("mcp", McpResolver)
+# ── File resolver ─────────────────────────────────────────────────────────
+class FileResolver(Resolver):
+    """Read structured data from a local file.
+    Supports JSONL (one JSON object per line, default) and JSON arrays.
+    Config:
+      path:    absolute or {config_dir}-interpolated path
+      format:  "jsonl" (default) or "json"
+    """
+    type = "file"
+    async def fetch(self, source: DataSource, params: dict) -> Any:
+        path_str = source.config.get("path", "")
+        if not path_str:
+            raise ResolverError(f"Source {source.name!r}: missing 'path'")
+        path = Path(path_str)
+        if not path.exists():
+            raise ResolverError(f"Source {source.name!r}: file not found: {path}")
+        fmt = source.config.get("format", "jsonl")
+        text = path.read_text()
+        if fmt == "json":
+            return json.loads(text)
+        if fmt == "jsonl":
+            rows = []
+            for line in text.splitlines():
+                line = line.strip()
+                if not line:
+                    continue
+                rows.append(json.loads(line))
+            return rows
+        raise ResolverError(f"Source {source.name!r}: unknown format {fmt!r}")
+register_resolver("file", FileResolver)
+# ── Top-level resolver: name → data ────────────────────────────────────────
+async def resolve_source(
+    config: AppConfig, name: str, params: dict | None = None
+) -> tuple[Any, str]:
+    """Resolve a named source. Returns (data, fetched_at_iso).
+    Honors cache TTLs. Raises ResolverError on failure (caller decides
+    whether to fall back to a HITL gate).
+    """
+    src = config.source(name)
+    if src is None:
+        raise ResolverError(f"Unknown source: {name!r}")
+    params = params or {}
+    cached = _cache_get(name, params, src.cache_seconds)
+    if cached is not None:
+        return cached, _iso_now()
+    resolver = get_resolver(src.type)
+    data = await resolver.fetch(src, params)
+    _cache_put(name, params, src.cache_seconds, data)
+    return data, _iso_now()
+def _iso_now() -> str:
+    import datetime
+    return datetime.datetime.utcnow().isoformat() + "Z"

generflow_core/hitl/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""HITL module: human-in-the-loop gates for confidence, PII, ambiguity, missing sources."""
+from .gates import (
+    Decision,
+    GateResult,
+    ambiguity_gate,
+    confidence_gate,
+    missing_source_gate,
+    pii_gate,
+    redact,
+    scan_pii,
+)
+__all__ = [
+    "Decision",
+    "GateResult",
+    "ambiguity_gate",
+    "confidence_gate",
+    "missing_source_gate",
+    "pii_gate",
+    "redact",
+    "scan_pii",
+]

generflow_core/hitl/gates.py ADDED Viewed

@@ -0,0 +1,165 @@
+"""HITL gates: confidence, PII, ambiguity, missing-source.
+Each gate returns a decision:
+  - ALLOW:        proceed, no user interaction
+  - CONFIRM:      show the user a preview, wait for approval
+  - CLARIFY:      ask a question, get more info
+  - REJECT:       hard-fail, don't render / don't execute
+Gates run during SSE streaming:
+  - Confidence gate runs per node (after spec.line)
+  - PII gate runs on data.fill values (redact + confirm)
+  - Ambiguity gate runs when a component name isn't in the registry
+  - Missing-source gate runs when a `src=` ref can't be resolved
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any
+class Decision(str, Enum):
+    ALLOW = "allow"
+    CONFIRM = "confirm"
+    CLARIFY = "clarify"
+    REJECT = "reject"
+@dataclass
+class GateResult:
+    decision: Decision
+    reason: str = ""
+    redacted_value: Any = None  # for PII gate
+    question: str = ""  # for CLARIFY
+    options: list[str] | None = None  # for CLARIFY
+    confidence: float = 1.0
+    def to_dict(self) -> dict:
+        return {
+            "decision": self.decision.value,
+            "reason": self.reason,
+            "confidence": self.confidence,
+            "question": self.question,
+            "options": self.options,
+            "redacted_value": self.redacted_value,
+        }
+# ── PII detection ─────────────────────────────────────────────────────────
+_PII_PATTERNS: list[tuple[str, re.Pattern]] = [
+    ("email", re.compile(r"\b[\w.+-]+@[\w-]+(?:\.[\w.-]+)?\b")),
+    ("phone_us", re.compile(r"\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b")),
+    ("ssn", re.compile(r"\b\d{3}-\d{2}-\d{4}\b")),
+    ("credit_card", re.compile(r"\b(?:\d[ -]*?){13,16}\b")),
+    ("ipv4", re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")),
+]
+def scan_pii(value: Any) -> list[str]:
+    """Return the list of PII kinds detected in `value`."""
+    if value is None:
+        return []
+    if isinstance(value, (dict, list)):
+        import json
+        s = json.dumps(value, default=str)
+    else:
+        s = str(value)
+    found = []
+    for name, pat in _PII_PATTERNS:
+        if pat.search(s):
+            found.append(name)
+    return found
+def redact(value: Any) -> Any:
+    """Replace PII patterns with placeholder text."""
+    if value is None:
+        return value
+    s = str(value)
+    for name, pat in _PII_PATTERNS:
+        s = pat.sub(f"[REDACTED:{name}]", s)
+    return s
+# ── Confidence gate ───────────────────────────────────────────────────────
+def confidence_gate(value: Any, threshold: float = 0.7) -> GateResult:
+    """Heuristic confidence score for a value.
+    Real implementation would use the LLM's logprobs or a self-reported
+    score. For v1, we use a proxy: short, complete strings score higher;
+    long/uncertain/complex values score lower.
+    """
+    if isinstance(value, str):
+        s = value.strip()
+        if not s:
+            return GateResult(Decision.CONFIRM, reason="empty", confidence=0.0)
+        # Heuristic: contain "?" or "maybe" or "..." → low confidence
+        if any(t in s.lower() for t in ("maybe", "perhaps", "might be", "...")):
+            return GateResult(Decision.CONFIRM, reason="uncertain phrasing", confidence=0.5)
+        # TBD-like markers
+        if any(t in s for t in ("TODO", "TBD", "???", "...")):
+            return GateResult(Decision.CONFIRM, reason="TBD marker", confidence=0.4)
+        # otherwise assume confident
+        return GateResult(Decision.ALLOW, confidence=0.9)
+    if isinstance(value, (int, float, bool)):
+        return GateResult(Decision.ALLOW, confidence=0.95)
+    if isinstance(value, (list, dict)):
+        return GateResult(Decision.ALLOW, confidence=0.85)
+    return GateResult(Decision.ALLOW, confidence=0.8)
+# ── PII gate ──────────────────────────────────────────────────────────────
+def pii_gate(value: Any) -> GateResult:
+    kinds = scan_pii(value)
+    if not kinds:
+        return GateResult(Decision.ALLOW, confidence=1.0)
+    return GateResult(
+        Decision.CONFIRM,
+        reason=f"PII detected: {', '.join(kinds)}",
+        confidence=0.3,
+        redacted_value=redact(value),
+    )
+# ── Ambiguity gate ───────────────────────────────────────────────────────
+def ambiguity_gate(
+    name: str,
+    candidates: list[str],
+    threshold: float = 0.8,
+) -> GateResult:
+    """If the LLM emitted a component name not in the registry, but
+    it looks like a close match to one that is, ask the user."""
+    if not candidates:
+        return GateResult(Decision.REJECT, reason=f"Unknown component: {name}", confidence=0.0)
+    if len(candidates) == 1:
+        return GateResult(
+            Decision.CONFIRM,
+            reason=f"Unknown component '{name}', did you mean '{candidates[0]}'?",
+            confidence=0.6,
+            options=[candidates[0], "skip"],
+        )
+    return GateResult(
+        Decision.CLARIFY,
+        reason=f"Unknown component '{name}'",
+        confidence=0.4,
+        question=f"Component '{name}' is not in the registry. Which did you mean?",
+        options=candidates[:5] + ["skip"],
+    )
+# ── Missing-source gate ──────────────────────────────────────────────────
+def missing_source_gate(ref_name: str) -> GateResult:
+    return GateResult(
+        Decision.CLARIFY,
+        reason=f"Data source '{ref_name}' is not bound",
+        confidence=0.0,
+        question=f"I referenced a data source '{ref_name}' but no binding exists. Skip or abort?",
+        options=["skip (render placeholder)", "abort"],
+    )