kgmodule-utils 0.4.1__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/PKG-INFO +1 -1
  2. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/pyproject.toml +1 -1
  3. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/__init__.py +3 -1
  4. kgmodule_utils-0.4.2/src/kg_utils/retrieval/__init__.py +5 -0
  5. kgmodule_utils-0.4.2/src/kg_utils/retrieval/hits.py +75 -0
  6. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/synthesis/__init__.py +8 -0
  7. kgmodule_utils-0.4.2/src/kg_utils/synthesis/factory.py +97 -0
  8. kgmodule_utils-0.4.2/src/kg_utils/worker/__init__.py +17 -0
  9. kgmodule_utils-0.4.2/src/kg_utils/worker/client.py +196 -0
  10. kgmodule_utils-0.4.2/src/kg_utils/worker/ops.py +72 -0
  11. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/LICENSE +0 -0
  12. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/README.md +0 -0
  13. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/embed.py +0 -0
  14. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/embedder.py +0 -0
  15. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/extractor.py +0 -0
  16. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/module.py +0 -0
  17. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/pipeline.py +0 -0
  18. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/py.typed +0 -0
  19. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/semantic.py +0 -0
  20. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/snapshots/__init__.py +0 -0
  21. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/snapshots/manager.py +0 -0
  22. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/snapshots/models.py +0 -0
  23. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/specs.py +0 -0
  24. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/store.py +0 -0
  25. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/synthesis/_config.py +0 -0
  26. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/synthesis/_image.py +0 -0
  27. {kgmodule_utils-0.4.1 → kgmodule_utils-0.4.2}/src/kg_utils/synthesis/_text.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kgmodule-utils
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Summary: Shared types, graph store, semantic index, and pipeline base for the KGModule SDK
5
5
  License: Elastic-2.0
6
6
  License-File: LICENSE
@@ -10,7 +10,7 @@ build-backend = "poetry.core.masonry.api"
10
10
 
11
11
  [project]
12
12
  name = "kgmodule-utils"
13
- version = "0.4.1"
13
+ version = "0.4.2"
14
14
  description = "Shared types, graph store, semantic index, and pipeline base for the KGModule SDK"
15
15
  readme = "README.md"
16
16
  license = { text = "Elastic-2.0" }
@@ -14,6 +14,8 @@ Sub-packages / modules:
14
14
  kg_utils.synthesis — Unified text + image synthesis: TextSynthesizer, ImageSynthesizer.
15
15
  Backends: omlx | ollama | openai (text);
16
16
  mflux-local | mflux-serve | openai (image).
17
+ kg_utils.worker — RunPod worker protocol helpers and WorkerClient for /runsync calls.
18
+ kg_utils.retrieval — Shared retrieval helpers: hit_to_dict, attach_content_by_sqlite.
17
19
 
18
20
  Optional extras
19
21
  ---------------
@@ -22,4 +24,4 @@ Optional extras
22
24
  pip install 'kgmodule-utils[synthesis-mflux]' # + mflux (Apple Silicon local gen)
23
25
  """
24
26
 
25
- __version__ = "0.4.1"
27
+ __version__ = "0.4.2"
@@ -0,0 +1,5 @@
1
+ """Shared retrieval helpers for serializing and enriching KG hits."""
2
+
3
+ from kg_utils.retrieval.hits import attach_content_by_sqlite, hit_to_dict
4
+
5
+ __all__ = ["hit_to_dict", "attach_content_by_sqlite"]
@@ -0,0 +1,75 @@
1
+ # © 2026 Eric G. Suchanek, PhD — Flux-Frontiers · SPDX-License-Identifier: Elastic-2.0
2
+ """Hit serialization and content hydration helpers for KG retrieval responses."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import sqlite3
7
+ from collections import defaultdict
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ __all__ = ["hit_to_dict", "attach_content_by_sqlite"]
12
+
13
+
14
+ def _is_diary_kind(kind_value: Any) -> bool:
15
+ kind_str = str(kind_value)
16
+ return kind_str == "KGKind.DIARY" or kind_str.lower().endswith("diary")
17
+
18
+
19
+ def hit_to_dict(hit: Any, include_diary_timestamp: bool = False) -> dict:
20
+ """Serialize a KGRAG hit object into a plain dictionary.
21
+
22
+ :param hit: Hit-like object with standard retrieval attributes.
23
+ :param include_diary_timestamp: Include ``timestamp`` field for diary hits.
24
+ :returns: Serialized hit dictionary.
25
+ """
26
+ out = {
27
+ "kg_name": hit.kg_name,
28
+ "kg_kind": str(hit.kg_kind),
29
+ "node_id": hit.node_id,
30
+ "name": hit.name,
31
+ "kind": hit.kind,
32
+ "score": round(float(hit.score), 4),
33
+ "summary": hit.summary,
34
+ "source_path": hit.source_path,
35
+ }
36
+ if include_diary_timestamp:
37
+ out["timestamp"] = hit.name if _is_diary_kind(hit.kg_kind) else None
38
+ return out
39
+
40
+
41
+ def attach_content_by_sqlite(hits: list[dict], kg_sqlite_map: dict[str, Path]) -> None:
42
+ """Attach full node text under ``content`` via batched SQLite lookups.
43
+
44
+ Missing or unreadable databases are ignored to preserve permissive behavior.
45
+
46
+ :param hits: Mutable hit dictionaries. Each hit should include ``kg_name`` and ``node_id``.
47
+ :param kg_sqlite_map: Mapping of KG name to sqlite database path.
48
+ """
49
+ by_kg: dict[str, list[dict]] = defaultdict(list)
50
+ for hit in hits:
51
+ by_kg[hit.get("kg_name", "")].append(hit)
52
+
53
+ for kg_name, kg_hits in by_kg.items():
54
+ db_path = kg_sqlite_map.get(kg_name)
55
+ if not db_path or not Path(db_path).exists():
56
+ continue
57
+
58
+ ids = [h.get("node_id") for h in kg_hits if h.get("node_id")]
59
+ if not ids:
60
+ continue
61
+
62
+ text_by_id: dict[str, str] = {}
63
+ try:
64
+ with sqlite3.connect(str(db_path)) as con:
65
+ placeholders = ",".join("?" * len(ids))
66
+ query = f"SELECT id, text FROM nodes WHERE id IN ({placeholders})"
67
+ for node_id, text in con.execute(query, ids):
68
+ text_by_id[node_id] = text or ""
69
+ except Exception: # noqa: BLE001 # pylint: disable=broad-exception-caught
70
+ continue
71
+
72
+ for hit in kg_hits:
73
+ node_id = hit.get("node_id")
74
+ if node_id:
75
+ hit["content"] = text_by_id.get(node_id, "")
@@ -53,6 +53,11 @@ from kg_utils.synthesis._config import (
53
53
  )
54
54
  from kg_utils.synthesis._image import ImageSynthesizer
55
55
  from kg_utils.synthesis._text import TextSynthesizer
56
+ from kg_utils.synthesis.factory import (
57
+ image_synth_for_backend,
58
+ normalize_openai_base_url,
59
+ text_synth_for_backend,
60
+ )
56
61
 
57
62
 
58
63
  def text_synthesizer_from_env() -> TextSynthesizer:
@@ -76,4 +81,7 @@ __all__ = [
76
81
  "image_synthesizer_from_env",
77
82
  "text_config_from_env",
78
83
  "image_config_from_env",
84
+ "normalize_openai_base_url",
85
+ "text_synth_for_backend",
86
+ "image_synth_for_backend",
79
87
  ]
@@ -0,0 +1,97 @@
1
+ # © 2026 Eric G. Suchanek, PhD — Flux-Frontiers · SPDX-License-Identifier: Elastic-2.0
2
+ """Synthesis backend factory helpers for per-request backend overrides."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import os
7
+
8
+ from kg_utils.synthesis._config import (
9
+ ImageBackend,
10
+ ImageConfig,
11
+ TextBackend,
12
+ TextConfig,
13
+ )
14
+ from kg_utils.synthesis._image import ImageSynthesizer
15
+ from kg_utils.synthesis._text import TextSynthesizer
16
+
17
+ __all__ = [
18
+ "normalize_openai_base_url",
19
+ "text_synth_for_backend",
20
+ "image_synth_for_backend",
21
+ ]
22
+
23
+
24
+ def normalize_openai_base_url(endpoint: str) -> str:
25
+ """Normalize an OpenAI-wire endpoint so it ends with /v1.
26
+
27
+ Returns an empty string when endpoint is empty.
28
+ """
29
+ ep = (endpoint or "").strip().rstrip("/")
30
+ if not ep:
31
+ return ""
32
+ if ep.endswith("/v1"):
33
+ return ep
34
+ return f"{ep}/v1"
35
+
36
+
37
+ def text_synth_for_backend(backend: str, fallback: TextSynthesizer) -> TextSynthesizer:
38
+ """Return a TextSynthesizer configured for a specific backend override.
39
+
40
+ Unknown or empty backend strings return ``fallback``.
41
+ """
42
+ backend_str = (backend or "").strip().lower()
43
+ if not backend_str:
44
+ return fallback
45
+
46
+ try:
47
+ selected = TextBackend(backend_str)
48
+ except ValueError:
49
+ return fallback
50
+
51
+ if selected == TextBackend.OMLX:
52
+ endpoint = os.environ.get("SYNTH_ENDPOINT") or os.environ.get("VLLM_ENDPOINT_URL") or ""
53
+ endpoint = normalize_openai_base_url(endpoint)
54
+ api_key = os.environ.get("SYNTH_API_KEY") or os.environ.get("VLLM_API_KEY") or ""
55
+ model = os.environ.get("SYNTH_MODEL") or os.environ.get("VLLM_MODEL") or ""
56
+ return TextSynthesizer(
57
+ TextConfig(backend=selected, endpoint=endpoint, api_key=api_key, model=model)
58
+ )
59
+
60
+ if selected == TextBackend.OLLAMA:
61
+ endpoint = os.environ.get("OLLAMA_ENDPOINT") or ""
62
+ return TextSynthesizer(TextConfig(backend=selected, endpoint=endpoint))
63
+
64
+ if selected == TextBackend.OPENAI:
65
+ api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get("SYNTH_API_KEY") or ""
66
+ return TextSynthesizer(TextConfig(backend=selected, api_key=api_key))
67
+
68
+ return fallback
69
+
70
+
71
+ def image_synth_for_backend(backend: str, fallback: ImageSynthesizer) -> ImageSynthesizer:
72
+ """Return an ImageSynthesizer configured for a specific backend override.
73
+
74
+ Unknown or empty backend strings return ``fallback``.
75
+ """
76
+ backend_str = (backend or "").strip().lower()
77
+ if not backend_str:
78
+ return fallback
79
+
80
+ try:
81
+ selected = ImageBackend(backend_str)
82
+ except ValueError:
83
+ return fallback
84
+
85
+ if selected == ImageBackend.OPENAI:
86
+ api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get("IMAGE_API_KEY") or ""
87
+ return ImageSynthesizer(ImageConfig(backend=selected, api_key=api_key))
88
+
89
+ if selected == ImageBackend.MFLUX_SERVE:
90
+ server_url = os.environ.get("IMAGE_ENDPOINT") or ""
91
+ return ImageSynthesizer(ImageConfig(backend=selected, server_url=server_url))
92
+
93
+ if selected == ImageBackend.MFLUX_LOCAL:
94
+ model = os.environ.get("IMAGE_MODEL") or os.environ.get("GUTENKG_IMAGE_MODEL") or ""
95
+ return ImageSynthesizer(ImageConfig(backend=selected, model=model))
96
+
97
+ return fallback
@@ -0,0 +1,17 @@
1
+ """Worker protocol helpers and client for RunPod ``/runsync`` endpoints."""
2
+
3
+ from kg_utils.worker.client import (
4
+ WorkerClient,
5
+ WorkerError,
6
+ decode_worker_response,
7
+ extract_worker_error,
8
+ )
9
+ from kg_utils.worker.ops import handle_aux_ops
10
+
11
+ __all__ = [
12
+ "WorkerClient",
13
+ "WorkerError",
14
+ "decode_worker_response",
15
+ "extract_worker_error",
16
+ "handle_aux_ops",
17
+ ]
@@ -0,0 +1,196 @@
1
+ # © 2026 Eric G. Suchanek, PhD — Flux-Frontiers · SPDX-License-Identifier: Elastic-2.0
2
+ """RunPod worker client utilities for chat and handler front-ends.
3
+
4
+ This module centralizes payload construction and response/error decoding for
5
+ ``/runsync`` worker calls used by Streamlit clients.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+
12
+ import httpx
13
+
14
+
15
+ class WorkerError(Exception):
16
+ """Raised when a worker response contains a structured application-level error."""
17
+
18
+
19
+ def _format_error_data(error_data: object) -> str:
20
+ if isinstance(error_data, str):
21
+ try:
22
+ decoded = json.loads(error_data)
23
+ except (ValueError, TypeError):
24
+ return error_data
25
+ if isinstance(decoded, dict):
26
+ err_type = decoded.get("error_type", "Unknown")
27
+ err_msg = decoded.get("error_message", str(decoded))
28
+ return f"{err_type}: {err_msg}"
29
+ return str(decoded)
30
+
31
+ if isinstance(error_data, dict):
32
+ err_type = error_data.get("error_type", "Unknown")
33
+ err_msg = error_data.get("error_message", str(error_data))
34
+ return f"{err_type}: {err_msg}"
35
+
36
+ return str(error_data)
37
+
38
+
39
+ def extract_worker_error(data: object) -> str | None:
40
+ """Extract a readable worker error from a raw RunPod response payload."""
41
+ if not isinstance(data, dict):
42
+ return str(data)
43
+
44
+ if data.get("status") == "FAILED" or "error_type" in data:
45
+ return _format_error_data(data.get("error", data))
46
+
47
+ out = data.get("output")
48
+ if isinstance(out, dict) and isinstance(out.get("error"), str):
49
+ return out["error"]
50
+
51
+ return None
52
+
53
+
54
+ def decode_worker_response(data: object) -> dict:
55
+ """Decode a worker response payload and raise WorkerError on application errors."""
56
+ error = extract_worker_error(data)
57
+ if error:
58
+ raise WorkerError(error)
59
+
60
+ if not isinstance(data, dict):
61
+ raise WorkerError(f"unexpected worker response type: {type(data).__name__}")
62
+
63
+ out = data.get("output", data)
64
+ if not isinstance(out, dict):
65
+ raise WorkerError(f"unexpected worker output type: {type(out).__name__}")
66
+ return out
67
+
68
+
69
+ class WorkerClient:
70
+ """Small client for RunPod ``/runsync`` worker endpoints."""
71
+
72
+ def __init__(self, base_url: str, secret: str = "") -> None:
73
+ self._base_url = base_url.rstrip("/")
74
+ self._secret = secret
75
+
76
+ def _post(self, payload: dict, timeout: httpx.Timeout) -> dict:
77
+ resp = httpx.post(f"{self._base_url}/runsync", json=payload, timeout=timeout)
78
+ resp.raise_for_status()
79
+ return resp.json()
80
+
81
+ def list_models(self, backend: str = "") -> tuple[list[str], str]:
82
+ payload: dict = {"input": {"op": "models"}}
83
+ if backend:
84
+ payload["input"]["backend"] = backend
85
+ if self._secret:
86
+ payload["input"]["secret"] = self._secret
87
+
88
+ try:
89
+ data = self._post(
90
+ payload,
91
+ timeout=httpx.Timeout(connect=5.0, read=20.0, write=5.0, pool=5.0),
92
+ )
93
+ out = data.get("output", {}) if isinstance(data, dict) else {}
94
+ if not isinstance(out, dict):
95
+ return [], ""
96
+ return out.get("models", []), out.get("default", "")
97
+ except Exception: # noqa: BLE001
98
+ return [], ""
99
+
100
+ def rewrite(
101
+ self,
102
+ text: str,
103
+ backend: str = "",
104
+ model: str = "",
105
+ ) -> tuple[str, str | None]:
106
+ payload: dict = {"input": {"op": "rewrite", "text": text}}
107
+ if backend:
108
+ payload["input"]["backend"] = backend
109
+ if model:
110
+ payload["input"]["model"] = model
111
+ if self._secret:
112
+ payload["input"]["secret"] = self._secret
113
+
114
+ try:
115
+ data = self._post(
116
+ payload,
117
+ timeout=httpx.Timeout(connect=5.0, read=60.0, write=10.0, pool=5.0),
118
+ )
119
+ err = extract_worker_error(data)
120
+ if err:
121
+ return text, err
122
+ out = data.get("output", {}) if isinstance(data, dict) else {}
123
+ if not isinstance(out, dict):
124
+ return text, "unexpected worker output"
125
+ return out.get("prompt", text), out.get("error")
126
+ except Exception as exc: # noqa: BLE001
127
+ return text, str(exc)
128
+
129
+ def imagine(
130
+ self,
131
+ prompt: str,
132
+ *,
133
+ image_backend: str = "",
134
+ aspect_ratio: str = "3:2",
135
+ steps: int | None = None,
136
+ ) -> tuple[str | None, str | None, str | None, str | None]:
137
+ payload: dict = {"input": {"op": "imagine", "prompt": prompt, "aspect_ratio": aspect_ratio}}
138
+ if image_backend:
139
+ payload["input"]["image_backend"] = image_backend
140
+ if steps is not None:
141
+ payload["input"]["steps"] = steps
142
+ if self._secret:
143
+ payload["input"]["secret"] = self._secret
144
+
145
+ try:
146
+ data = self._post(
147
+ payload,
148
+ timeout=httpx.Timeout(connect=5.0, read=300.0, write=10.0, pool=5.0),
149
+ )
150
+ err = extract_worker_error(data)
151
+ if err:
152
+ return None, None, None, err
153
+
154
+ out = data.get("output", {}) if isinstance(data, dict) else {}
155
+ if not isinstance(out, dict):
156
+ return None, None, None, "unexpected worker output"
157
+ if "error" in out:
158
+ return None, None, None, str(out["error"])
159
+ return out.get("image_b64"), out.get("image_model"), out.get("image_backend"), None
160
+ except Exception as exc: # noqa: BLE001
161
+ return None, None, None, str(exc)
162
+
163
+ def query(
164
+ self,
165
+ query: str,
166
+ *,
167
+ corpus: str = "all",
168
+ k: int = 8,
169
+ min_score: float = 0.0,
170
+ semantic_floor: float = 0.0,
171
+ synthesize: bool = False,
172
+ model: str = "",
173
+ backend: str = "",
174
+ ) -> dict:
175
+ payload: dict = {
176
+ "input": {
177
+ "query": query,
178
+ "corpus": corpus,
179
+ "k": k,
180
+ "min_score": min_score,
181
+ "semantic_floor": semantic_floor,
182
+ "synthesize": synthesize,
183
+ }
184
+ }
185
+ if model:
186
+ payload["input"]["model"] = model
187
+ if backend:
188
+ payload["input"]["backend"] = backend
189
+ if self._secret:
190
+ payload["input"]["secret"] = self._secret
191
+
192
+ data = self._post(
193
+ payload,
194
+ timeout=httpx.Timeout(connect=5.0, read=600.0, write=30.0, pool=5.0),
195
+ )
196
+ return decode_worker_response(data)
@@ -0,0 +1,72 @@
1
+ # © 2026 Eric G. Suchanek, PhD — Flux-Frontiers · SPDX-License-Identifier: Elastic-2.0
2
+ """Shared handler operation dispatch for models, rewrite, and imagine."""
3
+
4
+ from __future__ import annotations
5
+
6
+ from collections.abc import Callable
7
+
8
+ from kg_utils.synthesis._image import ImageSynthesizer
9
+ from kg_utils.synthesis._text import TextSynthesizer
10
+
11
+ __all__ = ["handle_aux_ops"]
12
+
13
+
14
+ def handle_aux_ops(
15
+ inp: dict,
16
+ text_synth_factory: Callable[[str], TextSynthesizer],
17
+ image_synth_factory: Callable[[str], ImageSynthesizer],
18
+ ) -> dict | None:
19
+ """Handle shared non-query worker operations.
20
+
21
+ Returns:
22
+ - operation payload dict when op is recognized
23
+ - ``None`` when input has no recognized operation
24
+ """
25
+ op = inp.get("op")
26
+
27
+ if op == "models":
28
+ synth = text_synth_factory(inp.get("backend", ""))
29
+ # Existing handlers expose the active model via synthesizer config internals.
30
+ return {
31
+ "models": synth.list_models(),
32
+ "default": synth._cfg.resolved_model(), # pylint: disable=protected-access
33
+ }
34
+
35
+ if op == "rewrite":
36
+ text = (inp.get("text") or "").strip()
37
+ if not text:
38
+ return {"error": "rewrite requires a non-empty 'text'"}
39
+
40
+ synth = text_synth_factory(inp.get("backend", ""))
41
+ model_override = (inp.get("model") or "").strip() or None
42
+ prompt, error = synth.rewrite_for_image(text, model=model_override)
43
+ return {"prompt": prompt, "error": error}
44
+
45
+ if op == "imagine":
46
+ prompt = (inp.get("prompt") or "").strip()
47
+ if not prompt:
48
+ return {"error": "imagine requires a non-empty 'prompt'"}
49
+
50
+ aspect = inp.get("aspect_ratio", "3:2")
51
+ seed = inp.get("seed")
52
+ steps = inp.get("steps")
53
+ img_synth = image_synth_factory(inp.get("image_backend", ""))
54
+
55
+ try:
56
+ b64 = img_synth.generate_b64(
57
+ prompt,
58
+ aspect_ratio=aspect,
59
+ seed=int(seed) if seed is not None else None,
60
+ steps=int(steps) if steps is not None else None,
61
+ )
62
+ return {
63
+ "image_b64": b64,
64
+ "prompt": prompt,
65
+ "aspect_ratio": aspect,
66
+ "image_model": img_synth._cfg.resolved_model(), # pylint: disable=protected-access
67
+ "image_backend": img_synth._cfg.backend.value, # pylint: disable=protected-access
68
+ }
69
+ except Exception as exc: # noqa: BLE001 # pylint: disable=broad-exception-caught
70
+ return {"error": f"image generation failed: {exc}"}
71
+
72
+ return None
File without changes
File without changes