krysta 1.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- krysta-1.0.5/PKG-INFO +118 -0
- krysta-1.0.5/README.md +100 -0
- krysta-1.0.5/krysta/__init__.py +111 -0
- krysta-1.0.5/krysta/noa.py +84 -0
- krysta-1.0.5/krysta/sandbox.py +84 -0
- krysta-1.0.5/krysta/trace.py +74 -0
- krysta-1.0.5/krysta.egg-info/PKG-INFO +118 -0
- krysta-1.0.5/krysta.egg-info/SOURCES.txt +15 -0
- krysta-1.0.5/krysta.egg-info/dependency_links.txt +1 -0
- krysta-1.0.5/krysta.egg-info/requires.txt +6 -0
- krysta-1.0.5/krysta.egg-info/top_level.txt +2 -0
- krysta-1.0.5/krysta_reporter/__init__.py +1 -0
- krysta-1.0.5/krysta_reporter/dispatch.py +0 -0
- krysta-1.0.5/krysta_reporter/engine.py +317 -0
- krysta-1.0.5/krysta_reporter/templates/base_report.md +29 -0
- krysta-1.0.5/pyproject.toml +35 -0
- krysta-1.0.5/setup.cfg +4 -0
krysta-1.0.5/PKG-INFO
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: krysta
|
|
3
|
+
Version: 1.0.5
|
|
4
|
+
Summary: A unified multimodal model evaluation tracking and engineering report engine.
|
|
5
|
+
Author: Anshu Aditya
|
|
6
|
+
Project-URL: Homepage, https://github.com/your-username/kwing_library
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.8
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: jinja2>=3.0.0
|
|
13
|
+
Requires-Dist: matplotlib>=3.5.0
|
|
14
|
+
Requires-Dist: numpy>=1.20.0
|
|
15
|
+
Requires-Dist: pyyaml>=6.0
|
|
16
|
+
Requires-Dist: httpx>=0.27.0
|
|
17
|
+
Requires-Dist: httpx-sse>=0.4.0
|
|
18
|
+
|
|
19
|
+
# NoA Python SDK API Reference Documentation
|
|
20
|
+
|
|
21
|
+
This document provides technical reference details for the classes, methods, parameters, and streaming response objects available within the `krysta` client library.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Core Class Architecture
|
|
26
|
+
|
|
27
|
+
### `NoA` Class
|
|
28
|
+
The main client manager used to initialize connections and manage state lifecycle pipes with your remote execution infrastructure gateway.
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from krysta.noa import Noa
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
#### Class Constructor Matrix
|
|
35
|
+
```python
|
|
36
|
+
Noa(gateway_url: str)
|
|
37
|
+
```
|
|
38
|
+
* **Parameters:**
|
|
39
|
+
* `gateway_url` *(str, Required)*: The base HTTP/WS network address of your running NoA server cluster dashboard node (e.g., `"http://localhost:3000"`).
|
|
40
|
+
|
|
41
|
+
#### Context Manager Methods
|
|
42
|
+
The client class fully implements the standard asynchronous context manager layout (`__aenter__` / `__aexit__`) to automatically handle socket connections, channel allocations, and memory resource cleanups safely.
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
async with Noa(gateway_url="...") as client:
|
|
46
|
+
# Execution resources are automatically allocated here
|
|
47
|
+
pass
|
|
48
|
+
# Connection pipes are safely destroyed here
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Method Reference Maps
|
|
54
|
+
|
|
55
|
+
### `execute()`
|
|
56
|
+
Initiates an asynchronous Server-Sent Events (SSE) background task worker thread to evaluate a raw source code string.
|
|
57
|
+
|
|
58
|
+
#### Method Definition Syntax
|
|
59
|
+
```python
|
|
60
|
+
def execute(
|
|
61
|
+
language: str,
|
|
62
|
+
code: str,
|
|
63
|
+
timeout_ms: int = 5000
|
|
64
|
+
) -> AsyncIterator[dict]:
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
#### Input Arguments Block
|
|
68
|
+
* **`language`** *(str, Required)*: The programming compilation runner target environment to spawn inside the isolated cluster node. Supported strings:
|
|
69
|
+
* `"python"`
|
|
70
|
+
* `"javascript"`
|
|
71
|
+
* **`code`** *(str, Required)*: The uncompiled text string payload containing the raw source code script to evaluate inside the container pool.
|
|
72
|
+
* **`timeout_ms`** *(int, Optional)*: The strict maximum allowed execution time window in milliseconds before the watchdog thread hard-kills (`SIGKILL`) the task runner. Default fallback threshold value: `5000`.
|
|
73
|
+
|
|
74
|
+
#### Return Type Value
|
|
75
|
+
* Returns an **`AsyncIterator[dict]`** stream generator object. You must consume this data payload frame-by-frame using an `async for` loop layout block.
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Stream Event Response Payload Schema
|
|
80
|
+
|
|
81
|
+
Every data unit emitted from the async iterator returns a structured Python `dict` map object containing the following token parameters:
|
|
82
|
+
|
|
83
|
+
```json
|
|
84
|
+
{
|
|
85
|
+
"type": "system" | "stdout" | "stderr" | "rules" | "done" | "error",
|
|
86
|
+
"text": string | null,
|
|
87
|
+
"timestamp": integer
|
|
88
|
+
}
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Event Parameter Types Definition Matrix
|
|
92
|
+
|
|
93
|
+
#### 1. `type: "system"`
|
|
94
|
+
Emitted immediately when the gateway server begins allocating memory allocations or scheduling task execution queues.
|
|
95
|
+
* **Text Contents:** Standard tracking message flags (e.g., `"EXECUTION_STARTED"`).
|
|
96
|
+
|
|
97
|
+
#### 2. `type: "stdout"`
|
|
98
|
+
Emitted instantly whenever the sandboxed script writes characters to the standard console system output.
|
|
99
|
+
* **Text Contents:** The raw printed string text data.
|
|
100
|
+
|
|
101
|
+
#### 3. `type: "stderr"`
|
|
102
|
+
Emitted instantly when unhandled runtime errors, system exceptions, or line code tracebacks occur inside the isolated runner.
|
|
103
|
+
* **Text Contents:** Standard multiline traceback exception information text.
|
|
104
|
+
|
|
105
|
+
#### 4. `type: "rules"`
|
|
106
|
+
Emitted near task termination. Contains a serialized JSON string listing static metric quality scores and evaluation check markers.
|
|
107
|
+
* **Text Contents Schema:**
|
|
108
|
+
```json
|
|
109
|
+
"[{\"rule\": \"ExitCodeZeroRule\", \"result\": \"PASS\" | \"FAIL\", \"reason\": \"...\"}]"
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
#### 5. `type: "done"`
|
|
113
|
+
Emitted when the execution lifecycle pipeline completes its loop naturally and closes down successfully.
|
|
114
|
+
* **Text Contents:** `null`
|
|
115
|
+
|
|
116
|
+
#### 6. `type: "error"`
|
|
117
|
+
Emitted if a network drop occurs, or if the server cannot complete an internal spawn routing operation.
|
|
118
|
+
* **Text Contents:** Detailed platform failure tracking notes (e.g., `"Job failed during execution"`).
|
krysta-1.0.5/README.md
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# NoA Python SDK API Reference Documentation
|
|
2
|
+
|
|
3
|
+
This document provides technical reference details for the classes, methods, parameters, and streaming response objects available within the `krysta` client library.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Core Class Architecture
|
|
8
|
+
|
|
9
|
+
### `NoA` Class
|
|
10
|
+
The main client manager used to initialize connections and manage state lifecycle pipes with your remote execution infrastructure gateway.
|
|
11
|
+
|
|
12
|
+
```python
|
|
13
|
+
from krysta.noa import Noa
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
#### Class Constructor Matrix
|
|
17
|
+
```python
|
|
18
|
+
Noa(gateway_url: str)
|
|
19
|
+
```
|
|
20
|
+
* **Parameters:**
|
|
21
|
+
* `gateway_url` *(str, Required)*: The base HTTP/WS network address of your running NoA server cluster dashboard node (e.g., `"http://localhost:3000"`).
|
|
22
|
+
|
|
23
|
+
#### Context Manager Methods
|
|
24
|
+
The client class fully implements the standard asynchronous context manager layout (`__aenter__` / `__aexit__`) to automatically handle socket connections, channel allocations, and memory resource cleanups safely.
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
async with Noa(gateway_url="...") as client:
|
|
28
|
+
# Execution resources are automatically allocated here
|
|
29
|
+
pass
|
|
30
|
+
# Connection pipes are safely destroyed here
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Method Reference Maps
|
|
36
|
+
|
|
37
|
+
### `execute()`
|
|
38
|
+
Initiates an asynchronous Server-Sent Events (SSE) background task worker thread to evaluate a raw source code string.
|
|
39
|
+
|
|
40
|
+
#### Method Definition Syntax
|
|
41
|
+
```python
|
|
42
|
+
def execute(
|
|
43
|
+
language: str,
|
|
44
|
+
code: str,
|
|
45
|
+
timeout_ms: int = 5000
|
|
46
|
+
) -> AsyncIterator[dict]:
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
#### Input Arguments Block
|
|
50
|
+
* **`language`** *(str, Required)*: The programming compilation runner target environment to spawn inside the isolated cluster node. Supported strings:
|
|
51
|
+
* `"python"`
|
|
52
|
+
* `"javascript"`
|
|
53
|
+
* **`code`** *(str, Required)*: The uncompiled text string payload containing the raw source code script to evaluate inside the container pool.
|
|
54
|
+
* **`timeout_ms`** *(int, Optional)*: The strict maximum allowed execution time window in milliseconds before the watchdog thread hard-kills (`SIGKILL`) the task runner. Default fallback threshold value: `5000`.
|
|
55
|
+
|
|
56
|
+
#### Return Type Value
|
|
57
|
+
* Returns an **`AsyncIterator[dict]`** stream generator object. You must consume this data payload frame-by-frame using an `async for` loop layout block.
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## Stream Event Response Payload Schema
|
|
62
|
+
|
|
63
|
+
Every data unit emitted from the async iterator returns a structured Python `dict` map object containing the following token parameters:
|
|
64
|
+
|
|
65
|
+
```json
|
|
66
|
+
{
|
|
67
|
+
"type": "system" | "stdout" | "stderr" | "rules" | "done" | "error",
|
|
68
|
+
"text": string | null,
|
|
69
|
+
"timestamp": integer
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Event Parameter Types Definition Matrix
|
|
74
|
+
|
|
75
|
+
#### 1. `type: "system"`
|
|
76
|
+
Emitted immediately when the gateway server begins allocating memory allocations or scheduling task execution queues.
|
|
77
|
+
* **Text Contents:** Standard tracking message flags (e.g., `"EXECUTION_STARTED"`).
|
|
78
|
+
|
|
79
|
+
#### 2. `type: "stdout"`
|
|
80
|
+
Emitted instantly whenever the sandboxed script writes characters to the standard console system output.
|
|
81
|
+
* **Text Contents:** The raw printed string text data.
|
|
82
|
+
|
|
83
|
+
#### 3. `type: "stderr"`
|
|
84
|
+
Emitted instantly when unhandled runtime errors, system exceptions, or line code tracebacks occur inside the isolated runner.
|
|
85
|
+
* **Text Contents:** Standard multiline traceback exception information text.
|
|
86
|
+
|
|
87
|
+
#### 4. `type: "rules"`
|
|
88
|
+
Emitted near task termination. Contains a serialized JSON string listing static metric quality scores and evaluation check markers.
|
|
89
|
+
* **Text Contents Schema:**
|
|
90
|
+
```json
|
|
91
|
+
"[{\"rule\": \"ExitCodeZeroRule\", \"result\": \"PASS\" | \"FAIL\", \"reason\": \"...\"}]"
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
#### 5. `type: "done"`
|
|
95
|
+
Emitted when the execution lifecycle pipeline completes its loop naturally and closes down successfully.
|
|
96
|
+
* **Text Contents:** `null`
|
|
97
|
+
|
|
98
|
+
#### 6. `type: "error"`
|
|
99
|
+
Emitted if a network drop occurs, or if the server cannot complete an internal spawn routing operation.
|
|
100
|
+
* **Text Contents:** Detailed platform failure tracking notes (e.g., `"Job failed during execution"`).
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import httpx
|
|
3
|
+
import asyncio
|
|
4
|
+
from typing import AsyncGenerator
|
|
5
|
+
from .trace import ExecutionTrace
|
|
6
|
+
from .sandbox import RuleEngine
|
|
7
|
+
|
|
8
|
+
class Noa:
|
|
9
|
+
"""The primary public entrypoint for the Krysta NoA SDK ecosystem."""
|
|
10
|
+
def __init__(self, gateway_url: str = "http://localhost:3000" or "https://kwing.vercel.app/submit/api/submit"):
|
|
11
|
+
self.gateway_url = gateway_url
|
|
12
|
+
self._engine = RuleEngine()
|
|
13
|
+
|
|
14
|
+
def spawn(self, language: str, code: str):
|
|
15
|
+
return NoaExecutionLifecycle(self.gateway_url, language, code)
|
|
16
|
+
|
|
17
|
+
def trace(self, job_id: str) -> ExecutionTrace:
|
|
18
|
+
"""
|
|
19
|
+
Fetches the final ExecutionTrace for a completed job from Redis.
|
|
20
|
+
"""
|
|
21
|
+
return ExecutionTrace.from_redis(job_id)
|
|
22
|
+
|
|
23
|
+
def validate(self, trace: ExecutionTrace) -> dict:
|
|
24
|
+
"""
|
|
25
|
+
Passes an ExecutionTrace through the sandbox RuleEngine validator.
|
|
26
|
+
"""
|
|
27
|
+
return self._engine.validate(trace)
|
|
28
|
+
|
|
29
|
+
async def execute(self, language: str, code: str) -> ExecutionTrace:
|
|
30
|
+
"""
|
|
31
|
+
High-level orchestration method that automatically posts a job, consumes
|
|
32
|
+
and exhausts the live streaming frames, and resolves into a full ExecutionTrace.
|
|
33
|
+
"""
|
|
34
|
+
lifecycle = self.spawn(language, code)
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
async with lifecycle as stream:
|
|
38
|
+
async for _ in stream:
|
|
39
|
+
# Consume all frames including metrics — lifecycle stores them internally
|
|
40
|
+
pass
|
|
41
|
+
except httpx.ReadError:
|
|
42
|
+
print(f"\n[SDK WARNING] Telemetry pipe disrupted or closed early by server.")
|
|
43
|
+
|
|
44
|
+
job_id = lifecycle.job_id
|
|
45
|
+
|
|
46
|
+
if job_id:
|
|
47
|
+
print(f"[SDK INFO] Fetching final validation metric traces for Job ID: {job_id}...")
|
|
48
|
+
# FIX: call self.trace() which delegates to ExecutionTrace.from_redis()
|
|
49
|
+
trace = self.trace(job_id)
|
|
50
|
+
|
|
51
|
+
# FIX: backfill duration_ms from the metrics frame captured during streaming
|
|
52
|
+
if lifecycle.duration_ms is not None:
|
|
53
|
+
trace.duration_ms = lifecycle.duration_ms
|
|
54
|
+
|
|
55
|
+
return trace
|
|
56
|
+
else:
|
|
57
|
+
raise RuntimeError("[SDK ERROR] Connection dropped before a Job ID could be securely assigned.")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class NoaExecutionLifecycle:
|
|
61
|
+
"""Handles the inner stateful scope of an active streaming execution task connection."""
|
|
62
|
+
def __init__(self, gateway_url: str, language: str, code: str):
|
|
63
|
+
self.gateway_url = gateway_url
|
|
64
|
+
self.language = language
|
|
65
|
+
self.code = code
|
|
66
|
+
self.job_id = None
|
|
67
|
+
self.duration_ms = None # FIX: capture metrics frame duration here
|
|
68
|
+
|
|
69
|
+
async def __aenter__(self):
|
|
70
|
+
async with httpx.AsyncClient() as client:
|
|
71
|
+
response = await client.post(
|
|
72
|
+
f"{self.gateway_url}/api/submit",
|
|
73
|
+
json={"language": self.language, "code": self.code}
|
|
74
|
+
)
|
|
75
|
+
response.raise_for_status()
|
|
76
|
+
payload = response.json()
|
|
77
|
+
self.job_id = payload.get("jobId")
|
|
78
|
+
|
|
79
|
+
return self._stream_generator()
|
|
80
|
+
|
|
81
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
async def _stream_generator(self) -> AsyncGenerator[dict, None]:
|
|
85
|
+
async with httpx.AsyncClient(timeout=None) as client:
|
|
86
|
+
async with client.stream("GET", f"{self.gateway_url}/api/stream?jobId={self.job_id}") as response:
|
|
87
|
+
print("[SDK DEBUG] SSE Stream link connected! Awaiting incoming frames from background worker...")
|
|
88
|
+
async for line in response.aiter_lines():
|
|
89
|
+
if line.startswith("data:"):
|
|
90
|
+
try:
|
|
91
|
+
frame = json.loads(line[5:])
|
|
92
|
+
print(f"[SDK DEBUG] Received frame type: {frame.get('type')}")
|
|
93
|
+
|
|
94
|
+
# FIX: capture duration_ms from metrics frame before yielding
|
|
95
|
+
if frame.get("type") == "metrics" and frame.get("text"):
|
|
96
|
+
try:
|
|
97
|
+
metrics = json.loads(frame["text"])
|
|
98
|
+
self.duration_ms = metrics.get("duration_ms", 0)
|
|
99
|
+
except Exception as e:
|
|
100
|
+
print(f"[SDK WARNING] Failed to parse metrics frame: {e}")
|
|
101
|
+
|
|
102
|
+
yield frame
|
|
103
|
+
|
|
104
|
+
if frame.get("type") in ["done", "timeout", "error"]:
|
|
105
|
+
print("[SDK DEBUG] Terminal lifecycle frame detected. Closing stream context connection.")
|
|
106
|
+
break
|
|
107
|
+
except Exception as e:
|
|
108
|
+
print(f"[SDK DEBUG] Failed to parse frame line: {e}")
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import httpx
|
|
2
|
+
from httpx_sse import aconnect_sse
|
|
3
|
+
import asyncio
|
|
4
|
+
from typing import AsyncIterator, Optional
|
|
5
|
+
|
|
6
|
+
class Noa:
|
|
7
|
+
def __init__(self, gateway_url: str = "http://localhost:3000"):
|
|
8
|
+
"""
|
|
9
|
+
Initializes the Krysta Sandbox execution engine SDK client interface.
|
|
10
|
+
"""
|
|
11
|
+
self.gateway_url = gateway_url.rstrip("/")
|
|
12
|
+
self.client: Optional[httpx.AsyncClient] = None
|
|
13
|
+
|
|
14
|
+
async def __aenter__(self):
|
|
15
|
+
# Open an asynchronous persistent HTTP connection pool block
|
|
16
|
+
self.client = httpx.AsyncClient()
|
|
17
|
+
return self
|
|
18
|
+
|
|
19
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
20
|
+
# Cleanly sever pool allocation on block departure
|
|
21
|
+
if self.client:
|
|
22
|
+
await self.client.aclose()
|
|
23
|
+
|
|
24
|
+
async def execute(self, language: str, code: str, timeout_ms: int = 10000) -> AsyncIterator[dict]:
|
|
25
|
+
"""
|
|
26
|
+
POSTs the code execution task to the API gateway, then establishes
|
|
27
|
+
an SSE live event connection to stream runtime events chunk-by-chunk.
|
|
28
|
+
"""
|
|
29
|
+
if not self.client:
|
|
30
|
+
raise RuntimeError("Noa context manager must be entered using 'async with Noa(...) as noa:'")
|
|
31
|
+
|
|
32
|
+
submit_url = f"{self.gateway_url}/api/submit"
|
|
33
|
+
stream_url = f"{self.gateway_url}/api/stream"
|
|
34
|
+
|
|
35
|
+
payload = {
|
|
36
|
+
"language": language,
|
|
37
|
+
"code": code,
|
|
38
|
+
"timeout_ms": timeout_ms
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
# 1. Dispatch code payload out to ingestion gateway
|
|
42
|
+
response = await self.client.post(submit_url, json=payload)
|
|
43
|
+
|
|
44
|
+
# Enhanced debugging check to catch blank or failed gateway hits
|
|
45
|
+
if response.status_code != 202:
|
|
46
|
+
raise RuntimeError(
|
|
47
|
+
f"Gateway Rejected Request.\n"
|
|
48
|
+
f"Status Code: {response.status_code}\n"
|
|
49
|
+
f"Raw Response Content: '{response.text}'"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
receipt = response.json()
|
|
54
|
+
except Exception as json_err:
|
|
55
|
+
raise RuntimeError(
|
|
56
|
+
f"Gateway returned non-JSON response.\n"
|
|
57
|
+
f"Status Code: {response.status_code}\n"
|
|
58
|
+
f"Raw Response Content: '{response.text}'"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
job_id = receipt.get("jobId")
|
|
62
|
+
|
|
63
|
+
if not job_id:
|
|
64
|
+
raise ValueError(f"Gateway failed to yield a valid infrastructure jobId transaction receipt token.")
|
|
65
|
+
|
|
66
|
+
# 2. Bind directly to the live event telemetry stream bus
|
|
67
|
+
params = {"jobId": job_id}
|
|
68
|
+
|
|
69
|
+
print(f"[SDK DEBUG] Establishing SSE streaming pipe connection to {stream_url}...")
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
# Pass an explicit, prolonged timeout configuration payload to prevent httpx from dropping early
|
|
73
|
+
timeout_config = httpx.Timeout(60.0, connect=10.0)
|
|
74
|
+
|
|
75
|
+
async with aconnect_sse(self.client, "GET", stream_url, params=params, timeout=timeout_config) as event_stream:
|
|
76
|
+
print("[SDK DEBUG] SSE pipe connection link established. Awaiting daemon frames...")
|
|
77
|
+
async for event in event_stream.aiter_sse():
|
|
78
|
+
# Yield decoded structured events out to user tracking script loops
|
|
79
|
+
yield event.json()
|
|
80
|
+
|
|
81
|
+
except httpx.TimeoutException as timeout_err:
|
|
82
|
+
raise RuntimeError(f"Streaming telemetry channel timed out waiting for broker response. Details: {timeout_err}")
|
|
83
|
+
except Exception as stream_err:
|
|
84
|
+
raise RuntimeError(f"SSE Telemetry transport layer connection failed. Details: ({type(stream_err).__name__}) {stream_err}")
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Tuple, Dict
|
|
3
|
+
from .trace import ExecutionTrace
|
|
4
|
+
|
|
5
|
+
class ValidationRule:
|
|
6
|
+
"""Abstract base class for all sandbox evaluation rules."""
|
|
7
|
+
def evaluate(self, trace: ExecutionTrace) -> Tuple[bool, str]:
|
|
8
|
+
raise NotImplementedError
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ExitCodeZeroRule(ValidationRule):
|
|
12
|
+
"""Verifies that the sandboxed process wrapped up operations with a clean exit status."""
|
|
13
|
+
def evaluate(self, trace: ExecutionTrace) -> Tuple[bool, str]:
|
|
14
|
+
if trace.timeout_hit:
|
|
15
|
+
return False, "Sandbox execution exceeded the allocated runtime wall clock limit."
|
|
16
|
+
if trace.exit_code is None:
|
|
17
|
+
return False, "Process terminated abruptly without returning a valid exit status code."
|
|
18
|
+
if trace.exit_code != 0:
|
|
19
|
+
return False, f"Process terminated with unhandled non-zero exit status: {trace.exit_code}."
|
|
20
|
+
return True, "Process completed runtime operations cleanly with exit code 0."
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ValidJsonRule(ValidationRule):
|
|
24
|
+
"""Ensures the console stdout stream can be compiled into structured JSON metadata."""
|
|
25
|
+
def evaluate(self, trace: ExecutionTrace) -> Tuple[bool, str]:
|
|
26
|
+
# Collect and concatenate all stdout text chunks
|
|
27
|
+
combined_output = "".join([
|
|
28
|
+
line.get("text", "")
|
|
29
|
+
for line in trace.stdout_lines
|
|
30
|
+
if line.get("type") == "stdout"
|
|
31
|
+
])
|
|
32
|
+
|
|
33
|
+
# Strip out system lifecycle banners injected by the daemon infrastructure
|
|
34
|
+
clean_payload = combined_output.replace("SYSTEM // EXECUTION_STARTED", "").strip()
|
|
35
|
+
|
|
36
|
+
if not clean_payload:
|
|
37
|
+
return False, "Process stdout stream is completely empty. No data payload found to parse."
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
json.loads(clean_payload)
|
|
41
|
+
return True, "Stdout stream successfully compiled into valid structural JSON."
|
|
42
|
+
except json.JSONDecodeError as e:
|
|
43
|
+
return False, f"Malformed output layout. Failed to parse stream as structural JSON. Details: {str(e)}"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class NoNetworkCallsRule(ValidationRule):
|
|
47
|
+
"""Inspects log traces to detect unauthorized outbound socket connection attempts."""
|
|
48
|
+
def evaluate(self, trace: ExecutionTrace) -> Tuple[bool, str]:
|
|
49
|
+
# Check standard low-level connection trace patterns and error string indicators
|
|
50
|
+
for frame in trace.stdout_lines:
|
|
51
|
+
text = frame.get("text", "").lower()
|
|
52
|
+
if any(marker in text for marker in ["socket", "http", "connection refused", "urllib", "requests"]):
|
|
53
|
+
return False, f"Security sandbox violation: Rogue external network handshake detected: '{frame.get('text')}'"
|
|
54
|
+
return True, "No outbound socket or data link requests detected during script lifecycle tracking."
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class RuleEngine:
|
|
58
|
+
"""Orchestrates sequential trace assessments against the configured ruleset profile."""
|
|
59
|
+
def __init__(self):
|
|
60
|
+
self.rules = [
|
|
61
|
+
ExitCodeZeroRule(),
|
|
62
|
+
ValidJsonRule(),
|
|
63
|
+
NoNetworkCallsRule()
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
def validate(self, trace: ExecutionTrace) -> Dict[str, any]:
|
|
67
|
+
results = {}
|
|
68
|
+
overall_passed = True
|
|
69
|
+
|
|
70
|
+
for rule in self.rules:
|
|
71
|
+
rule_name = rule.__class__.__name__
|
|
72
|
+
passed, reason = rule.evaluate(trace)
|
|
73
|
+
if not passed:
|
|
74
|
+
overall_passed = False
|
|
75
|
+
|
|
76
|
+
results[rule_name] = {
|
|
77
|
+
"status": "PASS" if passed else "FAIL",
|
|
78
|
+
"reason": reason
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return {
|
|
82
|
+
"passed": overall_passed,
|
|
83
|
+
"results": results
|
|
84
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
from upstash_redis import Redis
|
|
5
|
+
from typing import List, Dict, Optional
|
|
6
|
+
|
|
7
|
+
load_dotenv(dotenv_path=".env.local")
|
|
8
|
+
|
|
9
|
+
class ExecutionTrace:
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
job_id: str,
|
|
13
|
+
duration_ms: int = 0,
|
|
14
|
+
stdout_lines: List[Dict] = None,
|
|
15
|
+
exit_code: int = None,
|
|
16
|
+
timeout_hit: bool = False
|
|
17
|
+
):
|
|
18
|
+
self.job_id = job_id
|
|
19
|
+
self.duration_ms = duration_ms
|
|
20
|
+
self.stdout_lines = stdout_lines or []
|
|
21
|
+
self.exit_code = exit_code
|
|
22
|
+
self.timeout_hit = timeout_hit
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def from_redis(cls, job_id: str):
|
|
26
|
+
"""
|
|
27
|
+
Connects to Upstash Redis via REST API, inspects the execution status,
|
|
28
|
+
and maps the execution trace.
|
|
29
|
+
"""
|
|
30
|
+
redis_url = os.getenv("UPSTASH_REDIS_REST_URL")
|
|
31
|
+
redis_token = os.getenv("UPSTASH_REDIS_REST_TOKEN")
|
|
32
|
+
|
|
33
|
+
if not redis_url:
|
|
34
|
+
raise ValueError("[TRACE] Missing UPSTASH_REDIS_REST_URL environment variable.")
|
|
35
|
+
if not redis_token:
|
|
36
|
+
raise ValueError("[TRACE] Missing UPSTASH_REDIS_REST_TOKEN environment variable.")
|
|
37
|
+
|
|
38
|
+
# ✅ Use Upstash REST client — no TCP/TLS port issues
|
|
39
|
+
client = Redis(url=redis_url, token=redis_token)
|
|
40
|
+
|
|
41
|
+
# Read the lifecycle state directly from the status key
|
|
42
|
+
status = client.get(f"status:{job_id}") or "unknown"
|
|
43
|
+
|
|
44
|
+
raw_lines = client.lrange(f"stdout:{job_id}", 0, -1) or []
|
|
45
|
+
stdout_lines = [{"type": "stdout", "text": line.strip()} for line in raw_lines]
|
|
46
|
+
|
|
47
|
+
duration_ms = 0
|
|
48
|
+
timeout_hit = (status == "timeout")
|
|
49
|
+
|
|
50
|
+
if status == "done":
|
|
51
|
+
exit_code = 0
|
|
52
|
+
elif status == "error":
|
|
53
|
+
exit_code = 1
|
|
54
|
+
elif status == "timeout":
|
|
55
|
+
exit_code = None
|
|
56
|
+
else:
|
|
57
|
+
exit_code = None
|
|
58
|
+
|
|
59
|
+
return cls(
|
|
60
|
+
job_id=job_id,
|
|
61
|
+
duration_ms=duration_ms,
|
|
62
|
+
stdout_lines=stdout_lines,
|
|
63
|
+
exit_code=exit_code,
|
|
64
|
+
timeout_hit=timeout_hit
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def to_dict(self) -> dict:
|
|
68
|
+
return {
|
|
69
|
+
"jobId": self.job_id,
|
|
70
|
+
"duration_ms": self.duration_ms,
|
|
71
|
+
"stdout_lines": self.stdout_lines,
|
|
72
|
+
"exit_code": self.exit_code,
|
|
73
|
+
"timeout_hit": self.timeout_hit
|
|
74
|
+
}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: krysta
|
|
3
|
+
Version: 1.0.5
|
|
4
|
+
Summary: A unified multimodal model evaluation tracking and engineering report engine.
|
|
5
|
+
Author: Anshu Aditya
|
|
6
|
+
Project-URL: Homepage, https://github.com/your-username/kwing_library
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.8
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: jinja2>=3.0.0
|
|
13
|
+
Requires-Dist: matplotlib>=3.5.0
|
|
14
|
+
Requires-Dist: numpy>=1.20.0
|
|
15
|
+
Requires-Dist: pyyaml>=6.0
|
|
16
|
+
Requires-Dist: httpx>=0.27.0
|
|
17
|
+
Requires-Dist: httpx-sse>=0.4.0
|
|
18
|
+
|
|
19
|
+
# NoA Python SDK API Reference Documentation
|
|
20
|
+
|
|
21
|
+
This document provides technical reference details for the classes, methods, parameters, and streaming response objects available within the `krysta` client library.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Core Class Architecture
|
|
26
|
+
|
|
27
|
+
### `NoA` Class
|
|
28
|
+
The main client manager used to initialize connections and manage state lifecycle pipes with your remote execution infrastructure gateway.
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from krysta.noa import Noa
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
#### Class Constructor Matrix
|
|
35
|
+
```python
|
|
36
|
+
Noa(gateway_url: str)
|
|
37
|
+
```
|
|
38
|
+
* **Parameters:**
|
|
39
|
+
* `gateway_url` *(str, Required)*: The base HTTP/WS network address of your running NoA server cluster dashboard node (e.g., `"http://localhost:3000"`).
|
|
40
|
+
|
|
41
|
+
#### Context Manager Methods
|
|
42
|
+
The client class fully implements the standard asynchronous context manager layout (`__aenter__` / `__aexit__`) to automatically handle socket connections, channel allocations, and memory resource cleanups safely.
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
async with Noa(gateway_url="...") as client:
|
|
46
|
+
# Execution resources are automatically allocated here
|
|
47
|
+
pass
|
|
48
|
+
# Connection pipes are safely destroyed here
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Method Reference Maps
|
|
54
|
+
|
|
55
|
+
### `execute()`
|
|
56
|
+
Initiates an asynchronous Server-Sent Events (SSE) background task worker thread to evaluate a raw source code string.
|
|
57
|
+
|
|
58
|
+
#### Method Definition Syntax
|
|
59
|
+
```python
|
|
60
|
+
def execute(
|
|
61
|
+
language: str,
|
|
62
|
+
code: str,
|
|
63
|
+
timeout_ms: int = 5000
|
|
64
|
+
) -> AsyncIterator[dict]:
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
#### Input Arguments Block
|
|
68
|
+
* **`language`** *(str, Required)*: The programming compilation runner target environment to spawn inside the isolated cluster node. Supported strings:
|
|
69
|
+
* `"python"`
|
|
70
|
+
* `"javascript"`
|
|
71
|
+
* **`code`** *(str, Required)*: The uncompiled text string payload containing the raw source code script to evaluate inside the container pool.
|
|
72
|
+
* **`timeout_ms`** *(int, Optional)*: The strict maximum allowed execution time window in milliseconds before the watchdog thread hard-kills (`SIGKILL`) the task runner. Default fallback threshold value: `5000`.
|
|
73
|
+
|
|
74
|
+
#### Return Type Value
|
|
75
|
+
* Returns an **`AsyncIterator[dict]`** stream generator object. You must consume this data payload frame-by-frame using an `async for` loop layout block.
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Stream Event Response Payload Schema
|
|
80
|
+
|
|
81
|
+
Every data unit emitted from the async iterator returns a structured Python `dict` map object containing the following token parameters:
|
|
82
|
+
|
|
83
|
+
```json
|
|
84
|
+
{
|
|
85
|
+
"type": "system" | "stdout" | "stderr" | "rules" | "done" | "error",
|
|
86
|
+
"text": string | null,
|
|
87
|
+
"timestamp": integer
|
|
88
|
+
}
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Event Parameter Types Definition Matrix
|
|
92
|
+
|
|
93
|
+
#### 1. `type: "system"`
|
|
94
|
+
Emitted immediately when the gateway server begins allocating memory allocations or scheduling task execution queues.
|
|
95
|
+
* **Text Contents:** Standard tracking message flags (e.g., `"EXECUTION_STARTED"`).
|
|
96
|
+
|
|
97
|
+
#### 2. `type: "stdout"`
|
|
98
|
+
Emitted instantly whenever the sandboxed script writes characters to the standard console system output.
|
|
99
|
+
* **Text Contents:** The raw printed string text data.
|
|
100
|
+
|
|
101
|
+
#### 3. `type: "stderr"`
|
|
102
|
+
Emitted instantly when unhandled runtime errors, system exceptions, or line code tracebacks occur inside the isolated runner.
|
|
103
|
+
* **Text Contents:** Standard multiline traceback exception information text.
|
|
104
|
+
|
|
105
|
+
#### 4. `type: "rules"`
|
|
106
|
+
Emitted near task termination. Contains a serialized JSON string listing static metric quality scores and evaluation check markers.
|
|
107
|
+
* **Text Contents Schema:**
|
|
108
|
+
```json
|
|
109
|
+
"[{\"rule\": \"ExitCodeZeroRule\", \"result\": \"PASS\" | \"FAIL\", \"reason\": \"...\"}]"
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
#### 5. `type: "done"`
|
|
113
|
+
Emitted when the execution lifecycle pipeline completes its loop naturally and closes down successfully.
|
|
114
|
+
* **Text Contents:** `null`
|
|
115
|
+
|
|
116
|
+
#### 6. `type: "error"`
|
|
117
|
+
Emitted if a network drop occurs, or if the server cannot complete an internal spawn routing operation.
|
|
118
|
+
* **Text Contents:** Detailed platform failure tracking notes (e.g., `"Job failed during execution"`).
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
krysta/__init__.py
|
|
4
|
+
krysta/noa.py
|
|
5
|
+
krysta/sandbox.py
|
|
6
|
+
krysta/trace.py
|
|
7
|
+
krysta.egg-info/PKG-INFO
|
|
8
|
+
krysta.egg-info/SOURCES.txt
|
|
9
|
+
krysta.egg-info/dependency_links.txt
|
|
10
|
+
krysta.egg-info/requires.txt
|
|
11
|
+
krysta.egg-info/top_level.txt
|
|
12
|
+
krysta_reporter/__init__.py
|
|
13
|
+
krysta_reporter/dispatch.py
|
|
14
|
+
krysta_reporter/engine.py
|
|
15
|
+
krysta_reporter/templates/base_report.md
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .engine import ModelReport
|
|
File without changes
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
import yaml
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
class ModelReport:
|
|
8
|
+
def __init__(self, week: int, model_name: str, modality: str):
|
|
9
|
+
self.week = week
|
|
10
|
+
self.model_name = model_name
|
|
11
|
+
self.modality = modality
|
|
12
|
+
self.artifact_counter = 0
|
|
13
|
+
|
|
14
|
+
# Core data metrics trackers
|
|
15
|
+
self.timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
|
|
16
|
+
self.metrics = {}
|
|
17
|
+
|
|
18
|
+
# 1. Load configuration profiles safely
|
|
19
|
+
self.config = self._load_global_config()
|
|
20
|
+
|
|
21
|
+
# 2. Extract workspace paths from config or default back to standard pathways
|
|
22
|
+
base_workspace = self.config.get("workspace_root", "workspace_reports")
|
|
23
|
+
self.output_dir = os.path.join(base_workspace, "2026", f"week-{week}_{model_name}")
|
|
24
|
+
self.report_dir = self.output_dir # Add this line to fix the final compile directory lookup
|
|
25
|
+
self.artifacts_dir = os.path.join(self.output_dir, "artifacts")
|
|
26
|
+
|
|
27
|
+
os.makedirs(self.artifacts_dir, exist_ok=True)
|
|
28
|
+
|
|
29
|
+
def _load_global_config(self):
|
|
30
|
+
"""Looks for a local kwing_config.yaml file in the user's working directory."""
|
|
31
|
+
config_filename = "kwing_config.yaml"
|
|
32
|
+
default_config = {
|
|
33
|
+
"workspace_root": "workspace_reports",
|
|
34
|
+
"thresholds": {
|
|
35
|
+
"token_confidence": 0.50,
|
|
36
|
+
"vram_limit_mb": 4000.0,
|
|
37
|
+
"latency_limit_ms": 100.0
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if os.path.exists(config_filename):
|
|
42
|
+
try:
|
|
43
|
+
with open(config_filename, "r") as f:
|
|
44
|
+
user_config = yaml.safe_load(f)
|
|
45
|
+
if user_config:
|
|
46
|
+
# Clean, industrial CLI layout
|
|
47
|
+
print("[INFO] krysta-wing :: loaded runtime profile from kwing_config.yaml")
|
|
48
|
+
return user_config
|
|
49
|
+
except Exception as e:
|
|
50
|
+
print(f"[WARN] krysta-wing :: configuration parsing failed ({e}). using defaults.")
|
|
51
|
+
|
|
52
|
+
return default_config
|
|
53
|
+
|
|
54
|
+
def log_benchmarks(self, latency: float, vram: float, loss: float):
|
|
55
|
+
"""Logs standard performance engineering parameters."""
|
|
56
|
+
self.metrics["latency"] = latency
|
|
57
|
+
self.metrics["vram"] = vram
|
|
58
|
+
self.metrics["loss"] = round(loss, 4)
|
|
59
|
+
|
|
60
|
+
def log_vision_artifact(self, image_matrix, title: str = "Model Attention Heatmap"):
|
|
61
|
+
"""Captures a visual matrix, names it uniquely, and saves it to disk."""
|
|
62
|
+
from .modalities.vision import save_analysis_plot
|
|
63
|
+
|
|
64
|
+
self.artifact_counter += 1
|
|
65
|
+
filename = f"artifact_{self.artifact_counter}.png"
|
|
66
|
+
target_path = os.path.join(self.artifacts_dir, filename)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
save_analysis_plot(image_matrix, target_path, title)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
relative_markdown_path = f"artifacts/{filename}"
|
|
73
|
+
self.logged_artifacts.append((title, relative_markdown_path))
|
|
74
|
+
|
|
75
|
+
print(f"[✓] Visual artifact saved to: {target_path}")
|
|
76
|
+
|
|
77
|
+
def log_text_artifact(self, tokens: list, confidences: list, sample_phrase: str, threshold: float = 0.5):
|
|
78
|
+
"""Processes NLP evaluation data and catches text extraction anomalies."""
|
|
79
|
+
from .modalities.text import analyze_token_confidence
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
low_conf_words = analyze_token_confidence(tokens, confidences, threshold)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
text_summary = f"#### Evaluated Sequence Sample:\n`\"{sample_phrase}\"`\n\n"
|
|
86
|
+
if low_conf_words:
|
|
87
|
+
text_summary += f"⚠️ **Low Confidence Anomaly Tokens (Below {int(threshold*100)}%):**\n"
|
|
88
|
+
text_summary += f"{', '.join(low_conf_words)}\n"
|
|
89
|
+
else:
|
|
90
|
+
text_summary += "✓ **All tokens parsed with high confidence spectral safety.**\n"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
if not hasattr(self, 'logged_text_blocks'):
|
|
94
|
+
self.logged_text_blocks = []
|
|
95
|
+
self.logged_text_blocks.append(text_summary)
|
|
96
|
+
|
|
97
|
+
print(f"[✓] Text analysis artifact logged for sequence.")
|
|
98
|
+
|
|
99
|
+
def log_audio_artifact(self, raw_audio_array, title: str = "Audio Mel-Spectrogram"):
|
|
100
|
+
"""Processes raw audio data arrays, converts to a spectrogram, and logs the path."""
|
|
101
|
+
from .modalities.audio import save_spectrogram_plot
|
|
102
|
+
|
|
103
|
+
self.artifact_counter += 1
|
|
104
|
+
filename = f"audio_artifact_{self.artifact_counter}.png"
|
|
105
|
+
target_path = os.path.join(self.artifacts_dir, filename)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
save_spectrogram_plot(raw_audio_array, target_path, title)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
relative_markdown_path = f"artifacts/{filename}"
|
|
112
|
+
if not hasattr(self, 'logged_artifacts'):
|
|
113
|
+
self.logged_artifacts = []
|
|
114
|
+
self.logged_artifacts.append((title, relative_markdown_path))
|
|
115
|
+
|
|
116
|
+
print(f"[✓] Audio spectrogram artifact saved to: {target_path}")
|
|
117
|
+
|
|
118
|
+
def log_custom_artifact(self, data, artifact_type: str, title: str, **kwargs):
|
|
119
|
+
"""
|
|
120
|
+
Universal gateway method making the architecture model-agnostic.
|
|
121
|
+
Routes data dynamically to modality processors based on the artifact_type string.
|
|
122
|
+
|
|
123
|
+
Supported types: 'heatmap' (Vision Matrix), 'tokens' (NLP Sequences), 'audio' (Waveform Signals)
|
|
124
|
+
"""
|
|
125
|
+
self.artifact_counter += 1
|
|
126
|
+
|
|
127
|
+
if not hasattr(self, 'logged_artifacts'):
|
|
128
|
+
self.logged_artifacts = []
|
|
129
|
+
if not hasattr(self, 'logged_text_blocks'):
|
|
130
|
+
self.logged_text_blocks = []
|
|
131
|
+
|
|
132
|
+
# Route 1: Vision / Spatial Heatmaps
|
|
133
|
+
if artifact_type == "heatmap":
|
|
134
|
+
# Updated to match your exact file function name: save_analysis_plot
|
|
135
|
+
from .modalities.vision import save_analysis_plot
|
|
136
|
+
filename = f"custom_vision_{self.artifact_counter}.png"
|
|
137
|
+
target_path = os.path.join(self.artifacts_dir, filename)
|
|
138
|
+
save_analysis_plot(data, target_path, title)
|
|
139
|
+
self.logged_artifacts.append((title, f"artifacts/{filename}"))
|
|
140
|
+
print(f"[CORE] krysta-wing :: registered vision artifact ↳ {filename}")
|
|
141
|
+
|
|
142
|
+
# Route 2: Text Token Streams
|
|
143
|
+
elif artifact_type == "tokens":
|
|
144
|
+
# Updated to match your exact file function name: analyze_token_confidence
|
|
145
|
+
from .modalities.text import analyze_token_confidence
|
|
146
|
+
confidences = kwargs.get("confidences", [])
|
|
147
|
+
sample_phrase = kwargs.get("sample_phrase", "Raw Token String Segment Passed")
|
|
148
|
+
|
|
149
|
+
config_threshold = self.config.get("thresholds", {}).get("token_confidence", 0.50)
|
|
150
|
+
threshold = kwargs.get("threshold", config_threshold)
|
|
151
|
+
|
|
152
|
+
low_conf_words = analyze_token_confidence(data, confidences, threshold)
|
|
153
|
+
|
|
154
|
+
text_summary = f"#### Custom Token Stream Analysis: {title}\n"
|
|
155
|
+
text_summary += f"`\"{sample_phrase}\"`\n\n"
|
|
156
|
+
if low_conf_words:
|
|
157
|
+
text_summary += f"**Low Confidence Anomaly Tokens (Below {int(threshold*100)}%):**\n"
|
|
158
|
+
text_summary += f"{', '.join(low_conf_words)}\n\n"
|
|
159
|
+
else:
|
|
160
|
+
text_summary += "**All tokens parsed securely above target boundary criteria.**\n\n"
|
|
161
|
+
|
|
162
|
+
self.logged_text_blocks.append(text_summary)
|
|
163
|
+
print(f"[CORE] krysta-wing :: registered text token anomalies")
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
elif artifact_type == "audio":
|
|
167
|
+
from .modalities.audio import save_spectrogram_plot
|
|
168
|
+
filename = f"custom_audio_{self.artifact_counter}.png"
|
|
169
|
+
target_path = os.path.join(self.artifacts_dir, filename)
|
|
170
|
+
save_spectrogram_plot(data, target_path, title)
|
|
171
|
+
self.logged_artifacts.append((title, f"artifacts/{filename}"))
|
|
172
|
+
print(f"[✓] Model-Agnostic Engine: Logged Audio Spectrogram -> {filename}")
|
|
173
|
+
|
|
174
|
+
else:
|
|
175
|
+
raise ValueError(f"Unsupported artifact type variant: '{artifact_type}'. Choose 'heatmap', 'tokens', or 'audio'.")
|
|
176
|
+
|
|
177
|
+
def compile(self):
|
|
178
|
+
"""Reads template, hydrates variables, and exports the final file."""
|
|
179
|
+
|
|
180
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
181
|
+
template_path = os.path.join(current_dir, "templates", "base_report.md")
|
|
182
|
+
|
|
183
|
+
# Run statistical anomaly checks before rendering the template
|
|
184
|
+
alerts = self._compute_regression_analysis()
|
|
185
|
+
alert_markdown_block = ""
|
|
186
|
+
if alerts:
|
|
187
|
+
alert_markdown_block = (
|
|
188
|
+
"### RUNTIME TELEMETRY ANOMALIES\n"
|
|
189
|
+
"> **NOTICE:** The statistical tracking engine flagged performance metrics "
|
|
190
|
+
"exceeding expected variance limits against historical baselines.\n>\n"
|
|
191
|
+
)
|
|
192
|
+
for alert in alerts:
|
|
193
|
+
# Remove emojis from individual alert strings if you have them inside _compute_regression_analysis
|
|
194
|
+
alert_markdown_block += f"> * {alert.replace('⚠️ ', '').replace('🚨 ', '')}\n"
|
|
195
|
+
alert_markdown_block += "\n---\n"
|
|
196
|
+
|
|
197
|
+
with open(template_path, "r") as f:
|
|
198
|
+
raw_template = f.read()
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
template = Template(raw_template)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
# Build dynamic markdown blocks tracking ALL custom-logged entries
|
|
205
|
+
modality_content = "SYSTEM INTERPRETABILITY & MULTI-MODAL ARTIFACTS\n\n"
|
|
206
|
+
has_content = False
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
if hasattr(self, 'logged_artifacts') and self.logged_artifacts:
|
|
210
|
+
has_content = True
|
|
211
|
+
for title, path in self.logged_artifacts:
|
|
212
|
+
modality_content += f"#### {title}\n\n\n"
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
if hasattr(self, 'logged_text_blocks') and self.logged_text_blocks:
|
|
216
|
+
has_content = True
|
|
217
|
+
for block in self.logged_text_blocks:
|
|
218
|
+
modality_content += block
|
|
219
|
+
|
|
220
|
+
if not has_content:
|
|
221
|
+
modality_content += "*No evaluation artifacts or modality streams logged during this operational execution pass.*"
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
rendered_md = template.render(
|
|
225
|
+
model_name=self.model_name,
|
|
226
|
+
week=self.week,
|
|
227
|
+
timestamp=self.timestamp,
|
|
228
|
+
modality=self.modality,
|
|
229
|
+
latency=self.metrics.get("latency", 0.0),
|
|
230
|
+
vram=self.metrics.get("vram", 0.0),
|
|
231
|
+
loss=self.metrics.get("loss", 0.0),
|
|
232
|
+
modality_specific_content=modality_content,
|
|
233
|
+
regression_alerts=alert_markdown_block
|
|
234
|
+
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
output_file_path = os.path.join(self.report_dir, "report.md")
|
|
239
|
+
with open(output_file_path, "w", encoding="utf-8") as f:
|
|
240
|
+
f.write(rendered_md)
|
|
241
|
+
|
|
242
|
+
print(f"[SUCCESS] krysta-wing :: consolidated report exported to {output_file_path}")
|
|
243
|
+
|
|
244
|
+
def _compute_regression_analysis(self):
|
|
245
|
+
"""
|
|
246
|
+
Saves run metrics locally and runs a 2-sigma anomaly verification pass
|
|
247
|
+
against historical evaluation baselines.
|
|
248
|
+
"""
|
|
249
|
+
import json
|
|
250
|
+
history_file = ".kwing_history.json"
|
|
251
|
+
|
|
252
|
+
# Pull current performance parameters cleanly
|
|
253
|
+
current_latency = self.metrics.get("latency", 0.0)
|
|
254
|
+
current_vram = self.metrics.get("vram", 0.0)
|
|
255
|
+
|
|
256
|
+
# Load historical benchmarks state
|
|
257
|
+
history_data = []
|
|
258
|
+
if os.path.exists(history_file):
|
|
259
|
+
try:
|
|
260
|
+
with open(history_file, "r") as f:
|
|
261
|
+
history_data = json.load(f)
|
|
262
|
+
except Exception:
|
|
263
|
+
history_data = []
|
|
264
|
+
|
|
265
|
+
regression_alerts = []
|
|
266
|
+
|
|
267
|
+
# Run statistical checks only if we have a viable baseline (minimum 3 historical runs)
|
|
268
|
+
if len(history_data) >= 3:
|
|
269
|
+
import math
|
|
270
|
+
|
|
271
|
+
latencies = [run["latency"] for run in history_data if "latency" in run]
|
|
272
|
+
vrams = [run["vram"] for run in history_data if "vram" in run]
|
|
273
|
+
|
|
274
|
+
def calculate_stats(data_list):
|
|
275
|
+
mean = sum(data_list) / len(data_list)
|
|
276
|
+
variance = sum((x - mean) ** 2 for x in data_list) / len(data_list)
|
|
277
|
+
std_dev = math.sqrt(variance)
|
|
278
|
+
return mean, std_dev
|
|
279
|
+
|
|
280
|
+
# Check 1: Latency Regression Spike Checks
|
|
281
|
+
if latencies:
|
|
282
|
+
mean_lat, std_lat = calculate_stats(latencies)
|
|
283
|
+
# If standard deviation is near zero, protect against false positives by setting a minimum window
|
|
284
|
+
threshold_lat = mean_lat + max(2 * std_lat, 5.0)
|
|
285
|
+
if current_latency > threshold_lat:
|
|
286
|
+
regression_alerts.append(
|
|
287
|
+
f"⚠️ **PERFORMANCE REGRESSION:** Inference Latency spiked to **{current_latency:.1f}ms** "
|
|
288
|
+
f"(Historical Baseline: {mean_lat:.1f}ms ± {std_lat:.1f}ms). Exceeded 2σ limit threshold."
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
# Check 2: Graphics Memory Allocation Checks
|
|
292
|
+
if vrams:
|
|
293
|
+
mean_vram, std_vram = calculate_stats(vrams)
|
|
294
|
+
threshold_vram = mean_vram + max(2 * std_vram, 256.0)
|
|
295
|
+
if current_vram > threshold_vram:
|
|
296
|
+
regression_alerts.append(
|
|
297
|
+
f"⚠️ **RESOURCE ANOMALY:** Peak VRAM consumption reached **{current_vram:.1f}MB** "
|
|
298
|
+
f"(Historical Baseline: {mean_vram:.1f}MB ± {std_vram:.1f}MB). Potential memory leak detected."
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# Append the current profile parameters safely to history logs
|
|
302
|
+
history_data.append({
|
|
303
|
+
"timestamp": self.timestamp,
|
|
304
|
+
"model_name": self.model_name,
|
|
305
|
+
"week": self.week,
|
|
306
|
+
"latency": current_latency,
|
|
307
|
+
"vram": current_vram,
|
|
308
|
+
"loss": self.metrics.get("loss", 0.0)
|
|
309
|
+
})
|
|
310
|
+
|
|
311
|
+
try:
|
|
312
|
+
with open(history_file, "w") as f:
|
|
313
|
+
json.dump(history_data, f, indent=4)
|
|
314
|
+
except Exception as e:
|
|
315
|
+
print(f"⚠️ Warning: Unable to save runtime profile data to history store ({e})")
|
|
316
|
+
|
|
317
|
+
return regression_alerts
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# KRYSTA WING // MODEL BENCHMARK REPORT
|
|
2
|
+
|
|
3
|
+
## METADATA PROFILE
|
|
4
|
+
* **Identifier:** {{ model_name }}
|
|
5
|
+
* **Evaluation Window:** Week {{ week }}
|
|
6
|
+
* **Timestamp:** {{ timestamp }}
|
|
7
|
+
* **Active Modality:** {{ modality | upper }}
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## 1.0 COMPUTE & PERFORMANCE METRICS
|
|
12
|
+
|
|
13
|
+
| Evaluation Parameter | Operational Value | Status / Threshold |
|
|
14
|
+
| :--- | :--- | :--- |
|
|
15
|
+
| **Inference Latency** | {{ latency }} ms / sample | {% if latency < 50 %}NOMINAL{% else %}ATTENTION REQUIRED{% endif %} |
|
|
16
|
+
| **Peak VRAM Allocation** | {{ vram }} MB | COMPLIANT |
|
|
17
|
+
| **Target Loss Metrics** | {{ loss }} | RECORDED |
|
|
18
|
+
|
|
19
|
+
{{ regression_alerts }}
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## 2.0 MULTI-MODAL INTERPRETABILITY ARTIFACTS
|
|
24
|
+
{{ modality_specific_content }}
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## 3.0 AUTOMATED EXECUTION SUMMARY
|
|
29
|
+
> **System Verdict:** Runtime evaluation pipeline executed completely. Performance parameters and logged interpretation states have been successfully archived in the target workspace directory.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "krysta"
|
|
7
|
+
version = "1.0.5"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Anshu Aditya" }
|
|
10
|
+
]
|
|
11
|
+
description = "A unified multimodal model evaluation tracking and engineering report engine."
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.8"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"jinja2>=3.0.0",
|
|
21
|
+
"matplotlib>=3.5.0",
|
|
22
|
+
"numpy>=1.20.0",
|
|
23
|
+
"pyyaml>=6.0",
|
|
24
|
+
"httpx>=0.27.0",
|
|
25
|
+
"httpx-sse>=0.4.0"
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.urls]
|
|
29
|
+
"Homepage" = "https://github.com/your-username/kwing_library"
|
|
30
|
+
|
|
31
|
+
[tool.setuptools]
|
|
32
|
+
packages = ["krysta_reporter", "krysta"]
|
|
33
|
+
|
|
34
|
+
[tool.setuptools.package-data]
|
|
35
|
+
"krysta_reporter" = ["templates/*.md"]
|
krysta-1.0.5/setup.cfg
ADDED