pugmark 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pugmark/__init__.py +1676 -0
- pugmark/http.py +240 -0
- pugmark/protoipc.py +371 -0
- pugmark/py.typed +0 -0
- pugmark-0.1.0.dist-info/METADATA +155 -0
- pugmark-0.1.0.dist-info/RECORD +7 -0
- pugmark-0.1.0.dist-info/WHEEL +4 -0
pugmark/__init__.py
ADDED
|
@@ -0,0 +1,1676 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pugmark Python SDK for subprocess communication.
|
|
3
|
+
|
|
4
|
+
This module provides inter-process communication functionality for the Pugmark session log
|
|
5
|
+
storage system.
|
|
6
|
+
It enables communication between the main Pugmark process and Python subprocesses,
|
|
7
|
+
allowing for data transformation and processing workflows.
|
|
8
|
+
|
|
9
|
+
The SDK implements a JSON-based protocol for passing object data between processes via stdin/stdout.
|
|
10
|
+
|
|
11
|
+
Architecture:
|
|
12
|
+
- The main process serves objects via HTTP and sends input records via stdin
|
|
13
|
+
- Python subprocesses receive input records, fetch object data via HTTP, process it,
|
|
14
|
+
and send output records via stdout
|
|
15
|
+
- The main process collects output records and converts them back to Pugmark objects
|
|
16
|
+
|
|
17
|
+
Example usage:
|
|
18
|
+
|
|
19
|
+
import pugmark
|
|
20
|
+
|
|
21
|
+
for obj in pugmark.read():
|
|
22
|
+
# Access data through the unified Object interface
|
|
23
|
+
data = obj.body()
|
|
24
|
+
if obj.content_type().startswith("text/"):
|
|
25
|
+
text_data = data.decode("utf-8")
|
|
26
|
+
processed = text_data.upper()
|
|
27
|
+
elif obj.content_type() == "application/json":
|
|
28
|
+
import json
|
|
29
|
+
json_data = json.loads(data.decode("utf-8"))
|
|
30
|
+
processed = {"processed": json_data}
|
|
31
|
+
else:
|
|
32
|
+
processed = {"binary_length": len(data)}
|
|
33
|
+
|
|
34
|
+
# Send output (automatically encodes based on type)
|
|
35
|
+
pugmark.write(processed)
|
|
36
|
+
|
|
37
|
+
# Optionally pause execution for interactive workflows
|
|
38
|
+
if needs_approval:
|
|
39
|
+
pugmark.pause("Processing complete, awaiting user approval")
|
|
40
|
+
|
|
41
|
+
Key Functions:
|
|
42
|
+
- pugmark.read(): Read input objects from the parent process (returns RemoteObject instances)
|
|
43
|
+
- pugmark.write(): Send output objects to the parent process (returns LocalObject instances)
|
|
44
|
+
- pugmark.pause(): Pause execution with an optional reason
|
|
45
|
+
- pugmark.load(): Load objects by name from session state
|
|
46
|
+
- pugmark.store(): Store objects by name in session state
|
|
47
|
+
- pugmark.open(): Context manager for reading/writing named objects
|
|
48
|
+
|
|
49
|
+
Object Types:
|
|
50
|
+
- Object: Abstract base class for all objects
|
|
51
|
+
- RemoteObject: Objects backed by Input data (from HTTP URLs)
|
|
52
|
+
- LocalObject: Objects backed by Output data (local data)
|
|
53
|
+
|
|
54
|
+
The API now matches the Go ipc package structure with consistent Input/Output types.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
import base64
|
|
58
|
+
import builtins
|
|
59
|
+
import gzip
|
|
60
|
+
import json
|
|
61
|
+
import os
|
|
62
|
+
import sys
|
|
63
|
+
import tempfile
|
|
64
|
+
import time
|
|
65
|
+
import urllib.error
|
|
66
|
+
import urllib.parse
|
|
67
|
+
import urllib.request
|
|
68
|
+
from contextlib import contextmanager
|
|
69
|
+
from contextvars import Token
|
|
70
|
+
from dataclasses import dataclass, field
|
|
71
|
+
from typing import IO, Any, BinaryIO, Dict, Iterator, List, Optional, Protocol, TextIO, Union
|
|
72
|
+
|
|
73
|
+
import zstandard as zstd
|
|
74
|
+
from zstandard import ZstdDecompressionReader
|
|
75
|
+
|
|
76
|
+
__version__ = "0.1.0"
|
|
77
|
+
|
|
78
|
+
# Optional OpenTelemetry imports for trace context propagation
|
|
79
|
+
try:
|
|
80
|
+
from opentelemetry import context as otel_context
|
|
81
|
+
from opentelemetry import propagate as otel_propagate
|
|
82
|
+
from opentelemetry.instrumentation.urllib import URLLibInstrumentor
|
|
83
|
+
|
|
84
|
+
_HAS_OTEL = True
|
|
85
|
+
except ImportError:
|
|
86
|
+
_HAS_OTEL = False
|
|
87
|
+
|
|
88
|
+
# Optional OpenTelemetry trace imports for span creation
|
|
89
|
+
try:
|
|
90
|
+
from opentelemetry import trace as otel_trace
|
|
91
|
+
|
|
92
|
+
_HAS_OTEL_TRACE = True
|
|
93
|
+
except ImportError:
|
|
94
|
+
_HAS_OTEL_TRACE = False
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _get_tracer() -> Optional["otel_trace.Tracer"]:
|
|
98
|
+
"""Get the OpenTelemetry tracer for pugmark."""
|
|
99
|
+
if not _HAS_OTEL_TRACE:
|
|
100
|
+
return None
|
|
101
|
+
return otel_trace.get_tracer("github.com/firetiger-oss/pugmark")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# Context token for cleanup (used when trace context is attached)
|
|
105
|
+
_otel_context_token: Optional[Token[object]] = None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _init_trace_context_from_env() -> None:
|
|
109
|
+
"""Extract and attach trace context from environment variables at startup.
|
|
110
|
+
|
|
111
|
+
This function reads W3C trace context (TRACEPARENT, TRACESTATE) from
|
|
112
|
+
environment variables and attaches it to the current OpenTelemetry context.
|
|
113
|
+
This enables trace continuity from Go parent processes to Python subprocesses.
|
|
114
|
+
"""
|
|
115
|
+
global _otel_context_token
|
|
116
|
+
if not _HAS_OTEL:
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
traceparent = os.environ.get("TRACEPARENT", "")
|
|
120
|
+
tracestate = os.environ.get("TRACESTATE", "")
|
|
121
|
+
|
|
122
|
+
if not traceparent:
|
|
123
|
+
return
|
|
124
|
+
|
|
125
|
+
carrier = {
|
|
126
|
+
"traceparent": traceparent,
|
|
127
|
+
"tracestate": tracestate,
|
|
128
|
+
}
|
|
129
|
+
ctx = otel_propagate.extract(carrier)
|
|
130
|
+
_otel_context_token = otel_context.attach(ctx)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _init_urllib_instrumentation() -> None:
|
|
134
|
+
"""Instrument urllib for automatic trace propagation in HTTP requests.
|
|
135
|
+
|
|
136
|
+
When OpenTelemetry is available, this instruments the urllib library so that
|
|
137
|
+
outgoing HTTP requests (such as fetching objects from the Go object server)
|
|
138
|
+
automatically include trace context headers.
|
|
139
|
+
"""
|
|
140
|
+
if not _HAS_OTEL:
|
|
141
|
+
return
|
|
142
|
+
try:
|
|
143
|
+
URLLibInstrumentor().instrument()
|
|
144
|
+
except Exception:
|
|
145
|
+
pass # Already instrumented or unavailable
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# Initialize trace context when module is imported (for subprocess mode)
|
|
149
|
+
_init_trace_context_from_env()
|
|
150
|
+
_init_urllib_instrumentation()
|
|
151
|
+
|
|
152
|
+
_print = builtins.print
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def print(*args: Any, **kwargs: Any) -> Any:
|
|
156
|
+
kwargs.setdefault("file", sys.stderr)
|
|
157
|
+
return _print(*args, **kwargs)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _parse_media_type(content_type: str) -> str:
|
|
161
|
+
"""Parse media type from content type string, ignoring parameters."""
|
|
162
|
+
if not content_type:
|
|
163
|
+
return ""
|
|
164
|
+
return content_type.split(";")[0].strip()
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
# Override built-in print to redirect to stderr because stdout is used to
|
|
168
|
+
# output pugmark records, this helps avoid confusion when the program uses
|
|
169
|
+
# print statements for debugging.
|
|
170
|
+
builtins.print = print
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@dataclass
|
|
174
|
+
class Input:
|
|
175
|
+
"""Represents an input record sent from the main Pugmark process."""
|
|
176
|
+
|
|
177
|
+
url: str = ""
|
|
178
|
+
size: int = 0
|
|
179
|
+
event: str = ""
|
|
180
|
+
content_type: str = ""
|
|
181
|
+
content_encoding: str = ""
|
|
182
|
+
schema_url: str = ""
|
|
183
|
+
name: str = ""
|
|
184
|
+
metadata: Optional[Dict[str, str]] = None
|
|
185
|
+
data: Optional[bytes] = None
|
|
186
|
+
|
|
187
|
+
def __post_init__(self) -> None:
|
|
188
|
+
if self.metadata is None:
|
|
189
|
+
self.metadata = {}
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
@dataclass
|
|
193
|
+
class Output:
|
|
194
|
+
"""Represents an output record sent to the main Pugmark process.
|
|
195
|
+
|
|
196
|
+
Either ``data`` or ``uri`` must be set, but not both. Use ``uri`` to
|
|
197
|
+
reference an object already stored at a remote location (e.g.
|
|
198
|
+
``s3://bucket/path/to/object``); pugmark will fetch it, compute its SHA-256,
|
|
199
|
+
and store it in the session without the data travelling through this process.
|
|
200
|
+
"""
|
|
201
|
+
|
|
202
|
+
data: bytes = b""
|
|
203
|
+
uri: str = ""
|
|
204
|
+
name: str = ""
|
|
205
|
+
content_type: str = ""
|
|
206
|
+
content_encoding: str = ""
|
|
207
|
+
schema_url: str = ""
|
|
208
|
+
metadata: Optional[Dict[str, str]] = field(default_factory=dict)
|
|
209
|
+
fork: bool = False # Marks this as a fork message that creates a child session
|
|
210
|
+
session: str = "" # Target session ID for cross-session writes
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class Object(Protocol):
|
|
214
|
+
"""
|
|
215
|
+
Protocol defining the interface for Pugmark objects.
|
|
216
|
+
|
|
217
|
+
Objects can be either local (backed by Output data) or remote (backed by Input data).
|
|
218
|
+
This unified interface matches the Go ipc package structure.
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
def body(self) -> bytes:
|
|
222
|
+
"""Get the object's data as bytes."""
|
|
223
|
+
...
|
|
224
|
+
|
|
225
|
+
def content_type(self) -> str:
|
|
226
|
+
"""Get the content type of the object."""
|
|
227
|
+
...
|
|
228
|
+
|
|
229
|
+
def schema_url(self) -> str:
|
|
230
|
+
"""Get the schema URL of the object."""
|
|
231
|
+
...
|
|
232
|
+
|
|
233
|
+
def name(self) -> str:
|
|
234
|
+
"""Get the name of the object."""
|
|
235
|
+
...
|
|
236
|
+
|
|
237
|
+
def event(self) -> str:
|
|
238
|
+
"""Get the event type associated with this object."""
|
|
239
|
+
...
|
|
240
|
+
|
|
241
|
+
def metadata(self) -> Dict[str, str]:
|
|
242
|
+
"""Get the metadata dictionary of the object."""
|
|
243
|
+
...
|
|
244
|
+
|
|
245
|
+
def fork(self) -> bool:
|
|
246
|
+
"""Return True if this object is a fork message that creates a child session."""
|
|
247
|
+
...
|
|
248
|
+
|
|
249
|
+
def decode_data(self) -> bytes:
|
|
250
|
+
"""Get the object's data as raw bytes (alias for body())."""
|
|
251
|
+
...
|
|
252
|
+
|
|
253
|
+
def decode_text(self) -> str:
|
|
254
|
+
"""Decode the object's data as UTF-8 text."""
|
|
255
|
+
...
|
|
256
|
+
|
|
257
|
+
def decode_json(self) -> Any:
|
|
258
|
+
"""Decode the object's data as JSON."""
|
|
259
|
+
...
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
# Event classes for the events API
|
|
263
|
+
class Event:
|
|
264
|
+
"""
|
|
265
|
+
Base class for pugmark session events.
|
|
266
|
+
|
|
267
|
+
All events provide a string representation through the __str__ method,
|
|
268
|
+
matching the fmt.Stringer interface from the Go implementation.
|
|
269
|
+
"""
|
|
270
|
+
|
|
271
|
+
def __str__(self) -> str:
|
|
272
|
+
"""Return the string representation of the event."""
|
|
273
|
+
raise NotImplementedError
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
class StartEvent(Event):
|
|
277
|
+
"""
|
|
278
|
+
Indicates that a new root session has been created.
|
|
279
|
+
|
|
280
|
+
This event is emitted when a session is started without a parent session,
|
|
281
|
+
marking the beginning of a new conversation or workflow.
|
|
282
|
+
"""
|
|
283
|
+
|
|
284
|
+
def __str__(self) -> str:
|
|
285
|
+
"""Return the string representation of a StartEvent."""
|
|
286
|
+
return "start"
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class ForkEvent(Event):
|
|
290
|
+
"""
|
|
291
|
+
Indicates that a new child session has been created from a parent session.
|
|
292
|
+
|
|
293
|
+
This event is emitted when a session is created with a parent reference,
|
|
294
|
+
allowing for branching conversations and parallel execution paths.
|
|
295
|
+
"""
|
|
296
|
+
|
|
297
|
+
def __str__(self) -> str:
|
|
298
|
+
"""Return the string representation of a ForkEvent."""
|
|
299
|
+
return "fork"
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
class WakeEvent(Event):
|
|
303
|
+
"""
|
|
304
|
+
Indicates that a session has been resumed or awakened.
|
|
305
|
+
|
|
306
|
+
This event is typically emitted when a session becomes active after
|
|
307
|
+
being paused or sleeping.
|
|
308
|
+
"""
|
|
309
|
+
|
|
310
|
+
def __str__(self) -> str:
|
|
311
|
+
"""Return the string representation of a WakeEvent."""
|
|
312
|
+
return "wake"
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
class PushEvent(Event):
|
|
316
|
+
"""
|
|
317
|
+
Indicates that an object has been added to the session.
|
|
318
|
+
|
|
319
|
+
This event contains the Object that was pushed, allowing access to
|
|
320
|
+
its content, metadata, and other properties.
|
|
321
|
+
"""
|
|
322
|
+
|
|
323
|
+
def __init__(self, obj: Object):
|
|
324
|
+
"""
|
|
325
|
+
Initialize a PushEvent with an Object.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
obj: The Object that was pushed to the session
|
|
329
|
+
"""
|
|
330
|
+
self.object = obj
|
|
331
|
+
|
|
332
|
+
def __str__(self) -> str:
|
|
333
|
+
"""Return the string representation of a PushEvent."""
|
|
334
|
+
return "push"
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
class RemoteObject:
|
|
338
|
+
"""
|
|
339
|
+
Represents a remote object accessible via HTTP.
|
|
340
|
+
|
|
341
|
+
RemoteObjects are created from Input records and fetch data from remote URLs.
|
|
342
|
+
This matches the concept of remote objects in the Go ipc package.
|
|
343
|
+
"""
|
|
344
|
+
|
|
345
|
+
def __init__(self, input_record: Input):
|
|
346
|
+
"""
|
|
347
|
+
Initialize a RemoteObject from an Input record.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
input_record: Input record containing URL and metadata
|
|
351
|
+
name: Optional name for the object
|
|
352
|
+
"""
|
|
353
|
+
self._input = input_record
|
|
354
|
+
self._body: Optional[bytes] = None
|
|
355
|
+
|
|
356
|
+
def body(self) -> bytes:
|
|
357
|
+
"""
|
|
358
|
+
Get the object's data as bytes.
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
Raw bytes of the object data
|
|
362
|
+
"""
|
|
363
|
+
if self._body is not None:
|
|
364
|
+
return self._body
|
|
365
|
+
|
|
366
|
+
# Return empty bytes if URL is not provided (for events without data)
|
|
367
|
+
if not self._input.url:
|
|
368
|
+
self._body = b""
|
|
369
|
+
return self._body
|
|
370
|
+
|
|
371
|
+
with self._read() as response:
|
|
372
|
+
self._body = response.read()
|
|
373
|
+
return self._body
|
|
374
|
+
|
|
375
|
+
def content_type(self) -> str:
|
|
376
|
+
"""Get the content type of the object."""
|
|
377
|
+
return self._input.content_type
|
|
378
|
+
|
|
379
|
+
def schema_url(self) -> str:
|
|
380
|
+
"""Get the schema URL of the object."""
|
|
381
|
+
return self._input.schema_url
|
|
382
|
+
|
|
383
|
+
def name(self) -> str:
|
|
384
|
+
"""Get the name of the object."""
|
|
385
|
+
return self._input.name
|
|
386
|
+
|
|
387
|
+
def event(self) -> str:
|
|
388
|
+
"""Get the event type associated with this object."""
|
|
389
|
+
return self._input.event
|
|
390
|
+
|
|
391
|
+
def metadata(self) -> Dict[str, str]:
|
|
392
|
+
"""Get the metadata dictionary of the object."""
|
|
393
|
+
return self._input.metadata or {}
|
|
394
|
+
|
|
395
|
+
def fork(self) -> bool:
|
|
396
|
+
"""Return False - remote objects from input are never fork markers."""
|
|
397
|
+
return False
|
|
398
|
+
|
|
399
|
+
def decode_data(self) -> bytes:
|
|
400
|
+
"""Get the object's data as raw bytes (alias for body())."""
|
|
401
|
+
return self.body()
|
|
402
|
+
|
|
403
|
+
def decode_text(self) -> str:
|
|
404
|
+
"""Decode the object's data as UTF-8 text."""
|
|
405
|
+
return self.body().decode("utf-8")
|
|
406
|
+
|
|
407
|
+
def decode_json(self) -> Any:
|
|
408
|
+
"""Decode the object's data as JSON."""
|
|
409
|
+
return json.loads(self.body())
|
|
410
|
+
|
|
411
|
+
def __str__(self) -> str:
|
|
412
|
+
"""Return a string representation of the object."""
|
|
413
|
+
return self.name() or self._input.url or "<remote object>"
|
|
414
|
+
|
|
415
|
+
def __repr__(self) -> str:
|
|
416
|
+
return f"RemoteObject(body={self.body()}, content_type={self.content_type()})"
|
|
417
|
+
|
|
418
|
+
def _read(self) -> Union[gzip.GzipFile, ZstdDecompressionReader, BinaryIO]:
|
|
419
|
+
"""
|
|
420
|
+
Return a file-like object for streaming the object's data.
|
|
421
|
+
|
|
422
|
+
The returned reader handles content encoding decompression automatically based
|
|
423
|
+
on the Input record's content_encoding field. Retries on ConnectionRefusedError
|
|
424
|
+
to handle the race condition where the Go-side HTTP server is shutting down
|
|
425
|
+
while Python still has pending reads.
|
|
426
|
+
|
|
427
|
+
Returns:
|
|
428
|
+
File-like object for reading object data
|
|
429
|
+
"""
|
|
430
|
+
last_error: Optional[Exception] = None
|
|
431
|
+
for attempt in range(3):
|
|
432
|
+
try:
|
|
433
|
+
response = urllib.request.urlopen(self._input.url)
|
|
434
|
+
except urllib.error.URLError as exc:
|
|
435
|
+
if isinstance(exc.reason, ConnectionRefusedError) and attempt < 2:
|
|
436
|
+
time.sleep(0.1 * (2**attempt))
|
|
437
|
+
last_error = exc
|
|
438
|
+
continue
|
|
439
|
+
raise
|
|
440
|
+
content_encoding = self._input.content_encoding
|
|
441
|
+
if content_encoding == "gzip":
|
|
442
|
+
return gzip.GzipFile(fileobj=response)
|
|
443
|
+
elif content_encoding == "zstd":
|
|
444
|
+
decompressor = zstd.ZstdDecompressor()
|
|
445
|
+
return decompressor.stream_reader(response)
|
|
446
|
+
else:
|
|
447
|
+
return response # type: ignore[no-any-return]
|
|
448
|
+
raise last_error # type: ignore[misc]
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
class LocalObject:
|
|
452
|
+
"""
|
|
453
|
+
Represents a local object backed by Output data.
|
|
454
|
+
|
|
455
|
+
LocalObjects are created from Output records and contain data locally.
|
|
456
|
+
This matches the concept of local objects in the Go ipc package.
|
|
457
|
+
"""
|
|
458
|
+
|
|
459
|
+
def __init__(self, output_record: Output, event: str = "push"):
|
|
460
|
+
"""
|
|
461
|
+
Initialize a LocalObject from an Output record.
|
|
462
|
+
|
|
463
|
+
Args:
|
|
464
|
+
output_record: Output record containing data and metadata
|
|
465
|
+
event: Event type for this object
|
|
466
|
+
"""
|
|
467
|
+
self._output = output_record
|
|
468
|
+
self._event = event
|
|
469
|
+
self._body: Optional[bytes] = None
|
|
470
|
+
|
|
471
|
+
def body(self) -> bytes:
|
|
472
|
+
"""Get the object's data as bytes, automatically decompressed if needed."""
|
|
473
|
+
if self._body is not None:
|
|
474
|
+
return self._body
|
|
475
|
+
|
|
476
|
+
data = self._output.data
|
|
477
|
+
content_encoding = self._output.content_encoding
|
|
478
|
+
|
|
479
|
+
if content_encoding == "gzip":
|
|
480
|
+
import gzip
|
|
481
|
+
|
|
482
|
+
self._body = gzip.decompress(data)
|
|
483
|
+
elif content_encoding == "zstd":
|
|
484
|
+
decompressor = zstd.ZstdDecompressor()
|
|
485
|
+
self._body = decompressor.decompress(data)
|
|
486
|
+
else:
|
|
487
|
+
self._body = data
|
|
488
|
+
|
|
489
|
+
return self._body
|
|
490
|
+
|
|
491
|
+
def content_type(self) -> str:
|
|
492
|
+
"""Get the content type of the object."""
|
|
493
|
+
return self._output.content_type
|
|
494
|
+
|
|
495
|
+
def schema_url(self) -> str:
|
|
496
|
+
"""Get the schema URL of the object."""
|
|
497
|
+
return self._output.schema_url
|
|
498
|
+
|
|
499
|
+
def name(self) -> str:
|
|
500
|
+
"""Get the name of the object."""
|
|
501
|
+
return self._output.name
|
|
502
|
+
|
|
503
|
+
def event(self) -> str:
|
|
504
|
+
"""Get the event type associated with this object."""
|
|
505
|
+
return self._event
|
|
506
|
+
|
|
507
|
+
def metadata(self) -> Dict[str, str]:
|
|
508
|
+
"""Get the metadata dictionary of the object."""
|
|
509
|
+
return self._output.metadata or {}
|
|
510
|
+
|
|
511
|
+
def fork(self) -> bool:
|
|
512
|
+
"""Return True if this object is a fork message that creates a child session."""
|
|
513
|
+
return self._output.fork
|
|
514
|
+
|
|
515
|
+
def decode_data(self) -> bytes:
|
|
516
|
+
"""Get the object's data as raw bytes (alias for body())."""
|
|
517
|
+
return self.body()
|
|
518
|
+
|
|
519
|
+
def decode_text(self) -> str:
|
|
520
|
+
"""Decode the object's data as UTF-8 text."""
|
|
521
|
+
return self.body().decode("utf-8")
|
|
522
|
+
|
|
523
|
+
def decode_json(self) -> Any:
|
|
524
|
+
"""Decode the object's data as JSON."""
|
|
525
|
+
return json.loads(self.body())
|
|
526
|
+
|
|
527
|
+
def __str__(self) -> str:
|
|
528
|
+
"""Return a string representation of the object."""
|
|
529
|
+
return self.name() or "<local object>"
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def _is_object(value: Any) -> bool:
|
|
533
|
+
"""Duck-typed check for whether a value already implements the Object protocol."""
|
|
534
|
+
return all(
|
|
535
|
+
callable(getattr(value, attr, None))
|
|
536
|
+
for attr in ("body", "name", "content_type", "metadata")
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def _local_object(
|
|
541
|
+
value: Any,
|
|
542
|
+
*,
|
|
543
|
+
name: str = "",
|
|
544
|
+
content_type: Optional[str] = None,
|
|
545
|
+
content_encoding: str = "",
|
|
546
|
+
schema_url: str = "",
|
|
547
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
548
|
+
) -> "LocalObject":
|
|
549
|
+
"""
|
|
550
|
+
Build a LocalObject from a raw Python value. Content type is auto-detected
|
|
551
|
+
when not provided:
|
|
552
|
+
|
|
553
|
+
- str → "text/plain"
|
|
554
|
+
- bytes → "application/octet-stream"
|
|
555
|
+
- dict, list, int, float, bool, None → "application/json"
|
|
556
|
+
|
|
557
|
+
Raises TypeError for unsupported types.
|
|
558
|
+
"""
|
|
559
|
+
if isinstance(value, str):
|
|
560
|
+
data = value.encode("utf-8")
|
|
561
|
+
ct = content_type or "text/plain"
|
|
562
|
+
elif isinstance(value, (bytes, bytearray)):
|
|
563
|
+
data = bytes(value)
|
|
564
|
+
ct = content_type or "application/octet-stream"
|
|
565
|
+
elif isinstance(value, (dict, list, int, float, bool)) or value is None:
|
|
566
|
+
data = json.dumps(value).encode("utf-8")
|
|
567
|
+
ct = content_type or "application/json"
|
|
568
|
+
else:
|
|
569
|
+
raise TypeError(
|
|
570
|
+
f"cannot write value of type {type(value).__name__}; "
|
|
571
|
+
"expected str, bytes, dict, list, or pugmark.Object"
|
|
572
|
+
)
|
|
573
|
+
return LocalObject(
|
|
574
|
+
Output(
|
|
575
|
+
data=data,
|
|
576
|
+
name=name,
|
|
577
|
+
content_type=ct,
|
|
578
|
+
content_encoding=content_encoding,
|
|
579
|
+
schema_url=schema_url,
|
|
580
|
+
metadata=metadata or {},
|
|
581
|
+
)
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
class Reader:
|
|
586
|
+
"""
|
|
587
|
+
Provides an iterator interface for reading Input records from a stream
|
|
588
|
+
and converting them to Object instances for processing.
|
|
589
|
+
"""
|
|
590
|
+
|
|
591
|
+
def __init__(self, stream: TextIO):
|
|
592
|
+
"""
|
|
593
|
+
Initialize a Reader instance.
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
stream: Source stream for Input records
|
|
597
|
+
"""
|
|
598
|
+
self.stream = stream
|
|
599
|
+
|
|
600
|
+
def read(self) -> Iterator[Object]:
|
|
601
|
+
"""
|
|
602
|
+
Return an iterator that yields Object instances from Input records.
|
|
603
|
+
|
|
604
|
+
Each Input record is read from the stream, decoded as JSON, and converted to a RemoteObject.
|
|
605
|
+
The iterator stops on the first decoding error or when the stream is exhausted.
|
|
606
|
+
|
|
607
|
+
Yields:
|
|
608
|
+
Object instances created from Input record data
|
|
609
|
+
|
|
610
|
+
Raises:
|
|
611
|
+
json.JSONDecodeError: If input JSON is malformed
|
|
612
|
+
"""
|
|
613
|
+
for line in self.stream:
|
|
614
|
+
line = line.strip()
|
|
615
|
+
if not line:
|
|
616
|
+
continue
|
|
617
|
+
|
|
618
|
+
try:
|
|
619
|
+
data = json.loads(line)
|
|
620
|
+
record = Input(
|
|
621
|
+
url=data.get("url", ""),
|
|
622
|
+
size=data.get("size", 0),
|
|
623
|
+
event=data.get("event", ""),
|
|
624
|
+
content_type=data.get("content-type", ""),
|
|
625
|
+
content_encoding=data.get("content-encoding", ""),
|
|
626
|
+
schema_url=data.get("schema-url", ""),
|
|
627
|
+
name=data.get("name", ""),
|
|
628
|
+
metadata=data.get("metadata", {}),
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
if record.data is None:
|
|
632
|
+
obj = RemoteObject(input_record=record)
|
|
633
|
+
else:
|
|
634
|
+
obj = LocalObject(
|
|
635
|
+
output_record=Output(
|
|
636
|
+
data=record.data,
|
|
637
|
+
name=record.name,
|
|
638
|
+
content_type=record.content_type,
|
|
639
|
+
content_encoding=record.content_encoding,
|
|
640
|
+
schema_url=record.schema_url,
|
|
641
|
+
metadata=record.metadata or {},
|
|
642
|
+
),
|
|
643
|
+
event=record.event,
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
yield obj
|
|
647
|
+
except json.JSONDecodeError as e:
|
|
648
|
+
raise RuntimeError(f"Failed to decode input record: {e}") from e
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
class Writer:
|
|
652
|
+
"""
|
|
653
|
+
Provides methods for sending Output records to a stream.
|
|
654
|
+
|
|
655
|
+
Handles JSON encoding, buffering, and automatic compression.
|
|
656
|
+
"""
|
|
657
|
+
|
|
658
|
+
def __init__(self, stream: TextIO):
|
|
659
|
+
"""
|
|
660
|
+
Initialize a Writer instance.
|
|
661
|
+
|
|
662
|
+
Args:
|
|
663
|
+
stream: Destination stream for Output records
|
|
664
|
+
"""
|
|
665
|
+
self.stream = stream
|
|
666
|
+
self._initialized = False
|
|
667
|
+
|
|
668
|
+
def write(self, obj: Object) -> None:
|
|
669
|
+
"""
|
|
670
|
+
Write an Object to the stream.
|
|
671
|
+
|
|
672
|
+
Args:
|
|
673
|
+
obj: The Object to write
|
|
674
|
+
"""
|
|
675
|
+
# Get data and metadata from the object
|
|
676
|
+
data = obj.body()
|
|
677
|
+
name = obj.name()
|
|
678
|
+
content_type = obj.content_type()
|
|
679
|
+
schema_url = obj.schema_url()
|
|
680
|
+
metadata = obj.metadata()
|
|
681
|
+
is_fork = obj.fork()
|
|
682
|
+
|
|
683
|
+
# Create Output record from Object data
|
|
684
|
+
output = Output(
|
|
685
|
+
data=data,
|
|
686
|
+
name=name,
|
|
687
|
+
content_type=content_type,
|
|
688
|
+
content_encoding="", # Content encoding handled internally by Object
|
|
689
|
+
schema_url=schema_url,
|
|
690
|
+
metadata=metadata,
|
|
691
|
+
fork=is_fork,
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
# Convert to JSON format expected by main process
|
|
695
|
+
json_data: Dict[str, Any] = {
|
|
696
|
+
"data": base64.b64encode(output.data).decode("ascii"), # Base64 encode binary data
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
if output.name:
|
|
700
|
+
json_data["name"] = output.name
|
|
701
|
+
if output.content_type:
|
|
702
|
+
json_data["content-type"] = output.content_type
|
|
703
|
+
if output.content_encoding:
|
|
704
|
+
json_data["content-encoding"] = output.content_encoding
|
|
705
|
+
if output.schema_url:
|
|
706
|
+
json_data["schema-url"] = output.schema_url
|
|
707
|
+
if output.metadata:
|
|
708
|
+
json_data["metadata"] = output.metadata
|
|
709
|
+
if output.fork:
|
|
710
|
+
json_data["fork"] = True
|
|
711
|
+
if output.session:
|
|
712
|
+
json_data["session"] = output.session
|
|
713
|
+
|
|
714
|
+
# Write JSON line to stream
|
|
715
|
+
json.dump(json_data, self.stream, separators=(",", ":"))
|
|
716
|
+
self.stream.write("\n")
|
|
717
|
+
self.stream.flush()
|
|
718
|
+
|
|
719
|
+
def write_remote(
|
|
720
|
+
self,
|
|
721
|
+
uri: str,
|
|
722
|
+
*,
|
|
723
|
+
name: str = "",
|
|
724
|
+
content_type: str = "",
|
|
725
|
+
content_encoding: str = "",
|
|
726
|
+
schema_url: str = "",
|
|
727
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
728
|
+
session: str = "",
|
|
729
|
+
) -> None:
|
|
730
|
+
"""
|
|
731
|
+
Write a remote object reference to the stream.
|
|
732
|
+
|
|
733
|
+
Instead of sending the object's bytes inline, pugmark will fetch the
|
|
734
|
+
object at ``uri`` (e.g. ``s3://bucket/path/to/object``), compute its
|
|
735
|
+
SHA-256, and store it in the session.
|
|
736
|
+
|
|
737
|
+
Args:
|
|
738
|
+
uri: Remote object URI (e.g. ``s3://bucket/path/to/object``)
|
|
739
|
+
name: Optional name for the object
|
|
740
|
+
content_type: Optional MIME type override (defaults to remote value)
|
|
741
|
+
content_encoding: Optional encoding override (defaults to remote value)
|
|
742
|
+
schema_url: Optional schema URL
|
|
743
|
+
metadata: Optional metadata key-value pairs
|
|
744
|
+
session: Optional target session ID for cross-session writes
|
|
745
|
+
"""
|
|
746
|
+
json_data: Dict[str, Any] = {"uri": uri}
|
|
747
|
+
if name:
|
|
748
|
+
json_data["name"] = name
|
|
749
|
+
if content_type:
|
|
750
|
+
json_data["content-type"] = content_type
|
|
751
|
+
if content_encoding:
|
|
752
|
+
json_data["content-encoding"] = content_encoding
|
|
753
|
+
if schema_url:
|
|
754
|
+
json_data["schema-url"] = schema_url
|
|
755
|
+
if metadata:
|
|
756
|
+
json_data["metadata"] = metadata
|
|
757
|
+
if session:
|
|
758
|
+
json_data["session"] = session
|
|
759
|
+
|
|
760
|
+
json.dump(json_data, self.stream, separators=(",", ":"))
|
|
761
|
+
self.stream.write("\n")
|
|
762
|
+
self.stream.flush()
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
@dataclass
|
|
766
|
+
class State:
|
|
767
|
+
"""
|
|
768
|
+
Represents the current state of all objects read from stdin.
|
|
769
|
+
This class maintains a dictionary of named objects and a list of all objects
|
|
770
|
+
to allow easy access and iteration over the objects processed during the session.
|
|
771
|
+
"""
|
|
772
|
+
|
|
773
|
+
_dict: Dict[str, Object]
|
|
774
|
+
_list: List[Object]
|
|
775
|
+
_memory_namespaces: Dict[str, str] # namespace -> URL mapping
|
|
776
|
+
|
|
777
|
+
def __init__(self) -> None:
|
|
778
|
+
self._dict = {}
|
|
779
|
+
self._list = []
|
|
780
|
+
self._memory_namespaces = {}
|
|
781
|
+
|
|
782
|
+
def __iter__(self) -> Iterator[Object]:
|
|
783
|
+
"""
|
|
784
|
+
Iterate over all objects in the state.
|
|
785
|
+
|
|
786
|
+
Yields:
|
|
787
|
+
Object instances stored in the state
|
|
788
|
+
"""
|
|
789
|
+
return iter(self._list)
|
|
790
|
+
|
|
791
|
+
def __len__(self) -> int:
|
|
792
|
+
"""
|
|
793
|
+
Get the number of objects in the state.
|
|
794
|
+
|
|
795
|
+
Returns:
|
|
796
|
+
The count of Object instances stored in the state
|
|
797
|
+
"""
|
|
798
|
+
return len(self._dict)
|
|
799
|
+
|
|
800
|
+
def __getitem__(self, key: str) -> Object:
|
|
801
|
+
"""
|
|
802
|
+
Get an object by name from the state.
|
|
803
|
+
|
|
804
|
+
Args:
|
|
805
|
+
key: The name of the object to retrieve
|
|
806
|
+
|
|
807
|
+
Returns:
|
|
808
|
+
The Object instance associated with the given name
|
|
809
|
+
"""
|
|
810
|
+
return self._dict[key]
|
|
811
|
+
|
|
812
|
+
def __setitem__(self, key: str, value: Object) -> None:
|
|
813
|
+
"""
|
|
814
|
+
Set an object in the state by name.
|
|
815
|
+
|
|
816
|
+
Args:
|
|
817
|
+
key: The name of the object to set
|
|
818
|
+
value: The Object instance to associate with the name
|
|
819
|
+
"""
|
|
820
|
+
self._dict[key] = value
|
|
821
|
+
self._list.append(value)
|
|
822
|
+
|
|
823
|
+
def __contains__(self, key: str) -> bool:
|
|
824
|
+
"""
|
|
825
|
+
Check if an object with the given name exists in the state.
|
|
826
|
+
|
|
827
|
+
Args:
|
|
828
|
+
key: The name of the object to check
|
|
829
|
+
|
|
830
|
+
Returns:
|
|
831
|
+
True if the object exists, False otherwise
|
|
832
|
+
"""
|
|
833
|
+
return key in self._dict
|
|
834
|
+
|
|
835
|
+
def append(self, value: Object) -> None:
|
|
836
|
+
if value.name():
|
|
837
|
+
self._dict[value.name()] = value
|
|
838
|
+
self._list.append(value)
|
|
839
|
+
|
|
840
|
+
def add_memory_namespace(self, namespace: str, url: str) -> None:
|
|
841
|
+
"""
|
|
842
|
+
Add a memory namespace URL mapping.
|
|
843
|
+
|
|
844
|
+
Args:
|
|
845
|
+
namespace: The namespace name
|
|
846
|
+
url: The base URL for this namespace
|
|
847
|
+
"""
|
|
848
|
+
self._memory_namespaces[namespace] = url
|
|
849
|
+
|
|
850
|
+
def get_memory_url(self, name: str) -> Optional[str]:
|
|
851
|
+
"""
|
|
852
|
+
Get the memory URL for an object name if it's in a memory namespace.
|
|
853
|
+
|
|
854
|
+
Args:
|
|
855
|
+
name: The object name to check (e.g., "agents/gpt4/config.txt")
|
|
856
|
+
|
|
857
|
+
Returns:
|
|
858
|
+
The full URL if the name matches a memory namespace, None otherwise
|
|
859
|
+
"""
|
|
860
|
+
if "/" not in name:
|
|
861
|
+
return None
|
|
862
|
+
|
|
863
|
+
# Sort keys and search for the first match in reverse order
|
|
864
|
+
# This finds the longest matching namespace
|
|
865
|
+
keys = sorted(self._memory_namespaces.keys(), reverse=True)
|
|
866
|
+
for key in keys:
|
|
867
|
+
if name.startswith(key):
|
|
868
|
+
object_name = name[len(key) :]
|
|
869
|
+
url = self._memory_namespaces[key] + object_name
|
|
870
|
+
return url
|
|
871
|
+
|
|
872
|
+
return None
|
|
873
|
+
|
|
874
|
+
|
|
875
|
+
class Session:
|
|
876
|
+
"""
|
|
877
|
+
Encapsulates the pugmark communication session with configurable I/O streams.
|
|
878
|
+
|
|
879
|
+
This allows for dependency injection of reader/writer streams for testing
|
|
880
|
+
while maintaining the same API as the global functions.
|
|
881
|
+
"""
|
|
882
|
+
|
|
883
|
+
def __init__(
|
|
884
|
+
self,
|
|
885
|
+
reader_stream: Optional[TextIO] = None,
|
|
886
|
+
writer_stream: Optional[TextIO] = None,
|
|
887
|
+
agent: Optional[str] = None,
|
|
888
|
+
session_id: Optional[str] = None,
|
|
889
|
+
snapshot: Optional[int] = None,
|
|
890
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
891
|
+
):
|
|
892
|
+
"""
|
|
893
|
+
Initialize a Session with optional custom streams.
|
|
894
|
+
|
|
895
|
+
Args:
|
|
896
|
+
reader_stream: Input stream for reading records (defaults to sys.stdin)
|
|
897
|
+
writer_stream: Output stream for writing records (defaults to sys.stdout)
|
|
898
|
+
agent: Agent identifier (defaults to PUGMARK_AGENT_ID env var)
|
|
899
|
+
session_id: Session identifier (defaults to PUGMARK_SESSION_ID env var)
|
|
900
|
+
snapshot: Snapshot version number
|
|
901
|
+
metadata: Session metadata from manifest (defaults to PUGMARK_SESSION_METADATA env var)
|
|
902
|
+
"""
|
|
903
|
+
self.reader = Reader(reader_stream or sys.stdin)
|
|
904
|
+
self.writer = Writer(writer_stream or sys.stdout)
|
|
905
|
+
self._state = State()
|
|
906
|
+
# Session ID, agent, and snapshot can be passed directly or via environment variables
|
|
907
|
+
self._session_id: str = (
|
|
908
|
+
session_id if session_id is not None else os.environ.get("PUGMARK_SESSION_ID", "")
|
|
909
|
+
)
|
|
910
|
+
self._agent: str = agent if agent is not None else os.environ.get("PUGMARK_AGENT_ID", "")
|
|
911
|
+
self._snapshot: Optional[int] = snapshot
|
|
912
|
+
# Metadata can be passed directly or via environment variable (JSON-encoded)
|
|
913
|
+
if metadata is not None:
|
|
914
|
+
self._metadata: Dict[str, str] = metadata
|
|
915
|
+
else:
|
|
916
|
+
metadata_json = os.environ.get("PUGMARK_SESSION_METADATA", "")
|
|
917
|
+
if metadata_json:
|
|
918
|
+
try:
|
|
919
|
+
self._metadata = json.loads(metadata_json)
|
|
920
|
+
except json.JSONDecodeError:
|
|
921
|
+
self._metadata = {}
|
|
922
|
+
else:
|
|
923
|
+
self._metadata = {}
|
|
924
|
+
|
|
925
|
+
def events(self) -> Iterator[Event]:
|
|
926
|
+
"""
|
|
927
|
+
Return an iterator over Event instances parsed from the session's input stream.
|
|
928
|
+
|
|
929
|
+
This method provides a higher-level abstraction over the raw read() method by
|
|
930
|
+
interpreting Objects as typed events based on their event() field.
|
|
931
|
+
|
|
932
|
+
The iterator yields events in the order they are received from the underlying
|
|
933
|
+
input stream. Events are parsed according to the following mapping:
|
|
934
|
+
- "start" → StartEvent: Session creation without a parent
|
|
935
|
+
- "fork" → ForkEvent: Session creation with a parent
|
|
936
|
+
- "wake" → WakeEvent: Session resumption or awakening
|
|
937
|
+
- "push" → PushEvent: Object addition (contains the actual Object)
|
|
938
|
+
- Other event types are silently ignored
|
|
939
|
+
|
|
940
|
+
Example usage:
|
|
941
|
+
for event in session.events():
|
|
942
|
+
match event:
|
|
943
|
+
case StartEvent():
|
|
944
|
+
print("Session started")
|
|
945
|
+
case ForkEvent():
|
|
946
|
+
print("Session forked")
|
|
947
|
+
case WakeEvent():
|
|
948
|
+
print("Session awakened")
|
|
949
|
+
case PushEvent() as push:
|
|
950
|
+
print(f"Object pushed: {push.object.name()}")
|
|
951
|
+
|
|
952
|
+
Returns:
|
|
953
|
+
Iterator yielding Event instances from the input stream
|
|
954
|
+
"""
|
|
955
|
+
for obj in self.read():
|
|
956
|
+
event_type = obj.event()
|
|
957
|
+
if event_type == "start":
|
|
958
|
+
yield StartEvent()
|
|
959
|
+
elif event_type == "fork":
|
|
960
|
+
yield ForkEvent()
|
|
961
|
+
elif event_type == "wake":
|
|
962
|
+
yield WakeEvent()
|
|
963
|
+
elif event_type == "push":
|
|
964
|
+
yield PushEvent(obj)
|
|
965
|
+
|
|
966
|
+
def read(self) -> Iterator[Object]:
|
|
967
|
+
"""
|
|
968
|
+
Read objects from the input stream.
|
|
969
|
+
|
|
970
|
+
Returns:
|
|
971
|
+
Iterator yielding Object instances from the input stream
|
|
972
|
+
"""
|
|
973
|
+
for obj in self.reader.read():
|
|
974
|
+
# Handle memory namespace events
|
|
975
|
+
if obj.event() == "memory":
|
|
976
|
+
# Extract namespace from URL path
|
|
977
|
+
url = obj._input.url # Access the URL directly
|
|
978
|
+
if url.endswith("/"):
|
|
979
|
+
# Extract namespace from URL like "http://127.0.0.1:port/namespace/"
|
|
980
|
+
namespace = url.split("/")[-2] + "/" # Keep the trailing slash
|
|
981
|
+
self._state.add_memory_namespace(namespace, url)
|
|
982
|
+
# Don't yield memory events to the user
|
|
983
|
+
continue
|
|
984
|
+
|
|
985
|
+
self._state.append(obj)
|
|
986
|
+
yield obj
|
|
987
|
+
|
|
988
|
+
def write(
|
|
989
|
+
self,
|
|
990
|
+
value: Any,
|
|
991
|
+
*,
|
|
992
|
+
name: str = "",
|
|
993
|
+
content_type: Optional[str] = None,
|
|
994
|
+
content_encoding: str = "",
|
|
995
|
+
schema_url: str = "",
|
|
996
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
997
|
+
) -> None:
|
|
998
|
+
"""
|
|
999
|
+
Write a value to the output stream.
|
|
1000
|
+
|
|
1001
|
+
``value`` may be a :class:`pugmark.Object` (written as-is) or a raw
|
|
1002
|
+
Python value (str, bytes, dict, list, or any JSON-serializable scalar),
|
|
1003
|
+
in which case it is wrapped in a :class:`LocalObject`. Content type is
|
|
1004
|
+
auto-detected from the value's type unless explicitly provided.
|
|
1005
|
+
"""
|
|
1006
|
+
if _is_object(value):
|
|
1007
|
+
obj = value
|
|
1008
|
+
else:
|
|
1009
|
+
obj = _local_object(
|
|
1010
|
+
value,
|
|
1011
|
+
name=name,
|
|
1012
|
+
content_type=content_type,
|
|
1013
|
+
content_encoding=content_encoding,
|
|
1014
|
+
schema_url=schema_url,
|
|
1015
|
+
metadata=metadata,
|
|
1016
|
+
)
|
|
1017
|
+
self.writer.write(obj)
|
|
1018
|
+
self._state.append(obj)
|
|
1019
|
+
|
|
1020
|
+
def write_text(
|
|
1021
|
+
self,
|
|
1022
|
+
text: str,
|
|
1023
|
+
*,
|
|
1024
|
+
name: str = "",
|
|
1025
|
+
content_type: str = "text/plain",
|
|
1026
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
1027
|
+
) -> None:
|
|
1028
|
+
"""Write a string as a text/plain object (or other text/* content type)."""
|
|
1029
|
+
self.write(text, name=name, content_type=content_type, metadata=metadata)
|
|
1030
|
+
|
|
1031
|
+
def write_json(
|
|
1032
|
+
self,
|
|
1033
|
+
data: Any,
|
|
1034
|
+
*,
|
|
1035
|
+
name: str = "",
|
|
1036
|
+
content_type: str = "application/json",
|
|
1037
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
1038
|
+
) -> None:
|
|
1039
|
+
"""Write a JSON-serializable value as an application/json object."""
|
|
1040
|
+
self.write(data, name=name, content_type=content_type, metadata=metadata)
|
|
1041
|
+
|
|
1042
|
+
def write_bytes(
|
|
1043
|
+
self,
|
|
1044
|
+
data: Union[bytes, bytearray],
|
|
1045
|
+
*,
|
|
1046
|
+
name: str = "",
|
|
1047
|
+
content_type: str = "application/octet-stream",
|
|
1048
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
1049
|
+
) -> None:
|
|
1050
|
+
"""Write raw bytes with a configurable content type."""
|
|
1051
|
+
self.write(bytes(data), name=name, content_type=content_type, metadata=metadata)
|
|
1052
|
+
|
|
1053
|
+
def write_remote(
|
|
1054
|
+
self,
|
|
1055
|
+
uri: str,
|
|
1056
|
+
*,
|
|
1057
|
+
name: str = "",
|
|
1058
|
+
content_type: str = "",
|
|
1059
|
+
content_encoding: str = "",
|
|
1060
|
+
schema_url: str = "",
|
|
1061
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
1062
|
+
session: str = "",
|
|
1063
|
+
) -> None:
|
|
1064
|
+
"""
|
|
1065
|
+
Write a remote object reference to the output stream.
|
|
1066
|
+
|
|
1067
|
+
Instead of sending the object's bytes inline, pugmark will fetch the
|
|
1068
|
+
object at ``uri`` (e.g. ``s3://bucket/path/to/object``), compute its
|
|
1069
|
+
SHA-256, and store it in the session.
|
|
1070
|
+
|
|
1071
|
+
Args:
|
|
1072
|
+
uri: Remote object URI (e.g. ``s3://bucket/path/to/object``)
|
|
1073
|
+
name: Optional name for the object
|
|
1074
|
+
content_type: Optional MIME type override (defaults to remote value)
|
|
1075
|
+
content_encoding: Optional encoding override (defaults to remote value)
|
|
1076
|
+
schema_url: Optional schema URL
|
|
1077
|
+
metadata: Optional metadata key-value pairs
|
|
1078
|
+
session: Optional target session ID for cross-session writes
|
|
1079
|
+
"""
|
|
1080
|
+
self.writer.write_remote(
|
|
1081
|
+
uri,
|
|
1082
|
+
name=name,
|
|
1083
|
+
content_type=content_type,
|
|
1084
|
+
content_encoding=content_encoding,
|
|
1085
|
+
schema_url=schema_url,
|
|
1086
|
+
metadata=metadata,
|
|
1087
|
+
session=session,
|
|
1088
|
+
)
|
|
1089
|
+
|
|
1090
|
+
def state(self) -> "State":
|
|
1091
|
+
"""
|
|
1092
|
+
Get the current state of objects read from the input stream.
|
|
1093
|
+
|
|
1094
|
+
Returns:
|
|
1095
|
+
The State instance containing all objects seen so far
|
|
1096
|
+
"""
|
|
1097
|
+
for _ in self.read():
|
|
1098
|
+
pass
|
|
1099
|
+
return self._state
|
|
1100
|
+
|
|
1101
|
+
def load(self, name: str) -> Optional[Object]:
|
|
1102
|
+
"""
|
|
1103
|
+
Load an object by name from the state.
|
|
1104
|
+
|
|
1105
|
+
Args:
|
|
1106
|
+
name: The name of the object to retrieve
|
|
1107
|
+
|
|
1108
|
+
Returns:
|
|
1109
|
+
The Object instance, or None if not found
|
|
1110
|
+
"""
|
|
1111
|
+
objects = self.state()
|
|
1112
|
+
|
|
1113
|
+
# Check if the name corresponds to a memory namespace object
|
|
1114
|
+
memory_url = objects.get_memory_url(name)
|
|
1115
|
+
if memory_url:
|
|
1116
|
+
# Create a RemoteObject that points to the memory URL
|
|
1117
|
+
memory_input = Input(url=memory_url, name=name)
|
|
1118
|
+
return RemoteObject(memory_input)
|
|
1119
|
+
|
|
1120
|
+
if name in objects:
|
|
1121
|
+
return objects[name]
|
|
1122
|
+
|
|
1123
|
+
return None
|
|
1124
|
+
|
|
1125
|
+
def load_text(self, name: str) -> Optional[str]:
|
|
1126
|
+
"""Load a named object and decode it as UTF-8 text."""
|
|
1127
|
+
obj = self.load(name)
|
|
1128
|
+
return obj.decode_text() if obj is not None else None
|
|
1129
|
+
|
|
1130
|
+
def load_json(self, name: str) -> Any:
|
|
1131
|
+
"""Load a named object and decode it as JSON. Returns None if absent."""
|
|
1132
|
+
obj = self.load(name)
|
|
1133
|
+
return obj.decode_json() if obj is not None else None
|
|
1134
|
+
|
|
1135
|
+
def load_bytes(self, name: str) -> Optional[bytes]:
|
|
1136
|
+
"""Load a named object and return its raw bytes."""
|
|
1137
|
+
obj = self.load(name)
|
|
1138
|
+
return obj.decode_data() if obj is not None else None
|
|
1139
|
+
|
|
1140
|
+
def store(self, obj: Object) -> None:
|
|
1141
|
+
"""
|
|
1142
|
+
Store an object in the state.
|
|
1143
|
+
|
|
1144
|
+
Args:
|
|
1145
|
+
obj: The Object instance to store
|
|
1146
|
+
"""
|
|
1147
|
+
self.write(obj)
|
|
1148
|
+
|
|
1149
|
+
def pause(self, reason: str = "") -> None:
|
|
1150
|
+
"""
|
|
1151
|
+
Pause the execution of the current Pugmark subprocess.
|
|
1152
|
+
|
|
1153
|
+
Args:
|
|
1154
|
+
reason: An optional string explaining why execution is being paused
|
|
1155
|
+
"""
|
|
1156
|
+
output = Output(
|
|
1157
|
+
data=reason.encode("utf-8"),
|
|
1158
|
+
name="",
|
|
1159
|
+
content_type="application/vnd.pugmark.pause+text",
|
|
1160
|
+
content_encoding="",
|
|
1161
|
+
schema_url="",
|
|
1162
|
+
metadata={},
|
|
1163
|
+
)
|
|
1164
|
+
obj = LocalObject(output)
|
|
1165
|
+
self.write(obj)
|
|
1166
|
+
|
|
1167
|
+
def sleep(self, seconds: float, reason: str = "") -> None:
|
|
1168
|
+
"""
|
|
1169
|
+
Sleep for a specified number of seconds, pausing execution.
|
|
1170
|
+
|
|
1171
|
+
Args:
|
|
1172
|
+
seconds: Number of seconds to sleep
|
|
1173
|
+
reason: Optional reason for the pause
|
|
1174
|
+
"""
|
|
1175
|
+
output = Output(
|
|
1176
|
+
data=reason.encode("utf-8"),
|
|
1177
|
+
name="",
|
|
1178
|
+
content_type="application/vnd.pugmark.pause+text",
|
|
1179
|
+
content_encoding="",
|
|
1180
|
+
schema_url="",
|
|
1181
|
+
metadata={
|
|
1182
|
+
"duration": f"{seconds:.2f}s",
|
|
1183
|
+
},
|
|
1184
|
+
)
|
|
1185
|
+
obj = LocalObject(output)
|
|
1186
|
+
self.write(obj)
|
|
1187
|
+
|
|
1188
|
+
def session_id(self) -> Optional[str]:
|
|
1189
|
+
"""
|
|
1190
|
+
Get the pugmark session ID.
|
|
1191
|
+
|
|
1192
|
+
The session ID is read from the PUGMARK_SESSION_ID environment variable,
|
|
1193
|
+
which is automatically set by the pugmark runner when spawning subprocesses.
|
|
1194
|
+
|
|
1195
|
+
Returns:
|
|
1196
|
+
The session ID string if available, None otherwise
|
|
1197
|
+
"""
|
|
1198
|
+
return self._session_id if self._session_id else None
|
|
1199
|
+
|
|
1200
|
+
def agent(self) -> Optional[str]:
|
|
1201
|
+
"""
|
|
1202
|
+
Get the agent identity.
|
|
1203
|
+
|
|
1204
|
+
The agent ID is read from the PUGMARK_AGENT_ID environment variable,
|
|
1205
|
+
which is automatically set by the pugmark runner when spawning subprocesses.
|
|
1206
|
+
This enables multi-agent routing where handlers can route
|
|
1207
|
+
requests to different implementations based on the agent.
|
|
1208
|
+
|
|
1209
|
+
Returns:
|
|
1210
|
+
The agent identity string if available, None otherwise
|
|
1211
|
+
"""
|
|
1212
|
+
return self._agent if self._agent else None
|
|
1213
|
+
|
|
1214
|
+
def snapshot(self) -> Optional[int]:
|
|
1215
|
+
"""
|
|
1216
|
+
Get the snapshot version number.
|
|
1217
|
+
|
|
1218
|
+
The snapshot is the version of the session at the time of invocation.
|
|
1219
|
+
This is passed via query parameters when running in RPC mode.
|
|
1220
|
+
|
|
1221
|
+
Returns:
|
|
1222
|
+
The snapshot version number if available, None otherwise
|
|
1223
|
+
"""
|
|
1224
|
+
return self._snapshot
|
|
1225
|
+
|
|
1226
|
+
def metadata(self) -> Dict[str, str]:
|
|
1227
|
+
"""
|
|
1228
|
+
Get the session metadata from the manifest.
|
|
1229
|
+
|
|
1230
|
+
Metadata is passed via different mechanisms depending on the runner type:
|
|
1231
|
+
- Command runner: PUGMARK_SESSION_METADATA environment variable (JSON-encoded)
|
|
1232
|
+
- HTTP runner: X-Pugmark-Session-Metadata header (JSON-encoded)
|
|
1233
|
+
- Function runner: Passed directly to the Session constructor
|
|
1234
|
+
|
|
1235
|
+
Returns:
|
|
1236
|
+
Dictionary of metadata key-value pairs, empty dict if none set
|
|
1237
|
+
"""
|
|
1238
|
+
return self._metadata
|
|
1239
|
+
|
|
1240
|
+
@contextmanager
|
|
1241
|
+
def traced_execution(self) -> Iterator[None]:
|
|
1242
|
+
"""
|
|
1243
|
+
Context manager that creates a tracing span for handler execution.
|
|
1244
|
+
|
|
1245
|
+
When OpenTelemetry is available, this creates a "pugmark.handler" span with
|
|
1246
|
+
session context attributes. The span uses CONSUMER kind to indicate that
|
|
1247
|
+
this is processing a message/event from the parent process.
|
|
1248
|
+
|
|
1249
|
+
Attributes included in the span:
|
|
1250
|
+
- pugmark.session: The session ID
|
|
1251
|
+
- pugmark.agent: The agent ID (if available)
|
|
1252
|
+
- pugmark.snapshot: The snapshot version (if available)
|
|
1253
|
+
|
|
1254
|
+
If OpenTelemetry is not available, this is a no-op.
|
|
1255
|
+
|
|
1256
|
+
Yields:
|
|
1257
|
+
None
|
|
1258
|
+
"""
|
|
1259
|
+
tracer = _get_tracer()
|
|
1260
|
+
if tracer is None:
|
|
1261
|
+
yield
|
|
1262
|
+
return
|
|
1263
|
+
|
|
1264
|
+
attributes: Dict[str, Any] = {}
|
|
1265
|
+
if self._session_id:
|
|
1266
|
+
attributes["pugmark.session"] = self._session_id
|
|
1267
|
+
if self._agent:
|
|
1268
|
+
attributes["pugmark.agent"] = self._agent
|
|
1269
|
+
if self._snapshot is not None:
|
|
1270
|
+
attributes["pugmark.snapshot"] = self._snapshot
|
|
1271
|
+
|
|
1272
|
+
with tracer.start_as_current_span(
|
|
1273
|
+
"pugmark.handler",
|
|
1274
|
+
kind=otel_trace.SpanKind.CONSUMER,
|
|
1275
|
+
attributes=attributes,
|
|
1276
|
+
):
|
|
1277
|
+
yield
|
|
1278
|
+
|
|
1279
|
+
@contextmanager
|
|
1280
|
+
def open(
|
|
1281
|
+
self,
|
|
1282
|
+
name: str,
|
|
1283
|
+
content_type: Optional[str] = None,
|
|
1284
|
+
content_encoding: Optional[str] = None,
|
|
1285
|
+
schema_url: Optional[str] = None,
|
|
1286
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
1287
|
+
) -> Iterator[IO[bytes]]:
|
|
1288
|
+
"""
|
|
1289
|
+
Context manager for opening an object by name.
|
|
1290
|
+
|
|
1291
|
+
Args:
|
|
1292
|
+
name: The name of the object to open
|
|
1293
|
+
content_type: Optional content type for the object
|
|
1294
|
+
content_encoding: Optional content encoding for the object
|
|
1295
|
+
schema_url: URL of the schema associated with this object
|
|
1296
|
+
metadata: Optional metadata dictionary for the object
|
|
1297
|
+
|
|
1298
|
+
Yields:
|
|
1299
|
+
A temporary file-like object containing the object's data
|
|
1300
|
+
"""
|
|
1301
|
+
# Try to load the object (this will check memory namespaces too)
|
|
1302
|
+
obj = self.load(name)
|
|
1303
|
+
if obj:
|
|
1304
|
+
body = obj.body()
|
|
1305
|
+
data = body
|
|
1306
|
+
else:
|
|
1307
|
+
body = b""
|
|
1308
|
+
data = b""
|
|
1309
|
+
|
|
1310
|
+
temp = tempfile.NamedTemporaryFile(delete=True, mode="w+b")
|
|
1311
|
+
try:
|
|
1312
|
+
temp.write(data)
|
|
1313
|
+
temp.seek(0)
|
|
1314
|
+
yield temp
|
|
1315
|
+
temp.flush()
|
|
1316
|
+
temp.seek(0)
|
|
1317
|
+
data = temp.read()
|
|
1318
|
+
finally:
|
|
1319
|
+
temp.close()
|
|
1320
|
+
|
|
1321
|
+
if data != body:
|
|
1322
|
+
output = Output(
|
|
1323
|
+
data=data,
|
|
1324
|
+
name=name or "",
|
|
1325
|
+
content_type=content_type or "",
|
|
1326
|
+
content_encoding=content_encoding or "",
|
|
1327
|
+
schema_url=schema_url or "",
|
|
1328
|
+
metadata=metadata or {},
|
|
1329
|
+
)
|
|
1330
|
+
obj = LocalObject(output)
|
|
1331
|
+
self.store(obj)
|
|
1332
|
+
|
|
1333
|
+
|
|
1334
|
+
# Default session instance for backward compatibility
|
|
1335
|
+
_default_session = Session()
|
|
1336
|
+
|
|
1337
|
+
|
|
1338
|
+
# Legacy global functions that use the default session
|
|
1339
|
+
stdin = _default_session.reader
|
|
1340
|
+
stdout = _default_session.writer
|
|
1341
|
+
|
|
1342
|
+
|
|
1343
|
+
def default_session() -> Session:
|
|
1344
|
+
"""
|
|
1345
|
+
Get the default session instance used by global functions.
|
|
1346
|
+
|
|
1347
|
+
This allows access to the underlying session for advanced use cases
|
|
1348
|
+
like accessing the session's state directly or replacing the default
|
|
1349
|
+
session for testing purposes.
|
|
1350
|
+
|
|
1351
|
+
Returns:
|
|
1352
|
+
The default Session instance
|
|
1353
|
+
"""
|
|
1354
|
+
return _default_session
|
|
1355
|
+
|
|
1356
|
+
|
|
1357
|
+
def events() -> Iterator[Event]:
|
|
1358
|
+
"""
|
|
1359
|
+
Return an iterator over events from the default session.
|
|
1360
|
+
|
|
1361
|
+
This is a convenience function that provides access to the event stream
|
|
1362
|
+
without needing to manage a Session instance directly.
|
|
1363
|
+
|
|
1364
|
+
Example usage:
|
|
1365
|
+
import pugmark
|
|
1366
|
+
|
|
1367
|
+
for event in pugmark.events():
|
|
1368
|
+
match event:
|
|
1369
|
+
case pugmark.StartEvent():
|
|
1370
|
+
print("Session started")
|
|
1371
|
+
case pugmark.PushEvent() as push:
|
|
1372
|
+
print(f"Object pushed: {push.object.name()}")
|
|
1373
|
+
|
|
1374
|
+
Returns:
|
|
1375
|
+
Iterator yielding Event instances from the default session
|
|
1376
|
+
"""
|
|
1377
|
+
return _default_session.events()
|
|
1378
|
+
|
|
1379
|
+
|
|
1380
|
+
def read() -> Iterator[Object]:
|
|
1381
|
+
"""
|
|
1382
|
+
Convenience function that returns an iterator using the default session.
|
|
1383
|
+
|
|
1384
|
+
This is the most common way to read Input records in subprocess implementations.
|
|
1385
|
+
|
|
1386
|
+
Returns:
|
|
1387
|
+
Iterator yielding Object instances from stdin
|
|
1388
|
+
"""
|
|
1389
|
+
return _default_session.read()
|
|
1390
|
+
|
|
1391
|
+
|
|
1392
|
+
def write(
|
|
1393
|
+
value: Any,
|
|
1394
|
+
*,
|
|
1395
|
+
name: str = "",
|
|
1396
|
+
content_type: Optional[str] = None,
|
|
1397
|
+
content_encoding: str = "",
|
|
1398
|
+
schema_url: str = "",
|
|
1399
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
1400
|
+
) -> None:
|
|
1401
|
+
"""
|
|
1402
|
+
Convenience function that writes a value using the default session.
|
|
1403
|
+
|
|
1404
|
+
``value`` may be a :class:`pugmark.Object` (written as-is) or a raw Python
|
|
1405
|
+
value (str, bytes, dict, list, or any JSON-serializable scalar). Content
|
|
1406
|
+
type is auto-detected from the value's type unless explicitly provided.
|
|
1407
|
+
|
|
1408
|
+
Examples::
|
|
1409
|
+
|
|
1410
|
+
pugmark.write("hello") # text/plain
|
|
1411
|
+
pugmark.write({"answer": 42}, name="reply") # application/json
|
|
1412
|
+
pugmark.write(b"\\x00\\x01", name="bin") # application/octet-stream
|
|
1413
|
+
"""
|
|
1414
|
+
_default_session.write(
|
|
1415
|
+
value,
|
|
1416
|
+
name=name,
|
|
1417
|
+
content_type=content_type,
|
|
1418
|
+
content_encoding=content_encoding,
|
|
1419
|
+
schema_url=schema_url,
|
|
1420
|
+
metadata=metadata,
|
|
1421
|
+
)
|
|
1422
|
+
|
|
1423
|
+
|
|
1424
|
+
def write_text(
|
|
1425
|
+
text: str,
|
|
1426
|
+
*,
|
|
1427
|
+
name: str = "",
|
|
1428
|
+
content_type: str = "text/plain",
|
|
1429
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
1430
|
+
) -> None:
|
|
1431
|
+
"""Write a string using the default session."""
|
|
1432
|
+
_default_session.write_text(text, name=name, content_type=content_type, metadata=metadata)
|
|
1433
|
+
|
|
1434
|
+
|
|
1435
|
+
def write_json(
|
|
1436
|
+
data: Any,
|
|
1437
|
+
*,
|
|
1438
|
+
name: str = "",
|
|
1439
|
+
content_type: str = "application/json",
|
|
1440
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
1441
|
+
) -> None:
|
|
1442
|
+
"""Write a JSON-serializable value using the default session."""
|
|
1443
|
+
_default_session.write_json(data, name=name, content_type=content_type, metadata=metadata)
|
|
1444
|
+
|
|
1445
|
+
|
|
1446
|
+
def write_bytes(
|
|
1447
|
+
data: Union[bytes, bytearray],
|
|
1448
|
+
*,
|
|
1449
|
+
name: str = "",
|
|
1450
|
+
content_type: str = "application/octet-stream",
|
|
1451
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
1452
|
+
) -> None:
|
|
1453
|
+
"""Write raw bytes using the default session."""
|
|
1454
|
+
_default_session.write_bytes(data, name=name, content_type=content_type, metadata=metadata)
|
|
1455
|
+
|
|
1456
|
+
|
|
1457
|
+
def write_remote(
|
|
1458
|
+
uri: str,
|
|
1459
|
+
*,
|
|
1460
|
+
name: str = "",
|
|
1461
|
+
content_type: str = "",
|
|
1462
|
+
content_encoding: str = "",
|
|
1463
|
+
schema_url: str = "",
|
|
1464
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
1465
|
+
session: str = "",
|
|
1466
|
+
) -> None:
|
|
1467
|
+
"""
|
|
1468
|
+
Write a remote object reference using the default session.
|
|
1469
|
+
|
|
1470
|
+
Instead of sending the object's bytes inline, pugmark will fetch the object
|
|
1471
|
+
at ``uri`` (e.g. ``s3://bucket/path/to/object``), compute its SHA-256, and
|
|
1472
|
+
store it in the session.
|
|
1473
|
+
|
|
1474
|
+
Args:
|
|
1475
|
+
uri: Remote object URI (e.g. ``s3://bucket/path/to/object``)
|
|
1476
|
+
name: Optional name for the object
|
|
1477
|
+
content_type: Optional MIME type override (defaults to remote value)
|
|
1478
|
+
content_encoding: Optional encoding override (defaults to remote value)
|
|
1479
|
+
schema_url: Optional schema URL
|
|
1480
|
+
metadata: Optional metadata key-value pairs
|
|
1481
|
+
session: Optional target session ID for cross-session writes
|
|
1482
|
+
"""
|
|
1483
|
+
_default_session.write_remote(
|
|
1484
|
+
uri,
|
|
1485
|
+
name=name,
|
|
1486
|
+
content_type=content_type,
|
|
1487
|
+
content_encoding=content_encoding,
|
|
1488
|
+
schema_url=schema_url,
|
|
1489
|
+
metadata=metadata,
|
|
1490
|
+
session=session,
|
|
1491
|
+
)
|
|
1492
|
+
|
|
1493
|
+
|
|
1494
|
+
def state() -> State:
|
|
1495
|
+
"""
|
|
1496
|
+
Get the current state of objects read from stdin using the default session.
|
|
1497
|
+
|
|
1498
|
+
Returns:
|
|
1499
|
+
The State instance containing all objects seen so far
|
|
1500
|
+
"""
|
|
1501
|
+
return _default_session.state()
|
|
1502
|
+
|
|
1503
|
+
|
|
1504
|
+
def load(name: str) -> Optional[Object]:
|
|
1505
|
+
"""
|
|
1506
|
+
Load an object by name from the state using the default session.
|
|
1507
|
+
|
|
1508
|
+
Args:
|
|
1509
|
+
name: The name of the object to retrieve
|
|
1510
|
+
|
|
1511
|
+
Returns:
|
|
1512
|
+
The Object instance, or None if not found
|
|
1513
|
+
"""
|
|
1514
|
+
return _default_session.load(name)
|
|
1515
|
+
|
|
1516
|
+
|
|
1517
|
+
def load_text(name: str) -> Optional[str]:
|
|
1518
|
+
"""Load a named object and decode it as UTF-8 text."""
|
|
1519
|
+
return _default_session.load_text(name)
|
|
1520
|
+
|
|
1521
|
+
|
|
1522
|
+
def load_json(name: str) -> Any:
|
|
1523
|
+
"""Load a named object and decode it as JSON. Returns None if absent."""
|
|
1524
|
+
return _default_session.load_json(name)
|
|
1525
|
+
|
|
1526
|
+
|
|
1527
|
+
def load_bytes(name: str) -> Optional[bytes]:
|
|
1528
|
+
"""Load a named object and return its raw bytes."""
|
|
1529
|
+
return _default_session.load_bytes(name)
|
|
1530
|
+
|
|
1531
|
+
|
|
1532
|
+
def store(object: Object) -> None:
|
|
1533
|
+
"""
|
|
1534
|
+
Store an object in the state using the default session.
|
|
1535
|
+
|
|
1536
|
+
Args:
|
|
1537
|
+
object: The Object instance to store
|
|
1538
|
+
"""
|
|
1539
|
+
_default_session.store(object)
|
|
1540
|
+
|
|
1541
|
+
|
|
1542
|
+
@contextmanager
|
|
1543
|
+
def open(
|
|
1544
|
+
name: str,
|
|
1545
|
+
content_type: Optional[str] = None,
|
|
1546
|
+
content_encoding: Optional[str] = None,
|
|
1547
|
+
schema_url: Optional[str] = None,
|
|
1548
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
1549
|
+
) -> Iterator[IO[bytes]]:
|
|
1550
|
+
"""
|
|
1551
|
+
Context manager for opening an object by name using the default session.
|
|
1552
|
+
|
|
1553
|
+
Args:
|
|
1554
|
+
name: The name of the object to open
|
|
1555
|
+
content_type: Optional content type for the object
|
|
1556
|
+
content_encoding: Optional content encoding for the object
|
|
1557
|
+
schema_url: URL of the schema associated with this object
|
|
1558
|
+
metadata: Optional metadata dictionary for the object
|
|
1559
|
+
|
|
1560
|
+
Yields:
|
|
1561
|
+
A temporary file-like object containing the object's data
|
|
1562
|
+
"""
|
|
1563
|
+
with _default_session.open(name, content_type, content_encoding, schema_url, metadata) as f:
|
|
1564
|
+
yield f
|
|
1565
|
+
|
|
1566
|
+
|
|
1567
|
+
def pause(reason: str = "") -> None:
|
|
1568
|
+
"""
|
|
1569
|
+
Pause the execution of the current Pugmark subprocess using the default session.
|
|
1570
|
+
|
|
1571
|
+
Args:
|
|
1572
|
+
reason: An optional string explaining why execution is being paused
|
|
1573
|
+
"""
|
|
1574
|
+
_default_session.pause(reason)
|
|
1575
|
+
|
|
1576
|
+
|
|
1577
|
+
def sleep(seconds: float, reason: str = "") -> None:
|
|
1578
|
+
"""
|
|
1579
|
+
Sleep for a specified number of seconds using the default session.
|
|
1580
|
+
|
|
1581
|
+
Args:
|
|
1582
|
+
seconds: Number of seconds to sleep
|
|
1583
|
+
reason: Optional reason for the pause
|
|
1584
|
+
"""
|
|
1585
|
+
_default_session.sleep(seconds, reason)
|
|
1586
|
+
|
|
1587
|
+
|
|
1588
|
+
def session_id() -> Optional[str]:
|
|
1589
|
+
"""
|
|
1590
|
+
Get the pugmark session ID.
|
|
1591
|
+
|
|
1592
|
+
This function returns the session ID from the PUGMARK_SESSION_ID environment
|
|
1593
|
+
variable, which is automatically set when running inside a pugmark subprocess
|
|
1594
|
+
(via `pugmark run` or `pugmark exec`). Returns None if not running under pugmark.
|
|
1595
|
+
|
|
1596
|
+
Returns:
|
|
1597
|
+
The session ID string if available, None otherwise
|
|
1598
|
+
|
|
1599
|
+
Example:
|
|
1600
|
+
import pugmark
|
|
1601
|
+
|
|
1602
|
+
sid = pugmark.session_id()
|
|
1603
|
+
if sid:
|
|
1604
|
+
print(f"Running in session: {sid}")
|
|
1605
|
+
"""
|
|
1606
|
+
return _default_session.session_id()
|
|
1607
|
+
|
|
1608
|
+
|
|
1609
|
+
def agent() -> Optional[str]:
|
|
1610
|
+
"""
|
|
1611
|
+
Get the agent identity.
|
|
1612
|
+
|
|
1613
|
+
This function returns the agent ID from the PUGMARK_AGENT_ID environment
|
|
1614
|
+
variable, which is automatically set when running inside a pugmark subprocess.
|
|
1615
|
+
This enables multi-agent routing where handlers can route
|
|
1616
|
+
requests to different implementations based on the agent.
|
|
1617
|
+
|
|
1618
|
+
Returns:
|
|
1619
|
+
The agent identity string if available, None otherwise
|
|
1620
|
+
|
|
1621
|
+
Example:
|
|
1622
|
+
import pugmark
|
|
1623
|
+
|
|
1624
|
+
for event in pugmark.events():
|
|
1625
|
+
agent_id = pugmark.agent()
|
|
1626
|
+
if agent_id == "claude":
|
|
1627
|
+
handle_claude(event)
|
|
1628
|
+
elif agent_id == "gpt4":
|
|
1629
|
+
handle_gpt4(event)
|
|
1630
|
+
"""
|
|
1631
|
+
return _default_session.agent()
|
|
1632
|
+
|
|
1633
|
+
|
|
1634
|
+
def metadata() -> Dict[str, str]:
|
|
1635
|
+
"""
|
|
1636
|
+
Get the session metadata from the manifest.
|
|
1637
|
+
|
|
1638
|
+
This function returns metadata passed to the pugmark runner. The metadata
|
|
1639
|
+
is passed via different mechanisms depending on the runner type:
|
|
1640
|
+
- Command runner: PUGMARK_SESSION_METADATA environment variable (JSON-encoded)
|
|
1641
|
+
- HTTP runner: X-Pugmark-Session-Metadata header (JSON-encoded)
|
|
1642
|
+
|
|
1643
|
+
Returns:
|
|
1644
|
+
Dictionary of metadata key-value pairs, empty dict if none set
|
|
1645
|
+
|
|
1646
|
+
Example:
|
|
1647
|
+
import pugmark
|
|
1648
|
+
|
|
1649
|
+
meta = pugmark.metadata()
|
|
1650
|
+
user_id = meta.get("user-id")
|
|
1651
|
+
tenant = meta.get("tenant")
|
|
1652
|
+
"""
|
|
1653
|
+
return _default_session.metadata()
|
|
1654
|
+
|
|
1655
|
+
|
|
1656
|
+
@contextmanager
|
|
1657
|
+
def traced_execution() -> Iterator[None]:
|
|
1658
|
+
"""
|
|
1659
|
+
Context manager that creates a tracing span for default session handler execution.
|
|
1660
|
+
|
|
1661
|
+
This is a convenience function that wraps the default session's traced_execution
|
|
1662
|
+
method, creating a "pugmark.handler" span with session context attributes when
|
|
1663
|
+
OpenTelemetry is available.
|
|
1664
|
+
|
|
1665
|
+
Example:
|
|
1666
|
+
import pugmark
|
|
1667
|
+
|
|
1668
|
+
with pugmark.traced_execution():
|
|
1669
|
+
for event in pugmark.events():
|
|
1670
|
+
process(event)
|
|
1671
|
+
|
|
1672
|
+
Yields:
|
|
1673
|
+
None
|
|
1674
|
+
"""
|
|
1675
|
+
with _default_session.traced_execution():
|
|
1676
|
+
yield
|