pugmark 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pugmark/__init__.py ADDED
@@ -0,0 +1,1676 @@
1
+ """
2
+ Pugmark Python SDK for subprocess communication.
3
+
4
+ This module provides inter-process communication functionality for the Pugmark session log
5
+ storage system.
6
+ It enables communication between the main Pugmark process and Python subprocesses,
7
+ allowing for data transformation and processing workflows.
8
+
9
+ The SDK implements a JSON-based protocol for passing object data between processes via stdin/stdout.
10
+
11
+ Architecture:
12
+ - The main process serves objects via HTTP and sends input records via stdin
13
+ - Python subprocesses receive input records, fetch object data via HTTP, process it,
14
+ and send output records via stdout
15
+ - The main process collects output records and converts them back to Pugmark objects
16
+
17
+ Example usage:
18
+
19
+ import pugmark
20
+
21
+ for obj in pugmark.read():
22
+ # Access data through the unified Object interface
23
+ data = obj.body()
24
+ if obj.content_type().startswith("text/"):
25
+ text_data = data.decode("utf-8")
26
+ processed = text_data.upper()
27
+ elif obj.content_type() == "application/json":
28
+ import json
29
+ json_data = json.loads(data.decode("utf-8"))
30
+ processed = {"processed": json_data}
31
+ else:
32
+ processed = {"binary_length": len(data)}
33
+
34
+ # Send output (automatically encodes based on type)
35
+ pugmark.write(processed)
36
+
37
+ # Optionally pause execution for interactive workflows
38
+ if needs_approval:
39
+ pugmark.pause("Processing complete, awaiting user approval")
40
+
41
+ Key Functions:
42
+ - pugmark.read(): Read input objects from the parent process (returns RemoteObject instances)
43
+ - pugmark.write(): Send output objects to the parent process (returns LocalObject instances)
44
+ - pugmark.pause(): Pause execution with an optional reason
45
+ - pugmark.load(): Load objects by name from session state
46
+ - pugmark.store(): Store objects by name in session state
47
+ - pugmark.open(): Context manager for reading/writing named objects
48
+
49
+ Object Types:
50
+ - Object: Abstract base class for all objects
51
+ - RemoteObject: Objects backed by Input data (from HTTP URLs)
52
+ - LocalObject: Objects backed by Output data (local data)
53
+
54
+ The API now matches the Go ipc package structure with consistent Input/Output types.
55
+ """
56
+
57
+ import base64
58
+ import builtins
59
+ import gzip
60
+ import json
61
+ import os
62
+ import sys
63
+ import tempfile
64
+ import time
65
+ import urllib.error
66
+ import urllib.parse
67
+ import urllib.request
68
+ from contextlib import contextmanager
69
+ from contextvars import Token
70
+ from dataclasses import dataclass, field
71
+ from typing import IO, Any, BinaryIO, Dict, Iterator, List, Optional, Protocol, TextIO, Union
72
+
73
+ import zstandard as zstd
74
+ from zstandard import ZstdDecompressionReader
75
+
76
+ __version__ = "0.1.0"
77
+
78
+ # Optional OpenTelemetry imports for trace context propagation
79
+ try:
80
+ from opentelemetry import context as otel_context
81
+ from opentelemetry import propagate as otel_propagate
82
+ from opentelemetry.instrumentation.urllib import URLLibInstrumentor
83
+
84
+ _HAS_OTEL = True
85
+ except ImportError:
86
+ _HAS_OTEL = False
87
+
88
+ # Optional OpenTelemetry trace imports for span creation
89
+ try:
90
+ from opentelemetry import trace as otel_trace
91
+
92
+ _HAS_OTEL_TRACE = True
93
+ except ImportError:
94
+ _HAS_OTEL_TRACE = False
95
+
96
+
97
+ def _get_tracer() -> Optional["otel_trace.Tracer"]:
98
+ """Get the OpenTelemetry tracer for pugmark."""
99
+ if not _HAS_OTEL_TRACE:
100
+ return None
101
+ return otel_trace.get_tracer("github.com/firetiger-oss/pugmark")
102
+
103
+
104
+ # Context token for cleanup (used when trace context is attached)
105
+ _otel_context_token: Optional[Token[object]] = None
106
+
107
+
108
+ def _init_trace_context_from_env() -> None:
109
+ """Extract and attach trace context from environment variables at startup.
110
+
111
+ This function reads W3C trace context (TRACEPARENT, TRACESTATE) from
112
+ environment variables and attaches it to the current OpenTelemetry context.
113
+ This enables trace continuity from Go parent processes to Python subprocesses.
114
+ """
115
+ global _otel_context_token
116
+ if not _HAS_OTEL:
117
+ return
118
+
119
+ traceparent = os.environ.get("TRACEPARENT", "")
120
+ tracestate = os.environ.get("TRACESTATE", "")
121
+
122
+ if not traceparent:
123
+ return
124
+
125
+ carrier = {
126
+ "traceparent": traceparent,
127
+ "tracestate": tracestate,
128
+ }
129
+ ctx = otel_propagate.extract(carrier)
130
+ _otel_context_token = otel_context.attach(ctx)
131
+
132
+
133
+ def _init_urllib_instrumentation() -> None:
134
+ """Instrument urllib for automatic trace propagation in HTTP requests.
135
+
136
+ When OpenTelemetry is available, this instruments the urllib library so that
137
+ outgoing HTTP requests (such as fetching objects from the Go object server)
138
+ automatically include trace context headers.
139
+ """
140
+ if not _HAS_OTEL:
141
+ return
142
+ try:
143
+ URLLibInstrumentor().instrument()
144
+ except Exception:
145
+ pass # Already instrumented or unavailable
146
+
147
+
148
+ # Initialize trace context when module is imported (for subprocess mode)
149
+ _init_trace_context_from_env()
150
+ _init_urllib_instrumentation()
151
+
152
+ _print = builtins.print
153
+
154
+
155
+ def print(*args: Any, **kwargs: Any) -> Any:
156
+ kwargs.setdefault("file", sys.stderr)
157
+ return _print(*args, **kwargs)
158
+
159
+
160
+ def _parse_media_type(content_type: str) -> str:
161
+ """Parse media type from content type string, ignoring parameters."""
162
+ if not content_type:
163
+ return ""
164
+ return content_type.split(";")[0].strip()
165
+
166
+
167
+ # Override built-in print to redirect to stderr because stdout is used to
168
+ # output pugmark records, this helps avoid confusion when the program uses
169
+ # print statements for debugging.
170
+ builtins.print = print
171
+
172
+
173
+ @dataclass
174
+ class Input:
175
+ """Represents an input record sent from the main Pugmark process."""
176
+
177
+ url: str = ""
178
+ size: int = 0
179
+ event: str = ""
180
+ content_type: str = ""
181
+ content_encoding: str = ""
182
+ schema_url: str = ""
183
+ name: str = ""
184
+ metadata: Optional[Dict[str, str]] = None
185
+ data: Optional[bytes] = None
186
+
187
+ def __post_init__(self) -> None:
188
+ if self.metadata is None:
189
+ self.metadata = {}
190
+
191
+
192
+ @dataclass
193
+ class Output:
194
+ """Represents an output record sent to the main Pugmark process.
195
+
196
+ Either ``data`` or ``uri`` must be set, but not both. Use ``uri`` to
197
+ reference an object already stored at a remote location (e.g.
198
+ ``s3://bucket/path/to/object``); pugmark will fetch it, compute its SHA-256,
199
+ and store it in the session without the data travelling through this process.
200
+ """
201
+
202
+ data: bytes = b""
203
+ uri: str = ""
204
+ name: str = ""
205
+ content_type: str = ""
206
+ content_encoding: str = ""
207
+ schema_url: str = ""
208
+ metadata: Optional[Dict[str, str]] = field(default_factory=dict)
209
+ fork: bool = False # Marks this as a fork message that creates a child session
210
+ session: str = "" # Target session ID for cross-session writes
211
+
212
+
213
+ class Object(Protocol):
214
+ """
215
+ Protocol defining the interface for Pugmark objects.
216
+
217
+ Objects can be either local (backed by Output data) or remote (backed by Input data).
218
+ This unified interface matches the Go ipc package structure.
219
+ """
220
+
221
+ def body(self) -> bytes:
222
+ """Get the object's data as bytes."""
223
+ ...
224
+
225
+ def content_type(self) -> str:
226
+ """Get the content type of the object."""
227
+ ...
228
+
229
+ def schema_url(self) -> str:
230
+ """Get the schema URL of the object."""
231
+ ...
232
+
233
+ def name(self) -> str:
234
+ """Get the name of the object."""
235
+ ...
236
+
237
+ def event(self) -> str:
238
+ """Get the event type associated with this object."""
239
+ ...
240
+
241
+ def metadata(self) -> Dict[str, str]:
242
+ """Get the metadata dictionary of the object."""
243
+ ...
244
+
245
+ def fork(self) -> bool:
246
+ """Return True if this object is a fork message that creates a child session."""
247
+ ...
248
+
249
+ def decode_data(self) -> bytes:
250
+ """Get the object's data as raw bytes (alias for body())."""
251
+ ...
252
+
253
+ def decode_text(self) -> str:
254
+ """Decode the object's data as UTF-8 text."""
255
+ ...
256
+
257
+ def decode_json(self) -> Any:
258
+ """Decode the object's data as JSON."""
259
+ ...
260
+
261
+
262
+ # Event classes for the events API
263
+ class Event:
264
+ """
265
+ Base class for pugmark session events.
266
+
267
+ All events provide a string representation through the __str__ method,
268
+ matching the fmt.Stringer interface from the Go implementation.
269
+ """
270
+
271
+ def __str__(self) -> str:
272
+ """Return the string representation of the event."""
273
+ raise NotImplementedError
274
+
275
+
276
+ class StartEvent(Event):
277
+ """
278
+ Indicates that a new root session has been created.
279
+
280
+ This event is emitted when a session is started without a parent session,
281
+ marking the beginning of a new conversation or workflow.
282
+ """
283
+
284
+ def __str__(self) -> str:
285
+ """Return the string representation of a StartEvent."""
286
+ return "start"
287
+
288
+
289
+ class ForkEvent(Event):
290
+ """
291
+ Indicates that a new child session has been created from a parent session.
292
+
293
+ This event is emitted when a session is created with a parent reference,
294
+ allowing for branching conversations and parallel execution paths.
295
+ """
296
+
297
+ def __str__(self) -> str:
298
+ """Return the string representation of a ForkEvent."""
299
+ return "fork"
300
+
301
+
302
+ class WakeEvent(Event):
303
+ """
304
+ Indicates that a session has been resumed or awakened.
305
+
306
+ This event is typically emitted when a session becomes active after
307
+ being paused or sleeping.
308
+ """
309
+
310
+ def __str__(self) -> str:
311
+ """Return the string representation of a WakeEvent."""
312
+ return "wake"
313
+
314
+
315
+ class PushEvent(Event):
316
+ """
317
+ Indicates that an object has been added to the session.
318
+
319
+ This event contains the Object that was pushed, allowing access to
320
+ its content, metadata, and other properties.
321
+ """
322
+
323
+ def __init__(self, obj: Object):
324
+ """
325
+ Initialize a PushEvent with an Object.
326
+
327
+ Args:
328
+ obj: The Object that was pushed to the session
329
+ """
330
+ self.object = obj
331
+
332
+ def __str__(self) -> str:
333
+ """Return the string representation of a PushEvent."""
334
+ return "push"
335
+
336
+
337
+ class RemoteObject:
338
+ """
339
+ Represents a remote object accessible via HTTP.
340
+
341
+ RemoteObjects are created from Input records and fetch data from remote URLs.
342
+ This matches the concept of remote objects in the Go ipc package.
343
+ """
344
+
345
+ def __init__(self, input_record: Input):
346
+ """
347
+ Initialize a RemoteObject from an Input record.
348
+
349
+ Args:
350
+ input_record: Input record containing URL and metadata
351
+ name: Optional name for the object
352
+ """
353
+ self._input = input_record
354
+ self._body: Optional[bytes] = None
355
+
356
+ def body(self) -> bytes:
357
+ """
358
+ Get the object's data as bytes.
359
+
360
+ Returns:
361
+ Raw bytes of the object data
362
+ """
363
+ if self._body is not None:
364
+ return self._body
365
+
366
+ # Return empty bytes if URL is not provided (for events without data)
367
+ if not self._input.url:
368
+ self._body = b""
369
+ return self._body
370
+
371
+ with self._read() as response:
372
+ self._body = response.read()
373
+ return self._body
374
+
375
+ def content_type(self) -> str:
376
+ """Get the content type of the object."""
377
+ return self._input.content_type
378
+
379
+ def schema_url(self) -> str:
380
+ """Get the schema URL of the object."""
381
+ return self._input.schema_url
382
+
383
+ def name(self) -> str:
384
+ """Get the name of the object."""
385
+ return self._input.name
386
+
387
+ def event(self) -> str:
388
+ """Get the event type associated with this object."""
389
+ return self._input.event
390
+
391
+ def metadata(self) -> Dict[str, str]:
392
+ """Get the metadata dictionary of the object."""
393
+ return self._input.metadata or {}
394
+
395
+ def fork(self) -> bool:
396
+ """Return False - remote objects from input are never fork markers."""
397
+ return False
398
+
399
+ def decode_data(self) -> bytes:
400
+ """Get the object's data as raw bytes (alias for body())."""
401
+ return self.body()
402
+
403
+ def decode_text(self) -> str:
404
+ """Decode the object's data as UTF-8 text."""
405
+ return self.body().decode("utf-8")
406
+
407
+ def decode_json(self) -> Any:
408
+ """Decode the object's data as JSON."""
409
+ return json.loads(self.body())
410
+
411
+ def __str__(self) -> str:
412
+ """Return a string representation of the object."""
413
+ return self.name() or self._input.url or "<remote object>"
414
+
415
+ def __repr__(self) -> str:
416
+ return f"RemoteObject(body={self.body()}, content_type={self.content_type()})"
417
+
418
+ def _read(self) -> Union[gzip.GzipFile, ZstdDecompressionReader, BinaryIO]:
419
+ """
420
+ Return a file-like object for streaming the object's data.
421
+
422
+ The returned reader handles content encoding decompression automatically based
423
+ on the Input record's content_encoding field. Retries on ConnectionRefusedError
424
+ to handle the race condition where the Go-side HTTP server is shutting down
425
+ while Python still has pending reads.
426
+
427
+ Returns:
428
+ File-like object for reading object data
429
+ """
430
+ last_error: Optional[Exception] = None
431
+ for attempt in range(3):
432
+ try:
433
+ response = urllib.request.urlopen(self._input.url)
434
+ except urllib.error.URLError as exc:
435
+ if isinstance(exc.reason, ConnectionRefusedError) and attempt < 2:
436
+ time.sleep(0.1 * (2**attempt))
437
+ last_error = exc
438
+ continue
439
+ raise
440
+ content_encoding = self._input.content_encoding
441
+ if content_encoding == "gzip":
442
+ return gzip.GzipFile(fileobj=response)
443
+ elif content_encoding == "zstd":
444
+ decompressor = zstd.ZstdDecompressor()
445
+ return decompressor.stream_reader(response)
446
+ else:
447
+ return response # type: ignore[no-any-return]
448
+ raise last_error # type: ignore[misc]
449
+
450
+
451
+ class LocalObject:
452
+ """
453
+ Represents a local object backed by Output data.
454
+
455
+ LocalObjects are created from Output records and contain data locally.
456
+ This matches the concept of local objects in the Go ipc package.
457
+ """
458
+
459
+ def __init__(self, output_record: Output, event: str = "push"):
460
+ """
461
+ Initialize a LocalObject from an Output record.
462
+
463
+ Args:
464
+ output_record: Output record containing data and metadata
465
+ event: Event type for this object
466
+ """
467
+ self._output = output_record
468
+ self._event = event
469
+ self._body: Optional[bytes] = None
470
+
471
+ def body(self) -> bytes:
472
+ """Get the object's data as bytes, automatically decompressed if needed."""
473
+ if self._body is not None:
474
+ return self._body
475
+
476
+ data = self._output.data
477
+ content_encoding = self._output.content_encoding
478
+
479
+ if content_encoding == "gzip":
480
+ import gzip
481
+
482
+ self._body = gzip.decompress(data)
483
+ elif content_encoding == "zstd":
484
+ decompressor = zstd.ZstdDecompressor()
485
+ self._body = decompressor.decompress(data)
486
+ else:
487
+ self._body = data
488
+
489
+ return self._body
490
+
491
+ def content_type(self) -> str:
492
+ """Get the content type of the object."""
493
+ return self._output.content_type
494
+
495
+ def schema_url(self) -> str:
496
+ """Get the schema URL of the object."""
497
+ return self._output.schema_url
498
+
499
+ def name(self) -> str:
500
+ """Get the name of the object."""
501
+ return self._output.name
502
+
503
+ def event(self) -> str:
504
+ """Get the event type associated with this object."""
505
+ return self._event
506
+
507
+ def metadata(self) -> Dict[str, str]:
508
+ """Get the metadata dictionary of the object."""
509
+ return self._output.metadata or {}
510
+
511
+ def fork(self) -> bool:
512
+ """Return True if this object is a fork message that creates a child session."""
513
+ return self._output.fork
514
+
515
+ def decode_data(self) -> bytes:
516
+ """Get the object's data as raw bytes (alias for body())."""
517
+ return self.body()
518
+
519
+ def decode_text(self) -> str:
520
+ """Decode the object's data as UTF-8 text."""
521
+ return self.body().decode("utf-8")
522
+
523
+ def decode_json(self) -> Any:
524
+ """Decode the object's data as JSON."""
525
+ return json.loads(self.body())
526
+
527
+ def __str__(self) -> str:
528
+ """Return a string representation of the object."""
529
+ return self.name() or "<local object>"
530
+
531
+
532
+ def _is_object(value: Any) -> bool:
533
+ """Duck-typed check for whether a value already implements the Object protocol."""
534
+ return all(
535
+ callable(getattr(value, attr, None))
536
+ for attr in ("body", "name", "content_type", "metadata")
537
+ )
538
+
539
+
540
+ def _local_object(
541
+ value: Any,
542
+ *,
543
+ name: str = "",
544
+ content_type: Optional[str] = None,
545
+ content_encoding: str = "",
546
+ schema_url: str = "",
547
+ metadata: Optional[Dict[str, str]] = None,
548
+ ) -> "LocalObject":
549
+ """
550
+ Build a LocalObject from a raw Python value. Content type is auto-detected
551
+ when not provided:
552
+
553
+ - str → "text/plain"
554
+ - bytes → "application/octet-stream"
555
+ - dict, list, int, float, bool, None → "application/json"
556
+
557
+ Raises TypeError for unsupported types.
558
+ """
559
+ if isinstance(value, str):
560
+ data = value.encode("utf-8")
561
+ ct = content_type or "text/plain"
562
+ elif isinstance(value, (bytes, bytearray)):
563
+ data = bytes(value)
564
+ ct = content_type or "application/octet-stream"
565
+ elif isinstance(value, (dict, list, int, float, bool)) or value is None:
566
+ data = json.dumps(value).encode("utf-8")
567
+ ct = content_type or "application/json"
568
+ else:
569
+ raise TypeError(
570
+ f"cannot write value of type {type(value).__name__}; "
571
+ "expected str, bytes, dict, list, or pugmark.Object"
572
+ )
573
+ return LocalObject(
574
+ Output(
575
+ data=data,
576
+ name=name,
577
+ content_type=ct,
578
+ content_encoding=content_encoding,
579
+ schema_url=schema_url,
580
+ metadata=metadata or {},
581
+ )
582
+ )
583
+
584
+
585
+ class Reader:
586
+ """
587
+ Provides an iterator interface for reading Input records from a stream
588
+ and converting them to Object instances for processing.
589
+ """
590
+
591
+ def __init__(self, stream: TextIO):
592
+ """
593
+ Initialize a Reader instance.
594
+
595
+ Args:
596
+ stream: Source stream for Input records
597
+ """
598
+ self.stream = stream
599
+
600
+ def read(self) -> Iterator[Object]:
601
+ """
602
+ Return an iterator that yields Object instances from Input records.
603
+
604
+ Each Input record is read from the stream, decoded as JSON, and converted to a RemoteObject.
605
+ The iterator stops on the first decoding error or when the stream is exhausted.
606
+
607
+ Yields:
608
+ Object instances created from Input record data
609
+
610
+ Raises:
611
+ json.JSONDecodeError: If input JSON is malformed
612
+ """
613
+ for line in self.stream:
614
+ line = line.strip()
615
+ if not line:
616
+ continue
617
+
618
+ try:
619
+ data = json.loads(line)
620
+ record = Input(
621
+ url=data.get("url", ""),
622
+ size=data.get("size", 0),
623
+ event=data.get("event", ""),
624
+ content_type=data.get("content-type", ""),
625
+ content_encoding=data.get("content-encoding", ""),
626
+ schema_url=data.get("schema-url", ""),
627
+ name=data.get("name", ""),
628
+ metadata=data.get("metadata", {}),
629
+ )
630
+
631
+ if record.data is None:
632
+ obj = RemoteObject(input_record=record)
633
+ else:
634
+ obj = LocalObject(
635
+ output_record=Output(
636
+ data=record.data,
637
+ name=record.name,
638
+ content_type=record.content_type,
639
+ content_encoding=record.content_encoding,
640
+ schema_url=record.schema_url,
641
+ metadata=record.metadata or {},
642
+ ),
643
+ event=record.event,
644
+ )
645
+
646
+ yield obj
647
+ except json.JSONDecodeError as e:
648
+ raise RuntimeError(f"Failed to decode input record: {e}") from e
649
+
650
+
651
+ class Writer:
652
+ """
653
+ Provides methods for sending Output records to a stream.
654
+
655
+ Handles JSON encoding, buffering, and automatic compression.
656
+ """
657
+
658
+ def __init__(self, stream: TextIO):
659
+ """
660
+ Initialize a Writer instance.
661
+
662
+ Args:
663
+ stream: Destination stream for Output records
664
+ """
665
+ self.stream = stream
666
+ self._initialized = False
667
+
668
+ def write(self, obj: Object) -> None:
669
+ """
670
+ Write an Object to the stream.
671
+
672
+ Args:
673
+ obj: The Object to write
674
+ """
675
+ # Get data and metadata from the object
676
+ data = obj.body()
677
+ name = obj.name()
678
+ content_type = obj.content_type()
679
+ schema_url = obj.schema_url()
680
+ metadata = obj.metadata()
681
+ is_fork = obj.fork()
682
+
683
+ # Create Output record from Object data
684
+ output = Output(
685
+ data=data,
686
+ name=name,
687
+ content_type=content_type,
688
+ content_encoding="", # Content encoding handled internally by Object
689
+ schema_url=schema_url,
690
+ metadata=metadata,
691
+ fork=is_fork,
692
+ )
693
+
694
+ # Convert to JSON format expected by main process
695
+ json_data: Dict[str, Any] = {
696
+ "data": base64.b64encode(output.data).decode("ascii"), # Base64 encode binary data
697
+ }
698
+
699
+ if output.name:
700
+ json_data["name"] = output.name
701
+ if output.content_type:
702
+ json_data["content-type"] = output.content_type
703
+ if output.content_encoding:
704
+ json_data["content-encoding"] = output.content_encoding
705
+ if output.schema_url:
706
+ json_data["schema-url"] = output.schema_url
707
+ if output.metadata:
708
+ json_data["metadata"] = output.metadata
709
+ if output.fork:
710
+ json_data["fork"] = True
711
+ if output.session:
712
+ json_data["session"] = output.session
713
+
714
+ # Write JSON line to stream
715
+ json.dump(json_data, self.stream, separators=(",", ":"))
716
+ self.stream.write("\n")
717
+ self.stream.flush()
718
+
719
+ def write_remote(
720
+ self,
721
+ uri: str,
722
+ *,
723
+ name: str = "",
724
+ content_type: str = "",
725
+ content_encoding: str = "",
726
+ schema_url: str = "",
727
+ metadata: Optional[Dict[str, str]] = None,
728
+ session: str = "",
729
+ ) -> None:
730
+ """
731
+ Write a remote object reference to the stream.
732
+
733
+ Instead of sending the object's bytes inline, pugmark will fetch the
734
+ object at ``uri`` (e.g. ``s3://bucket/path/to/object``), compute its
735
+ SHA-256, and store it in the session.
736
+
737
+ Args:
738
+ uri: Remote object URI (e.g. ``s3://bucket/path/to/object``)
739
+ name: Optional name for the object
740
+ content_type: Optional MIME type override (defaults to remote value)
741
+ content_encoding: Optional encoding override (defaults to remote value)
742
+ schema_url: Optional schema URL
743
+ metadata: Optional metadata key-value pairs
744
+ session: Optional target session ID for cross-session writes
745
+ """
746
+ json_data: Dict[str, Any] = {"uri": uri}
747
+ if name:
748
+ json_data["name"] = name
749
+ if content_type:
750
+ json_data["content-type"] = content_type
751
+ if content_encoding:
752
+ json_data["content-encoding"] = content_encoding
753
+ if schema_url:
754
+ json_data["schema-url"] = schema_url
755
+ if metadata:
756
+ json_data["metadata"] = metadata
757
+ if session:
758
+ json_data["session"] = session
759
+
760
+ json.dump(json_data, self.stream, separators=(",", ":"))
761
+ self.stream.write("\n")
762
+ self.stream.flush()
763
+
764
+
765
+ @dataclass
766
+ class State:
767
+ """
768
+ Represents the current state of all objects read from stdin.
769
+ This class maintains a dictionary of named objects and a list of all objects
770
+ to allow easy access and iteration over the objects processed during the session.
771
+ """
772
+
773
+ _dict: Dict[str, Object]
774
+ _list: List[Object]
775
+ _memory_namespaces: Dict[str, str] # namespace -> URL mapping
776
+
777
+ def __init__(self) -> None:
778
+ self._dict = {}
779
+ self._list = []
780
+ self._memory_namespaces = {}
781
+
782
+ def __iter__(self) -> Iterator[Object]:
783
+ """
784
+ Iterate over all objects in the state.
785
+
786
+ Yields:
787
+ Object instances stored in the state
788
+ """
789
+ return iter(self._list)
790
+
791
+ def __len__(self) -> int:
792
+ """
793
+ Get the number of objects in the state.
794
+
795
+ Returns:
796
+ The count of Object instances stored in the state
797
+ """
798
+ return len(self._dict)
799
+
800
+ def __getitem__(self, key: str) -> Object:
801
+ """
802
+ Get an object by name from the state.
803
+
804
+ Args:
805
+ key: The name of the object to retrieve
806
+
807
+ Returns:
808
+ The Object instance associated with the given name
809
+ """
810
+ return self._dict[key]
811
+
812
+ def __setitem__(self, key: str, value: Object) -> None:
813
+ """
814
+ Set an object in the state by name.
815
+
816
+ Args:
817
+ key: The name of the object to set
818
+ value: The Object instance to associate with the name
819
+ """
820
+ self._dict[key] = value
821
+ self._list.append(value)
822
+
823
+ def __contains__(self, key: str) -> bool:
824
+ """
825
+ Check if an object with the given name exists in the state.
826
+
827
+ Args:
828
+ key: The name of the object to check
829
+
830
+ Returns:
831
+ True if the object exists, False otherwise
832
+ """
833
+ return key in self._dict
834
+
835
+ def append(self, value: Object) -> None:
836
+ if value.name():
837
+ self._dict[value.name()] = value
838
+ self._list.append(value)
839
+
840
+ def add_memory_namespace(self, namespace: str, url: str) -> None:
841
+ """
842
+ Add a memory namespace URL mapping.
843
+
844
+ Args:
845
+ namespace: The namespace name
846
+ url: The base URL for this namespace
847
+ """
848
+ self._memory_namespaces[namespace] = url
849
+
850
+ def get_memory_url(self, name: str) -> Optional[str]:
851
+ """
852
+ Get the memory URL for an object name if it's in a memory namespace.
853
+
854
+ Args:
855
+ name: The object name to check (e.g., "agents/gpt4/config.txt")
856
+
857
+ Returns:
858
+ The full URL if the name matches a memory namespace, None otherwise
859
+ """
860
+ if "/" not in name:
861
+ return None
862
+
863
+ # Sort keys and search for the first match in reverse order
864
+ # This finds the longest matching namespace
865
+ keys = sorted(self._memory_namespaces.keys(), reverse=True)
866
+ for key in keys:
867
+ if name.startswith(key):
868
+ object_name = name[len(key) :]
869
+ url = self._memory_namespaces[key] + object_name
870
+ return url
871
+
872
+ return None
873
+
874
+
875
+ class Session:
876
+ """
877
+ Encapsulates the pugmark communication session with configurable I/O streams.
878
+
879
+ This allows for dependency injection of reader/writer streams for testing
880
+ while maintaining the same API as the global functions.
881
+ """
882
+
883
+ def __init__(
884
+ self,
885
+ reader_stream: Optional[TextIO] = None,
886
+ writer_stream: Optional[TextIO] = None,
887
+ agent: Optional[str] = None,
888
+ session_id: Optional[str] = None,
889
+ snapshot: Optional[int] = None,
890
+ metadata: Optional[Dict[str, str]] = None,
891
+ ):
892
+ """
893
+ Initialize a Session with optional custom streams.
894
+
895
+ Args:
896
+ reader_stream: Input stream for reading records (defaults to sys.stdin)
897
+ writer_stream: Output stream for writing records (defaults to sys.stdout)
898
+ agent: Agent identifier (defaults to PUGMARK_AGENT_ID env var)
899
+ session_id: Session identifier (defaults to PUGMARK_SESSION_ID env var)
900
+ snapshot: Snapshot version number
901
+ metadata: Session metadata from manifest (defaults to PUGMARK_SESSION_METADATA env var)
902
+ """
903
+ self.reader = Reader(reader_stream or sys.stdin)
904
+ self.writer = Writer(writer_stream or sys.stdout)
905
+ self._state = State()
906
+ # Session ID, agent, and snapshot can be passed directly or via environment variables
907
+ self._session_id: str = (
908
+ session_id if session_id is not None else os.environ.get("PUGMARK_SESSION_ID", "")
909
+ )
910
+ self._agent: str = agent if agent is not None else os.environ.get("PUGMARK_AGENT_ID", "")
911
+ self._snapshot: Optional[int] = snapshot
912
+ # Metadata can be passed directly or via environment variable (JSON-encoded)
913
+ if metadata is not None:
914
+ self._metadata: Dict[str, str] = metadata
915
+ else:
916
+ metadata_json = os.environ.get("PUGMARK_SESSION_METADATA", "")
917
+ if metadata_json:
918
+ try:
919
+ self._metadata = json.loads(metadata_json)
920
+ except json.JSONDecodeError:
921
+ self._metadata = {}
922
+ else:
923
+ self._metadata = {}
924
+
925
+ def events(self) -> Iterator[Event]:
926
+ """
927
+ Return an iterator over Event instances parsed from the session's input stream.
928
+
929
+ This method provides a higher-level abstraction over the raw read() method by
930
+ interpreting Objects as typed events based on their event() field.
931
+
932
+ The iterator yields events in the order they are received from the underlying
933
+ input stream. Events are parsed according to the following mapping:
934
+ - "start" → StartEvent: Session creation without a parent
935
+ - "fork" → ForkEvent: Session creation with a parent
936
+ - "wake" → WakeEvent: Session resumption or awakening
937
+ - "push" → PushEvent: Object addition (contains the actual Object)
938
+ - Other event types are silently ignored
939
+
940
+ Example usage:
941
+ for event in session.events():
942
+ match event:
943
+ case StartEvent():
944
+ print("Session started")
945
+ case ForkEvent():
946
+ print("Session forked")
947
+ case WakeEvent():
948
+ print("Session awakened")
949
+ case PushEvent() as push:
950
+ print(f"Object pushed: {push.object.name()}")
951
+
952
+ Returns:
953
+ Iterator yielding Event instances from the input stream
954
+ """
955
+ for obj in self.read():
956
+ event_type = obj.event()
957
+ if event_type == "start":
958
+ yield StartEvent()
959
+ elif event_type == "fork":
960
+ yield ForkEvent()
961
+ elif event_type == "wake":
962
+ yield WakeEvent()
963
+ elif event_type == "push":
964
+ yield PushEvent(obj)
965
+
966
+ def read(self) -> Iterator[Object]:
967
+ """
968
+ Read objects from the input stream.
969
+
970
+ Returns:
971
+ Iterator yielding Object instances from the input stream
972
+ """
973
+ for obj in self.reader.read():
974
+ # Handle memory namespace events
975
+ if obj.event() == "memory":
976
+ # Extract namespace from URL path
977
+ url = obj._input.url # Access the URL directly
978
+ if url.endswith("/"):
979
+ # Extract namespace from URL like "http://127.0.0.1:port/namespace/"
980
+ namespace = url.split("/")[-2] + "/" # Keep the trailing slash
981
+ self._state.add_memory_namespace(namespace, url)
982
+ # Don't yield memory events to the user
983
+ continue
984
+
985
+ self._state.append(obj)
986
+ yield obj
987
+
988
+ def write(
989
+ self,
990
+ value: Any,
991
+ *,
992
+ name: str = "",
993
+ content_type: Optional[str] = None,
994
+ content_encoding: str = "",
995
+ schema_url: str = "",
996
+ metadata: Optional[Dict[str, str]] = None,
997
+ ) -> None:
998
+ """
999
+ Write a value to the output stream.
1000
+
1001
+ ``value`` may be a :class:`pugmark.Object` (written as-is) or a raw
1002
+ Python value (str, bytes, dict, list, or any JSON-serializable scalar),
1003
+ in which case it is wrapped in a :class:`LocalObject`. Content type is
1004
+ auto-detected from the value's type unless explicitly provided.
1005
+ """
1006
+ if _is_object(value):
1007
+ obj = value
1008
+ else:
1009
+ obj = _local_object(
1010
+ value,
1011
+ name=name,
1012
+ content_type=content_type,
1013
+ content_encoding=content_encoding,
1014
+ schema_url=schema_url,
1015
+ metadata=metadata,
1016
+ )
1017
+ self.writer.write(obj)
1018
+ self._state.append(obj)
1019
+
1020
+ def write_text(
1021
+ self,
1022
+ text: str,
1023
+ *,
1024
+ name: str = "",
1025
+ content_type: str = "text/plain",
1026
+ metadata: Optional[Dict[str, str]] = None,
1027
+ ) -> None:
1028
+ """Write a string as a text/plain object (or other text/* content type)."""
1029
+ self.write(text, name=name, content_type=content_type, metadata=metadata)
1030
+
1031
+ def write_json(
1032
+ self,
1033
+ data: Any,
1034
+ *,
1035
+ name: str = "",
1036
+ content_type: str = "application/json",
1037
+ metadata: Optional[Dict[str, str]] = None,
1038
+ ) -> None:
1039
+ """Write a JSON-serializable value as an application/json object."""
1040
+ self.write(data, name=name, content_type=content_type, metadata=metadata)
1041
+
1042
+ def write_bytes(
1043
+ self,
1044
+ data: Union[bytes, bytearray],
1045
+ *,
1046
+ name: str = "",
1047
+ content_type: str = "application/octet-stream",
1048
+ metadata: Optional[Dict[str, str]] = None,
1049
+ ) -> None:
1050
+ """Write raw bytes with a configurable content type."""
1051
+ self.write(bytes(data), name=name, content_type=content_type, metadata=metadata)
1052
+
1053
+ def write_remote(
1054
+ self,
1055
+ uri: str,
1056
+ *,
1057
+ name: str = "",
1058
+ content_type: str = "",
1059
+ content_encoding: str = "",
1060
+ schema_url: str = "",
1061
+ metadata: Optional[Dict[str, str]] = None,
1062
+ session: str = "",
1063
+ ) -> None:
1064
+ """
1065
+ Write a remote object reference to the output stream.
1066
+
1067
+ Instead of sending the object's bytes inline, pugmark will fetch the
1068
+ object at ``uri`` (e.g. ``s3://bucket/path/to/object``), compute its
1069
+ SHA-256, and store it in the session.
1070
+
1071
+ Args:
1072
+ uri: Remote object URI (e.g. ``s3://bucket/path/to/object``)
1073
+ name: Optional name for the object
1074
+ content_type: Optional MIME type override (defaults to remote value)
1075
+ content_encoding: Optional encoding override (defaults to remote value)
1076
+ schema_url: Optional schema URL
1077
+ metadata: Optional metadata key-value pairs
1078
+ session: Optional target session ID for cross-session writes
1079
+ """
1080
+ self.writer.write_remote(
1081
+ uri,
1082
+ name=name,
1083
+ content_type=content_type,
1084
+ content_encoding=content_encoding,
1085
+ schema_url=schema_url,
1086
+ metadata=metadata,
1087
+ session=session,
1088
+ )
1089
+
1090
+ def state(self) -> "State":
1091
+ """
1092
+ Get the current state of objects read from the input stream.
1093
+
1094
+ Returns:
1095
+ The State instance containing all objects seen so far
1096
+ """
1097
+ for _ in self.read():
1098
+ pass
1099
+ return self._state
1100
+
1101
+ def load(self, name: str) -> Optional[Object]:
1102
+ """
1103
+ Load an object by name from the state.
1104
+
1105
+ Args:
1106
+ name: The name of the object to retrieve
1107
+
1108
+ Returns:
1109
+ The Object instance, or None if not found
1110
+ """
1111
+ objects = self.state()
1112
+
1113
+ # Check if the name corresponds to a memory namespace object
1114
+ memory_url = objects.get_memory_url(name)
1115
+ if memory_url:
1116
+ # Create a RemoteObject that points to the memory URL
1117
+ memory_input = Input(url=memory_url, name=name)
1118
+ return RemoteObject(memory_input)
1119
+
1120
+ if name in objects:
1121
+ return objects[name]
1122
+
1123
+ return None
1124
+
1125
+ def load_text(self, name: str) -> Optional[str]:
1126
+ """Load a named object and decode it as UTF-8 text."""
1127
+ obj = self.load(name)
1128
+ return obj.decode_text() if obj is not None else None
1129
+
1130
+ def load_json(self, name: str) -> Any:
1131
+ """Load a named object and decode it as JSON. Returns None if absent."""
1132
+ obj = self.load(name)
1133
+ return obj.decode_json() if obj is not None else None
1134
+
1135
+ def load_bytes(self, name: str) -> Optional[bytes]:
1136
+ """Load a named object and return its raw bytes."""
1137
+ obj = self.load(name)
1138
+ return obj.decode_data() if obj is not None else None
1139
+
1140
+ def store(self, obj: Object) -> None:
1141
+ """
1142
+ Store an object in the state.
1143
+
1144
+ Args:
1145
+ obj: The Object instance to store
1146
+ """
1147
+ self.write(obj)
1148
+
1149
+ def pause(self, reason: str = "") -> None:
1150
+ """
1151
+ Pause the execution of the current Pugmark subprocess.
1152
+
1153
+ Args:
1154
+ reason: An optional string explaining why execution is being paused
1155
+ """
1156
+ output = Output(
1157
+ data=reason.encode("utf-8"),
1158
+ name="",
1159
+ content_type="application/vnd.pugmark.pause+text",
1160
+ content_encoding="",
1161
+ schema_url="",
1162
+ metadata={},
1163
+ )
1164
+ obj = LocalObject(output)
1165
+ self.write(obj)
1166
+
1167
+ def sleep(self, seconds: float, reason: str = "") -> None:
1168
+ """
1169
+ Sleep for a specified number of seconds, pausing execution.
1170
+
1171
+ Args:
1172
+ seconds: Number of seconds to sleep
1173
+ reason: Optional reason for the pause
1174
+ """
1175
+ output = Output(
1176
+ data=reason.encode("utf-8"),
1177
+ name="",
1178
+ content_type="application/vnd.pugmark.pause+text",
1179
+ content_encoding="",
1180
+ schema_url="",
1181
+ metadata={
1182
+ "duration": f"{seconds:.2f}s",
1183
+ },
1184
+ )
1185
+ obj = LocalObject(output)
1186
+ self.write(obj)
1187
+
1188
+ def session_id(self) -> Optional[str]:
1189
+ """
1190
+ Get the pugmark session ID.
1191
+
1192
+ The session ID is read from the PUGMARK_SESSION_ID environment variable,
1193
+ which is automatically set by the pugmark runner when spawning subprocesses.
1194
+
1195
+ Returns:
1196
+ The session ID string if available, None otherwise
1197
+ """
1198
+ return self._session_id if self._session_id else None
1199
+
1200
+ def agent(self) -> Optional[str]:
1201
+ """
1202
+ Get the agent identity.
1203
+
1204
+ The agent ID is read from the PUGMARK_AGENT_ID environment variable,
1205
+ which is automatically set by the pugmark runner when spawning subprocesses.
1206
+ This enables multi-agent routing where handlers can route
1207
+ requests to different implementations based on the agent.
1208
+
1209
+ Returns:
1210
+ The agent identity string if available, None otherwise
1211
+ """
1212
+ return self._agent if self._agent else None
1213
+
1214
+ def snapshot(self) -> Optional[int]:
1215
+ """
1216
+ Get the snapshot version number.
1217
+
1218
+ The snapshot is the version of the session at the time of invocation.
1219
+ This is passed via query parameters when running in RPC mode.
1220
+
1221
+ Returns:
1222
+ The snapshot version number if available, None otherwise
1223
+ """
1224
+ return self._snapshot
1225
+
1226
+ def metadata(self) -> Dict[str, str]:
1227
+ """
1228
+ Get the session metadata from the manifest.
1229
+
1230
+ Metadata is passed via different mechanisms depending on the runner type:
1231
+ - Command runner: PUGMARK_SESSION_METADATA environment variable (JSON-encoded)
1232
+ - HTTP runner: X-Pugmark-Session-Metadata header (JSON-encoded)
1233
+ - Function runner: Passed directly to the Session constructor
1234
+
1235
+ Returns:
1236
+ Dictionary of metadata key-value pairs, empty dict if none set
1237
+ """
1238
+ return self._metadata
1239
+
1240
+ @contextmanager
1241
+ def traced_execution(self) -> Iterator[None]:
1242
+ """
1243
+ Context manager that creates a tracing span for handler execution.
1244
+
1245
+ When OpenTelemetry is available, this creates a "pugmark.handler" span with
1246
+ session context attributes. The span uses CONSUMER kind to indicate that
1247
+ this is processing a message/event from the parent process.
1248
+
1249
+ Attributes included in the span:
1250
+ - pugmark.session: The session ID
1251
+ - pugmark.agent: The agent ID (if available)
1252
+ - pugmark.snapshot: The snapshot version (if available)
1253
+
1254
+ If OpenTelemetry is not available, this is a no-op.
1255
+
1256
+ Yields:
1257
+ None
1258
+ """
1259
+ tracer = _get_tracer()
1260
+ if tracer is None:
1261
+ yield
1262
+ return
1263
+
1264
+ attributes: Dict[str, Any] = {}
1265
+ if self._session_id:
1266
+ attributes["pugmark.session"] = self._session_id
1267
+ if self._agent:
1268
+ attributes["pugmark.agent"] = self._agent
1269
+ if self._snapshot is not None:
1270
+ attributes["pugmark.snapshot"] = self._snapshot
1271
+
1272
+ with tracer.start_as_current_span(
1273
+ "pugmark.handler",
1274
+ kind=otel_trace.SpanKind.CONSUMER,
1275
+ attributes=attributes,
1276
+ ):
1277
+ yield
1278
+
1279
+ @contextmanager
1280
+ def open(
1281
+ self,
1282
+ name: str,
1283
+ content_type: Optional[str] = None,
1284
+ content_encoding: Optional[str] = None,
1285
+ schema_url: Optional[str] = None,
1286
+ metadata: Optional[Dict[str, str]] = None,
1287
+ ) -> Iterator[IO[bytes]]:
1288
+ """
1289
+ Context manager for opening an object by name.
1290
+
1291
+ Args:
1292
+ name: The name of the object to open
1293
+ content_type: Optional content type for the object
1294
+ content_encoding: Optional content encoding for the object
1295
+ schema_url: URL of the schema associated with this object
1296
+ metadata: Optional metadata dictionary for the object
1297
+
1298
+ Yields:
1299
+ A temporary file-like object containing the object's data
1300
+ """
1301
+ # Try to load the object (this will check memory namespaces too)
1302
+ obj = self.load(name)
1303
+ if obj:
1304
+ body = obj.body()
1305
+ data = body
1306
+ else:
1307
+ body = b""
1308
+ data = b""
1309
+
1310
+ temp = tempfile.NamedTemporaryFile(delete=True, mode="w+b")
1311
+ try:
1312
+ temp.write(data)
1313
+ temp.seek(0)
1314
+ yield temp
1315
+ temp.flush()
1316
+ temp.seek(0)
1317
+ data = temp.read()
1318
+ finally:
1319
+ temp.close()
1320
+
1321
+ if data != body:
1322
+ output = Output(
1323
+ data=data,
1324
+ name=name or "",
1325
+ content_type=content_type or "",
1326
+ content_encoding=content_encoding or "",
1327
+ schema_url=schema_url or "",
1328
+ metadata=metadata or {},
1329
+ )
1330
+ obj = LocalObject(output)
1331
+ self.store(obj)
1332
+
1333
+
1334
+ # Default session instance for backward compatibility
1335
+ _default_session = Session()
1336
+
1337
+
1338
+ # Legacy global functions that use the default session
1339
+ stdin = _default_session.reader
1340
+ stdout = _default_session.writer
1341
+
1342
+
1343
+ def default_session() -> Session:
1344
+ """
1345
+ Get the default session instance used by global functions.
1346
+
1347
+ This allows access to the underlying session for advanced use cases
1348
+ like accessing the session's state directly or replacing the default
1349
+ session for testing purposes.
1350
+
1351
+ Returns:
1352
+ The default Session instance
1353
+ """
1354
+ return _default_session
1355
+
1356
+
1357
+ def events() -> Iterator[Event]:
1358
+ """
1359
+ Return an iterator over events from the default session.
1360
+
1361
+ This is a convenience function that provides access to the event stream
1362
+ without needing to manage a Session instance directly.
1363
+
1364
+ Example usage:
1365
+ import pugmark
1366
+
1367
+ for event in pugmark.events():
1368
+ match event:
1369
+ case pugmark.StartEvent():
1370
+ print("Session started")
1371
+ case pugmark.PushEvent() as push:
1372
+ print(f"Object pushed: {push.object.name()}")
1373
+
1374
+ Returns:
1375
+ Iterator yielding Event instances from the default session
1376
+ """
1377
+ return _default_session.events()
1378
+
1379
+
1380
+ def read() -> Iterator[Object]:
1381
+ """
1382
+ Convenience function that returns an iterator using the default session.
1383
+
1384
+ This is the most common way to read Input records in subprocess implementations.
1385
+
1386
+ Returns:
1387
+ Iterator yielding Object instances from stdin
1388
+ """
1389
+ return _default_session.read()
1390
+
1391
+
1392
+ def write(
1393
+ value: Any,
1394
+ *,
1395
+ name: str = "",
1396
+ content_type: Optional[str] = None,
1397
+ content_encoding: str = "",
1398
+ schema_url: str = "",
1399
+ metadata: Optional[Dict[str, str]] = None,
1400
+ ) -> None:
1401
+ """
1402
+ Convenience function that writes a value using the default session.
1403
+
1404
+ ``value`` may be a :class:`pugmark.Object` (written as-is) or a raw Python
1405
+ value (str, bytes, dict, list, or any JSON-serializable scalar). Content
1406
+ type is auto-detected from the value's type unless explicitly provided.
1407
+
1408
+ Examples::
1409
+
1410
+ pugmark.write("hello") # text/plain
1411
+ pugmark.write({"answer": 42}, name="reply") # application/json
1412
+ pugmark.write(b"\\x00\\x01", name="bin") # application/octet-stream
1413
+ """
1414
+ _default_session.write(
1415
+ value,
1416
+ name=name,
1417
+ content_type=content_type,
1418
+ content_encoding=content_encoding,
1419
+ schema_url=schema_url,
1420
+ metadata=metadata,
1421
+ )
1422
+
1423
+
1424
+ def write_text(
1425
+ text: str,
1426
+ *,
1427
+ name: str = "",
1428
+ content_type: str = "text/plain",
1429
+ metadata: Optional[Dict[str, str]] = None,
1430
+ ) -> None:
1431
+ """Write a string using the default session."""
1432
+ _default_session.write_text(text, name=name, content_type=content_type, metadata=metadata)
1433
+
1434
+
1435
+ def write_json(
1436
+ data: Any,
1437
+ *,
1438
+ name: str = "",
1439
+ content_type: str = "application/json",
1440
+ metadata: Optional[Dict[str, str]] = None,
1441
+ ) -> None:
1442
+ """Write a JSON-serializable value using the default session."""
1443
+ _default_session.write_json(data, name=name, content_type=content_type, metadata=metadata)
1444
+
1445
+
1446
+ def write_bytes(
1447
+ data: Union[bytes, bytearray],
1448
+ *,
1449
+ name: str = "",
1450
+ content_type: str = "application/octet-stream",
1451
+ metadata: Optional[Dict[str, str]] = None,
1452
+ ) -> None:
1453
+ """Write raw bytes using the default session."""
1454
+ _default_session.write_bytes(data, name=name, content_type=content_type, metadata=metadata)
1455
+
1456
+
1457
+ def write_remote(
1458
+ uri: str,
1459
+ *,
1460
+ name: str = "",
1461
+ content_type: str = "",
1462
+ content_encoding: str = "",
1463
+ schema_url: str = "",
1464
+ metadata: Optional[Dict[str, str]] = None,
1465
+ session: str = "",
1466
+ ) -> None:
1467
+ """
1468
+ Write a remote object reference using the default session.
1469
+
1470
+ Instead of sending the object's bytes inline, pugmark will fetch the object
1471
+ at ``uri`` (e.g. ``s3://bucket/path/to/object``), compute its SHA-256, and
1472
+ store it in the session.
1473
+
1474
+ Args:
1475
+ uri: Remote object URI (e.g. ``s3://bucket/path/to/object``)
1476
+ name: Optional name for the object
1477
+ content_type: Optional MIME type override (defaults to remote value)
1478
+ content_encoding: Optional encoding override (defaults to remote value)
1479
+ schema_url: Optional schema URL
1480
+ metadata: Optional metadata key-value pairs
1481
+ session: Optional target session ID for cross-session writes
1482
+ """
1483
+ _default_session.write_remote(
1484
+ uri,
1485
+ name=name,
1486
+ content_type=content_type,
1487
+ content_encoding=content_encoding,
1488
+ schema_url=schema_url,
1489
+ metadata=metadata,
1490
+ session=session,
1491
+ )
1492
+
1493
+
1494
+ def state() -> State:
1495
+ """
1496
+ Get the current state of objects read from stdin using the default session.
1497
+
1498
+ Returns:
1499
+ The State instance containing all objects seen so far
1500
+ """
1501
+ return _default_session.state()
1502
+
1503
+
1504
+ def load(name: str) -> Optional[Object]:
1505
+ """
1506
+ Load an object by name from the state using the default session.
1507
+
1508
+ Args:
1509
+ name: The name of the object to retrieve
1510
+
1511
+ Returns:
1512
+ The Object instance, or None if not found
1513
+ """
1514
+ return _default_session.load(name)
1515
+
1516
+
1517
+ def load_text(name: str) -> Optional[str]:
1518
+ """Load a named object and decode it as UTF-8 text."""
1519
+ return _default_session.load_text(name)
1520
+
1521
+
1522
+ def load_json(name: str) -> Any:
1523
+ """Load a named object and decode it as JSON. Returns None if absent."""
1524
+ return _default_session.load_json(name)
1525
+
1526
+
1527
+ def load_bytes(name: str) -> Optional[bytes]:
1528
+ """Load a named object and return its raw bytes."""
1529
+ return _default_session.load_bytes(name)
1530
+
1531
+
1532
+ def store(object: Object) -> None:
1533
+ """
1534
+ Store an object in the state using the default session.
1535
+
1536
+ Args:
1537
+ object: The Object instance to store
1538
+ """
1539
+ _default_session.store(object)
1540
+
1541
+
1542
+ @contextmanager
1543
+ def open(
1544
+ name: str,
1545
+ content_type: Optional[str] = None,
1546
+ content_encoding: Optional[str] = None,
1547
+ schema_url: Optional[str] = None,
1548
+ metadata: Optional[Dict[str, str]] = None,
1549
+ ) -> Iterator[IO[bytes]]:
1550
+ """
1551
+ Context manager for opening an object by name using the default session.
1552
+
1553
+ Args:
1554
+ name: The name of the object to open
1555
+ content_type: Optional content type for the object
1556
+ content_encoding: Optional content encoding for the object
1557
+ schema_url: URL of the schema associated with this object
1558
+ metadata: Optional metadata dictionary for the object
1559
+
1560
+ Yields:
1561
+ A temporary file-like object containing the object's data
1562
+ """
1563
+ with _default_session.open(name, content_type, content_encoding, schema_url, metadata) as f:
1564
+ yield f
1565
+
1566
+
1567
+ def pause(reason: str = "") -> None:
1568
+ """
1569
+ Pause the execution of the current Pugmark subprocess using the default session.
1570
+
1571
+ Args:
1572
+ reason: An optional string explaining why execution is being paused
1573
+ """
1574
+ _default_session.pause(reason)
1575
+
1576
+
1577
+ def sleep(seconds: float, reason: str = "") -> None:
1578
+ """
1579
+ Sleep for a specified number of seconds using the default session.
1580
+
1581
+ Args:
1582
+ seconds: Number of seconds to sleep
1583
+ reason: Optional reason for the pause
1584
+ """
1585
+ _default_session.sleep(seconds, reason)
1586
+
1587
+
1588
+ def session_id() -> Optional[str]:
1589
+ """
1590
+ Get the pugmark session ID.
1591
+
1592
+ This function returns the session ID from the PUGMARK_SESSION_ID environment
1593
+ variable, which is automatically set when running inside a pugmark subprocess
1594
+ (via `pugmark run` or `pugmark exec`). Returns None if not running under pugmark.
1595
+
1596
+ Returns:
1597
+ The session ID string if available, None otherwise
1598
+
1599
+ Example:
1600
+ import pugmark
1601
+
1602
+ sid = pugmark.session_id()
1603
+ if sid:
1604
+ print(f"Running in session: {sid}")
1605
+ """
1606
+ return _default_session.session_id()
1607
+
1608
+
1609
+ def agent() -> Optional[str]:
1610
+ """
1611
+ Get the agent identity.
1612
+
1613
+ This function returns the agent ID from the PUGMARK_AGENT_ID environment
1614
+ variable, which is automatically set when running inside a pugmark subprocess.
1615
+ This enables multi-agent routing where handlers can route
1616
+ requests to different implementations based on the agent.
1617
+
1618
+ Returns:
1619
+ The agent identity string if available, None otherwise
1620
+
1621
+ Example:
1622
+ import pugmark
1623
+
1624
+ for event in pugmark.events():
1625
+ agent_id = pugmark.agent()
1626
+ if agent_id == "claude":
1627
+ handle_claude(event)
1628
+ elif agent_id == "gpt4":
1629
+ handle_gpt4(event)
1630
+ """
1631
+ return _default_session.agent()
1632
+
1633
+
1634
+ def metadata() -> Dict[str, str]:
1635
+ """
1636
+ Get the session metadata from the manifest.
1637
+
1638
+ This function returns metadata passed to the pugmark runner. The metadata
1639
+ is passed via different mechanisms depending on the runner type:
1640
+ - Command runner: PUGMARK_SESSION_METADATA environment variable (JSON-encoded)
1641
+ - HTTP runner: X-Pugmark-Session-Metadata header (JSON-encoded)
1642
+
1643
+ Returns:
1644
+ Dictionary of metadata key-value pairs, empty dict if none set
1645
+
1646
+ Example:
1647
+ import pugmark
1648
+
1649
+ meta = pugmark.metadata()
1650
+ user_id = meta.get("user-id")
1651
+ tenant = meta.get("tenant")
1652
+ """
1653
+ return _default_session.metadata()
1654
+
1655
+
1656
+ @contextmanager
1657
+ def traced_execution() -> Iterator[None]:
1658
+ """
1659
+ Context manager that creates a tracing span for default session handler execution.
1660
+
1661
+ This is a convenience function that wraps the default session's traced_execution
1662
+ method, creating a "pugmark.handler" span with session context attributes when
1663
+ OpenTelemetry is available.
1664
+
1665
+ Example:
1666
+ import pugmark
1667
+
1668
+ with pugmark.traced_execution():
1669
+ for event in pugmark.events():
1670
+ process(event)
1671
+
1672
+ Yields:
1673
+ None
1674
+ """
1675
+ with _default_session.traced_execution():
1676
+ yield