PyPI - langgraph-runtime-inmem - Versions diffs - 0.9.0__py3-none-any.whl → 0.18.1__py3-none-any.whl - Mend

langgraph-runtime-inmem 0.9.0py3-none-any.whl → 0.18.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

langgraph_runtime_inmem/__init__.py +1 -1
langgraph_runtime_inmem/database.py +3 -1
langgraph_runtime_inmem/inmem_stream.py +24 -2
langgraph_runtime_inmem/lifespan.py +41 -2
langgraph_runtime_inmem/metrics.py +1 -1
langgraph_runtime_inmem/ops.py +322 -229
langgraph_runtime_inmem/queue.py +7 -16
{langgraph_runtime_inmem-0.9.0.dist-info → langgraph_runtime_inmem-0.18.1.dist-info}/METADATA +3 -3
langgraph_runtime_inmem-0.18.1.dist-info/RECORD +13 -0
langgraph_runtime_inmem-0.9.0.dist-info/RECORD +0 -13
{langgraph_runtime_inmem-0.9.0.dist-info → langgraph_runtime_inmem-0.18.1.dist-info}/WHEEL +0 -0

langgraph_runtime_inmem/__init__.py CHANGED Viewed

@@ -9,7 +9,7 @@ from langgraph_runtime_inmem import (
     store,
 )
-__version__ = "0.9.0"
+__version__ = "0.18.1"
 __all__ = [
     "ops",
     "database",

langgraph_runtime_inmem/database.py CHANGED Viewed

@@ -142,7 +142,9 @@ class InMemConnectionProto:
 @asynccontextmanager
-async def connect(*, __test__: bool = False) -> AsyncIterator["AsyncConnectionProto"]:
+async def connect(
+    *, supports_core_api: bool = False, __test__: bool = False
+) -> AsyncIterator["AsyncConnectionProto"]:
     yield InMemConnectionProto()

langgraph_runtime_inmem/inmem_stream.py CHANGED Viewed

@@ -58,6 +58,7 @@ class StreamManager:
         )  # Dict[str, List[asyncio.Queue]]
         self.control_keys = defaultdict(lambda: defaultdict())
         self.control_queues = defaultdict(lambda: defaultdict(list))
+        self.thread_streams = defaultdict(list)
         self.message_stores = defaultdict(
             lambda: defaultdict(list[Message])
@@ -95,7 +96,7 @@ class StreamManager:
     async def put(
         self,
-        run_id: UUID | str,
+        run_id: UUID | str | None,
         thread_id: UUID | str | None,
         message: Message,
         resumable: bool = False,
@@ -107,9 +108,10 @@ class StreamManager:
             thread_id = _ensure_uuid(thread_id)
         message.id = _generate_ms_seq_id().encode()
+        # For resumable run streams, embed the generated message ID into the frame
+        topic = message.topic.decode()
         if resumable:
             self.message_stores[thread_id][run_id].append(message)
-        topic = message.topic.decode()
         if "control" in topic:
             self.control_keys[thread_id][run_id] = message
             queues = self.control_queues[thread_id][run_id]
@@ -121,6 +123,20 @@ class StreamManager:
             if isinstance(result, Exception):
                 logger.exception(f"Failed to put message in queue: {result}")
+    async def put_thread(
+        self,
+        thread_id: UUID | str,
+        message: Message,
+    ) -> None:
+        thread_id = _ensure_uuid(thread_id)
+        message.id = _generate_ms_seq_id().encode()
+        queues = self.thread_streams[thread_id]
+        coros = [queue.put(message) for queue in queues]
+        results = await asyncio.gather(*coros, return_exceptions=True)
+        for result in results:
+            if isinstance(result, Exception):
+                logger.exception(f"Failed to put message in queue: {result}")
     async def add_queue(
         self, run_id: UUID | str, thread_id: UUID | str | None
     ) -> asyncio.Queue:
@@ -145,6 +161,12 @@ class StreamManager:
         self.control_queues[thread_id][run_id].append(queue)
         return queue
+    async def add_thread_stream(self, thread_id: UUID | str) -> asyncio.Queue:
+        thread_id = _ensure_uuid(thread_id)
+        queue = ContextQueue()
+        self.thread_streams[thread_id].append(queue)
+        return queue
     async def remove_queue(
         self, run_id: UUID | str, thread_id: UUID | str | None, queue: asyncio.Queue
     ):

langgraph_runtime_inmem/lifespan.py CHANGED Viewed

@@ -14,9 +14,17 @@ from langgraph_runtime_inmem.database import start_pool, stop_pool
 logger = structlog.stdlib.get_logger(__name__)
+_LAST_LIFESPAN_ERROR: BaseException | None = None
+def get_last_error() -> BaseException | None:
+    return _LAST_LIFESPAN_ERROR
 @asynccontextmanager
 async def lifespan(
     app: Starlette | None = None,
+    cancel_event: asyncio.Event | None = None,
     taskset: set[asyncio.Task] | None = None,
     **kwargs: Any,
 ):
@@ -41,13 +49,31 @@ async def lifespan(
     except RuntimeError:
         await logger.aerror("Failed to set loop")
+    global _LAST_LIFESPAN_ERROR
+    _LAST_LIFESPAN_ERROR = None
     await start_http_client()
     await start_pool()
     await start_ui_bundler()
+    async def _log_graph_load_failure(err: graph.GraphLoadError) -> None:
+        cause = err.__cause__ or err.cause
+        log_fields = err.log_fields()
+        log_fields["action"] = "fix_user_graph"
+        await logger.aerror(
+            f"Graph '{err.spec.id}' failed to load: {err.cause_message}",
+            **log_fields,
+        )
+        await logger.adebug(
+            "Full graph load failure traceback (internal)",
+            **{k: v for k, v in log_fields.items() if k != "user_traceback"},
+            exc_info=cause,
+        )
     try:
         async with SimpleTaskGroup(
             cancel=True,
-            taskset=taskset,
+            cancel_event=cancel_event,
             taskgroup_name="Lifespan",
         ) as tg:
             tg.create_task(metadata_loop())
@@ -76,11 +102,21 @@ async def lifespan(
             var_child_runnable_config.set(langgraph_config)
             # Keep after the setter above so users can access the store from within the factory function
-            await graph.collect_graphs_from_env(True)
+            try:
+                await graph.collect_graphs_from_env(True)
+            except graph.GraphLoadError as exc:
+                _LAST_LIFESPAN_ERROR = exc
+                await _log_graph_load_failure(exc)
+                raise
             if config.N_JOBS_PER_WORKER > 0:
                 tg.create_task(queue_with_signal())
             yield
+    except graph.GraphLoadError as exc:
+        _LAST_LIFESPAN_ERROR = exc
+        raise
+    except asyncio.CancelledError:
+        pass
     finally:
         await api_store.exit_store()
         await stop_ui_bundler()
@@ -97,3 +133,6 @@ async def queue_with_signal():
     except Exception as exc:
         logger.exception("Queue failed. Signaling shutdown", exc_info=exc)
         signal.raise_signal(signal.SIGINT)
+lifespan.get_last_error = get_last_error  # type: ignore[attr-defined]

langgraph_runtime_inmem/metrics.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from langgraph_runtime_inmem.queue import get_num_workers
-def get_metrics() -> dict[str, int]:
+def get_metrics() -> dict[str, dict[str, int]]:
     from langgraph_api import config
     workers_max = config.N_JOBS_PER_WORKER

langgraph_runtime_inmem/ops.py CHANGED Viewed

@@ -11,7 +11,6 @@ import uuid
 from collections import defaultdict
 from collections.abc import AsyncIterator, Sequence
 from contextlib import asynccontextmanager
-from copy import deepcopy
 from datetime import UTC, datetime, timedelta
 from typing import Any, Literal, cast
 from uuid import UUID, uuid4
@@ -29,6 +28,7 @@ from langgraph_runtime_inmem.checkpoint import Checkpointer
 from langgraph_runtime_inmem.database import InMemConnectionProto, connect
 from langgraph_runtime_inmem.inmem_stream import (
     THREADLESS_KEY,
+    ContextQueue,
     Message,
     get_stream_manager,
 )
@@ -58,12 +58,13 @@ if typing.TYPE_CHECKING:
         Thread,
         ThreadSelectField,
         ThreadStatus,
+        ThreadStreamMode,
         ThreadUpdateResponse,
     )
     from langgraph_api.schema import Interrupt as InterruptSchema
-    from langgraph_api.serde import Fragment
     from langgraph_api.utils import AsyncConnectionProto
+StreamHandler = ContextQueue
 logger = structlog.stdlib.get_logger(__name__)
@@ -228,7 +229,7 @@ class Assistants(Authenticated):
                 if assistant["assistant_id"] == assistant_id and (
                     not filters or _check_filter_match(assistant["metadata"], filters)
                 ):
-                    yield assistant
+                    yield copy.deepcopy(assistant)
         return _yield_result()
@@ -247,6 +248,8 @@ class Assistants(Authenticated):
         description: str | None = None,
     ) -> AsyncIterator[Assistant]:
         """Insert an assistant."""
+        from langgraph_api.graph import GRAPHS
         assistant_id = _ensure_uuid(assistant_id)
         metadata = metadata if metadata is not None else {}
         filters = await Assistants.handle_event(
@@ -268,6 +271,9 @@ class Assistants(Authenticated):
                 detail="Cannot specify both configurable and context. Prefer setting context alone. Context was introduced in LangGraph 0.6.0 and is the long term planned replacement for configurable.",
             )
+        if graph_id not in GRAPHS:
+            raise HTTPException(status_code=404, detail=f"Graph {graph_id} not found")
         # Keep config and context up to date with one another
         if config.get("configurable"):
             context = config["configurable"]
@@ -555,6 +561,8 @@ class Assistants(Authenticated):
                 "metadata": version_data["metadata"],
                 "version": version_data["version"],
                 "updated_at": datetime.now(UTC),
+                "name": version_data["name"],
+                "description": version_data["description"],
             }
         )
@@ -738,6 +746,7 @@ class Threads(Authenticated):
     async def search(
         conn: InMemConnectionProto,
         *,
+        ids: list[str] | list[UUID] | None = None,
         metadata: MetadataInput,
         values: MetadataInput,
         status: ThreadStatus | None,
@@ -765,7 +774,19 @@ class Threads(Authenticated):
         )
         # Apply filters
+        id_set: set[UUID] | None = None
+        if ids:
+            id_set = set()
+            for i in ids:
+                try:
+                    id_set.add(_ensure_uuid(i))
+                except Exception:
+                    raise HTTPException(
+                        status_code=400, detail="Invalid thread ID " + str(i)
+                    ) from None
         for thread in threads:
+            if id_set is not None and thread.get("thread_id") not in id_set:
+                continue
             if filters and not _check_filter_match(thread["metadata"], filters):
                 continue
@@ -943,6 +964,7 @@ class Threads(Authenticated):
         thread_id: UUID,
         *,
         metadata: MetadataValue,
+        ttl: ThreadTTLConfig | None = None,
         ctx: Auth.types.BaseAuthContext | None = None,
     ) -> AsyncIterator[Thread]:
         """Update a thread."""
@@ -1215,13 +1237,23 @@ class Threads(Authenticated):
         """Create a copy of an existing thread."""
         thread_id = _ensure_uuid(thread_id)
         new_thread_id = uuid4()
-        filters = await Threads.handle_event(
+        read_filters = await Threads.handle_event(
             ctx,
             "read",
             Auth.types.ThreadsRead(
+                thread_id=thread_id,
+            ),
+        )
+        # Assert that the user has permissions to create a new thread.
+        # (We don't actually need the filters.)
+        await Threads.handle_event(
+            ctx,
+            "create",
+            Auth.types.ThreadsCreate(
                 thread_id=new_thread_id,
             ),
         )
         async with conn.pipeline():
             # Find the original thread in our store
             original_thread = next(
@@ -1230,8 +1262,8 @@ class Threads(Authenticated):
             if not original_thread:
                 return _empty_generator()
-            if filters and not _check_filter_match(
-                original_thread["metadata"], filters
+            if read_filters and not _check_filter_match(
+                original_thread["metadata"], read_filters
             ):
                 return _empty_generator()
@@ -1240,7 +1272,7 @@ class Threads(Authenticated):
                 "thread_id": new_thread_id,
                 "created_at": datetime.now(tz=UTC),
                 "updated_at": datetime.now(tz=UTC),
-                "metadata": deepcopy(original_thread["metadata"]),
+                "metadata": copy.deepcopy(original_thread["metadata"]),
                 "status": "idle",
                 "config": {},
             }
@@ -1327,7 +1359,14 @@ class Threads(Authenticated):
                 )
             metadata = thread.get("metadata", {})
-            thread_config = thread.get("config", {})
+            thread_config = cast(dict[str, Any], thread.get("config", {}))
+            thread_config = {
+                **thread_config,
+                "configurable": {
+                    **thread_config.get("configurable", {}),
+                    **config.get("configurable", {}),
+                },
+            }
             # Fallback to graph_id from run if not in thread metadata
             graph_id = metadata.get("graph_id")
@@ -1377,6 +1416,7 @@ class Threads(Authenticated):
             """Add state to a thread."""
             from langgraph_api.graph import get_graph
             from langgraph_api.schema import ThreadUpdateResponse
+            from langgraph_api.state import state_snapshot_to_thread_state
             from langgraph_api.store import get_store
             from langgraph_api.utils import fetchone
@@ -1414,6 +1454,13 @@ class Threads(Authenticated):
                     status_code=409,
                     detail=f"Thread {thread_id} has in-flight runs: {pending_runs}",
                 )
+            thread_config = {
+                **thread_config,
+                "configurable": {
+                    **thread_config.get("configurable", {}),
+                    **config.get("configurable", {}),
+                },
+            }
             # Fallback to graph_id from run if not in thread metadata
             graph_id = metadata.get("graph_id")
@@ -1454,6 +1501,19 @@ class Threads(Authenticated):
                             thread["values"] = state.values
                             break
+                    # Publish state update event
+                    from langgraph_api.serde import json_dumpb
+                    event_data = {
+                        "state": state_snapshot_to_thread_state(state),
+                        "thread_id": str(thread_id),
+                    }
+                    await Threads.Stream.publish(
+                        thread_id,
+                        "state_update",
+                        json_dumpb(event_data),
+                    )
                     return ThreadUpdateResponse(
                         checkpoint=next_config["configurable"],
                         # Including deprecated fields
@@ -1496,7 +1556,14 @@ class Threads(Authenticated):
                 thread_iter, not_found_detail=f"Thread {thread_id} not found."
             )
-            thread_config = thread["config"]
+            thread_config = cast(dict[str, Any], thread["config"])
+            thread_config = {
+                **thread_config,
+                "configurable": {
+                    **thread_config.get("configurable", {}),
+                    **config.get("configurable", {}),
+                },
+            }
             metadata = thread["metadata"]
             if not thread:
@@ -1543,6 +1610,19 @@ class Threads(Authenticated):
                             thread["values"] = state.values
                             break
+                    # Publish state update event
+                    from langgraph_api.serde import json_dumpb
+                    event_data = {
+                        "state": state,
+                        "thread_id": str(thread_id),
+                    }
+                    await Threads.Stream.publish(
+                        thread_id,
+                        "state_update",
+                        json_dumpb(event_data),
+                    )
                     return ThreadUpdateResponse(
                         checkpoint=next_config["configurable"],
                     )
@@ -1584,7 +1664,14 @@ class Threads(Authenticated):
             if not _check_filter_match(thread_metadata, filters):
                 return []
-            thread_config = thread["config"]
+            thread_config = cast(dict[str, Any], thread["config"])
+            thread_config = {
+                **thread_config,
+                "configurable": {
+                    **thread_config.get("configurable", {}),
+                    **config.get("configurable", {}),
+                },
+            }
             # If graph_id exists, get state history
             if graph_id := thread_metadata.get("graph_id"):
                 async with get_graph(
@@ -1613,7 +1700,9 @@ class Threads(Authenticated):
             return []
-    class Stream:
+    class Stream(Authenticated):
+        resource = "threads"
         @staticmethod
         async def subscribe(
             conn: InMemConnectionProto | AsyncConnectionProto,
@@ -1626,6 +1715,13 @@ class Threads(Authenticated):
             # Create new queues only for runs not yet seen
             thread_id = _ensure_uuid(thread_id)
+            # Add thread stream queue
+            if thread_id not in seen_runs:
+                queue = await stream_manager.add_thread_stream(thread_id)
+                queues.append((thread_id, queue))
+                seen_runs.add(thread_id)
             for run in conn.store["runs"]:
                 if run["thread_id"] == thread_id:
                     run_id = run["run_id"]
@@ -1641,9 +1737,32 @@ class Threads(Authenticated):
             thread_id: UUID,
             *,
             last_event_id: str | None = None,
+            stream_modes: list[ThreadStreamMode],
+            ctx: Auth.types.BaseAuthContext | None = None,
         ) -> AsyncIterator[tuple[bytes, bytes, bytes | None]]:
             """Stream the thread output."""
-            from langgraph_api.serde import json_loads
+            await Threads.Stream.check_thread_stream_auth(thread_id, ctx)
+            from langgraph_api.utils.stream_codec import (
+                decode_stream_message,
+            )
+            def should_filter_event(event_name: str, message_bytes: bytes) -> bool:
+                """Check if an event should be filtered out based on stream_modes."""
+                if "run_modes" in stream_modes and event_name != "state_update":
+                    return False
+                if "state_update" in stream_modes and event_name == "state_update":
+                    return False
+                if "lifecycle" in stream_modes and event_name == "metadata":
+                    try:
+                        message_data = orjson.loads(message_bytes)
+                        if message_data.get("status") == "run_done":
+                            return False
+                        if "attempt" in message_data and "run_id" in message_data:
+                            return False
+                    except (orjson.JSONDecodeError, TypeError):
+                        pass
+                return True
             stream_manager = get_stream_manager()
             seen_runs: set[UUID] = set()
@@ -1673,23 +1792,24 @@ class Threads(Authenticated):
                         # Yield sorted events
                         for message, run_id in all_events:
-                            data = json_loads(message.data)
-                            event_name = data["event"]
-                            message_content = data["message"]
-                            if event_name == "control":
-                                if message_content == b"done":
-                                    yield (
-                                        b"metadata",
-                                        orjson.dumps(
-                                            {"status": "run_done", "run_id": run_id}
-                                        ),
-                                        message.id,
+                            decoded = decode_stream_message(
+                                message.data, channel=message.topic
+                            )
+                            event_bytes = decoded.event_bytes
+                            message_bytes = decoded.message_bytes
+                            if event_bytes == b"control":
+                                if message_bytes == b"done":
+                                    event_bytes = b"metadata"
+                                    message_bytes = orjson.dumps(
+                                        {"status": "run_done", "run_id": run_id}
                                     )
-                            else:
+                            if not should_filter_event(
+                                event_bytes.decode("utf-8"), message_bytes
+                            ):
                                 yield (
-                                    event_name.encode(),
-                                    base64.b64decode(message_content),
+                                    event_bytes,
+                                    message_bytes,
                                     message.id,
                                 )
@@ -1708,28 +1828,27 @@ class Threads(Authenticated):
                                 message = await asyncio.wait_for(
                                     queue.get(), timeout=0.2
                                 )
-                                data = json_loads(message.data)
-                                event_name = data["event"]
-                                message_content = data["message"]
-                                if event_name == "control":
-                                    if message_content == b"done":
-                                        # Extract run_id from topic
-                                        topic = message.topic.decode()
-                                        run_id = topic.split("run:")[1].split(":")[0]
-                                        yield (
-                                            b"metadata",
-                                            orjson.dumps(
-                                                {"status": "run_done", "run_id": run_id}
-                                            ),
-                                            message.id,
-                                        )
-                                else:
-                                    yield (
-                                        event_name.encode(),
-                                        base64.b64decode(message_content),
-                                        message.id,
+                                decoded = decode_stream_message(
+                                    message.data, channel=message.topic
+                                )
+                                event = decoded.event_bytes
+                                event_name = event.decode("utf-8")
+                                payload = decoded.message_bytes
+                                if event == b"control" and payload == b"done":
+                                    topic = message.topic.decode()
+                                    run_id = topic.split("run:")[1].split(":")[0]
+                                    meta_event = b"metadata"
+                                    meta_payload = orjson.dumps(
+                                        {"status": "run_done", "run_id": run_id}
                                     )
+                                    if not should_filter_event(
+                                        "metadata", meta_payload
+                                    ):
+                                        yield (meta_event, meta_payload, message.id)
+                                else:
+                                    if not should_filter_event(event_name, payload):
+                                        yield (event, payload, message.id)
                             except TimeoutError:
                                 continue
@@ -1758,6 +1877,41 @@ class Threads(Authenticated):
                         # Ignore cleanup errors
                         pass
+        @staticmethod
+        async def publish(
+            thread_id: UUID | str,
+            event: str,
+            message: bytes,
+        ) -> None:
+            """Publish a thread-level event to the thread stream."""
+            from langgraph_api.utils.stream_codec import STREAM_CODEC
+            topic = f"thread:{thread_id}:stream".encode()
+            stream_manager = get_stream_manager()
+            payload = STREAM_CODEC.encode(event, message)
+            await stream_manager.put_thread(
+                str(thread_id), Message(topic=topic, data=payload)
+            )
+        @staticmethod
+        async def check_thread_stream_auth(
+            thread_id: UUID,
+            ctx: Auth.types.BaseAuthContext | None = None,
+        ) -> None:
+            async with connect() as conn:
+                filters = await Threads.Stream.handle_event(
+                    ctx,
+                    "read",
+                    Auth.types.ThreadsRead(thread_id=thread_id),
+                )
+                if filters:
+                    thread = await Threads._get_with_filters(
+                        cast(InMemConnectionProto, conn), thread_id, filters
+                    )
+                    if not thread:
+                        raise HTTPException(status_code=404, detail="Thread not found")
     @staticmethod
     async def count(
         conn: InMemConnectionProto,
@@ -1821,38 +1975,37 @@ class Runs(Authenticated):
         if not pending_runs and not running_runs:
             return {
                 "n_pending": 0,
-                "max_age_secs": None,
-                "med_age_secs": None,
+                "pending_runs_wait_time_max_secs": None,
+                "pending_runs_wait_time_med_secs": None,
                 "n_running": 0,
             }
-        # Get all creation timestamps
-        created_times = [run.get("created_at") for run in (pending_runs + running_runs)]
-        created_times = [
-            t for t in created_times if t is not None
-        ]  # Filter out None values
-        if not created_times:
-            return {
-                "n_pending": len(pending_runs),
-                "n_running": len(running_runs),
-                "max_age_secs": None,
-                "med_age_secs": None,
-            }
-        # Find oldest (max age)
-        oldest_time = min(created_times)  # Earliest timestamp = oldest run
-        # Find median age
-        sorted_times = sorted(created_times)
-        median_idx = len(sorted_times) // 2
-        median_time = sorted_times[median_idx]
+        now = datetime.now(UTC)
+        pending_waits: list[float] = []
+        for run in pending_runs:
+            created_at = run.get("created_at")
+            if not isinstance(created_at, datetime):
+                continue
+            if created_at.tzinfo is None:
+                created_at = created_at.replace(tzinfo=UTC)
+            pending_waits.append((now - created_at).total_seconds())
+        max_pending_wait = max(pending_waits) if pending_waits else None
+        if pending_waits:
+            sorted_waits = sorted(pending_waits)
+            half = len(sorted_waits) // 2
+            if len(sorted_waits) % 2 == 1:
+                med_pending_wait = sorted_waits[half]
+            else:
+                med_pending_wait = (sorted_waits[half - 1] + sorted_waits[half]) / 2
+        else:
+            med_pending_wait = None
         return {
             "n_pending": len(pending_runs),
             "n_running": len(running_runs),
-            "max_age_secs": oldest_time,
-            "med_age_secs": median_time,
+            "pending_runs_wait_time_max_secs": max_pending_wait,
+            "pending_runs_wait_time_med_secs": med_pending_wait,
         }
     @staticmethod
@@ -1916,12 +2069,16 @@ class Runs(Authenticated):
     @asynccontextmanager
     @staticmethod
     async def enter(
-        run_id: UUID, thread_id: UUID | None, loop: asyncio.AbstractEventLoop
+        run_id: UUID,
+        thread_id: UUID | None,
+        loop: asyncio.AbstractEventLoop,
+        resumable: bool,
     ) -> AsyncIterator[ValueEvent]:
         """Enter a run, listen for cancellation while running, signal when done."
         This method should be called as a context manager by a worker executing a run.
         """
         from langgraph_api.asyncio import SimpleTaskGroup, ValueEvent
+        from langgraph_api.utils.stream_codec import STREAM_CODEC
         stream_manager = get_stream_manager()
         # Get control queue for this run (normal queue is created during run creation)
@@ -1941,12 +2098,14 @@ class Runs(Authenticated):
             )
             await stream_manager.put(run_id, thread_id, control_message)
-            # Signal done to all subscribers
+            # Signal done to all subscribers using stream codec
             stream_message = Message(
                 topic=f"run:{run_id}:stream".encode(),
-                data={"event": "control", "message": b"done"},
+                data=STREAM_CODEC.encode("control", b"done"),
+            )
+            await stream_manager.put(
+                run_id, thread_id, stream_message, resumable=resumable
             )
-            await stream_manager.put(run_id, thread_id, stream_message)
             # Remove the control_queue (normal queue is cleaned up during run deletion)
             await stream_manager.remove_control_queue(run_id, thread_id, control_queue)
@@ -1988,7 +2147,6 @@ class Runs(Authenticated):
         ctx: Auth.types.BaseAuthContext | None = None,
     ) -> AsyncIterator[Run]:
         """Create a run."""
-        from langgraph_api.config import FF_RICH_THREADS
         from langgraph_api.schema import Run, Thread
         assistant_id = _ensure_uuid(assistant_id)
@@ -2004,6 +2162,7 @@ class Runs(Authenticated):
         run_id = _ensure_uuid(run_id) if run_id else None
         metadata = metadata if metadata is not None else {}
         config = kwargs.get("config", {})
+        temporary = kwargs.get("temporary", False)
         # Handle thread creation/update
         existing_thread = next(
@@ -2013,7 +2172,7 @@ class Runs(Authenticated):
             ctx,
             "create_run",
             Auth.types.RunsCreate(
-                thread_id=thread_id,
+                thread_id=None if temporary else thread_id,
                 assistant_id=assistant_id,
                 run_id=run_id,
                 status=status,
@@ -2034,49 +2193,35 @@ class Runs(Authenticated):
             # Create new thread
             if thread_id is None:
                 thread_id = uuid4()
-            if FF_RICH_THREADS:
-                thread = Thread(
-                    thread_id=thread_id,
-                    status="busy",
-                    metadata={
-                        "graph_id": assistant["graph_id"],
-                        "assistant_id": str(assistant_id),
-                        **(config.get("metadata") or {}),
-                        **metadata,
-                    },
-                    config=Runs._merge_jsonb(
-                        assistant["config"],
-                        config,
-                        {
-                            "configurable": Runs._merge_jsonb(
-                                Runs._get_configurable(assistant["config"]),
-                            )
-                        },
-                    ),
-                    created_at=datetime.now(UTC),
-                    updated_at=datetime.now(UTC),
-                    values=b"",
-                )
-            else:
-                thread = Thread(
-                    thread_id=thread_id,
-                    status="idle",
-                    metadata={
-                        "graph_id": assistant["graph_id"],
-                        "assistant_id": str(assistant_id),
-                        **(config.get("metadata") or {}),
-                        **metadata,
+            thread = Thread(
+                thread_id=thread_id,
+                status="busy",
+                metadata={
+                    "graph_id": assistant["graph_id"],
+                    "assistant_id": str(assistant_id),
+                    **(config.get("metadata") or {}),
+                    **metadata,
+                },
+                config=Runs._merge_jsonb(
+                    assistant["config"],
+                    config,
+                    {
+                        "configurable": Runs._merge_jsonb(
+                            Runs._get_configurable(assistant["config"]),
+                        )
                     },
-                    config={},
-                    created_at=datetime.now(UTC),
-                    updated_at=datetime.now(UTC),
-                    values=b"",
-                )
+                ),
+                created_at=datetime.now(UTC),
+                updated_at=datetime.now(UTC),
+                values=b"",
+            )
             await logger.ainfo("Creating thread", thread_id=thread_id)
             conn.store["threads"].append(thread)
         elif existing_thread:
             # Update existing thread
-            if FF_RICH_THREADS and existing_thread["status"] != "busy":
+            if existing_thread["status"] != "busy":
                 existing_thread["status"] = "busy"
                 existing_thread["metadata"] = Runs._merge_jsonb(
                     existing_thread["metadata"],
@@ -2253,66 +2398,6 @@ class Runs(Authenticated):
         return _yield_deleted()
-    @staticmethod
-    async def join(
-        run_id: UUID,
-        *,
-        thread_id: UUID,
-        ctx: Auth.types.BaseAuthContext | None = None,
-    ) -> Fragment:
-        """Wait for a run to complete. If already done, return immediately.
-        Returns:
-            the final state of the run.
-        """
-        from langgraph_api.serde import Fragment
-        from langgraph_api.utils import fetchone
-        async with connect() as conn:
-            # Validate ownership
-            thread_iter = await Threads.get(conn, thread_id, ctx=ctx)
-            await fetchone(thread_iter)
-        last_chunk: bytes | None = None
-        # wait for the run to complete
-        # Rely on this join's auth
-        async for mode, chunk, _ in Runs.Stream.join(
-            run_id,
-            thread_id=thread_id,
-            ctx=ctx,
-            ignore_404=True,
-            stream_mode=["values", "updates", "error"],
-        ):
-            if mode == b"values":
-                last_chunk = chunk
-            elif mode == b"updates" and b"__interrupt__" in chunk:
-                last_chunk = chunk
-            elif mode == b"error":
-                last_chunk = orjson.dumps({"__error__": orjson.Fragment(chunk)})
-        # if we received a final chunk, return it
-        if last_chunk is not None:
-            # ie. if the run completed while we were waiting for it
-            return Fragment(last_chunk)
-        else:
-            # otherwise, the run had already finished, so fetch the state from thread
-            async with connect() as conn:
-                thread_iter = await Threads.get(conn, thread_id, ctx=ctx)
-                thread = await fetchone(thread_iter)
-                if thread["status"] == "error":
-                    return Fragment(
-                        orjson.dumps({"__error__": orjson.Fragment(thread["error"])})
-                    )
-                if thread["status"] == "interrupted":
-                    # Get an interrupt for the thread. There is the case where there are multiple interrupts for the same run and we may not show the same
-                    # interrupt, but we'll always show one. Long term we should show all of them.
-                    try:
-                        interrupt_map = thread["interrupts"]
-                        interrupt = [next(iter(interrupt_map.values()))[0]]
-                        return Fragment(orjson.dumps({"__interrupt__": interrupt}))
-                    except Exception:
-                        # No interrupt, but status is interrupted from a before/after block. Default back to values.
-                        pass
-                return thread["values"]
     @staticmethod
     async def cancel(
         conn: InMemConnectionProto | AsyncConnectionProto,
@@ -2538,7 +2623,7 @@ class Runs(Authenticated):
         async def subscribe(
             run_id: UUID,
             thread_id: UUID | None = None,
-        ) -> asyncio.Queue:
+        ) -> ContextQueue:
             """Subscribe to the run stream, returning a queue."""
             stream_manager = get_stream_manager()
             queue = await stream_manager.add_queue(_ensure_uuid(run_id), thread_id)
@@ -2562,54 +2647,38 @@ class Runs(Authenticated):
         async def join(
             run_id: UUID,
             *,
+            stream_channel: asyncio.Queue,
             thread_id: UUID,
             ignore_404: bool = False,
             cancel_on_disconnect: bool = False,
-            stream_channel: asyncio.Queue | None = None,
             stream_mode: list[StreamMode] | StreamMode | None = None,
             last_event_id: str | None = None,
             ctx: Auth.types.BaseAuthContext | None = None,
         ) -> AsyncIterator[tuple[bytes, bytes, bytes | None]]:
             """Stream the run output."""
             from langgraph_api.asyncio import create_task
-            from langgraph_api.serde import json_loads
-            queue = (
-                stream_channel
-                if stream_channel
-                else await Runs.Stream.subscribe(run_id, thread_id)
-            )
+            from langgraph_api.serde import json_dumpb
+            from langgraph_api.utils.stream_codec import decode_stream_message
+            queue = stream_channel
             try:
                 async with connect() as conn:
-                    filters = await Runs.handle_event(
-                        ctx,
-                        "read",
-                        Auth.types.ThreadsRead(thread_id=thread_id),
-                    )
-                    if filters:
-                        thread = await Threads._get_with_filters(
-                            cast(InMemConnectionProto, conn), thread_id, filters
-                        )
-                        if not thread:
-                            raise WrappedHTTPException(
-                                HTTPException(
-                                    status_code=404, detail="Thread not found"
-                                )
-                            )
+                    try:
+                        await Runs.Stream.check_run_stream_auth(run_id, thread_id, ctx)
+                    except HTTPException as e:
+                        raise WrappedHTTPException(e) from None
                     run = await Runs.get(conn, run_id, thread_id=thread_id, ctx=ctx)
                     for message in get_stream_manager().restore_messages(
                         run_id, thread_id, last_event_id
                     ):
                         data, id = message.data, message.id
-                        data = json_loads(data)
-                        mode = data["event"]
-                        message = data["message"]
+                        decoded = decode_stream_message(data, channel=message.topic)
+                        mode = decoded.event_bytes.decode("utf-8")
+                        payload = decoded.message_bytes
                         if mode == "control":
-                            if message == b"done":
+                            if payload == b"done":
                                 return
                         elif (
                             not stream_mode
@@ -2622,7 +2691,7 @@ class Runs(Authenticated):
                                 and mode.startswith("messages")
                             )
                         ):
-                            yield mode.encode(), base64.b64decode(message), id
+                            yield mode.encode(), payload, id
                             logger.debug(
                                 "Replayed run event",
                                 run_id=str(run_id),
@@ -2636,13 +2705,12 @@ class Runs(Authenticated):
                             # Wait for messages with a timeout
                             message = await asyncio.wait_for(queue.get(), timeout=0.5)
                             data, id = message.data, message.id
-                            data = json_loads(data)
-                            mode = data["event"]
-                            message = data["message"]
+                            decoded = decode_stream_message(data, channel=message.topic)
+                            mode = decoded.event_bytes.decode("utf-8")
+                            payload = decoded.message_bytes
                             if mode == "control":
-                                if message == b"done":
+                                if payload == b"done":
                                     break
                             elif (
                                 not stream_mode
@@ -2655,13 +2723,13 @@ class Runs(Authenticated):
                                     and mode.startswith("messages")
                                 )
                             ):
-                                yield mode.encode(), base64.b64decode(message), id
+                                yield mode.encode(), payload, id
                                 logger.debug(
                                     "Streamed run event",
                                     run_id=str(run_id),
                                     stream_mode=mode,
                                     message_id=id,
-                                    data=message,
+                                    data=payload,
                                 )
                         except TimeoutError:
                             # Check if the run is still pending
@@ -2675,8 +2743,10 @@ class Runs(Authenticated):
                             elif run is None:
                                 yield (
                                     b"error",
-                                    HTTPException(
-                                        status_code=404, detail="Run not found"
+                                    json_dumpb(
+                                        HTTPException(
+                                            status_code=404, detail="Run not found"
+                                        )
                                     ),
                                     None,
                                 )
@@ -2693,6 +2763,25 @@ class Runs(Authenticated):
                 stream_manager = get_stream_manager()
                 await stream_manager.remove_queue(run_id, thread_id, queue)
+        @staticmethod
+        async def check_run_stream_auth(
+            run_id: UUID,
+            thread_id: UUID,
+            ctx: Auth.types.BaseAuthContext | None = None,
+        ) -> None:
+            async with connect() as conn:
+                filters = await Runs.handle_event(
+                    ctx,
+                    "read",
+                    Auth.types.ThreadsRead(thread_id=thread_id),
+                )
+                if filters:
+                    thread = await Threads._get_with_filters(
+                        cast(InMemConnectionProto, conn), thread_id, filters
+                    )
+                    if not thread:
+                        raise HTTPException(status_code=404, detail="Thread not found")
         @staticmethod
         async def publish(
             run_id: UUID | str,
@@ -2703,18 +2792,13 @@ class Runs(Authenticated):
             resumable: bool = False,
         ) -> None:
             """Publish a message to all subscribers of the run stream."""
-            from langgraph_api.serde import json_dumpb
+            from langgraph_api.utils.stream_codec import STREAM_CODEC
             topic = f"run:{run_id}:stream".encode()
             stream_manager = get_stream_manager()
-            # Send to all queues subscribed to this run_id
-            payload = json_dumpb(
-                {
-                    "event": event,
-                    "message": message,
-                }
-            )
+            # Send to all queues subscribed to this run_id using protocol frame
+            payload = STREAM_CODEC.encode(event, message)
             await stream_manager.put(
                 run_id, thread_id, Message(topic=topic, data=payload), resumable
             )
@@ -2761,6 +2845,7 @@ class Crons:
         schedule: str,
         cron_id: UUID | None = None,
         thread_id: UUID | None = None,
+        on_run_completed: Literal["delete", "keep"] | None = None,
         end_time: datetime | None = None,
         ctx: Auth.types.BaseAuthContext | None = None,
     ) -> AsyncIterator[Cron]:
@@ -2874,11 +2959,18 @@ def _check_filter_match(metadata: dict, filters: Auth.types.FilterType | None) -
                 if key not in metadata or metadata[key] != filter_value:
                     return False
             elif op == "$contains":
-                if (
-                    key not in metadata
-                    or not isinstance(metadata[key], list)
-                    or filter_value not in metadata[key]
-                ):
+                if key not in metadata or not isinstance(metadata[key], list):
+                    return False
+                if isinstance(filter_value, list):
+                    # Mimick Postgres containment operator behavior.
+                    # It would be more efficient to use set operations here,
+                    # but we can't assume that elements are hashable.
+                    # The Postgres algorithm is also O(n^2).
+                    for filter_element in filter_value:
+                        if filter_element not in metadata[key]:
+                            return False
+                elif filter_value not in metadata[key]:
                     return False
         else:
             # Direct equality
@@ -2894,6 +2986,7 @@ async def _empty_generator():
 __all__ = [
+    "StreamHandler",
     "Assistants",
     "Crons",
     "Runs",

langgraph_runtime_inmem/queue.py CHANGED Viewed

@@ -154,17 +154,6 @@ def _enable_blockbuster():
     ls_env.get_runtime_environment()  # this gets cached
     bb = BlockBuster(excluded_modules=[])
-    for module, func in (
-        # Note, we've cached this call in langsmith==0.3.21 so it shouldn't raise anyway
-        # but we don't want to raise teh minbound just for that.
-        ("langsmith/client.py", "_default_retry_config"),
-        # Only triggers in python 3.11 for getting subgraphs
-        # Will be unnecessary once we cache the assistant schemas
-        ("langgraph/pregel/utils.py", "get_function_nonlocals"),
-        ("importlib/metadata/__init__.py", "metadata"),
-        ("importlib/metadata/__init__.py", "read_text"),
-    ):
-        bb.functions["io.TextIOWrapper.read"].can_block_in(module, func)
     bb.functions["os.path.abspath"].can_block_in("inspect.py", "getmodule")
@@ -172,12 +161,8 @@ def _enable_blockbuster():
         ("memory/__init__.py", "sync"),
         ("memory/__init__.py", "load"),
         ("memory/__init__.py", "dump"),
+        ("pydantic/main.py", "__init__"),
     ):
-        bb.functions["io.TextIOWrapper.read"].can_block_in(module, func)
-        bb.functions["io.TextIOWrapper.write"].can_block_in(module, func)
-        bb.functions["io.BufferedWriter.write"].can_block_in(module, func)
-        bb.functions["io.BufferedReader.read"].can_block_in(module, func)
         bb.functions["os.remove"].can_block_in(module, func)
         bb.functions["os.rename"].can_block_in(module, func)
@@ -199,6 +184,12 @@ def _enable_blockbuster():
         # as well as importlib.metadata.
         "os.listdir",
         "os.remove",
+        # We used to block the IO things but people use them so often that
+        # we've decided to just let people make bad decisions for themselves.
+        "io.BufferedReader.read",
+        "io.BufferedWriter.write",
+        "io.TextIOWrapper.read",
+        "io.TextIOWrapper.write",
         # If people are using threadpoolexecutor, etc. they'd be using this.
         "threading.Lock.acquire",
     ]

{langgraph_runtime_inmem-0.9.0.dist-info → langgraph_runtime_inmem-0.18.1.dist-info}/METADATA RENAMED Viewed

@@ -1,13 +1,13 @@
 Metadata-Version: 2.4
 Name: langgraph-runtime-inmem
-Version: 0.9.0
+Version: 0.18.1
 Summary: Inmem implementation for the LangGraph API server.
 Author-email: Will Fu-Hinthorn <will@langchain.dev>
 License: Elastic-2.0
 Requires-Python: >=3.11.0
 Requires-Dist: blockbuster<2.0.0,>=1.5.24
-Requires-Dist: langgraph-checkpoint>=2.0.25
-Requires-Dist: langgraph>=0.2
+Requires-Dist: langgraph-checkpoint<4,>=3
+Requires-Dist: langgraph<2,>=0.4.10
 Requires-Dist: sse-starlette>=2
 Requires-Dist: starlette>=0.37
 Requires-Dist: structlog>23

langgraph_runtime_inmem-0.18.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+langgraph_runtime_inmem/__init__.py,sha256=8LwgexYJfUTj5uFimXddpKAdiLBMFWKrf71glUqQkTc,311
+langgraph_runtime_inmem/checkpoint.py,sha256=nc1G8DqVdIu-ibjKTqXfbPfMbAsKjPObKqegrSzo6Po,4432
+langgraph_runtime_inmem/database.py,sha256=g2XYa5KN-T8MbDeFH9sfUApDG62Wp4BACumVnDtxYhI,6403
+langgraph_runtime_inmem/inmem_stream.py,sha256=PFLWbsxU8RqbT5mYJgNk6v5q6TWJRIY1hkZWhJF8nkI,9094
+langgraph_runtime_inmem/lifespan.py,sha256=fCoYcN_h0cxmj6-muC-f0csPdSpyepZuGRD1yBrq4XM,4755
+langgraph_runtime_inmem/metrics.py,sha256=_YiSkLnhQvHpMktk38SZo0abyL-5GihfVAtBo0-lFIc,403
+langgraph_runtime_inmem/ops.py,sha256=s_3MN5f4uecR7FaSo4WTjeeUqD0fNgB0QhokiV6y8Hg,109178
+langgraph_runtime_inmem/queue.py,sha256=17HBZrYaxJg_k4NoabToYD_J6cqVzyHpWIz3VzGg_14,9363
+langgraph_runtime_inmem/retry.py,sha256=XmldOP4e_H5s264CagJRVnQMDFcEJR_dldVR1Hm5XvM,763
+langgraph_runtime_inmem/store.py,sha256=rTfL1JJvd-j4xjTrL8qDcynaWF6gUJ9-GDVwH0NBD_I,3506
+langgraph_runtime_inmem-0.18.1.dist-info/METADATA,sha256=JJWTv1Yhr5Fx83aOApdJOXkKMSJ3fomwb00xqfK_cnA,570
+langgraph_runtime_inmem-0.18.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+langgraph_runtime_inmem-0.18.1.dist-info/RECORD,,

langgraph_runtime_inmem-0.9.0.dist-info/RECORD DELETED Viewed

@@ -1,13 +0,0 @@
-langgraph_runtime_inmem/__init__.py,sha256=f-VPPHH1-hKFwEreffg7dNATe9IdcYwQedcSx2MiZog,310
-langgraph_runtime_inmem/checkpoint.py,sha256=nc1G8DqVdIu-ibjKTqXfbPfMbAsKjPObKqegrSzo6Po,4432
-langgraph_runtime_inmem/database.py,sha256=QgaA_WQo1IY6QioYd8r-e6-0B0rnC5anS0muIEJWby0,6364
-langgraph_runtime_inmem/inmem_stream.py,sha256=pUEiHW-1uXQrVTcwEYPwO8YXaYm5qZbpRWawt67y6Lw,8187
-langgraph_runtime_inmem/lifespan.py,sha256=t0w2MX2dGxe8yNtSX97Z-d2pFpllSLS4s1rh2GJDw5M,3557
-langgraph_runtime_inmem/metrics.py,sha256=HhO0RC2bMDTDyGBNvnd2ooLebLA8P1u5oq978Kp_nAA,392
-langgraph_runtime_inmem/ops.py,sha256=0Jx65S3PCvvHlIpA0XYpl-UnDEo_AiGWXRE2QiFSocY,105165
-langgraph_runtime_inmem/queue.py,sha256=33qfFKPhQicZ1qiibllYb-bTFzUNSN2c4bffPACP5es,9952
-langgraph_runtime_inmem/retry.py,sha256=XmldOP4e_H5s264CagJRVnQMDFcEJR_dldVR1Hm5XvM,763
-langgraph_runtime_inmem/store.py,sha256=rTfL1JJvd-j4xjTrL8qDcynaWF6gUJ9-GDVwH0NBD_I,3506
-langgraph_runtime_inmem-0.9.0.dist-info/METADATA,sha256=ptwW1Ei-Xln53P81eJK1aPcFozU8D192OCZBuC_y5EQ,565
-langgraph_runtime_inmem-0.9.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-langgraph_runtime_inmem-0.9.0.dist-info/RECORD,,

{langgraph_runtime_inmem-0.9.0.dist-info → langgraph_runtime_inmem-0.18.1.dist-info}/WHEEL RENAMED Viewed

File without changes

langgraph-runtime-inmem 0.9.0__py3-none-any.whl → 0.18.1__py3-none-any.whl

langgraph-runtime-inmem 0.9.0py3-none-any.whl → 0.18.1py3-none-any.whl