langgraph-api 0.4.9__tar.gz → 0.4.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langgraph-api might be problematic. Click here for more details.

Files changed (122) hide show
  1. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/PKG-INFO +2 -2
  2. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/benchmark/ramp.js +18 -23
  3. langgraph_api-0.4.14/langgraph_api/__init__.py +1 -0
  4. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/api/a2a.py +1 -1
  5. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/api/meta.py +35 -23
  6. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/api/threads.py +6 -1
  7. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/asyncio.py +4 -0
  8. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/auth/langsmith/backend.py +1 -1
  9. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/command.py +4 -2
  10. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/graph.py +3 -3
  11. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/remote.py +16 -12
  12. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/queue_entrypoint.py +13 -3
  13. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/serde.py +0 -19
  14. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/state.py +1 -1
  15. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/thread_ttl.py +4 -1
  16. langgraph_api-0.4.14/langgraph_api/utils/cache.py +95 -0
  17. langgraph_api-0.4.14/langgraph_api/utils/retriable_client.py +74 -0
  18. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/worker.py +1 -1
  19. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/openapi.json +20 -1
  20. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/pyproject.toml +1 -1
  21. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/uv.lock +37 -37
  22. langgraph_api-0.4.9/langgraph_api/__init__.py +0 -1
  23. langgraph_api-0.4.9/langgraph_api/utils/cache.py +0 -58
  24. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/.gitignore +0 -0
  25. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/LICENSE +0 -0
  26. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/Makefile +0 -0
  27. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/README.md +0 -0
  28. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/benchmark/.gitignore +0 -0
  29. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/benchmark/Makefile +0 -0
  30. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/benchmark/README.md +0 -0
  31. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/benchmark/burst.js +0 -0
  32. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/benchmark/clean.js +0 -0
  33. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/benchmark/graphs.js +0 -0
  34. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/benchmark/package.json +0 -0
  35. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/benchmark/update-revision.js +0 -0
  36. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/benchmark/weather.js +0 -0
  37. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/constraints.txt +0 -0
  38. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/forbidden.txt +0 -0
  39. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/healthcheck.py +0 -0
  40. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/api/__init__.py +0 -0
  41. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/api/assistants.py +0 -0
  42. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/api/mcp.py +0 -0
  43. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/api/openapi.py +0 -0
  44. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/api/runs.py +0 -0
  45. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/api/store.py +0 -0
  46. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/api/ui.py +0 -0
  47. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/asgi_transport.py +0 -0
  48. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/auth/__init__.py +0 -0
  49. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/auth/custom.py +0 -0
  50. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/auth/langsmith/__init__.py +0 -0
  51. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/auth/langsmith/client.py +0 -0
  52. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/auth/middleware.py +0 -0
  53. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/auth/noop.py +0 -0
  54. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/auth/studio_user.py +0 -0
  55. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/cli.py +0 -0
  56. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/config.py +0 -0
  57. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/cron_scheduler.py +0 -0
  58. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/errors.py +0 -0
  59. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/executor_entrypoint.py +0 -0
  60. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/feature_flags.py +0 -0
  61. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/http.py +0 -0
  62. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/http_metrics.py +0 -0
  63. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/.gitignore +0 -0
  64. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/.prettierrc +0 -0
  65. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/__init__.py +0 -0
  66. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/base.py +0 -0
  67. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/build.mts +0 -0
  68. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/client.http.mts +0 -0
  69. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/client.mts +0 -0
  70. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/errors.py +0 -0
  71. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/global.d.ts +0 -0
  72. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/package.json +0 -0
  73. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/schema.py +0 -0
  74. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/src/graph.mts +0 -0
  75. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/src/load.hooks.mjs +0 -0
  76. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/src/preload.mjs +0 -0
  77. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/src/utils/files.mts +0 -0
  78. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/src/utils/importMap.mts +0 -0
  79. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/src/utils/pythonSchemas.mts +0 -0
  80. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/src/utils/serde.mts +0 -0
  81. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/sse.py +0 -0
  82. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/traceblock.mts +0 -0
  83. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/tsconfig.json +0 -0
  84. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/ui.py +0 -0
  85. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/js/yarn.lock +0 -0
  86. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/logging.py +0 -0
  87. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/metadata.py +0 -0
  88. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/middleware/__init__.py +0 -0
  89. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/middleware/http_logger.py +0 -0
  90. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/middleware/private_network.py +0 -0
  91. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/middleware/request_id.py +0 -0
  92. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/models/__init__.py +0 -0
  93. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/models/run.py +0 -0
  94. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/patch.py +0 -0
  95. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/route.py +0 -0
  96. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/schema.py +0 -0
  97. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/server.py +0 -0
  98. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/sse.py +0 -0
  99. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/store.py +0 -0
  100. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/stream.py +0 -0
  101. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/traceblock.py +0 -0
  102. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/tunneling/cloudflare.py +0 -0
  103. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/utils/__init__.py +0 -0
  104. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/utils/config.py +0 -0
  105. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/utils/future.py +0 -0
  106. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/utils/headers.py +0 -0
  107. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/utils/uuids.py +0 -0
  108. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/validation.py +0 -0
  109. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_api/webhook.py +0 -0
  110. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_license/__init__.py +0 -0
  111. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_license/validation.py +0 -0
  112. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_runtime/__init__.py +0 -0
  113. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_runtime/checkpoint.py +0 -0
  114. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_runtime/database.py +0 -0
  115. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_runtime/lifespan.py +0 -0
  116. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_runtime/metrics.py +0 -0
  117. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_runtime/ops.py +0 -0
  118. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_runtime/queue.py +0 -0
  119. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_runtime/retry.py +0 -0
  120. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/langgraph_runtime/store.py +0 -0
  121. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/logging.json +0 -0
  122. {langgraph_api-0.4.9 → langgraph_api-0.4.14}/scripts/create_license.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langgraph-api
3
- Version: 0.4.9
3
+ Version: 0.4.14
4
4
  Author-email: Nuno Campos <nuno@langchain.dev>, Will Fu-Hinthorn <will@langchain.dev>
5
5
  License: Elastic-2.0
6
6
  License-File: LICENSE
@@ -11,7 +11,7 @@ Requires-Dist: httpx>=0.25.0
11
11
  Requires-Dist: jsonschema-rs<0.30,>=0.20.0
12
12
  Requires-Dist: langchain-core>=0.3.64
13
13
  Requires-Dist: langgraph-checkpoint>=2.0.23
14
- Requires-Dist: langgraph-runtime-inmem<0.11.0,>=0.10.0
14
+ Requires-Dist: langgraph-runtime-inmem<0.13.0,>=0.12.0
15
15
  Requires-Dist: langgraph-sdk>=0.2.0
16
16
  Requires-Dist: langgraph>=0.4.0
17
17
  Requires-Dist: langsmith>=0.3.45
@@ -56,7 +56,7 @@ export let options = {
56
56
  },
57
57
  thresholds: {
58
58
  'run_duration': [`p(95)<${p95_run_duration[MODE]}`],
59
- 'successful_runs': [`count>${(PLATEAU_DURATION / (p95_run_duration[MODE] / 1000)) * LOAD_SIZE * LEVELS * 2}`], // Number of expected successful runs per user worst caseduring plateau * max number of users * 2 cause that feels about right
59
+ 'successful_runs': [`count>${(PLATEAU_DURATION / (p95_run_duration[MODE] / 1000)) * LOAD_SIZE * LEVELS * 2}`], // Number of expected successful runs per user worst case during plateau * max number of users * 2 cause that feels about right
60
60
  'http_req_failed': ['rate<0.01'], // Error rate should be less than 1%
61
61
  },
62
62
  };
@@ -109,10 +109,16 @@ export default function() {
109
109
 
110
110
  // Check the response
111
111
  const expected_length = MODE === 'single' ? 1 : EXPAND + 1;
112
- const success = check(response, {
113
- 'Run completed successfully': (r) => r.status === 200,
114
- 'Response contains expected number of messages': (r) => JSON.parse(r.body)?.messages?.length === expected_length,
115
- });
112
+ let success = false;
113
+ try {
114
+ success = check(response, {
115
+ 'Run completed successfully': (r) => r.status === 200,
116
+ 'Response contains expected number of messages': (r) => JSON.parse(r.body)?.messages?.length === expected_length,
117
+ });
118
+ } catch (error) {
119
+ console.log(`Error checking response: ${error}`);
120
+ }
121
+
116
122
 
117
123
  if (success) {
118
124
  // Record success metrics
@@ -126,34 +132,23 @@ export default function() {
126
132
  if (response.status >= 500) {
127
133
  serverErrors.add(1);
128
134
  console.log(`Server error: ${response.status}`);
129
- } else if (response.status === 408 || response.error === 'timeout') {
135
+ } else if (response.status === 408 || response.error?.includes('timeout')) {
130
136
  timeoutErrors.add(1);
131
137
  console.log(`Timeout error: ${response.error}`);
132
- } else if (response.status === 200 && response?.body?.messages?.length !== expected_length) {
138
+ } else if (response.status === 200 && response.body?.messages?.length !== expected_length) {
133
139
  missingMessageErrors.add(1);
134
- console.log(response);
135
- console.log(`Missing message error: Status ${response.status}, ${JSON.stringify(response.body)}`);
140
+ console.log(`Missing message error: Status ${response.status}, ${JSON.stringify(response.body)}, ${response.headers?.['Content-Location']}`);
136
141
  } else {
137
142
  otherErrors.add(1);
138
143
  console.log(`Other error: Status ${response.status}, ${JSON.stringify(response.body)}`);
139
144
  }
140
145
  }
141
146
  } catch (error) {
142
- // Handle exceptions (network errors, etc.)
147
+ // Handle truly unexpected errors
143
148
  failedRuns.add(1);
144
-
145
- if (error.message.includes('timeout')) {
146
- timeoutErrors.add(1);
147
- console.log(`Timeout error: ${error.message}`);
148
- } else if (error.message.includes('connection') || error.message.includes('network')) {
149
- connectionErrors.add(1);
150
- console.log(`Connection error: ${error.message}`);
151
- } else {
152
- otherErrors.add(1);
153
- // Usually we end up with HTML error pages here
154
- console.log(response);
155
- console.log(`Unexpected error: ${error.message}, Response Body: ${response?.body}`);
156
- }
149
+ otherErrors.add(1);
150
+ console.log(response);
151
+ console.log(`Unexpected error: ${error.message}, Response Body: ${response?.body}`);
157
152
  }
158
153
 
159
154
  // Add a small random sleep between iterations to prevent thundering herd
@@ -0,0 +1 @@
1
+ __version__ = "0.4.14"
@@ -171,7 +171,7 @@ async def _validate_supports_messages(
171
171
  """
172
172
  assistant_id = assistant["assistant_id"]
173
173
 
174
- cached_schemas = _assistant_schemas_cache.get(assistant_id)
174
+ cached_schemas = await _assistant_schemas_cache.get(assistant_id)
175
175
  if cached_schemas is not None:
176
176
  schemas = cached_schemas
177
177
  else:
@@ -1,6 +1,7 @@
1
1
  from typing import cast
2
2
 
3
3
  import langgraph.version
4
+ import structlog
4
5
  from starlette.responses import JSONResponse, PlainTextResponse
5
6
 
6
7
  from langgraph_api import __version__, config, metadata
@@ -13,6 +14,8 @@ from langgraph_runtime.ops import Runs
13
14
 
14
15
  METRICS_FORMATS = {"prometheus", "json"}
15
16
 
17
+ logger = structlog.stdlib.get_logger(__name__)
18
+
16
19
 
17
20
  async def meta_info(request: ApiRequest):
18
21
  plus = plus_features_enabled()
@@ -71,35 +74,44 @@ async def meta_metrics(request: ApiRequest):
71
74
  resp["workers"] = worker_metrics
72
75
  return JSONResponse(resp)
73
76
  elif metrics_format == "prometheus":
74
- async with connect() as conn:
75
- queue_stats = await Runs.stats(conn)
76
-
77
- metrics = [
78
- "# HELP lg_api_num_pending_runs The number of runs currently pending.",
79
- "# TYPE lg_api_num_pending_runs gauge",
80
- f'lg_api_num_pending_runs{{project_id="{metadata.PROJECT_ID}", revision_id="{metadata.HOST_REVISION_ID}"}} {queue_stats["n_pending"]}',
81
- "# HELP lg_api_num_running_runs The number of runs currently running.",
82
- "# TYPE lg_api_num_running_runs gauge",
83
- f'lg_api_num_running_runs{{project_id="{metadata.PROJECT_ID}", revision_id="{metadata.HOST_REVISION_ID}"}} {queue_stats["n_running"]}',
84
- ]
77
+ metrics = []
78
+ try:
79
+ async with connect() as conn:
80
+ queue_stats = await Runs.stats(conn)
85
81
 
86
- if config.N_JOBS_PER_WORKER > 0:
87
82
  metrics.extend(
88
83
  [
89
- "# HELP lg_api_workers_max The maximum number of workers available.",
90
- "# TYPE lg_api_workers_max gauge",
91
- f'lg_api_workers_max{{project_id="{metadata.PROJECT_ID}", revision_id="{metadata.HOST_REVISION_ID}"}} {workers_max}',
92
- "# HELP lg_api_workers_active The number of currently active workers.",
93
- "# TYPE lg_api_workers_active gauge",
94
- f'lg_api_workers_active{{project_id="{metadata.PROJECT_ID}", revision_id="{metadata.HOST_REVISION_ID}"}} {workers_active}',
95
- "# HELP lg_api_workers_available The number of available (idle) workers.",
96
- "# TYPE lg_api_workers_available gauge",
97
- f'lg_api_workers_available{{project_id="{metadata.PROJECT_ID}", revision_id="{metadata.HOST_REVISION_ID}"}} {workers_available}',
84
+ "# HELP lg_api_num_pending_runs The number of runs currently pending.",
85
+ "# TYPE lg_api_num_pending_runs gauge",
86
+ f'lg_api_num_pending_runs{{project_id="{metadata.PROJECT_ID}", revision_id="{metadata.HOST_REVISION_ID}"}} {queue_stats["n_pending"]}',
87
+ "# HELP lg_api_num_running_runs The number of runs currently running.",
88
+ "# TYPE lg_api_num_running_runs gauge",
89
+ f'lg_api_num_running_runs{{project_id="{metadata.PROJECT_ID}", revision_id="{metadata.HOST_REVISION_ID}"}} {queue_stats["n_running"]}',
98
90
  ]
99
91
  )
92
+ except Exception as e:
93
+ # if we get a db connection error/timeout, just skip queue stats
94
+ await logger.awarning(
95
+ "Ignoring error while getting run stats for /metrics", exc_info=e
96
+ )
97
+
98
+ if config.N_JOBS_PER_WORKER > 0:
99
+ metrics.extend(
100
+ [
101
+ "# HELP lg_api_workers_max The maximum number of workers available.",
102
+ "# TYPE lg_api_workers_max gauge",
103
+ f'lg_api_workers_max{{project_id="{metadata.PROJECT_ID}", revision_id="{metadata.HOST_REVISION_ID}"}} {workers_max}',
104
+ "# HELP lg_api_workers_active The number of currently active workers.",
105
+ "# TYPE lg_api_workers_active gauge",
106
+ f'lg_api_workers_active{{project_id="{metadata.PROJECT_ID}", revision_id="{metadata.HOST_REVISION_ID}"}} {workers_active}',
107
+ "# HELP lg_api_workers_available The number of available (idle) workers.",
108
+ "# TYPE lg_api_workers_available gauge",
109
+ f'lg_api_workers_available{{project_id="{metadata.PROJECT_ID}", revision_id="{metadata.HOST_REVISION_ID}"}} {workers_available}',
110
+ ]
111
+ )
100
112
 
101
- metrics.extend(http_metrics)
102
- metrics.extend(pg_redis_stats)
113
+ metrics.extend(http_metrics)
114
+ metrics.extend(pg_redis_stats)
103
115
 
104
116
  metrics_response = "\n".join(metrics)
105
117
  return PlainTextResponse(metrics_response)
@@ -290,7 +290,12 @@ async def patch_thread(
290
290
  validate_uuid(thread_id, "Invalid thread ID: must be a UUID")
291
291
  payload = await request.json(ThreadPatch)
292
292
  async with connect() as conn:
293
- thread = await Threads.patch(conn, thread_id, metadata=payload["metadata"])
293
+ thread = await Threads.patch(
294
+ conn,
295
+ thread_id,
296
+ metadata=payload.get("metadata", {}),
297
+ ttl=payload.get("ttl"),
298
+ )
294
299
  return ApiResponse(await fetchone(thread))
295
300
 
296
301
 
@@ -158,6 +158,7 @@ class SimpleTaskGroup(AbstractAsyncContextManager["SimpleTaskGroup"]):
158
158
  self,
159
159
  *coros: Coroutine[Any, Any, T],
160
160
  cancel: bool = False,
161
+ cancel_event: asyncio.Event | None = None,
161
162
  wait: bool = True,
162
163
  taskset: set[asyncio.Task] | None = None,
163
164
  taskgroup_name: str | None = None,
@@ -165,6 +166,7 @@ class SimpleTaskGroup(AbstractAsyncContextManager["SimpleTaskGroup"]):
165
166
  # Copy the taskset to avoid modifying the original set unintentionally (like in lifespan)
166
167
  self.tasks = taskset.copy() if taskset is not None else set()
167
168
  self.cancel = cancel
169
+ self.cancel_event = cancel_event
168
170
  self.wait = wait
169
171
  if taskset:
170
172
  for task in tuple(taskset):
@@ -181,6 +183,8 @@ class SimpleTaskGroup(AbstractAsyncContextManager["SimpleTaskGroup"]):
181
183
  try:
182
184
  if (exc := task.exception()) and not isinstance(exc, ignore_exceptions):
183
185
  logger.exception("asyncio.task failed in task group", exc_info=exc)
186
+ if self.cancel_event:
187
+ self.cancel_event.set()
184
188
  except asyncio.CancelledError:
185
189
  pass
186
190
 
@@ -58,7 +58,7 @@ class LangsmithAuthBackend(AuthenticationBackend):
58
58
 
59
59
  # Check cache first
60
60
  cache_key = self._get_cache_key(headers)
61
- if cached_entry := self._cache.get(cache_key):
61
+ if cached_entry := await self._cache.get(cache_key):
62
62
  return cached_entry["credentials"], cached_entry["user"]
63
63
 
64
64
  async with auth_client() as auth:
@@ -1,3 +1,5 @@
1
+ from typing import cast
2
+
1
3
  from langgraph.types import Command, Send
2
4
 
3
5
  from langgraph_api.schema import RunCommand
@@ -11,9 +13,9 @@ def map_cmd(cmd: RunCommand) -> Command:
11
13
  update = cmd.get("update")
12
14
  if isinstance(update, tuple | list) and all(
13
15
  isinstance(t, tuple | list) and len(t) == 2 and isinstance(t[0], str)
14
- for t in update
16
+ for t in cast(list, update)
15
17
  ):
16
- update = [tuple(t) for t in update]
18
+ update = [tuple(t) for t in cast(list, update)]
17
19
 
18
20
  return Command(
19
21
  update=update,
@@ -392,14 +392,14 @@ async def collect_graphs_from_env(register: bool = False) -> None:
392
392
 
393
393
  if (
394
394
  config.HTTP_CONFIG
395
- and config.HTTP_CONFIG.get("app")
396
- and is_js_path(config.HTTP_CONFIG.get("app").split(":")[0])
395
+ and (js_app := config.HTTP_CONFIG.get("app"))
396
+ and is_js_path(js_app.split(":")[0])
397
397
  ):
398
398
  js_bg_tasks.add(
399
399
  asyncio.create_task(
400
400
  run_js_http_process(
401
401
  paths_str,
402
- config.HTTP_CONFIG.get("app"),
402
+ config.HTTP_CONFIG or {},
403
403
  watch="--reload" in sys.argv[1:],
404
404
  ),
405
405
  )
@@ -153,9 +153,9 @@ class RemotePregel(BaseRemotePregel):
153
153
 
154
154
  async for event in _client_stream("streamEvents", data):
155
155
  if event["event"] == "on_custom_event":
156
- yield CustomStreamEvent(**event)
156
+ yield CustomStreamEvent(**event) # type: ignore[missing-typed-dict-key]
157
157
  else:
158
- yield StandardStreamEvent(**event)
158
+ yield StandardStreamEvent(**event) # type: ignore[missing-typed-dict-key]
159
159
 
160
160
  async def fetch_state_schema(self):
161
161
  return await _client_invoke("getSchema", {"graph_id": self.graph_id})
@@ -187,15 +187,17 @@ class RemotePregel(BaseRemotePregel):
187
187
  )
188
188
  for data in nodes
189
189
  },
190
- {
191
- Edge(
192
- data["source"],
193
- data["target"],
194
- data.get("data"),
195
- data.get("conditional", False),
196
- )
197
- for data in edges
198
- },
190
+ list(
191
+ {
192
+ Edge(
193
+ data["source"],
194
+ data["target"],
195
+ data.get("data"),
196
+ data.get("conditional", False),
197
+ )
198
+ for data in edges
199
+ }
200
+ ),
199
201
  )
200
202
 
201
203
  async def fetch_subgraphs(
@@ -861,6 +863,8 @@ class CustomJsAuthBackend(AuthenticationBackend):
861
863
  self.ls_auth = LangsmithAuthBackend()
862
864
  self.ttl_cache: LRUCache | None = None
863
865
  self.cache_keys: list[str] | None = None
866
+ if LANGGRAPH_AUTH is None:
867
+ raise ValueError("LANGGRAPH_AUTH is not set")
864
868
  if cache := LANGGRAPH_AUTH.get("cache"):
865
869
  keys = cache.get("cache_keys", [])
866
870
  if not isinstance(keys, list):
@@ -891,7 +895,7 @@ class CustomJsAuthBackend(AuthenticationBackend):
891
895
  if self.cache_keys:
892
896
  cache_key = tuple((k, headers[k]) for k in self.cache_keys if k in headers)
893
897
  if cache_key and self.ttl_cache is not None:
894
- cached = self.ttl_cache.get(cache_key)
898
+ cached = await self.ttl_cache.get(cache_key)
895
899
  if cached:
896
900
  return cached
897
901
 
@@ -86,6 +86,7 @@ async def health_and_metrics_server():
86
86
  log_level="error",
87
87
  access_log=False,
88
88
  )
89
+ # Server will run indefinitely until the process is terminated
89
90
  server = uvicorn.Server(config)
90
91
 
91
92
  logger.info(f"Health and metrics server started at http://0.0.0.0:{port}")
@@ -93,14 +94,15 @@ async def health_and_metrics_server():
93
94
 
94
95
 
95
96
  async def entrypoint(
96
- grpc_port: int | None = None, entrypoint_name: str = "python-queue"
97
+ grpc_port: int | None = None,
98
+ entrypoint_name: str = "python-queue",
99
+ cancel_event: asyncio.Event | None = None,
97
100
  ):
98
101
  from langgraph_api import logging as lg_logging
99
102
  from langgraph_api.api import user_router
100
103
 
101
104
  lg_logging.set_logging_context({"entrypoint": entrypoint_name})
102
105
  tasks: set[asyncio.Task] = set()
103
- tasks.add(asyncio.create_task(health_and_metrics_server()))
104
106
 
105
107
  original_lifespan = user_router.router.lifespan_context if user_router else None
106
108
 
@@ -113,6 +115,7 @@ async def entrypoint(
113
115
  with_cron_scheduler=with_cron_scheduler,
114
116
  grpc_port=grpc_port,
115
117
  taskset=taskset,
118
+ cancel_event=cancel_event,
116
119
  ):
117
120
  if original_lifespan:
118
121
  async with original_lifespan(app):
@@ -123,6 +126,7 @@ async def entrypoint(
123
126
  async with combined_lifespan(
124
127
  None, with_cron_scheduler=False, grpc_port=grpc_port, taskset=tasks
125
128
  ):
129
+ tasks.add(asyncio.create_task(health_and_metrics_server()))
126
130
  await asyncio.gather(*tasks)
127
131
 
128
132
 
@@ -141,8 +145,14 @@ async def main(grpc_port: int | None = None, entrypoint_name: str = "python-queu
141
145
  signal.signal(signal.SIGTERM, lambda *_: _handle_signal())
142
146
 
143
147
  entry_task = asyncio.create_task(
144
- entrypoint(grpc_port=grpc_port, entrypoint_name=entrypoint_name)
148
+ entrypoint(
149
+ grpc_port=grpc_port,
150
+ entrypoint_name=entrypoint_name,
151
+ cancel_event=stop_event,
152
+ )
145
153
  )
154
+ # Handle the case where the entrypoint errors out
155
+ entry_task.add_done_callback(lambda _: stop_event.set())
146
156
  await stop_event.wait()
147
157
 
148
158
  logger.warning("Cancelling queue entrypoint task")
@@ -1,5 +1,4 @@
1
1
  import asyncio
2
- import base64
3
2
  import re
4
3
  import uuid
5
4
  from base64 import b64encode
@@ -178,21 +177,3 @@ class Serializer(JsonPlusSerializer):
178
177
 
179
178
  mpack_keys = {"method", "value"}
180
179
  SERIALIZER = Serializer()
181
-
182
-
183
- # TODO: Make more performant (by removing)
184
- async def reserialize_message(message: bytes) -> bytes:
185
- # Stream messages from golang runtime are a byte dict of StreamChunks.
186
- loaded = await ajson_loads(message)
187
- converted = {}
188
- for k, v in loaded.items():
189
- if isinstance(v, dict) and v.keys() == mpack_keys:
190
- if v["method"] == "missing":
191
- converted[k] = v["value"] # oops
192
- else:
193
- converted[k] = SERIALIZER.loads_typed(
194
- (v["method"], base64.b64decode(v["value"]))
195
- )
196
- else:
197
- converted[k] = v
198
- return json_dumpb(converted)
@@ -27,7 +27,7 @@ def runnable_config_to_checkpoint(
27
27
  return None
28
28
 
29
29
  configurable = config["configurable"]
30
- checkpoint: Checkpoint = {
30
+ checkpoint: Checkpoint = { # type: ignore[typed-dict-item]
31
31
  "checkpoint_id": configurable["checkpoint_id"],
32
32
  "thread_id": configurable["thread_id"],
33
33
  }
@@ -1,6 +1,7 @@
1
1
  """Sweeping logic for cleaning up expired threads and checkpoints."""
2
2
 
3
3
  import asyncio
4
+ from typing import cast
4
5
 
5
6
  import structlog
6
7
 
@@ -23,7 +24,9 @@ async def thread_ttl_sweep_loop():
23
24
  raise NotImplementedError(
24
25
  f"Unrecognized thread deletion strategy: {strategy}. Expected 'delete'."
25
26
  )
26
- sweep_interval_minutes = thread_ttl_config.get("sweep_interval_minutes", 5)
27
+ sweep_interval_minutes = cast(
28
+ int, thread_ttl_config.get("sweep_interval_minutes", 5)
29
+ )
27
30
  await logger.ainfo(
28
31
  f"Starting thread TTL sweeper with interval {sweep_interval_minutes} minutes",
29
32
  strategy=strategy,
@@ -0,0 +1,95 @@
1
+ import asyncio
2
+ import time
3
+ from collections import OrderedDict
4
+ from collections.abc import Awaitable, Callable
5
+ from typing import Generic, TypeVar
6
+
7
+ T = TypeVar("T")
8
+
9
+
10
+ class LRUCache(Generic[T]):
11
+ """LRU cache with TTL and proactive refresh support."""
12
+
13
+ def __init__(
14
+ self,
15
+ max_size: int = 1000,
16
+ ttl: float = 60,
17
+ refresh_window: float = 30,
18
+ refresh_callback: Callable[[str], Awaitable[T | None]] | None = None,
19
+ ):
20
+ self._cache: OrderedDict[str, tuple[T, float, bool]] = OrderedDict()
21
+ self._max_size = max_size if max_size > 0 else 1000
22
+ self._ttl = ttl
23
+ self._refresh_window = refresh_window if refresh_window > 0 else 30
24
+ self._refresh_callback = refresh_callback
25
+
26
+ def _get_time(self) -> float:
27
+ """Get current time, using loop.time() if available for better performance."""
28
+ try:
29
+ return asyncio.get_event_loop().time()
30
+ except RuntimeError:
31
+ return time.monotonic()
32
+
33
+ async def get(self, key: str) -> T | None:
34
+ """Get item from cache, attempting refresh if within refresh window."""
35
+ if key not in self._cache:
36
+ return None
37
+
38
+ value, timestamp, is_refreshing = self._cache[key]
39
+ current_time = self._get_time()
40
+ time_until_expiry = self._ttl - (current_time - timestamp)
41
+
42
+ # Check if expired
43
+ if time_until_expiry <= 0:
44
+ del self._cache[key]
45
+ return None
46
+
47
+ # Check if we should attempt refresh (within refresh window and not already refreshing)
48
+ if (
49
+ time_until_expiry <= self._refresh_window
50
+ and not is_refreshing
51
+ and self._refresh_callback
52
+ ):
53
+ # Mark as refreshing to prevent multiple simultaneous refresh attempts
54
+ self._cache[key] = (value, timestamp, True)
55
+
56
+ try:
57
+ # Attempt refresh
58
+ refreshed_value = await self._refresh_callback(key)
59
+ if refreshed_value is not None:
60
+ # Refresh successful, update cache with new value
61
+ self._cache[key] = (refreshed_value, current_time, False)
62
+ # Move to end (most recently used)
63
+ self._cache.move_to_end(key)
64
+ return refreshed_value
65
+ else:
66
+ # Refresh failed, fallback to cached value
67
+ self._cache[key] = (value, timestamp, False)
68
+ except Exception:
69
+ # Refresh failed with exception, fallback to cached value
70
+ self._cache[key] = (value, timestamp, False)
71
+
72
+ # Move to end (most recently used)
73
+ self._cache.move_to_end(key)
74
+ return value
75
+
76
+ def set(self, key: str, value: T) -> None:
77
+ """Set item in cache, evicting old entries if needed."""
78
+ # Remove if already exists (to update timestamp)
79
+ if key in self._cache:
80
+ del self._cache[key]
81
+
82
+ # Evict oldest entries if needed
83
+ while len(self._cache) >= self._max_size:
84
+ self._cache.popitem(last=False) # Remove oldest (FIFO)
85
+
86
+ # Add new entry (not refreshing initially)
87
+ self._cache[key] = (value, self._get_time(), False)
88
+
89
+ def size(self) -> int:
90
+ """Return current cache size."""
91
+ return len(self._cache)
92
+
93
+ def clear(self) -> None:
94
+ """Clear all entries from cache."""
95
+ self._cache.clear()
@@ -0,0 +1,74 @@
1
+ import asyncio
2
+
3
+ import httpx
4
+ import structlog
5
+
6
+ logger = structlog.stdlib.get_logger(__name__)
7
+
8
+
9
+ async def _make_http_request_with_retries(
10
+ url: str,
11
+ headers: dict,
12
+ method: str = "GET",
13
+ json_data: dict | None = None,
14
+ max_retries: int = 3,
15
+ base_delay: float = 1.0,
16
+ ) -> httpx.Response | None:
17
+ """
18
+ Make an HTTP request with exponential backoff retries.
19
+
20
+ Args:
21
+ url: The URL to request
22
+ headers: Headers to include in the request
23
+ method: HTTP method ("GET" or "POST")
24
+ json_data: JSON data for POST requests
25
+ max_retries: Maximum number of retry attempts
26
+ base_delay: Base delay in seconds for exponential backoff
27
+
28
+ Returns:
29
+ httpx.Response: The successful response
30
+
31
+ Raises:
32
+ httpx.HTTPStatusError: If the request fails after all retries
33
+ httpx.RequestError: If the request fails after all retries
34
+ """
35
+ for attempt in range(max_retries + 1):
36
+ try:
37
+ async with httpx.AsyncClient(timeout=10.0) as client:
38
+ response = await client.request(
39
+ method, url, headers=headers, json=json_data
40
+ )
41
+ response.raise_for_status()
42
+ return response
43
+
44
+ except (
45
+ httpx.TimeoutException,
46
+ httpx.NetworkError,
47
+ httpx.RequestError,
48
+ httpx.HTTPStatusError,
49
+ ) as e:
50
+ if isinstance(e, httpx.HTTPStatusError) and e.response.status_code < 500:
51
+ # Don't retry on 4xx errors, but do on 5xxs
52
+ raise e
53
+
54
+ # Back off and retry if we haven't reached the max retries
55
+ if attempt < max_retries:
56
+ delay = base_delay * (2**attempt) # Exponential backoff
57
+ logger.warning(
58
+ "HTTP %s request attempt %d to %s failed: %s. Retrying in %.1f seconds...",
59
+ method,
60
+ attempt + 1,
61
+ url,
62
+ e,
63
+ delay,
64
+ )
65
+ await asyncio.sleep(delay)
66
+ else:
67
+ logger.exception(
68
+ "HTTP %s request to %s failed after %d attempts. Last error: %s",
69
+ method,
70
+ url,
71
+ max_retries + 1,
72
+ e,
73
+ )
74
+ raise e
@@ -153,7 +153,7 @@ async def worker(
153
153
  raise UserTimeout(e) from e
154
154
  raise
155
155
 
156
- async with Runs.enter(run_id, run["thread_id"], main_loop) as done:
156
+ async with Runs.enter(run_id, run["thread_id"], main_loop, resumable) as done:
157
157
  # attempt the run
158
158
  try:
159
159
  if attempt > BG_JOB_MAX_RETRIES:
@@ -5261,11 +5261,30 @@
5261
5261
  "type": "object",
5262
5262
  "title": "Metadata",
5263
5263
  "description": "Metadata to merge with existing thread metadata."
5264
+ },
5265
+ "ttl": {
5266
+ "type": "object",
5267
+ "title": "TTL",
5268
+ "description": "The time-to-live for the thread.",
5269
+ "properties": {
5270
+ "strategy": {
5271
+ "type": "string",
5272
+ "enum": [
5273
+ "delete"
5274
+ ],
5275
+ "description": "The TTL strategy. 'delete' removes the entire thread.",
5276
+ "default": "delete"
5277
+ },
5278
+ "ttl": {
5279
+ "type": "number",
5280
+ "description": "The time-to-live in minutes from now until thread should be swept."
5281
+ }
5282
+ }
5264
5283
  }
5265
5284
  },
5266
5285
  "type": "object",
5267
5286
  "title": "ThreadPatch",
5268
- "description": "Payload for creating a thread."
5287
+ "description": "Payload for updating a thread."
5269
5288
  },
5270
5289
  "ThreadStateCheckpointRequest": {
5271
5290
  "properties": {