langchain 1.0.4__py3-none-any.whl → 1.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain/__init__.py +1 -1
- langchain/agents/__init__.py +1 -7
- langchain/agents/factory.py +100 -41
- langchain/agents/middleware/__init__.py +5 -7
- langchain/agents/middleware/_execution.py +21 -20
- langchain/agents/middleware/_redaction.py +27 -12
- langchain/agents/middleware/_retry.py +123 -0
- langchain/agents/middleware/context_editing.py +26 -22
- langchain/agents/middleware/file_search.py +18 -13
- langchain/agents/middleware/human_in_the_loop.py +60 -54
- langchain/agents/middleware/model_call_limit.py +63 -17
- langchain/agents/middleware/model_fallback.py +7 -9
- langchain/agents/middleware/model_retry.py +300 -0
- langchain/agents/middleware/pii.py +80 -27
- langchain/agents/middleware/shell_tool.py +230 -103
- langchain/agents/middleware/summarization.py +439 -90
- langchain/agents/middleware/todo.py +111 -27
- langchain/agents/middleware/tool_call_limit.py +105 -71
- langchain/agents/middleware/tool_emulator.py +42 -33
- langchain/agents/middleware/tool_retry.py +171 -159
- langchain/agents/middleware/tool_selection.py +37 -27
- langchain/agents/middleware/types.py +754 -392
- langchain/agents/structured_output.py +22 -12
- langchain/chat_models/__init__.py +1 -7
- langchain/chat_models/base.py +234 -185
- langchain/embeddings/__init__.py +0 -5
- langchain/embeddings/base.py +80 -66
- langchain/messages/__init__.py +0 -5
- langchain/tools/__init__.py +1 -7
- {langchain-1.0.4.dist-info → langchain-1.2.3.dist-info}/METADATA +3 -5
- langchain-1.2.3.dist-info/RECORD +36 -0
- {langchain-1.0.4.dist-info → langchain-1.2.3.dist-info}/WHEEL +1 -1
- langchain-1.0.4.dist-info/RECORD +0 -34
- {langchain-1.0.4.dist-info → langchain-1.2.3.dist-info}/licenses/LICENSE +0 -0
langchain/__init__.py
CHANGED
langchain/agents/__init__.py
CHANGED
|
@@ -1,10 +1,4 @@
|
|
|
1
|
-
"""Entrypoint to building [Agents](https://docs.langchain.com/oss/python/langchain/agents) with LangChain.
|
|
2
|
-
|
|
3
|
-
!!! warning "Reference docs"
|
|
4
|
-
This page contains **reference documentation** for Agents. See
|
|
5
|
-
[the docs](https://docs.langchain.com/oss/python/langchain/agents) for conceptual
|
|
6
|
-
guides, tutorials, and examples on using Agents.
|
|
7
|
-
""" # noqa: E501
|
|
1
|
+
"""Entrypoint to building [Agents](https://docs.langchain.com/oss/python/langchain/agents) with LangChain.""" # noqa: E501
|
|
8
2
|
|
|
9
3
|
from langchain.agents.factory import create_agent
|
|
10
4
|
from langchain.agents.middleware.types import AgentState
|
langchain/agents/factory.py
CHANGED
|
@@ -20,9 +20,7 @@ from langgraph._internal._runnable import RunnableCallable
|
|
|
20
20
|
from langgraph.constants import END, START
|
|
21
21
|
from langgraph.graph.state import StateGraph
|
|
22
22
|
from langgraph.prebuilt.tool_node import ToolCallWithContext, ToolNode
|
|
23
|
-
from langgraph.runtime import Runtime # noqa: TC002
|
|
24
23
|
from langgraph.types import Command, Send
|
|
25
|
-
from langgraph.typing import ContextT # noqa: TC002
|
|
26
24
|
from typing_extensions import NotRequired, Required, TypedDict
|
|
27
25
|
|
|
28
26
|
from langchain.agents.middleware.types import (
|
|
@@ -56,13 +54,27 @@ if TYPE_CHECKING:
|
|
|
56
54
|
from langchain_core.runnables import Runnable
|
|
57
55
|
from langgraph.cache.base import BaseCache
|
|
58
56
|
from langgraph.graph.state import CompiledStateGraph
|
|
57
|
+
from langgraph.runtime import Runtime
|
|
59
58
|
from langgraph.store.base import BaseStore
|
|
60
59
|
from langgraph.types import Checkpointer
|
|
60
|
+
from langgraph.typing import ContextT
|
|
61
61
|
|
|
62
62
|
from langchain.agents.middleware.types import ToolCallRequest, ToolCallWrapper
|
|
63
63
|
|
|
64
64
|
STRUCTURED_OUTPUT_ERROR_TEMPLATE = "Error: {error}\n Please fix your mistakes."
|
|
65
65
|
|
|
66
|
+
FALLBACK_MODELS_WITH_STRUCTURED_OUTPUT = [
|
|
67
|
+
# if model profile data are not available, these models are assumed to support
|
|
68
|
+
# structured output
|
|
69
|
+
"grok",
|
|
70
|
+
"gpt-5",
|
|
71
|
+
"gpt-4.1",
|
|
72
|
+
"gpt-4o",
|
|
73
|
+
"gpt-oss",
|
|
74
|
+
"o3-pro",
|
|
75
|
+
"o3-mini",
|
|
76
|
+
]
|
|
77
|
+
|
|
66
78
|
|
|
67
79
|
def _normalize_to_model_response(result: ModelResponse | AIMessage) -> ModelResponse:
|
|
68
80
|
"""Normalize middleware return value to ModelResponse."""
|
|
@@ -302,7 +314,7 @@ def _resolve_schema(schemas: set[type], schema_name: str, omit_flag: str | None
|
|
|
302
314
|
def _extract_metadata(type_: type) -> list:
|
|
303
315
|
"""Extract metadata from a field type, handling Required/NotRequired and Annotated wrappers."""
|
|
304
316
|
# Handle Required[Annotated[...]] or NotRequired[Annotated[...]]
|
|
305
|
-
if get_origin(type_) in
|
|
317
|
+
if get_origin(type_) in {Required, NotRequired}:
|
|
306
318
|
inner_type = get_args(type_)[0]
|
|
307
319
|
if get_origin(inner_type) is Annotated:
|
|
308
320
|
return list(get_args(inner_type)[1:])
|
|
@@ -349,11 +361,13 @@ def _get_can_jump_to(middleware: AgentMiddleware[Any, Any], hook_name: str) -> l
|
|
|
349
361
|
return []
|
|
350
362
|
|
|
351
363
|
|
|
352
|
-
def _supports_provider_strategy(model: str | BaseChatModel) -> bool:
|
|
364
|
+
def _supports_provider_strategy(model: str | BaseChatModel, tools: list | None = None) -> bool:
|
|
353
365
|
"""Check if a model supports provider-specific structured output.
|
|
354
366
|
|
|
355
367
|
Args:
|
|
356
368
|
model: Model name string or `BaseChatModel` instance.
|
|
369
|
+
tools: Optional list of tools provided to the agent. Needed because some models
|
|
370
|
+
don't support structured output together with tool calling.
|
|
357
371
|
|
|
358
372
|
Returns:
|
|
359
373
|
`True` if the model supports provider-specific structured output, `False` otherwise.
|
|
@@ -362,11 +376,23 @@ def _supports_provider_strategy(model: str | BaseChatModel) -> bool:
|
|
|
362
376
|
if isinstance(model, str):
|
|
363
377
|
model_name = model
|
|
364
378
|
elif isinstance(model, BaseChatModel):
|
|
365
|
-
model_name =
|
|
379
|
+
model_name = (
|
|
380
|
+
getattr(model, "model_name", None)
|
|
381
|
+
or getattr(model, "model", None)
|
|
382
|
+
or getattr(model, "model_id", "")
|
|
383
|
+
)
|
|
384
|
+
model_profile = model.profile
|
|
385
|
+
if (
|
|
386
|
+
model_profile is not None
|
|
387
|
+
and model_profile.get("structured_output")
|
|
388
|
+
# We make an exception for Gemini models, which currently do not support
|
|
389
|
+
# simultaneous tool use with structured output
|
|
390
|
+
and not (tools and isinstance(model_name, str) and "gemini" in model_name.lower())
|
|
391
|
+
):
|
|
392
|
+
return True
|
|
366
393
|
|
|
367
394
|
return (
|
|
368
|
-
|
|
369
|
-
or any(part in model_name for part in ["gpt-5", "gpt-4.1", "gpt-oss", "o3-pro", "o3-mini"])
|
|
395
|
+
any(part in model_name.lower() for part in FALLBACK_MODELS_WITH_STRUCTURED_OUTPUT)
|
|
370
396
|
if model_name
|
|
371
397
|
else False
|
|
372
398
|
)
|
|
@@ -512,11 +538,11 @@ def _chain_async_tool_call_wrappers(
|
|
|
512
538
|
return result
|
|
513
539
|
|
|
514
540
|
|
|
515
|
-
def create_agent(
|
|
541
|
+
def create_agent(
|
|
516
542
|
model: str | BaseChatModel,
|
|
517
543
|
tools: Sequence[BaseTool | Callable | dict[str, Any]] | None = None,
|
|
518
544
|
*,
|
|
519
|
-
system_prompt: str | None = None,
|
|
545
|
+
system_prompt: str | SystemMessage | None = None,
|
|
520
546
|
middleware: Sequence[AgentMiddleware[StateT_co, ContextT]] = (),
|
|
521
547
|
response_format: ResponseFormat[ResponseT] | type[ResponseT] | None = None,
|
|
522
548
|
state_schema: type[AgentState[ResponseT]] | None = None,
|
|
@@ -537,42 +563,64 @@ def create_agent( # noqa: PLR0915
|
|
|
537
563
|
visit the [Agents](https://docs.langchain.com/oss/python/langchain/agents) docs.
|
|
538
564
|
|
|
539
565
|
Args:
|
|
540
|
-
model: The language model for the agent.
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
566
|
+
model: The language model for the agent.
|
|
567
|
+
|
|
568
|
+
Can be a string identifier (e.g., `"openai:gpt-4"`) or a direct chat model
|
|
569
|
+
instance (e.g., [`ChatOpenAI`][langchain_openai.ChatOpenAI] or other another
|
|
570
|
+
[LangChain chat model](https://docs.langchain.com/oss/python/integrations/chat)).
|
|
544
571
|
|
|
545
572
|
For a full list of supported model strings, see
|
|
546
573
|
[`init_chat_model`][langchain.chat_models.init_chat_model(model_provider)].
|
|
547
|
-
|
|
574
|
+
|
|
575
|
+
!!! tip ""
|
|
576
|
+
|
|
577
|
+
See the [Models](https://docs.langchain.com/oss/python/langchain/models)
|
|
578
|
+
docs for more information.
|
|
579
|
+
tools: A list of tools, `dict`, or `Callable`.
|
|
548
580
|
|
|
549
581
|
If `None` or an empty list, the agent will consist of a model node without a
|
|
550
582
|
tool calling loop.
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
!!! tip ""
|
|
586
|
+
|
|
587
|
+
See the [Tools](https://docs.langchain.com/oss/python/langchain/tools)
|
|
588
|
+
docs for more information.
|
|
551
589
|
system_prompt: An optional system prompt for the LLM.
|
|
552
590
|
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
beginning of the message list.
|
|
591
|
+
Can be a `str` (which will be converted to a `SystemMessage`) or a
|
|
592
|
+
`SystemMessage` instance directly. The system message is added to the
|
|
593
|
+
beginning of the message list when calling the model.
|
|
556
594
|
middleware: A sequence of middleware instances to apply to the agent.
|
|
557
595
|
|
|
558
|
-
Middleware can intercept and modify agent behavior at various stages.
|
|
559
|
-
|
|
596
|
+
Middleware can intercept and modify agent behavior at various stages.
|
|
597
|
+
|
|
598
|
+
!!! tip ""
|
|
599
|
+
|
|
600
|
+
See the [Middleware](https://docs.langchain.com/oss/python/langchain/middleware)
|
|
601
|
+
docs for more information.
|
|
560
602
|
response_format: An optional configuration for structured responses.
|
|
561
603
|
|
|
562
604
|
Can be a `ToolStrategy`, `ProviderStrategy`, or a Pydantic model class.
|
|
563
605
|
|
|
564
606
|
If provided, the agent will handle structured output during the
|
|
565
|
-
conversation flow.
|
|
566
|
-
|
|
607
|
+
conversation flow.
|
|
608
|
+
|
|
609
|
+
Raw schemas will be wrapped in an appropriate strategy based on model
|
|
610
|
+
capabilities.
|
|
611
|
+
|
|
612
|
+
!!! tip ""
|
|
613
|
+
|
|
614
|
+
See the [Structured output](https://docs.langchain.com/oss/python/langchain/structured-output)
|
|
615
|
+
docs for more information.
|
|
567
616
|
state_schema: An optional `TypedDict` schema that extends `AgentState`.
|
|
568
617
|
|
|
569
618
|
When provided, this schema is used instead of `AgentState` as the base
|
|
570
619
|
schema for merging with middleware state schemas. This allows users to
|
|
571
620
|
add custom state fields without needing to create custom middleware.
|
|
621
|
+
|
|
572
622
|
Generally, it's recommended to use `state_schema` extensions via middleware
|
|
573
623
|
to keep relevant extensions scoped to corresponding hooks / tools.
|
|
574
|
-
|
|
575
|
-
The schema must be a subclass of `AgentState[ResponseT]`.
|
|
576
624
|
context_schema: An optional schema for runtime context.
|
|
577
625
|
checkpointer: An optional checkpoint saver object.
|
|
578
626
|
|
|
@@ -637,6 +685,14 @@ def create_agent( # noqa: PLR0915
|
|
|
637
685
|
if isinstance(model, str):
|
|
638
686
|
model = init_chat_model(model)
|
|
639
687
|
|
|
688
|
+
# Convert system_prompt to SystemMessage if needed
|
|
689
|
+
system_message: SystemMessage | None = None
|
|
690
|
+
if system_prompt is not None:
|
|
691
|
+
if isinstance(system_prompt, SystemMessage):
|
|
692
|
+
system_message = system_prompt
|
|
693
|
+
else:
|
|
694
|
+
system_message = SystemMessage(content=system_prompt)
|
|
695
|
+
|
|
640
696
|
# Handle tools being None or empty
|
|
641
697
|
if tools is None:
|
|
642
698
|
tools = []
|
|
@@ -735,9 +791,9 @@ def create_agent( # noqa: PLR0915
|
|
|
735
791
|
default_tools = list(built_in_tools)
|
|
736
792
|
|
|
737
793
|
# validate middleware
|
|
738
|
-
|
|
739
|
-
"Please remove duplicate middleware instances."
|
|
740
|
-
|
|
794
|
+
if len({m.name for m in middleware}) != len(middleware):
|
|
795
|
+
msg = "Please remove duplicate middleware instances."
|
|
796
|
+
raise AssertionError(msg)
|
|
741
797
|
middleware_w_before_agent = [
|
|
742
798
|
m
|
|
743
799
|
for m in middleware
|
|
@@ -830,12 +886,12 @@ def create_agent( # noqa: PLR0915
|
|
|
830
886
|
)
|
|
831
887
|
try:
|
|
832
888
|
structured_response = provider_strategy_binding.parse(output)
|
|
833
|
-
except Exception as exc:
|
|
889
|
+
except Exception as exc:
|
|
834
890
|
schema_name = getattr(
|
|
835
891
|
effective_response_format.schema_spec.schema, "__name__", "response_format"
|
|
836
892
|
)
|
|
837
893
|
validation_error = StructuredOutputValidationError(schema_name, exc, output)
|
|
838
|
-
raise validation_error
|
|
894
|
+
raise validation_error from exc
|
|
839
895
|
else:
|
|
840
896
|
return {"messages": [output], "structured_response": structured_response}
|
|
841
897
|
return {"messages": [output]}
|
|
@@ -881,8 +937,7 @@ def create_agent( # noqa: PLR0915
|
|
|
881
937
|
|
|
882
938
|
tool_message_content = (
|
|
883
939
|
effective_response_format.tool_message_content
|
|
884
|
-
|
|
885
|
-
else f"Returning structured response: {structured_response}"
|
|
940
|
+
or f"Returning structured response: {structured_response}"
|
|
886
941
|
)
|
|
887
942
|
|
|
888
943
|
return {
|
|
@@ -896,13 +951,13 @@ def create_agent( # noqa: PLR0915
|
|
|
896
951
|
],
|
|
897
952
|
"structured_response": structured_response,
|
|
898
953
|
}
|
|
899
|
-
except Exception as exc:
|
|
954
|
+
except Exception as exc:
|
|
900
955
|
exception = StructuredOutputValidationError(tool_call["name"], exc, output)
|
|
901
956
|
should_retry, error_message = _handle_structured_output_error(
|
|
902
957
|
exception, effective_response_format
|
|
903
958
|
)
|
|
904
959
|
if not should_retry:
|
|
905
|
-
raise exception
|
|
960
|
+
raise exception from exc
|
|
906
961
|
|
|
907
962
|
return {
|
|
908
963
|
"messages": [
|
|
@@ -966,7 +1021,7 @@ def create_agent( # noqa: PLR0915
|
|
|
966
1021
|
effective_response_format: ResponseFormat | None
|
|
967
1022
|
if isinstance(request.response_format, AutoStrategy):
|
|
968
1023
|
# User provided raw schema via AutoStrategy - auto-detect best strategy based on model
|
|
969
|
-
if _supports_provider_strategy(request.model):
|
|
1024
|
+
if _supports_provider_strategy(request.model, tools=request.tools):
|
|
970
1025
|
# Model supports provider strategy - use it
|
|
971
1026
|
effective_response_format = ProviderStrategy(schema=request.response_format.schema)
|
|
972
1027
|
else:
|
|
@@ -987,7 +1042,7 @@ def create_agent( # noqa: PLR0915
|
|
|
987
1042
|
|
|
988
1043
|
# Bind model based on effective response format
|
|
989
1044
|
if isinstance(effective_response_format, ProviderStrategy):
|
|
990
|
-
# Use
|
|
1045
|
+
# (Backward compatibility) Use OpenAI format structured output
|
|
991
1046
|
kwargs = effective_response_format.to_model_kwargs()
|
|
992
1047
|
return (
|
|
993
1048
|
request.model.bind_tools(
|
|
@@ -1040,10 +1095,12 @@ def create_agent( # noqa: PLR0915
|
|
|
1040
1095
|
# Get the bound model (with auto-detection if needed)
|
|
1041
1096
|
model_, effective_response_format = _get_bound_model(request)
|
|
1042
1097
|
messages = request.messages
|
|
1043
|
-
if request.
|
|
1044
|
-
messages = [
|
|
1098
|
+
if request.system_message:
|
|
1099
|
+
messages = [request.system_message, *messages]
|
|
1045
1100
|
|
|
1046
1101
|
output = model_.invoke(messages)
|
|
1102
|
+
if name:
|
|
1103
|
+
output.name = name
|
|
1047
1104
|
|
|
1048
1105
|
# Handle model output to get messages and structured_response
|
|
1049
1106
|
handled_output = _handle_model_output(output, effective_response_format)
|
|
@@ -1060,7 +1117,7 @@ def create_agent( # noqa: PLR0915
|
|
|
1060
1117
|
request = ModelRequest(
|
|
1061
1118
|
model=model,
|
|
1062
1119
|
tools=default_tools,
|
|
1063
|
-
|
|
1120
|
+
system_message=system_message,
|
|
1064
1121
|
response_format=initial_response_format,
|
|
1065
1122
|
messages=state["messages"],
|
|
1066
1123
|
tool_choice=None,
|
|
@@ -1093,10 +1150,12 @@ def create_agent( # noqa: PLR0915
|
|
|
1093
1150
|
# Get the bound model (with auto-detection if needed)
|
|
1094
1151
|
model_, effective_response_format = _get_bound_model(request)
|
|
1095
1152
|
messages = request.messages
|
|
1096
|
-
if request.
|
|
1097
|
-
messages = [
|
|
1153
|
+
if request.system_message:
|
|
1154
|
+
messages = [request.system_message, *messages]
|
|
1098
1155
|
|
|
1099
1156
|
output = await model_.ainvoke(messages)
|
|
1157
|
+
if name:
|
|
1158
|
+
output.name = name
|
|
1100
1159
|
|
|
1101
1160
|
# Handle model output to get messages and structured_response
|
|
1102
1161
|
handled_output = _handle_model_output(output, effective_response_format)
|
|
@@ -1113,7 +1172,7 @@ def create_agent( # noqa: PLR0915
|
|
|
1113
1172
|
request = ModelRequest(
|
|
1114
1173
|
model=model,
|
|
1115
1174
|
tools=default_tools,
|
|
1116
|
-
|
|
1175
|
+
system_message=system_message,
|
|
1117
1176
|
response_format=initial_response_format,
|
|
1118
1177
|
messages=state["messages"],
|
|
1119
1178
|
tool_choice=None,
|
|
@@ -1420,7 +1479,7 @@ def create_agent( # noqa: PLR0915
|
|
|
1420
1479
|
debug=debug,
|
|
1421
1480
|
name=name,
|
|
1422
1481
|
cache=cache,
|
|
1423
|
-
)
|
|
1482
|
+
).with_config({"recursion_limit": 10_000})
|
|
1424
1483
|
|
|
1425
1484
|
|
|
1426
1485
|
def _resolve_jump(
|
|
@@ -1,21 +1,17 @@
|
|
|
1
|
-
"""Entrypoint to using [
|
|
2
|
-
|
|
3
|
-
!!! warning "Reference docs"
|
|
4
|
-
This page contains **reference documentation** for Middleware. See
|
|
5
|
-
[the docs](https://docs.langchain.com/oss/python/langchain/middleware) for conceptual
|
|
6
|
-
guides, tutorials, and examples on using Middleware.
|
|
7
|
-
""" # noqa: E501
|
|
1
|
+
"""Entrypoint to using [middleware](https://docs.langchain.com/oss/python/langchain/middleware) plugins with [Agents](https://docs.langchain.com/oss/python/langchain/agents).""" # noqa: E501
|
|
8
2
|
|
|
9
3
|
from .context_editing import (
|
|
10
4
|
ClearToolUsesEdit,
|
|
11
5
|
ContextEditingMiddleware,
|
|
12
6
|
)
|
|
7
|
+
from .file_search import FilesystemFileSearchMiddleware
|
|
13
8
|
from .human_in_the_loop import (
|
|
14
9
|
HumanInTheLoopMiddleware,
|
|
15
10
|
InterruptOnConfig,
|
|
16
11
|
)
|
|
17
12
|
from .model_call_limit import ModelCallLimitMiddleware
|
|
18
13
|
from .model_fallback import ModelFallbackMiddleware
|
|
14
|
+
from .model_retry import ModelRetryMiddleware
|
|
19
15
|
from .pii import PIIDetectionError, PIIMiddleware
|
|
20
16
|
from .shell_tool import (
|
|
21
17
|
CodexSandboxExecutionPolicy,
|
|
@@ -52,6 +48,7 @@ __all__ = [
|
|
|
52
48
|
"CodexSandboxExecutionPolicy",
|
|
53
49
|
"ContextEditingMiddleware",
|
|
54
50
|
"DockerExecutionPolicy",
|
|
51
|
+
"FilesystemFileSearchMiddleware",
|
|
55
52
|
"HostExecutionPolicy",
|
|
56
53
|
"HumanInTheLoopMiddleware",
|
|
57
54
|
"InterruptOnConfig",
|
|
@@ -61,6 +58,7 @@ __all__ = [
|
|
|
61
58
|
"ModelFallbackMiddleware",
|
|
62
59
|
"ModelRequest",
|
|
63
60
|
"ModelResponse",
|
|
61
|
+
"ModelRetryMiddleware",
|
|
64
62
|
"PIIDetectionError",
|
|
65
63
|
"PIIMiddleware",
|
|
66
64
|
"RedactionRule",
|
|
@@ -56,11 +56,12 @@ class BaseExecutionPolicy(abc.ABC):
|
|
|
56
56
|
"""Configuration contract for persistent shell sessions.
|
|
57
57
|
|
|
58
58
|
Concrete subclasses encapsulate how a shell process is launched and constrained.
|
|
59
|
+
|
|
59
60
|
Each policy documents its security guarantees and the operating environments in
|
|
60
|
-
which it is appropriate. Use
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
61
|
+
which it is appropriate. Use `HostExecutionPolicy` for trusted, same-host execution;
|
|
62
|
+
`CodexSandboxExecutionPolicy` when the Codex CLI sandbox is available and you want
|
|
63
|
+
additional syscall restrictions; and `DockerExecutionPolicy` for container-level
|
|
64
|
+
isolation using Docker.
|
|
64
65
|
"""
|
|
65
66
|
|
|
66
67
|
command_timeout: float = 30.0
|
|
@@ -91,13 +92,13 @@ class HostExecutionPolicy(BaseExecutionPolicy):
|
|
|
91
92
|
|
|
92
93
|
This policy is best suited for trusted or single-tenant environments (CI jobs,
|
|
93
94
|
developer workstations, pre-sandboxed containers) where the agent must access the
|
|
94
|
-
host filesystem and tooling without additional isolation.
|
|
95
|
-
|
|
95
|
+
host filesystem and tooling without additional isolation. Enforces optional CPU and
|
|
96
|
+
memory limits to prevent runaway commands but offers **no** filesystem or network
|
|
96
97
|
sandboxing; commands can modify anything the process user can reach.
|
|
97
98
|
|
|
98
|
-
On Linux platforms resource limits are applied with
|
|
99
|
-
shell starts. On macOS, where
|
|
100
|
-
|
|
99
|
+
On Linux platforms resource limits are applied with `resource.prlimit` after the
|
|
100
|
+
shell starts. On macOS, where `prlimit` is unavailable, limits are set in a
|
|
101
|
+
`preexec_fn` before `exec`. In both cases the shell runs in its own process group
|
|
101
102
|
so timeouts can terminate the full subtree.
|
|
102
103
|
"""
|
|
103
104
|
|
|
@@ -199,9 +200,9 @@ class CodexSandboxExecutionPolicy(BaseExecutionPolicy):
|
|
|
199
200
|
(Linux) profiles. Commands still run on the host, but within the sandbox requested by
|
|
200
201
|
the CLI. If the Codex binary is unavailable or the runtime lacks the required
|
|
201
202
|
kernel features (e.g., Landlock inside some containers), process startup fails with a
|
|
202
|
-
|
|
203
|
+
`RuntimeError`.
|
|
203
204
|
|
|
204
|
-
Configure sandbox
|
|
205
|
+
Configure sandbox behavior via `config_overrides` to align with your Codex CLI
|
|
205
206
|
profile. This policy does not add its own resource limits; combine it with
|
|
206
207
|
host-level guards (cgroups, container resource limits) as needed.
|
|
207
208
|
"""
|
|
@@ -271,17 +272,17 @@ class DockerExecutionPolicy(BaseExecutionPolicy):
|
|
|
271
272
|
"""Run the shell inside a dedicated Docker container.
|
|
272
273
|
|
|
273
274
|
Choose this policy when commands originate from untrusted users or you require
|
|
274
|
-
strong isolation between sessions. By default the workspace is bind-mounted only
|
|
275
|
-
it refers to an existing non-temporary directory; ephemeral sessions run
|
|
276
|
-
mount to minimise host exposure. The container's network namespace is
|
|
277
|
-
default (
|
|
278
|
-
|
|
275
|
+
strong isolation between sessions. By default the workspace is bind-mounted only
|
|
276
|
+
when it refers to an existing non-temporary directory; ephemeral sessions run
|
|
277
|
+
without a mount to minimise host exposure. The container's network namespace is
|
|
278
|
+
disabled by default (`--network none`) and you can enable further hardening via
|
|
279
|
+
`read_only_rootfs` and `user`.
|
|
279
280
|
|
|
280
281
|
The security guarantees depend on your Docker daemon configuration. Run the agent on
|
|
281
|
-
a host where Docker is locked down (rootless mode, AppArmor/SELinux, etc.) and
|
|
282
|
-
any additional volumes or capabilities passed through ``extra_run_args``. The
|
|
283
|
-
image is
|
|
284
|
-
tooling.
|
|
282
|
+
a host where Docker is locked down (rootless mode, AppArmor/SELinux, etc.) and
|
|
283
|
+
review any additional volumes or capabilities passed through ``extra_run_args``. The
|
|
284
|
+
default image is `python:3.12-alpine3.19`; supply a custom image if you need
|
|
285
|
+
preinstalled tooling.
|
|
285
286
|
"""
|
|
286
287
|
|
|
287
288
|
binary: str = "docker"
|
|
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import hashlib
|
|
6
6
|
import ipaddress
|
|
7
|
+
import operator
|
|
7
8
|
import re
|
|
8
9
|
from collections.abc import Callable, Sequence
|
|
9
10
|
from dataclasses import dataclass
|
|
@@ -127,7 +128,7 @@ def detect_url(content: str) -> list[PIIMatch]:
|
|
|
127
128
|
for match in re.finditer(scheme_pattern, content):
|
|
128
129
|
url = match.group()
|
|
129
130
|
result = urlparse(url)
|
|
130
|
-
if result.scheme in
|
|
131
|
+
if result.scheme in {"http", "https"} and result.netloc:
|
|
131
132
|
matches.append(
|
|
132
133
|
PIIMatch(
|
|
133
134
|
type="url",
|
|
@@ -179,11 +180,14 @@ BUILTIN_DETECTORS: dict[str, Detector] = {
|
|
|
179
180
|
}
|
|
180
181
|
"""Registry of built-in detectors keyed by type name."""
|
|
181
182
|
|
|
183
|
+
_CARD_NUMBER_MIN_DIGITS = 13
|
|
184
|
+
_CARD_NUMBER_MAX_DIGITS = 19
|
|
185
|
+
|
|
182
186
|
|
|
183
187
|
def _passes_luhn(card_number: str) -> bool:
|
|
184
188
|
"""Validate credit card number using the Luhn checksum."""
|
|
185
189
|
digits = [int(d) for d in card_number if d.isdigit()]
|
|
186
|
-
if not
|
|
190
|
+
if not _CARD_NUMBER_MIN_DIGITS <= len(digits) <= _CARD_NUMBER_MAX_DIGITS:
|
|
187
191
|
return False
|
|
188
192
|
|
|
189
193
|
checksum = 0
|
|
@@ -191,7 +195,7 @@ def _passes_luhn(card_number: str) -> bool:
|
|
|
191
195
|
value = digit
|
|
192
196
|
if index % 2 == 1:
|
|
193
197
|
value *= 2
|
|
194
|
-
if value > 9:
|
|
198
|
+
if value > 9: # noqa: PLR2004
|
|
195
199
|
value -= 9
|
|
196
200
|
checksum += value
|
|
197
201
|
return checksum % 10 == 0
|
|
@@ -199,24 +203,28 @@ def _passes_luhn(card_number: str) -> bool:
|
|
|
199
203
|
|
|
200
204
|
def _apply_redact_strategy(content: str, matches: list[PIIMatch]) -> str:
|
|
201
205
|
result = content
|
|
202
|
-
for match in sorted(matches, key=
|
|
206
|
+
for match in sorted(matches, key=operator.itemgetter("start"), reverse=True):
|
|
203
207
|
replacement = f"[REDACTED_{match['type'].upper()}]"
|
|
204
208
|
result = result[: match["start"]] + replacement + result[match["end"] :]
|
|
205
209
|
return result
|
|
206
210
|
|
|
207
211
|
|
|
212
|
+
_UNMASKED_CHAR_NUMBER = 4
|
|
213
|
+
_IPV4_PARTS_NUMBER = 4
|
|
214
|
+
|
|
215
|
+
|
|
208
216
|
def _apply_mask_strategy(content: str, matches: list[PIIMatch]) -> str:
|
|
209
217
|
result = content
|
|
210
|
-
for match in sorted(matches, key=
|
|
218
|
+
for match in sorted(matches, key=operator.itemgetter("start"), reverse=True):
|
|
211
219
|
value = match["value"]
|
|
212
220
|
pii_type = match["type"]
|
|
213
221
|
if pii_type == "email":
|
|
214
222
|
parts = value.split("@")
|
|
215
|
-
if len(parts) == 2:
|
|
223
|
+
if len(parts) == 2: # noqa: PLR2004
|
|
216
224
|
domain_parts = parts[1].split(".")
|
|
217
225
|
masked = (
|
|
218
226
|
f"{parts[0]}@****.{domain_parts[-1]}"
|
|
219
|
-
if len(domain_parts)
|
|
227
|
+
if len(domain_parts) > 1
|
|
220
228
|
else f"{parts[0]}@****"
|
|
221
229
|
)
|
|
222
230
|
else:
|
|
@@ -225,12 +233,15 @@ def _apply_mask_strategy(content: str, matches: list[PIIMatch]) -> str:
|
|
|
225
233
|
digits_only = "".join(c for c in value if c.isdigit())
|
|
226
234
|
separator = "-" if "-" in value else " " if " " in value else ""
|
|
227
235
|
if separator:
|
|
228
|
-
masked =
|
|
236
|
+
masked = (
|
|
237
|
+
f"****{separator}****{separator}****{separator}"
|
|
238
|
+
f"{digits_only[-_UNMASKED_CHAR_NUMBER:]}"
|
|
239
|
+
)
|
|
229
240
|
else:
|
|
230
|
-
masked = f"************{digits_only[-
|
|
241
|
+
masked = f"************{digits_only[-_UNMASKED_CHAR_NUMBER:]}"
|
|
231
242
|
elif pii_type == "ip":
|
|
232
243
|
octets = value.split(".")
|
|
233
|
-
masked = f"*.*.*.{octets[-1]}" if len(octets) ==
|
|
244
|
+
masked = f"*.*.*.{octets[-1]}" if len(octets) == _IPV4_PARTS_NUMBER else "****"
|
|
234
245
|
elif pii_type == "mac_address":
|
|
235
246
|
separator = ":" if ":" in value else "-"
|
|
236
247
|
masked = (
|
|
@@ -239,14 +250,18 @@ def _apply_mask_strategy(content: str, matches: list[PIIMatch]) -> str:
|
|
|
239
250
|
elif pii_type == "url":
|
|
240
251
|
masked = "[MASKED_URL]"
|
|
241
252
|
else:
|
|
242
|
-
masked =
|
|
253
|
+
masked = (
|
|
254
|
+
f"****{value[-_UNMASKED_CHAR_NUMBER:]}"
|
|
255
|
+
if len(value) > _UNMASKED_CHAR_NUMBER
|
|
256
|
+
else "****"
|
|
257
|
+
)
|
|
243
258
|
result = result[: match["start"]] + masked + result[match["end"] :]
|
|
244
259
|
return result
|
|
245
260
|
|
|
246
261
|
|
|
247
262
|
def _apply_hash_strategy(content: str, matches: list[PIIMatch]) -> str:
|
|
248
263
|
result = content
|
|
249
|
-
for match in sorted(matches, key=
|
|
264
|
+
for match in sorted(matches, key=operator.itemgetter("start"), reverse=True):
|
|
250
265
|
digest = hashlib.sha256(match["value"].encode()).hexdigest()[:8]
|
|
251
266
|
replacement = f"<{match['type']}_hash:{digest}>"
|
|
252
267
|
result = result[: match["start"]] + replacement + result[match["end"] :]
|