langchain 1.0.5__py3-none-any.whl → 1.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. langchain/__init__.py +1 -1
  2. langchain/agents/__init__.py +1 -7
  3. langchain/agents/factory.py +99 -40
  4. langchain/agents/middleware/__init__.py +5 -7
  5. langchain/agents/middleware/_execution.py +21 -20
  6. langchain/agents/middleware/_redaction.py +27 -12
  7. langchain/agents/middleware/_retry.py +123 -0
  8. langchain/agents/middleware/context_editing.py +26 -22
  9. langchain/agents/middleware/file_search.py +18 -13
  10. langchain/agents/middleware/human_in_the_loop.py +60 -54
  11. langchain/agents/middleware/model_call_limit.py +63 -17
  12. langchain/agents/middleware/model_fallback.py +7 -9
  13. langchain/agents/middleware/model_retry.py +300 -0
  14. langchain/agents/middleware/pii.py +80 -27
  15. langchain/agents/middleware/shell_tool.py +230 -103
  16. langchain/agents/middleware/summarization.py +439 -90
  17. langchain/agents/middleware/todo.py +111 -27
  18. langchain/agents/middleware/tool_call_limit.py +105 -71
  19. langchain/agents/middleware/tool_emulator.py +42 -33
  20. langchain/agents/middleware/tool_retry.py +171 -159
  21. langchain/agents/middleware/tool_selection.py +37 -27
  22. langchain/agents/middleware/types.py +754 -392
  23. langchain/agents/structured_output.py +22 -12
  24. langchain/chat_models/__init__.py +1 -7
  25. langchain/chat_models/base.py +233 -184
  26. langchain/embeddings/__init__.py +0 -5
  27. langchain/embeddings/base.py +79 -65
  28. langchain/messages/__init__.py +0 -5
  29. langchain/tools/__init__.py +1 -7
  30. {langchain-1.0.5.dist-info → langchain-1.2.3.dist-info}/METADATA +3 -5
  31. langchain-1.2.3.dist-info/RECORD +36 -0
  32. {langchain-1.0.5.dist-info → langchain-1.2.3.dist-info}/WHEEL +1 -1
  33. langchain-1.0.5.dist-info/RECORD +0 -34
  34. {langchain-1.0.5.dist-info → langchain-1.2.3.dist-info}/licenses/LICENSE +0 -0
langchain/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Main entrypoint into LangChain."""
2
2
 
3
- __version__ = "1.0.5"
3
+ __version__ = "1.2.3"
@@ -1,10 +1,4 @@
1
- """Entrypoint to building [Agents](https://docs.langchain.com/oss/python/langchain/agents) with LangChain.
2
-
3
- !!! warning "Reference docs"
4
- This page contains **reference documentation** for Agents. See
5
- [the docs](https://docs.langchain.com/oss/python/langchain/agents) for conceptual
6
- guides, tutorials, and examples on using Agents.
7
- """ # noqa: E501
1
+ """Entrypoint to building [Agents](https://docs.langchain.com/oss/python/langchain/agents) with LangChain.""" # noqa: E501
8
2
 
9
3
  from langchain.agents.factory import create_agent
10
4
  from langchain.agents.middleware.types import AgentState
@@ -20,9 +20,7 @@ from langgraph._internal._runnable import RunnableCallable
20
20
  from langgraph.constants import END, START
21
21
  from langgraph.graph.state import StateGraph
22
22
  from langgraph.prebuilt.tool_node import ToolCallWithContext, ToolNode
23
- from langgraph.runtime import Runtime # noqa: TC002
24
23
  from langgraph.types import Command, Send
25
- from langgraph.typing import ContextT # noqa: TC002
26
24
  from typing_extensions import NotRequired, Required, TypedDict
27
25
 
28
26
  from langchain.agents.middleware.types import (
@@ -56,13 +54,27 @@ if TYPE_CHECKING:
56
54
  from langchain_core.runnables import Runnable
57
55
  from langgraph.cache.base import BaseCache
58
56
  from langgraph.graph.state import CompiledStateGraph
57
+ from langgraph.runtime import Runtime
59
58
  from langgraph.store.base import BaseStore
60
59
  from langgraph.types import Checkpointer
60
+ from langgraph.typing import ContextT
61
61
 
62
62
  from langchain.agents.middleware.types import ToolCallRequest, ToolCallWrapper
63
63
 
64
64
  STRUCTURED_OUTPUT_ERROR_TEMPLATE = "Error: {error}\n Please fix your mistakes."
65
65
 
66
+ FALLBACK_MODELS_WITH_STRUCTURED_OUTPUT = [
67
+ # if model profile data are not available, these models are assumed to support
68
+ # structured output
69
+ "grok",
70
+ "gpt-5",
71
+ "gpt-4.1",
72
+ "gpt-4o",
73
+ "gpt-oss",
74
+ "o3-pro",
75
+ "o3-mini",
76
+ ]
77
+
66
78
 
67
79
  def _normalize_to_model_response(result: ModelResponse | AIMessage) -> ModelResponse:
68
80
  """Normalize middleware return value to ModelResponse."""
@@ -302,7 +314,7 @@ def _resolve_schema(schemas: set[type], schema_name: str, omit_flag: str | None
302
314
  def _extract_metadata(type_: type) -> list:
303
315
  """Extract metadata from a field type, handling Required/NotRequired and Annotated wrappers."""
304
316
  # Handle Required[Annotated[...]] or NotRequired[Annotated[...]]
305
- if get_origin(type_) in (Required, NotRequired):
317
+ if get_origin(type_) in {Required, NotRequired}:
306
318
  inner_type = get_args(type_)[0]
307
319
  if get_origin(inner_type) is Annotated:
308
320
  return list(get_args(inner_type)[1:])
@@ -349,11 +361,13 @@ def _get_can_jump_to(middleware: AgentMiddleware[Any, Any], hook_name: str) -> l
349
361
  return []
350
362
 
351
363
 
352
- def _supports_provider_strategy(model: str | BaseChatModel) -> bool:
364
+ def _supports_provider_strategy(model: str | BaseChatModel, tools: list | None = None) -> bool:
353
365
  """Check if a model supports provider-specific structured output.
354
366
 
355
367
  Args:
356
368
  model: Model name string or `BaseChatModel` instance.
369
+ tools: Optional list of tools provided to the agent. Needed because some models
370
+ don't support structured output together with tool calling.
357
371
 
358
372
  Returns:
359
373
  `True` if the model supports provider-specific structured output, `False` otherwise.
@@ -362,11 +376,23 @@ def _supports_provider_strategy(model: str | BaseChatModel) -> bool:
362
376
  if isinstance(model, str):
363
377
  model_name = model
364
378
  elif isinstance(model, BaseChatModel):
365
- model_name = getattr(model, "model_name", None)
379
+ model_name = (
380
+ getattr(model, "model_name", None)
381
+ or getattr(model, "model", None)
382
+ or getattr(model, "model_id", "")
383
+ )
384
+ model_profile = model.profile
385
+ if (
386
+ model_profile is not None
387
+ and model_profile.get("structured_output")
388
+ # We make an exception for Gemini models, which currently do not support
389
+ # simultaneous tool use with structured output
390
+ and not (tools and isinstance(model_name, str) and "gemini" in model_name.lower())
391
+ ):
392
+ return True
366
393
 
367
394
  return (
368
- "grok" in model_name.lower()
369
- or any(part in model_name for part in ["gpt-5", "gpt-4.1", "gpt-oss", "o3-pro", "o3-mini"])
395
+ any(part in model_name.lower() for part in FALLBACK_MODELS_WITH_STRUCTURED_OUTPUT)
370
396
  if model_name
371
397
  else False
372
398
  )
@@ -512,11 +538,11 @@ def _chain_async_tool_call_wrappers(
512
538
  return result
513
539
 
514
540
 
515
- def create_agent( # noqa: PLR0915
541
+ def create_agent(
516
542
  model: str | BaseChatModel,
517
543
  tools: Sequence[BaseTool | Callable | dict[str, Any]] | None = None,
518
544
  *,
519
- system_prompt: str | None = None,
545
+ system_prompt: str | SystemMessage | None = None,
520
546
  middleware: Sequence[AgentMiddleware[StateT_co, ContextT]] = (),
521
547
  response_format: ResponseFormat[ResponseT] | type[ResponseT] | None = None,
522
548
  state_schema: type[AgentState[ResponseT]] | None = None,
@@ -537,42 +563,64 @@ def create_agent( # noqa: PLR0915
537
563
  visit the [Agents](https://docs.langchain.com/oss/python/langchain/agents) docs.
538
564
 
539
565
  Args:
540
- model: The language model for the agent. Can be a string identifier
541
- (e.g., `"openai:gpt-4"`) or a direct chat model instance (e.g.,
542
- [`ChatOpenAI`][langchain_openai.ChatOpenAI] or other another
543
- [chat model](https://docs.langchain.com/oss/python/integrations/chat)).
566
+ model: The language model for the agent.
567
+
568
+ Can be a string identifier (e.g., `"openai:gpt-4"`) or a direct chat model
569
+ instance (e.g., [`ChatOpenAI`][langchain_openai.ChatOpenAI] or other another
570
+ [LangChain chat model](https://docs.langchain.com/oss/python/integrations/chat)).
544
571
 
545
572
  For a full list of supported model strings, see
546
573
  [`init_chat_model`][langchain.chat_models.init_chat_model(model_provider)].
547
- tools: A list of tools, `dicts`, or `Callable`.
574
+
575
+ !!! tip ""
576
+
577
+ See the [Models](https://docs.langchain.com/oss/python/langchain/models)
578
+ docs for more information.
579
+ tools: A list of tools, `dict`, or `Callable`.
548
580
 
549
581
  If `None` or an empty list, the agent will consist of a model node without a
550
582
  tool calling loop.
583
+
584
+
585
+ !!! tip ""
586
+
587
+ See the [Tools](https://docs.langchain.com/oss/python/langchain/tools)
588
+ docs for more information.
551
589
  system_prompt: An optional system prompt for the LLM.
552
590
 
553
- Prompts are converted to a
554
- [`SystemMessage`][langchain.messages.SystemMessage] and added to the
555
- beginning of the message list.
591
+ Can be a `str` (which will be converted to a `SystemMessage`) or a
592
+ `SystemMessage` instance directly. The system message is added to the
593
+ beginning of the message list when calling the model.
556
594
  middleware: A sequence of middleware instances to apply to the agent.
557
595
 
558
- Middleware can intercept and modify agent behavior at various stages. See
559
- the [full guide](https://docs.langchain.com/oss/python/langchain/middleware).
596
+ Middleware can intercept and modify agent behavior at various stages.
597
+
598
+ !!! tip ""
599
+
600
+ See the [Middleware](https://docs.langchain.com/oss/python/langchain/middleware)
601
+ docs for more information.
560
602
  response_format: An optional configuration for structured responses.
561
603
 
562
604
  Can be a `ToolStrategy`, `ProviderStrategy`, or a Pydantic model class.
563
605
 
564
606
  If provided, the agent will handle structured output during the
565
- conversation flow. Raw schemas will be wrapped in an appropriate strategy
566
- based on model capabilities.
607
+ conversation flow.
608
+
609
+ Raw schemas will be wrapped in an appropriate strategy based on model
610
+ capabilities.
611
+
612
+ !!! tip ""
613
+
614
+ See the [Structured output](https://docs.langchain.com/oss/python/langchain/structured-output)
615
+ docs for more information.
567
616
  state_schema: An optional `TypedDict` schema that extends `AgentState`.
568
617
 
569
618
  When provided, this schema is used instead of `AgentState` as the base
570
619
  schema for merging with middleware state schemas. This allows users to
571
620
  add custom state fields without needing to create custom middleware.
621
+
572
622
  Generally, it's recommended to use `state_schema` extensions via middleware
573
623
  to keep relevant extensions scoped to corresponding hooks / tools.
574
-
575
- The schema must be a subclass of `AgentState[ResponseT]`.
576
624
  context_schema: An optional schema for runtime context.
577
625
  checkpointer: An optional checkpoint saver object.
578
626
 
@@ -637,6 +685,14 @@ def create_agent( # noqa: PLR0915
637
685
  if isinstance(model, str):
638
686
  model = init_chat_model(model)
639
687
 
688
+ # Convert system_prompt to SystemMessage if needed
689
+ system_message: SystemMessage | None = None
690
+ if system_prompt is not None:
691
+ if isinstance(system_prompt, SystemMessage):
692
+ system_message = system_prompt
693
+ else:
694
+ system_message = SystemMessage(content=system_prompt)
695
+
640
696
  # Handle tools being None or empty
641
697
  if tools is None:
642
698
  tools = []
@@ -735,9 +791,9 @@ def create_agent( # noqa: PLR0915
735
791
  default_tools = list(built_in_tools)
736
792
 
737
793
  # validate middleware
738
- assert len({m.name for m in middleware}) == len(middleware), ( # noqa: S101
739
- "Please remove duplicate middleware instances."
740
- )
794
+ if len({m.name for m in middleware}) != len(middleware):
795
+ msg = "Please remove duplicate middleware instances."
796
+ raise AssertionError(msg)
741
797
  middleware_w_before_agent = [
742
798
  m
743
799
  for m in middleware
@@ -830,12 +886,12 @@ def create_agent( # noqa: PLR0915
830
886
  )
831
887
  try:
832
888
  structured_response = provider_strategy_binding.parse(output)
833
- except Exception as exc: # noqa: BLE001
889
+ except Exception as exc:
834
890
  schema_name = getattr(
835
891
  effective_response_format.schema_spec.schema, "__name__", "response_format"
836
892
  )
837
893
  validation_error = StructuredOutputValidationError(schema_name, exc, output)
838
- raise validation_error
894
+ raise validation_error from exc
839
895
  else:
840
896
  return {"messages": [output], "structured_response": structured_response}
841
897
  return {"messages": [output]}
@@ -881,8 +937,7 @@ def create_agent( # noqa: PLR0915
881
937
 
882
938
  tool_message_content = (
883
939
  effective_response_format.tool_message_content
884
- if effective_response_format.tool_message_content
885
- else f"Returning structured response: {structured_response}"
940
+ or f"Returning structured response: {structured_response}"
886
941
  )
887
942
 
888
943
  return {
@@ -896,13 +951,13 @@ def create_agent( # noqa: PLR0915
896
951
  ],
897
952
  "structured_response": structured_response,
898
953
  }
899
- except Exception as exc: # noqa: BLE001
954
+ except Exception as exc:
900
955
  exception = StructuredOutputValidationError(tool_call["name"], exc, output)
901
956
  should_retry, error_message = _handle_structured_output_error(
902
957
  exception, effective_response_format
903
958
  )
904
959
  if not should_retry:
905
- raise exception
960
+ raise exception from exc
906
961
 
907
962
  return {
908
963
  "messages": [
@@ -966,7 +1021,7 @@ def create_agent( # noqa: PLR0915
966
1021
  effective_response_format: ResponseFormat | None
967
1022
  if isinstance(request.response_format, AutoStrategy):
968
1023
  # User provided raw schema via AutoStrategy - auto-detect best strategy based on model
969
- if _supports_provider_strategy(request.model):
1024
+ if _supports_provider_strategy(request.model, tools=request.tools):
970
1025
  # Model supports provider strategy - use it
971
1026
  effective_response_format = ProviderStrategy(schema=request.response_format.schema)
972
1027
  else:
@@ -987,7 +1042,7 @@ def create_agent( # noqa: PLR0915
987
1042
 
988
1043
  # Bind model based on effective response format
989
1044
  if isinstance(effective_response_format, ProviderStrategy):
990
- # Use provider-specific structured output
1045
+ # (Backward compatibility) Use OpenAI format structured output
991
1046
  kwargs = effective_response_format.to_model_kwargs()
992
1047
  return (
993
1048
  request.model.bind_tools(
@@ -1040,10 +1095,12 @@ def create_agent( # noqa: PLR0915
1040
1095
  # Get the bound model (with auto-detection if needed)
1041
1096
  model_, effective_response_format = _get_bound_model(request)
1042
1097
  messages = request.messages
1043
- if request.system_prompt:
1044
- messages = [SystemMessage(request.system_prompt), *messages]
1098
+ if request.system_message:
1099
+ messages = [request.system_message, *messages]
1045
1100
 
1046
1101
  output = model_.invoke(messages)
1102
+ if name:
1103
+ output.name = name
1047
1104
 
1048
1105
  # Handle model output to get messages and structured_response
1049
1106
  handled_output = _handle_model_output(output, effective_response_format)
@@ -1060,7 +1117,7 @@ def create_agent( # noqa: PLR0915
1060
1117
  request = ModelRequest(
1061
1118
  model=model,
1062
1119
  tools=default_tools,
1063
- system_prompt=system_prompt,
1120
+ system_message=system_message,
1064
1121
  response_format=initial_response_format,
1065
1122
  messages=state["messages"],
1066
1123
  tool_choice=None,
@@ -1093,10 +1150,12 @@ def create_agent( # noqa: PLR0915
1093
1150
  # Get the bound model (with auto-detection if needed)
1094
1151
  model_, effective_response_format = _get_bound_model(request)
1095
1152
  messages = request.messages
1096
- if request.system_prompt:
1097
- messages = [SystemMessage(request.system_prompt), *messages]
1153
+ if request.system_message:
1154
+ messages = [request.system_message, *messages]
1098
1155
 
1099
1156
  output = await model_.ainvoke(messages)
1157
+ if name:
1158
+ output.name = name
1100
1159
 
1101
1160
  # Handle model output to get messages and structured_response
1102
1161
  handled_output = _handle_model_output(output, effective_response_format)
@@ -1113,7 +1172,7 @@ def create_agent( # noqa: PLR0915
1113
1172
  request = ModelRequest(
1114
1173
  model=model,
1115
1174
  tools=default_tools,
1116
- system_prompt=system_prompt,
1175
+ system_message=system_message,
1117
1176
  response_format=initial_response_format,
1118
1177
  messages=state["messages"],
1119
1178
  tool_choice=None,
@@ -1,21 +1,17 @@
1
- """Entrypoint to using [Middleware](https://docs.langchain.com/oss/python/langchain/middleware) plugins with [Agents](https://docs.langchain.com/oss/python/langchain/agents).
2
-
3
- !!! warning "Reference docs"
4
- This page contains **reference documentation** for Middleware. See
5
- [the docs](https://docs.langchain.com/oss/python/langchain/middleware) for conceptual
6
- guides, tutorials, and examples on using Middleware.
7
- """ # noqa: E501
1
+ """Entrypoint to using [middleware](https://docs.langchain.com/oss/python/langchain/middleware) plugins with [Agents](https://docs.langchain.com/oss/python/langchain/agents).""" # noqa: E501
8
2
 
9
3
  from .context_editing import (
10
4
  ClearToolUsesEdit,
11
5
  ContextEditingMiddleware,
12
6
  )
7
+ from .file_search import FilesystemFileSearchMiddleware
13
8
  from .human_in_the_loop import (
14
9
  HumanInTheLoopMiddleware,
15
10
  InterruptOnConfig,
16
11
  )
17
12
  from .model_call_limit import ModelCallLimitMiddleware
18
13
  from .model_fallback import ModelFallbackMiddleware
14
+ from .model_retry import ModelRetryMiddleware
19
15
  from .pii import PIIDetectionError, PIIMiddleware
20
16
  from .shell_tool import (
21
17
  CodexSandboxExecutionPolicy,
@@ -52,6 +48,7 @@ __all__ = [
52
48
  "CodexSandboxExecutionPolicy",
53
49
  "ContextEditingMiddleware",
54
50
  "DockerExecutionPolicy",
51
+ "FilesystemFileSearchMiddleware",
55
52
  "HostExecutionPolicy",
56
53
  "HumanInTheLoopMiddleware",
57
54
  "InterruptOnConfig",
@@ -61,6 +58,7 @@ __all__ = [
61
58
  "ModelFallbackMiddleware",
62
59
  "ModelRequest",
63
60
  "ModelResponse",
61
+ "ModelRetryMiddleware",
64
62
  "PIIDetectionError",
65
63
  "PIIMiddleware",
66
64
  "RedactionRule",
@@ -56,11 +56,12 @@ class BaseExecutionPolicy(abc.ABC):
56
56
  """Configuration contract for persistent shell sessions.
57
57
 
58
58
  Concrete subclasses encapsulate how a shell process is launched and constrained.
59
+
59
60
  Each policy documents its security guarantees and the operating environments in
60
- which it is appropriate. Use :class:`HostExecutionPolicy` for trusted, same-host
61
- execution; :class:`CodexSandboxExecutionPolicy` when the Codex CLI sandbox is
62
- available and you want additional syscall restrictions; and
63
- :class:`DockerExecutionPolicy` for container-level isolation using Docker.
61
+ which it is appropriate. Use `HostExecutionPolicy` for trusted, same-host execution;
62
+ `CodexSandboxExecutionPolicy` when the Codex CLI sandbox is available and you want
63
+ additional syscall restrictions; and `DockerExecutionPolicy` for container-level
64
+ isolation using Docker.
64
65
  """
65
66
 
66
67
  command_timeout: float = 30.0
@@ -91,13 +92,13 @@ class HostExecutionPolicy(BaseExecutionPolicy):
91
92
 
92
93
  This policy is best suited for trusted or single-tenant environments (CI jobs,
93
94
  developer workstations, pre-sandboxed containers) where the agent must access the
94
- host filesystem and tooling without additional isolation. It enforces optional CPU
95
- and memory limits to prevent runaway commands but offers **no** filesystem or network
95
+ host filesystem and tooling without additional isolation. Enforces optional CPU and
96
+ memory limits to prevent runaway commands but offers **no** filesystem or network
96
97
  sandboxing; commands can modify anything the process user can reach.
97
98
 
98
- On Linux platforms resource limits are applied with ``resource.prlimit`` after the
99
- shell starts. On macOS, where ``prlimit`` is unavailable, limits are set in a
100
- ``preexec_fn`` before ``exec``. In both cases the shell runs in its own process group
99
+ On Linux platforms resource limits are applied with `resource.prlimit` after the
100
+ shell starts. On macOS, where `prlimit` is unavailable, limits are set in a
101
+ `preexec_fn` before `exec`. In both cases the shell runs in its own process group
101
102
  so timeouts can terminate the full subtree.
102
103
  """
103
104
 
@@ -199,9 +200,9 @@ class CodexSandboxExecutionPolicy(BaseExecutionPolicy):
199
200
  (Linux) profiles. Commands still run on the host, but within the sandbox requested by
200
201
  the CLI. If the Codex binary is unavailable or the runtime lacks the required
201
202
  kernel features (e.g., Landlock inside some containers), process startup fails with a
202
- :class:`RuntimeError`.
203
+ `RuntimeError`.
203
204
 
204
- Configure sandbox behaviour via ``config_overrides`` to align with your Codex CLI
205
+ Configure sandbox behavior via `config_overrides` to align with your Codex CLI
205
206
  profile. This policy does not add its own resource limits; combine it with
206
207
  host-level guards (cgroups, container resource limits) as needed.
207
208
  """
@@ -271,17 +272,17 @@ class DockerExecutionPolicy(BaseExecutionPolicy):
271
272
  """Run the shell inside a dedicated Docker container.
272
273
 
273
274
  Choose this policy when commands originate from untrusted users or you require
274
- strong isolation between sessions. By default the workspace is bind-mounted only when
275
- it refers to an existing non-temporary directory; ephemeral sessions run without a
276
- mount to minimise host exposure. The container's network namespace is disabled by
277
- default (``--network none``) and you can enable further hardening via
278
- ``read_only_rootfs`` and ``user``.
275
+ strong isolation between sessions. By default the workspace is bind-mounted only
276
+ when it refers to an existing non-temporary directory; ephemeral sessions run
277
+ without a mount to minimise host exposure. The container's network namespace is
278
+ disabled by default (`--network none`) and you can enable further hardening via
279
+ `read_only_rootfs` and `user`.
279
280
 
280
281
  The security guarantees depend on your Docker daemon configuration. Run the agent on
281
- a host where Docker is locked down (rootless mode, AppArmor/SELinux, etc.) and review
282
- any additional volumes or capabilities passed through ``extra_run_args``. The default
283
- image is ``python:3.12-alpine3.19``; supply a custom image if you need preinstalled
284
- tooling.
282
+ a host where Docker is locked down (rootless mode, AppArmor/SELinux, etc.) and
283
+ review any additional volumes or capabilities passed through ``extra_run_args``. The
284
+ default image is `python:3.12-alpine3.19`; supply a custom image if you need
285
+ preinstalled tooling.
285
286
  """
286
287
 
287
288
  binary: str = "docker"
@@ -4,6 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  import hashlib
6
6
  import ipaddress
7
+ import operator
7
8
  import re
8
9
  from collections.abc import Callable, Sequence
9
10
  from dataclasses import dataclass
@@ -127,7 +128,7 @@ def detect_url(content: str) -> list[PIIMatch]:
127
128
  for match in re.finditer(scheme_pattern, content):
128
129
  url = match.group()
129
130
  result = urlparse(url)
130
- if result.scheme in ("http", "https") and result.netloc:
131
+ if result.scheme in {"http", "https"} and result.netloc:
131
132
  matches.append(
132
133
  PIIMatch(
133
134
  type="url",
@@ -179,11 +180,14 @@ BUILTIN_DETECTORS: dict[str, Detector] = {
179
180
  }
180
181
  """Registry of built-in detectors keyed by type name."""
181
182
 
183
+ _CARD_NUMBER_MIN_DIGITS = 13
184
+ _CARD_NUMBER_MAX_DIGITS = 19
185
+
182
186
 
183
187
  def _passes_luhn(card_number: str) -> bool:
184
188
  """Validate credit card number using the Luhn checksum."""
185
189
  digits = [int(d) for d in card_number if d.isdigit()]
186
- if not 13 <= len(digits) <= 19:
190
+ if not _CARD_NUMBER_MIN_DIGITS <= len(digits) <= _CARD_NUMBER_MAX_DIGITS:
187
191
  return False
188
192
 
189
193
  checksum = 0
@@ -191,7 +195,7 @@ def _passes_luhn(card_number: str) -> bool:
191
195
  value = digit
192
196
  if index % 2 == 1:
193
197
  value *= 2
194
- if value > 9:
198
+ if value > 9: # noqa: PLR2004
195
199
  value -= 9
196
200
  checksum += value
197
201
  return checksum % 10 == 0
@@ -199,24 +203,28 @@ def _passes_luhn(card_number: str) -> bool:
199
203
 
200
204
  def _apply_redact_strategy(content: str, matches: list[PIIMatch]) -> str:
201
205
  result = content
202
- for match in sorted(matches, key=lambda item: item["start"], reverse=True):
206
+ for match in sorted(matches, key=operator.itemgetter("start"), reverse=True):
203
207
  replacement = f"[REDACTED_{match['type'].upper()}]"
204
208
  result = result[: match["start"]] + replacement + result[match["end"] :]
205
209
  return result
206
210
 
207
211
 
212
+ _UNMASKED_CHAR_NUMBER = 4
213
+ _IPV4_PARTS_NUMBER = 4
214
+
215
+
208
216
  def _apply_mask_strategy(content: str, matches: list[PIIMatch]) -> str:
209
217
  result = content
210
- for match in sorted(matches, key=lambda item: item["start"], reverse=True):
218
+ for match in sorted(matches, key=operator.itemgetter("start"), reverse=True):
211
219
  value = match["value"]
212
220
  pii_type = match["type"]
213
221
  if pii_type == "email":
214
222
  parts = value.split("@")
215
- if len(parts) == 2:
223
+ if len(parts) == 2: # noqa: PLR2004
216
224
  domain_parts = parts[1].split(".")
217
225
  masked = (
218
226
  f"{parts[0]}@****.{domain_parts[-1]}"
219
- if len(domain_parts) >= 2
227
+ if len(domain_parts) > 1
220
228
  else f"{parts[0]}@****"
221
229
  )
222
230
  else:
@@ -225,12 +233,15 @@ def _apply_mask_strategy(content: str, matches: list[PIIMatch]) -> str:
225
233
  digits_only = "".join(c for c in value if c.isdigit())
226
234
  separator = "-" if "-" in value else " " if " " in value else ""
227
235
  if separator:
228
- masked = f"****{separator}****{separator}****{separator}{digits_only[-4:]}"
236
+ masked = (
237
+ f"****{separator}****{separator}****{separator}"
238
+ f"{digits_only[-_UNMASKED_CHAR_NUMBER:]}"
239
+ )
229
240
  else:
230
- masked = f"************{digits_only[-4:]}"
241
+ masked = f"************{digits_only[-_UNMASKED_CHAR_NUMBER:]}"
231
242
  elif pii_type == "ip":
232
243
  octets = value.split(".")
233
- masked = f"*.*.*.{octets[-1]}" if len(octets) == 4 else "****"
244
+ masked = f"*.*.*.{octets[-1]}" if len(octets) == _IPV4_PARTS_NUMBER else "****"
234
245
  elif pii_type == "mac_address":
235
246
  separator = ":" if ":" in value else "-"
236
247
  masked = (
@@ -239,14 +250,18 @@ def _apply_mask_strategy(content: str, matches: list[PIIMatch]) -> str:
239
250
  elif pii_type == "url":
240
251
  masked = "[MASKED_URL]"
241
252
  else:
242
- masked = f"****{value[-4:]}" if len(value) > 4 else "****"
253
+ masked = (
254
+ f"****{value[-_UNMASKED_CHAR_NUMBER:]}"
255
+ if len(value) > _UNMASKED_CHAR_NUMBER
256
+ else "****"
257
+ )
243
258
  result = result[: match["start"]] + masked + result[match["end"] :]
244
259
  return result
245
260
 
246
261
 
247
262
  def _apply_hash_strategy(content: str, matches: list[PIIMatch]) -> str:
248
263
  result = content
249
- for match in sorted(matches, key=lambda item: item["start"], reverse=True):
264
+ for match in sorted(matches, key=operator.itemgetter("start"), reverse=True):
250
265
  digest = hashlib.sha256(match["value"].encode()).hexdigest()[:8]
251
266
  replacement = f"<{match['type']}_hash:{digest}>"
252
267
  result = result[: match["start"]] + replacement + result[match["end"] :]