langchain 1.0.0rc2__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain might be problematic. Click here for more details.
- langchain/__init__.py +1 -1
- langchain/agents/factory.py +26 -20
- langchain/agents/middleware/__init__.py +12 -0
- langchain/agents/middleware/_execution.py +388 -0
- langchain/agents/middleware/_redaction.py +350 -0
- langchain/agents/middleware/file_search.py +382 -0
- langchain/agents/middleware/pii.py +43 -477
- langchain/agents/middleware/shell_tool.py +718 -0
- langchain/agents/middleware/types.py +7 -5
- langchain/chat_models/base.py +7 -17
- langchain/embeddings/__init__.py +6 -0
- langchain/embeddings/base.py +21 -7
- langchain/tools/tool_node.py +147 -61
- {langchain-1.0.0rc2.dist-info → langchain-1.0.2.dist-info}/METADATA +12 -9
- {langchain-1.0.0rc2.dist-info → langchain-1.0.2.dist-info}/RECORD +17 -13
- {langchain-1.0.0rc2.dist-info → langchain-1.0.2.dist-info}/WHEEL +0 -0
- {langchain-1.0.0rc2.dist-info → langchain-1.0.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
"""File search middleware for Anthropic text editor and memory tools.
|
|
2
|
+
|
|
3
|
+
This module provides Glob and Grep search tools that operate on files stored
|
|
4
|
+
in state or filesystem.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import fnmatch
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
import subprocess
|
|
13
|
+
from contextlib import suppress
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Literal
|
|
17
|
+
|
|
18
|
+
from langchain_core.tools import tool
|
|
19
|
+
|
|
20
|
+
from langchain.agents.middleware.types import AgentMiddleware
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _expand_include_patterns(pattern: str) -> list[str] | None:
|
|
24
|
+
"""Expand brace patterns like ``*.{py,pyi}`` into a list of globs."""
|
|
25
|
+
if "}" in pattern and "{" not in pattern:
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
expanded: list[str] = []
|
|
29
|
+
|
|
30
|
+
def _expand(current: str) -> None:
|
|
31
|
+
start = current.find("{")
|
|
32
|
+
if start == -1:
|
|
33
|
+
expanded.append(current)
|
|
34
|
+
return
|
|
35
|
+
|
|
36
|
+
end = current.find("}", start)
|
|
37
|
+
if end == -1:
|
|
38
|
+
raise ValueError
|
|
39
|
+
|
|
40
|
+
prefix = current[:start]
|
|
41
|
+
suffix = current[end + 1 :]
|
|
42
|
+
inner = current[start + 1 : end]
|
|
43
|
+
if not inner:
|
|
44
|
+
raise ValueError
|
|
45
|
+
|
|
46
|
+
for option in inner.split(","):
|
|
47
|
+
_expand(prefix + option + suffix)
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
_expand(pattern)
|
|
51
|
+
except ValueError:
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
return expanded
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _is_valid_include_pattern(pattern: str) -> bool:
|
|
58
|
+
"""Validate glob pattern used for include filters."""
|
|
59
|
+
if not pattern:
|
|
60
|
+
return False
|
|
61
|
+
|
|
62
|
+
if any(char in pattern for char in ("\x00", "\n", "\r")):
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
expanded = _expand_include_patterns(pattern)
|
|
66
|
+
if expanded is None:
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
for candidate in expanded:
|
|
71
|
+
re.compile(fnmatch.translate(candidate))
|
|
72
|
+
except re.error:
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
return True
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _match_include_pattern(basename: str, pattern: str) -> bool:
|
|
79
|
+
"""Return True if the basename matches the include pattern."""
|
|
80
|
+
expanded = _expand_include_patterns(pattern)
|
|
81
|
+
if not expanded:
|
|
82
|
+
return False
|
|
83
|
+
|
|
84
|
+
return any(fnmatch.fnmatch(basename, candidate) for candidate in expanded)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class FilesystemFileSearchMiddleware(AgentMiddleware):
|
|
88
|
+
"""Provides Glob and Grep search over filesystem files.
|
|
89
|
+
|
|
90
|
+
This middleware adds two tools that search through local filesystem:
|
|
91
|
+
- Glob: Fast file pattern matching by file path
|
|
92
|
+
- Grep: Fast content search using ripgrep or Python fallback
|
|
93
|
+
|
|
94
|
+
Example:
|
|
95
|
+
```python
|
|
96
|
+
from langchain.agents import create_agent
|
|
97
|
+
from langchain.agents.middleware import (
|
|
98
|
+
FilesystemFileSearchMiddleware,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
agent = create_agent(
|
|
102
|
+
model=model,
|
|
103
|
+
tools=[],
|
|
104
|
+
middleware=[
|
|
105
|
+
FilesystemFileSearchMiddleware(root_path="/workspace"),
|
|
106
|
+
],
|
|
107
|
+
)
|
|
108
|
+
```
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
def __init__(
|
|
112
|
+
self,
|
|
113
|
+
*,
|
|
114
|
+
root_path: str,
|
|
115
|
+
use_ripgrep: bool = True,
|
|
116
|
+
max_file_size_mb: int = 10,
|
|
117
|
+
) -> None:
|
|
118
|
+
"""Initialize the search middleware.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
root_path: Root directory to search.
|
|
122
|
+
use_ripgrep: Whether to use ripgrep for search (default: True).
|
|
123
|
+
Falls back to Python if ripgrep unavailable.
|
|
124
|
+
max_file_size_mb: Maximum file size to search in MB (default: 10).
|
|
125
|
+
"""
|
|
126
|
+
self.root_path = Path(root_path).resolve()
|
|
127
|
+
self.use_ripgrep = use_ripgrep
|
|
128
|
+
self.max_file_size_bytes = max_file_size_mb * 1024 * 1024
|
|
129
|
+
|
|
130
|
+
# Create tool instances as closures that capture self
|
|
131
|
+
@tool
|
|
132
|
+
def glob_search(pattern: str, path: str = "/") -> str:
|
|
133
|
+
"""Fast file pattern matching tool that works with any codebase size.
|
|
134
|
+
|
|
135
|
+
Supports glob patterns like **/*.js or src/**/*.ts.
|
|
136
|
+
Returns matching file paths sorted by modification time.
|
|
137
|
+
Use this tool when you need to find files by name patterns.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
pattern: The glob pattern to match files against.
|
|
141
|
+
path: The directory to search in. If not specified, searches from root.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Newline-separated list of matching file paths, sorted by modification
|
|
145
|
+
time (most recently modified first). Returns "No files found" if no
|
|
146
|
+
matches.
|
|
147
|
+
"""
|
|
148
|
+
try:
|
|
149
|
+
base_full = self._validate_and_resolve_path(path)
|
|
150
|
+
except ValueError:
|
|
151
|
+
return "No files found"
|
|
152
|
+
|
|
153
|
+
if not base_full.exists() or not base_full.is_dir():
|
|
154
|
+
return "No files found"
|
|
155
|
+
|
|
156
|
+
# Use pathlib glob
|
|
157
|
+
matching: list[tuple[str, str]] = []
|
|
158
|
+
for match in base_full.glob(pattern):
|
|
159
|
+
if match.is_file():
|
|
160
|
+
# Convert to virtual path
|
|
161
|
+
virtual_path = "/" + str(match.relative_to(self.root_path))
|
|
162
|
+
stat = match.stat()
|
|
163
|
+
modified_at = datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat()
|
|
164
|
+
matching.append((virtual_path, modified_at))
|
|
165
|
+
|
|
166
|
+
if not matching:
|
|
167
|
+
return "No files found"
|
|
168
|
+
|
|
169
|
+
file_paths = [p for p, _ in matching]
|
|
170
|
+
return "\n".join(file_paths)
|
|
171
|
+
|
|
172
|
+
@tool
|
|
173
|
+
def grep_search(
|
|
174
|
+
pattern: str,
|
|
175
|
+
path: str = "/",
|
|
176
|
+
include: str | None = None,
|
|
177
|
+
output_mode: Literal["files_with_matches", "content", "count"] = "files_with_matches",
|
|
178
|
+
) -> str:
|
|
179
|
+
"""Fast content search tool that works with any codebase size.
|
|
180
|
+
|
|
181
|
+
Searches file contents using regular expressions. Supports full regex
|
|
182
|
+
syntax and filters files by pattern with the include parameter.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
pattern: The regular expression pattern to search for in file contents.
|
|
186
|
+
path: The directory to search in. If not specified, searches from root.
|
|
187
|
+
include: File pattern to filter (e.g., "*.js", "*.{ts,tsx}").
|
|
188
|
+
output_mode: Output format:
|
|
189
|
+
- "files_with_matches": Only file paths containing matches (default)
|
|
190
|
+
- "content": Matching lines with file:line:content format
|
|
191
|
+
- "count": Count of matches per file
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Search results formatted according to output_mode. Returns "No matches
|
|
195
|
+
found" if no results.
|
|
196
|
+
"""
|
|
197
|
+
# Compile regex pattern (for validation)
|
|
198
|
+
try:
|
|
199
|
+
re.compile(pattern)
|
|
200
|
+
except re.error as e:
|
|
201
|
+
return f"Invalid regex pattern: {e}"
|
|
202
|
+
|
|
203
|
+
if include and not _is_valid_include_pattern(include):
|
|
204
|
+
return "Invalid include pattern"
|
|
205
|
+
|
|
206
|
+
# Try ripgrep first if enabled
|
|
207
|
+
results = None
|
|
208
|
+
if self.use_ripgrep:
|
|
209
|
+
with suppress(
|
|
210
|
+
FileNotFoundError,
|
|
211
|
+
subprocess.CalledProcessError,
|
|
212
|
+
subprocess.TimeoutExpired,
|
|
213
|
+
):
|
|
214
|
+
results = self._ripgrep_search(pattern, path, include)
|
|
215
|
+
|
|
216
|
+
# Python fallback if ripgrep failed or is disabled
|
|
217
|
+
if results is None:
|
|
218
|
+
results = self._python_search(pattern, path, include)
|
|
219
|
+
|
|
220
|
+
if not results:
|
|
221
|
+
return "No matches found"
|
|
222
|
+
|
|
223
|
+
# Format output based on mode
|
|
224
|
+
return self._format_grep_results(results, output_mode)
|
|
225
|
+
|
|
226
|
+
self.glob_search = glob_search
|
|
227
|
+
self.grep_search = grep_search
|
|
228
|
+
self.tools = [glob_search, grep_search]
|
|
229
|
+
|
|
230
|
+
def _validate_and_resolve_path(self, path: str) -> Path:
|
|
231
|
+
"""Validate and resolve a virtual path to filesystem path."""
|
|
232
|
+
# Normalize path
|
|
233
|
+
if not path.startswith("/"):
|
|
234
|
+
path = "/" + path
|
|
235
|
+
|
|
236
|
+
# Check for path traversal
|
|
237
|
+
if ".." in path or "~" in path:
|
|
238
|
+
msg = "Path traversal not allowed"
|
|
239
|
+
raise ValueError(msg)
|
|
240
|
+
|
|
241
|
+
# Convert virtual path to filesystem path
|
|
242
|
+
relative = path.lstrip("/")
|
|
243
|
+
full_path = (self.root_path / relative).resolve()
|
|
244
|
+
|
|
245
|
+
# Ensure path is within root
|
|
246
|
+
try:
|
|
247
|
+
full_path.relative_to(self.root_path)
|
|
248
|
+
except ValueError:
|
|
249
|
+
msg = f"Path outside root directory: {path}"
|
|
250
|
+
raise ValueError(msg) from None
|
|
251
|
+
|
|
252
|
+
return full_path
|
|
253
|
+
|
|
254
|
+
def _ripgrep_search(
|
|
255
|
+
self, pattern: str, base_path: str, include: str | None
|
|
256
|
+
) -> dict[str, list[tuple[int, str]]]:
|
|
257
|
+
"""Search using ripgrep subprocess."""
|
|
258
|
+
try:
|
|
259
|
+
base_full = self._validate_and_resolve_path(base_path)
|
|
260
|
+
except ValueError:
|
|
261
|
+
return {}
|
|
262
|
+
|
|
263
|
+
if not base_full.exists():
|
|
264
|
+
return {}
|
|
265
|
+
|
|
266
|
+
# Build ripgrep command
|
|
267
|
+
cmd = ["rg", "--json"]
|
|
268
|
+
|
|
269
|
+
if include:
|
|
270
|
+
# Convert glob pattern to ripgrep glob
|
|
271
|
+
cmd.extend(["--glob", include])
|
|
272
|
+
|
|
273
|
+
cmd.extend(["--", pattern, str(base_full)])
|
|
274
|
+
|
|
275
|
+
try:
|
|
276
|
+
result = subprocess.run( # noqa: S603
|
|
277
|
+
cmd,
|
|
278
|
+
capture_output=True,
|
|
279
|
+
text=True,
|
|
280
|
+
timeout=30,
|
|
281
|
+
check=False,
|
|
282
|
+
)
|
|
283
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
284
|
+
# Fallback to Python search if ripgrep unavailable or times out
|
|
285
|
+
return self._python_search(pattern, base_path, include)
|
|
286
|
+
|
|
287
|
+
# Parse ripgrep JSON output
|
|
288
|
+
results: dict[str, list[tuple[int, str]]] = {}
|
|
289
|
+
for line in result.stdout.splitlines():
|
|
290
|
+
try:
|
|
291
|
+
data = json.loads(line)
|
|
292
|
+
if data["type"] == "match":
|
|
293
|
+
path = data["data"]["path"]["text"]
|
|
294
|
+
# Convert to virtual path
|
|
295
|
+
virtual_path = "/" + str(Path(path).relative_to(self.root_path))
|
|
296
|
+
line_num = data["data"]["line_number"]
|
|
297
|
+
line_text = data["data"]["lines"]["text"].rstrip("\n")
|
|
298
|
+
|
|
299
|
+
if virtual_path not in results:
|
|
300
|
+
results[virtual_path] = []
|
|
301
|
+
results[virtual_path].append((line_num, line_text))
|
|
302
|
+
except (json.JSONDecodeError, KeyError):
|
|
303
|
+
continue
|
|
304
|
+
|
|
305
|
+
return results
|
|
306
|
+
|
|
307
|
+
def _python_search(
|
|
308
|
+
self, pattern: str, base_path: str, include: str | None
|
|
309
|
+
) -> dict[str, list[tuple[int, str]]]:
|
|
310
|
+
"""Search using Python regex (fallback)."""
|
|
311
|
+
try:
|
|
312
|
+
base_full = self._validate_and_resolve_path(base_path)
|
|
313
|
+
except ValueError:
|
|
314
|
+
return {}
|
|
315
|
+
|
|
316
|
+
if not base_full.exists():
|
|
317
|
+
return {}
|
|
318
|
+
|
|
319
|
+
regex = re.compile(pattern)
|
|
320
|
+
results: dict[str, list[tuple[int, str]]] = {}
|
|
321
|
+
|
|
322
|
+
# Walk directory tree
|
|
323
|
+
for file_path in base_full.rglob("*"):
|
|
324
|
+
if not file_path.is_file():
|
|
325
|
+
continue
|
|
326
|
+
|
|
327
|
+
# Check include filter
|
|
328
|
+
if include and not _match_include_pattern(file_path.name, include):
|
|
329
|
+
continue
|
|
330
|
+
|
|
331
|
+
# Skip files that are too large
|
|
332
|
+
if file_path.stat().st_size > self.max_file_size_bytes:
|
|
333
|
+
continue
|
|
334
|
+
|
|
335
|
+
try:
|
|
336
|
+
content = file_path.read_text()
|
|
337
|
+
except (UnicodeDecodeError, PermissionError):
|
|
338
|
+
continue
|
|
339
|
+
|
|
340
|
+
# Search content
|
|
341
|
+
for line_num, line in enumerate(content.splitlines(), 1):
|
|
342
|
+
if regex.search(line):
|
|
343
|
+
virtual_path = "/" + str(file_path.relative_to(self.root_path))
|
|
344
|
+
if virtual_path not in results:
|
|
345
|
+
results[virtual_path] = []
|
|
346
|
+
results[virtual_path].append((line_num, line))
|
|
347
|
+
|
|
348
|
+
return results
|
|
349
|
+
|
|
350
|
+
def _format_grep_results(
|
|
351
|
+
self,
|
|
352
|
+
results: dict[str, list[tuple[int, str]]],
|
|
353
|
+
output_mode: str,
|
|
354
|
+
) -> str:
|
|
355
|
+
"""Format grep results based on output mode."""
|
|
356
|
+
if output_mode == "files_with_matches":
|
|
357
|
+
# Just return file paths
|
|
358
|
+
return "\n".join(sorted(results.keys()))
|
|
359
|
+
|
|
360
|
+
if output_mode == "content":
|
|
361
|
+
# Return file:line:content format
|
|
362
|
+
lines = []
|
|
363
|
+
for file_path in sorted(results.keys()):
|
|
364
|
+
for line_num, line in results[file_path]:
|
|
365
|
+
lines.append(f"{file_path}:{line_num}:{line}")
|
|
366
|
+
return "\n".join(lines)
|
|
367
|
+
|
|
368
|
+
if output_mode == "count":
|
|
369
|
+
# Return file:count format
|
|
370
|
+
lines = []
|
|
371
|
+
for file_path in sorted(results.keys()):
|
|
372
|
+
count = len(results[file_path])
|
|
373
|
+
lines.append(f"{file_path}:{count}")
|
|
374
|
+
return "\n".join(lines)
|
|
375
|
+
|
|
376
|
+
# Default to files_with_matches
|
|
377
|
+
return "\n".join(sorted(results.keys()))
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
__all__ = [
|
|
381
|
+
"FilesystemFileSearchMiddleware",
|
|
382
|
+
]
|