langchain 1.0.0rc2__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain might be problematic. Click here for more details.

@@ -0,0 +1,382 @@
1
+ """File search middleware for Anthropic text editor and memory tools.
2
+
3
+ This module provides Glob and Grep search tools that operate on files stored
4
+ in state or filesystem.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import fnmatch
10
+ import json
11
+ import re
12
+ import subprocess
13
+ from contextlib import suppress
14
+ from datetime import datetime, timezone
15
+ from pathlib import Path
16
+ from typing import Literal
17
+
18
+ from langchain_core.tools import tool
19
+
20
+ from langchain.agents.middleware.types import AgentMiddleware
21
+
22
+
23
+ def _expand_include_patterns(pattern: str) -> list[str] | None:
24
+ """Expand brace patterns like ``*.{py,pyi}`` into a list of globs."""
25
+ if "}" in pattern and "{" not in pattern:
26
+ return None
27
+
28
+ expanded: list[str] = []
29
+
30
+ def _expand(current: str) -> None:
31
+ start = current.find("{")
32
+ if start == -1:
33
+ expanded.append(current)
34
+ return
35
+
36
+ end = current.find("}", start)
37
+ if end == -1:
38
+ raise ValueError
39
+
40
+ prefix = current[:start]
41
+ suffix = current[end + 1 :]
42
+ inner = current[start + 1 : end]
43
+ if not inner:
44
+ raise ValueError
45
+
46
+ for option in inner.split(","):
47
+ _expand(prefix + option + suffix)
48
+
49
+ try:
50
+ _expand(pattern)
51
+ except ValueError:
52
+ return None
53
+
54
+ return expanded
55
+
56
+
57
+ def _is_valid_include_pattern(pattern: str) -> bool:
58
+ """Validate glob pattern used for include filters."""
59
+ if not pattern:
60
+ return False
61
+
62
+ if any(char in pattern for char in ("\x00", "\n", "\r")):
63
+ return False
64
+
65
+ expanded = _expand_include_patterns(pattern)
66
+ if expanded is None:
67
+ return False
68
+
69
+ try:
70
+ for candidate in expanded:
71
+ re.compile(fnmatch.translate(candidate))
72
+ except re.error:
73
+ return False
74
+
75
+ return True
76
+
77
+
78
+ def _match_include_pattern(basename: str, pattern: str) -> bool:
79
+ """Return True if the basename matches the include pattern."""
80
+ expanded = _expand_include_patterns(pattern)
81
+ if not expanded:
82
+ return False
83
+
84
+ return any(fnmatch.fnmatch(basename, candidate) for candidate in expanded)
85
+
86
+
87
+ class FilesystemFileSearchMiddleware(AgentMiddleware):
88
+ """Provides Glob and Grep search over filesystem files.
89
+
90
+ This middleware adds two tools that search through local filesystem:
91
+ - Glob: Fast file pattern matching by file path
92
+ - Grep: Fast content search using ripgrep or Python fallback
93
+
94
+ Example:
95
+ ```python
96
+ from langchain.agents import create_agent
97
+ from langchain.agents.middleware import (
98
+ FilesystemFileSearchMiddleware,
99
+ )
100
+
101
+ agent = create_agent(
102
+ model=model,
103
+ tools=[],
104
+ middleware=[
105
+ FilesystemFileSearchMiddleware(root_path="/workspace"),
106
+ ],
107
+ )
108
+ ```
109
+ """
110
+
111
+ def __init__(
112
+ self,
113
+ *,
114
+ root_path: str,
115
+ use_ripgrep: bool = True,
116
+ max_file_size_mb: int = 10,
117
+ ) -> None:
118
+ """Initialize the search middleware.
119
+
120
+ Args:
121
+ root_path: Root directory to search.
122
+ use_ripgrep: Whether to use ripgrep for search (default: True).
123
+ Falls back to Python if ripgrep unavailable.
124
+ max_file_size_mb: Maximum file size to search in MB (default: 10).
125
+ """
126
+ self.root_path = Path(root_path).resolve()
127
+ self.use_ripgrep = use_ripgrep
128
+ self.max_file_size_bytes = max_file_size_mb * 1024 * 1024
129
+
130
+ # Create tool instances as closures that capture self
131
+ @tool
132
+ def glob_search(pattern: str, path: str = "/") -> str:
133
+ """Fast file pattern matching tool that works with any codebase size.
134
+
135
+ Supports glob patterns like **/*.js or src/**/*.ts.
136
+ Returns matching file paths sorted by modification time.
137
+ Use this tool when you need to find files by name patterns.
138
+
139
+ Args:
140
+ pattern: The glob pattern to match files against.
141
+ path: The directory to search in. If not specified, searches from root.
142
+
143
+ Returns:
144
+ Newline-separated list of matching file paths, sorted by modification
145
+ time (most recently modified first). Returns "No files found" if no
146
+ matches.
147
+ """
148
+ try:
149
+ base_full = self._validate_and_resolve_path(path)
150
+ except ValueError:
151
+ return "No files found"
152
+
153
+ if not base_full.exists() or not base_full.is_dir():
154
+ return "No files found"
155
+
156
+ # Use pathlib glob
157
+ matching: list[tuple[str, str]] = []
158
+ for match in base_full.glob(pattern):
159
+ if match.is_file():
160
+ # Convert to virtual path
161
+ virtual_path = "/" + str(match.relative_to(self.root_path))
162
+ stat = match.stat()
163
+ modified_at = datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat()
164
+ matching.append((virtual_path, modified_at))
165
+
166
+ if not matching:
167
+ return "No files found"
168
+
169
+ file_paths = [p for p, _ in matching]
170
+ return "\n".join(file_paths)
171
+
172
+ @tool
173
+ def grep_search(
174
+ pattern: str,
175
+ path: str = "/",
176
+ include: str | None = None,
177
+ output_mode: Literal["files_with_matches", "content", "count"] = "files_with_matches",
178
+ ) -> str:
179
+ """Fast content search tool that works with any codebase size.
180
+
181
+ Searches file contents using regular expressions. Supports full regex
182
+ syntax and filters files by pattern with the include parameter.
183
+
184
+ Args:
185
+ pattern: The regular expression pattern to search for in file contents.
186
+ path: The directory to search in. If not specified, searches from root.
187
+ include: File pattern to filter (e.g., "*.js", "*.{ts,tsx}").
188
+ output_mode: Output format:
189
+ - "files_with_matches": Only file paths containing matches (default)
190
+ - "content": Matching lines with file:line:content format
191
+ - "count": Count of matches per file
192
+
193
+ Returns:
194
+ Search results formatted according to output_mode. Returns "No matches
195
+ found" if no results.
196
+ """
197
+ # Compile regex pattern (for validation)
198
+ try:
199
+ re.compile(pattern)
200
+ except re.error as e:
201
+ return f"Invalid regex pattern: {e}"
202
+
203
+ if include and not _is_valid_include_pattern(include):
204
+ return "Invalid include pattern"
205
+
206
+ # Try ripgrep first if enabled
207
+ results = None
208
+ if self.use_ripgrep:
209
+ with suppress(
210
+ FileNotFoundError,
211
+ subprocess.CalledProcessError,
212
+ subprocess.TimeoutExpired,
213
+ ):
214
+ results = self._ripgrep_search(pattern, path, include)
215
+
216
+ # Python fallback if ripgrep failed or is disabled
217
+ if results is None:
218
+ results = self._python_search(pattern, path, include)
219
+
220
+ if not results:
221
+ return "No matches found"
222
+
223
+ # Format output based on mode
224
+ return self._format_grep_results(results, output_mode)
225
+
226
+ self.glob_search = glob_search
227
+ self.grep_search = grep_search
228
+ self.tools = [glob_search, grep_search]
229
+
230
+ def _validate_and_resolve_path(self, path: str) -> Path:
231
+ """Validate and resolve a virtual path to filesystem path."""
232
+ # Normalize path
233
+ if not path.startswith("/"):
234
+ path = "/" + path
235
+
236
+ # Check for path traversal
237
+ if ".." in path or "~" in path:
238
+ msg = "Path traversal not allowed"
239
+ raise ValueError(msg)
240
+
241
+ # Convert virtual path to filesystem path
242
+ relative = path.lstrip("/")
243
+ full_path = (self.root_path / relative).resolve()
244
+
245
+ # Ensure path is within root
246
+ try:
247
+ full_path.relative_to(self.root_path)
248
+ except ValueError:
249
+ msg = f"Path outside root directory: {path}"
250
+ raise ValueError(msg) from None
251
+
252
+ return full_path
253
+
254
+ def _ripgrep_search(
255
+ self, pattern: str, base_path: str, include: str | None
256
+ ) -> dict[str, list[tuple[int, str]]]:
257
+ """Search using ripgrep subprocess."""
258
+ try:
259
+ base_full = self._validate_and_resolve_path(base_path)
260
+ except ValueError:
261
+ return {}
262
+
263
+ if not base_full.exists():
264
+ return {}
265
+
266
+ # Build ripgrep command
267
+ cmd = ["rg", "--json"]
268
+
269
+ if include:
270
+ # Convert glob pattern to ripgrep glob
271
+ cmd.extend(["--glob", include])
272
+
273
+ cmd.extend(["--", pattern, str(base_full)])
274
+
275
+ try:
276
+ result = subprocess.run( # noqa: S603
277
+ cmd,
278
+ capture_output=True,
279
+ text=True,
280
+ timeout=30,
281
+ check=False,
282
+ )
283
+ except (subprocess.TimeoutExpired, FileNotFoundError):
284
+ # Fallback to Python search if ripgrep unavailable or times out
285
+ return self._python_search(pattern, base_path, include)
286
+
287
+ # Parse ripgrep JSON output
288
+ results: dict[str, list[tuple[int, str]]] = {}
289
+ for line in result.stdout.splitlines():
290
+ try:
291
+ data = json.loads(line)
292
+ if data["type"] == "match":
293
+ path = data["data"]["path"]["text"]
294
+ # Convert to virtual path
295
+ virtual_path = "/" + str(Path(path).relative_to(self.root_path))
296
+ line_num = data["data"]["line_number"]
297
+ line_text = data["data"]["lines"]["text"].rstrip("\n")
298
+
299
+ if virtual_path not in results:
300
+ results[virtual_path] = []
301
+ results[virtual_path].append((line_num, line_text))
302
+ except (json.JSONDecodeError, KeyError):
303
+ continue
304
+
305
+ return results
306
+
307
+ def _python_search(
308
+ self, pattern: str, base_path: str, include: str | None
309
+ ) -> dict[str, list[tuple[int, str]]]:
310
+ """Search using Python regex (fallback)."""
311
+ try:
312
+ base_full = self._validate_and_resolve_path(base_path)
313
+ except ValueError:
314
+ return {}
315
+
316
+ if not base_full.exists():
317
+ return {}
318
+
319
+ regex = re.compile(pattern)
320
+ results: dict[str, list[tuple[int, str]]] = {}
321
+
322
+ # Walk directory tree
323
+ for file_path in base_full.rglob("*"):
324
+ if not file_path.is_file():
325
+ continue
326
+
327
+ # Check include filter
328
+ if include and not _match_include_pattern(file_path.name, include):
329
+ continue
330
+
331
+ # Skip files that are too large
332
+ if file_path.stat().st_size > self.max_file_size_bytes:
333
+ continue
334
+
335
+ try:
336
+ content = file_path.read_text()
337
+ except (UnicodeDecodeError, PermissionError):
338
+ continue
339
+
340
+ # Search content
341
+ for line_num, line in enumerate(content.splitlines(), 1):
342
+ if regex.search(line):
343
+ virtual_path = "/" + str(file_path.relative_to(self.root_path))
344
+ if virtual_path not in results:
345
+ results[virtual_path] = []
346
+ results[virtual_path].append((line_num, line))
347
+
348
+ return results
349
+
350
+ def _format_grep_results(
351
+ self,
352
+ results: dict[str, list[tuple[int, str]]],
353
+ output_mode: str,
354
+ ) -> str:
355
+ """Format grep results based on output mode."""
356
+ if output_mode == "files_with_matches":
357
+ # Just return file paths
358
+ return "\n".join(sorted(results.keys()))
359
+
360
+ if output_mode == "content":
361
+ # Return file:line:content format
362
+ lines = []
363
+ for file_path in sorted(results.keys()):
364
+ for line_num, line in results[file_path]:
365
+ lines.append(f"{file_path}:{line_num}:{line}")
366
+ return "\n".join(lines)
367
+
368
+ if output_mode == "count":
369
+ # Return file:count format
370
+ lines = []
371
+ for file_path in sorted(results.keys()):
372
+ count = len(results[file_path])
373
+ lines.append(f"{file_path}:{count}")
374
+ return "\n".join(lines)
375
+
376
+ # Default to files_with_matches
377
+ return "\n".join(sorted(results.keys()))
378
+
379
+
380
+ __all__ = [
381
+ "FilesystemFileSearchMiddleware",
382
+ ]