everything-mcp 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- everything_mcp/__init__.py +14 -14
- everything_mcp/backend.py +455 -455
- everything_mcp/config.py +253 -253
- everything_mcp/server.py +745 -745
- {everything_mcp-1.0.1.dist-info → everything_mcp-1.0.3.dist-info}/METADATA +53 -11
- everything_mcp-1.0.3.dist-info/RECORD +11 -0
- {everything_mcp-1.0.1.dist-info → everything_mcp-1.0.3.dist-info}/licenses/LICENSE +1 -1
- everything_mcp-1.0.1.dist-info/RECORD +0 -11
- {everything_mcp-1.0.1.dist-info → everything_mcp-1.0.3.dist-info}/WHEEL +0 -0
- {everything_mcp-1.0.1.dist-info → everything_mcp-1.0.3.dist-info}/entry_points.txt +0 -0
everything_mcp/server.py
CHANGED
|
@@ -1,745 +1,745 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Everything MCP Server
|
|
3
|
-
|
|
4
|
-
Provides 5 tools for AI agents to search and analyse files at lightning speed
|
|
5
|
-
using voidtools Everything's real-time NTFS index.
|
|
6
|
-
|
|
7
|
-
Compatible with: Claude Code, Codex, Gemini, Kimi, Qwen, Cursor, Windsurf,
|
|
8
|
-
and any MCP-compatible client using stdio transport.
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
from __future__ import annotations
|
|
12
|
-
|
|
13
|
-
import asyncio
|
|
14
|
-
import json
|
|
15
|
-
import logging
|
|
16
|
-
import os
|
|
17
|
-
import sys
|
|
18
|
-
from contextlib import asynccontextmanager
|
|
19
|
-
from datetime import datetime
|
|
20
|
-
from pathlib import Path
|
|
21
|
-
|
|
22
|
-
from mcp.server.fastmcp import FastMCP
|
|
23
|
-
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
24
|
-
|
|
25
|
-
from everything_mcp.backend import (
|
|
26
|
-
FILE_TYPES,
|
|
27
|
-
SORT_MAP,
|
|
28
|
-
TIME_PERIODS,
|
|
29
|
-
EverythingBackend,
|
|
30
|
-
build_recent_query,
|
|
31
|
-
build_type_query,
|
|
32
|
-
human_size,
|
|
33
|
-
)
|
|
34
|
-
from everything_mcp.config import EverythingConfig
|
|
35
|
-
|
|
36
|
-
# ── Logging (stderr
|
|
37
|
-
|
|
38
|
-
logging.basicConfig(
|
|
39
|
-
level=logging.INFO,
|
|
40
|
-
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
|
41
|
-
stream=sys.stderr,
|
|
42
|
-
)
|
|
43
|
-
logger = logging.getLogger("everything_mcp")
|
|
44
|
-
|
|
45
|
-
# ── Globals (initialised during lifespan) ─────────────────────────────────
|
|
46
|
-
|
|
47
|
-
_backend: EverythingBackend | None = None
|
|
48
|
-
_config: EverythingConfig | None = None
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
@asynccontextmanager
|
|
52
|
-
async def lifespan(server):
|
|
53
|
-
"""Initialise Everything backend on startup, cleanup on shutdown."""
|
|
54
|
-
global _backend, _config
|
|
55
|
-
|
|
56
|
-
logger.info("Everything MCP starting
|
|
57
|
-
_config = EverythingConfig.auto_detect()
|
|
58
|
-
|
|
59
|
-
if _config.is_valid:
|
|
60
|
-
logger.info("Connected: %s (es: %s)", _config.version_info, _config.es_path)
|
|
61
|
-
else:
|
|
62
|
-
for err in _config.errors:
|
|
63
|
-
logger.error(" %s", err)
|
|
64
|
-
for warn in _config.warnings:
|
|
65
|
-
logger.warning(" %s", warn)
|
|
66
|
-
|
|
67
|
-
_backend = EverythingBackend(_config)
|
|
68
|
-
try:
|
|
69
|
-
yield
|
|
70
|
-
finally:
|
|
71
|
-
logger.info("Everything MCP shutting down.")
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
# ── Server instance ───────────────────────────────────────────────────────
|
|
75
|
-
|
|
76
|
-
mcp = FastMCP("everything_mcp", lifespan=lifespan)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def _get_backend() -> EverythingBackend:
|
|
80
|
-
"""Return the backend or raise with a clear message."""
|
|
81
|
-
if _backend is None:
|
|
82
|
-
raise RuntimeError("Everything MCP not initialised")
|
|
83
|
-
if not _config or not _config.is_valid:
|
|
84
|
-
errors = _config.errors if _config else ["Not initialised"]
|
|
85
|
-
raise RuntimeError("Everything is not available. " + " ".join(errors))
|
|
86
|
-
return _backend
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
# ═══════════════════════════════════════════════════════════════════════════
|
|
90
|
-
# Tool 1: everything_search
|
|
91
|
-
# ═══════════════════════════════════════════════════════════════════════════
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
class SearchInput(BaseModel):
|
|
95
|
-
"""Input schema for ``everything_search``."""
|
|
96
|
-
|
|
97
|
-
model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
|
|
98
|
-
|
|
99
|
-
query: str = Field(
|
|
100
|
-
...,
|
|
101
|
-
description=(
|
|
102
|
-
"Search query using Everything syntax. Examples: "
|
|
103
|
-
"'*.py' (all Python files), "
|
|
104
|
-
"'ext:py;js path:C:\\Projects' (Python/JS in Projects), "
|
|
105
|
-
"'size:>10mb ext:log' (large logs), "
|
|
106
|
-
"'dm:today ext:py' (Python files modified today), "
|
|
107
|
-
"'content:TODO ext:py' (files containing TODO
|
|
108
|
-
"'\"exact phrase\"' (exact filename match), "
|
|
109
|
-
"'regex:test_\\d+\\.py$' (regex). "
|
|
110
|
-
"Combine with space (AND) or | (OR). Prefix ! to exclude."
|
|
111
|
-
),
|
|
112
|
-
min_length=1,
|
|
113
|
-
max_length=2000,
|
|
114
|
-
)
|
|
115
|
-
max_results: int = Field(
|
|
116
|
-
default=50,
|
|
117
|
-
description="Maximum results to return (1-500)",
|
|
118
|
-
ge=1, le=500,
|
|
119
|
-
)
|
|
120
|
-
sort: str = Field(
|
|
121
|
-
default="date-modified-desc",
|
|
122
|
-
description=(
|
|
123
|
-
"Sort order. Options: "
|
|
124
|
-
+ ", ".join(sorted(SORT_MAP.keys()))
|
|
125
|
-
),
|
|
126
|
-
)
|
|
127
|
-
|
|
128
|
-
@field_validator("sort")
|
|
129
|
-
@classmethod
|
|
130
|
-
def validate_sort(cls, v: str) -> str:
|
|
131
|
-
if v not in SORT_MAP:
|
|
132
|
-
raise ValueError(f"Invalid sort option '{v}'. Valid: {', '.join(sorted(SORT_MAP.keys()))}")
|
|
133
|
-
return v
|
|
134
|
-
|
|
135
|
-
match_case: bool = Field(default=False, description="Case-sensitive search")
|
|
136
|
-
match_whole_word: bool = Field(default=False, description="Match whole words only")
|
|
137
|
-
match_regex: bool = Field(default=False, description="Treat query as regex")
|
|
138
|
-
match_path: bool = Field(default=False, description="Match against full path, not just filename")
|
|
139
|
-
offset: int = Field(default=0, description="Skip N results (pagination)", ge=0)
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
@mcp.tool(
|
|
143
|
-
name="everything_search",
|
|
144
|
-
annotations={
|
|
145
|
-
"title": "Search Files & Folders",
|
|
146
|
-
"readOnlyHint": True,
|
|
147
|
-
"destructiveHint": False,
|
|
148
|
-
"idempotentHint": True,
|
|
149
|
-
"openWorldHint": False,
|
|
150
|
-
},
|
|
151
|
-
)
|
|
152
|
-
async def everything_search(params: SearchInput) -> str:
|
|
153
|
-
"""Search for files and folders instantly using voidtools Everything.
|
|
154
|
-
|
|
155
|
-
Leverages Everything's real-time NTFS index for sub-millisecond search
|
|
156
|
-
across all local and mapped drives. Supports wildcards, regex, size/date
|
|
157
|
-
filters, extension filters, path restrictions, and content search.
|
|
158
|
-
"""
|
|
159
|
-
try:
|
|
160
|
-
backend = _get_backend()
|
|
161
|
-
results = await backend.search(
|
|
162
|
-
query=params.query,
|
|
163
|
-
max_results=params.max_results,
|
|
164
|
-
sort=params.sort,
|
|
165
|
-
match_case=params.match_case,
|
|
166
|
-
match_whole_word=params.match_whole_word,
|
|
167
|
-
match_regex=params.match_regex,
|
|
168
|
-
match_path=params.match_path,
|
|
169
|
-
offset=params.offset,
|
|
170
|
-
)
|
|
171
|
-
return _format_search_results(results, params.query, params.max_results, params.offset)
|
|
172
|
-
except Exception as exc:
|
|
173
|
-
return f"Error: {exc}"
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
# ═══════════════════════════════════════════════════════════════════════════
|
|
177
|
-
# Tool 2: everything_search_by_type
|
|
178
|
-
# ═══════════════════════════════════════════════════════════════════════════
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
class SearchByTypeInput(BaseModel):
|
|
182
|
-
"""Input schema for ``everything_search_by_type``."""
|
|
183
|
-
|
|
184
|
-
model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
|
|
185
|
-
|
|
186
|
-
file_type: str = Field(
|
|
187
|
-
...,
|
|
188
|
-
description="File type category: " + ", ".join(sorted(FILE_TYPES.keys())),
|
|
189
|
-
)
|
|
190
|
-
query: str = Field(
|
|
191
|
-
default="",
|
|
192
|
-
description="Additional search filter (e.g. 'config' to narrow results)",
|
|
193
|
-
)
|
|
194
|
-
path: str = Field(
|
|
195
|
-
default="",
|
|
196
|
-
description="Restrict search to this directory (e.g. 'C:\\Projects')",
|
|
197
|
-
)
|
|
198
|
-
max_results: int = Field(default=50, ge=1, le=500)
|
|
199
|
-
sort: str = Field(default="date-modified-desc")
|
|
200
|
-
|
|
201
|
-
@field_validator("sort")
|
|
202
|
-
@classmethod
|
|
203
|
-
def validate_sort(cls, v: str) -> str:
|
|
204
|
-
if v not in SORT_MAP:
|
|
205
|
-
raise ValueError(f"Invalid sort option '{v}'. Valid: {', '.join(sorted(SORT_MAP.keys()))}")
|
|
206
|
-
return v
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
@mcp.tool(
|
|
210
|
-
name="everything_search_by_type",
|
|
211
|
-
annotations={
|
|
212
|
-
"title": "Search by File Type Category",
|
|
213
|
-
"readOnlyHint": True,
|
|
214
|
-
"destructiveHint": False,
|
|
215
|
-
"idempotentHint": True,
|
|
216
|
-
"openWorldHint": False,
|
|
217
|
-
},
|
|
218
|
-
)
|
|
219
|
-
async def everything_search_by_type(params: SearchByTypeInput) -> str:
|
|
220
|
-
"""Search for files by type category.
|
|
221
|
-
|
|
222
|
-
Categories: audio, video, image, document, code, archive, executable,
|
|
223
|
-
font, 3d, data. Each maps to a curated list of file extensions.
|
|
224
|
-
"""
|
|
225
|
-
try:
|
|
226
|
-
backend = _get_backend()
|
|
227
|
-
query = build_type_query(params.file_type, params.query, params.path)
|
|
228
|
-
results = await backend.search(
|
|
229
|
-
query=query,
|
|
230
|
-
max_results=params.max_results,
|
|
231
|
-
sort=params.sort,
|
|
232
|
-
)
|
|
233
|
-
label = f"type:{params.file_type}" + (f" {params.query}" if params.query else "")
|
|
234
|
-
return _format_search_results(results, label, params.max_results)
|
|
235
|
-
except Exception as exc:
|
|
236
|
-
return f"Error: {exc}"
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
# ═══════════════════════════════════════════════════════════════════════════
|
|
240
|
-
# Tool 3: everything_find_recent
|
|
241
|
-
# ═══════════════════════════════════════════════════════════════════════════
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
class FindRecentInput(BaseModel):
|
|
245
|
-
"""Input schema for ``everything_find_recent``."""
|
|
246
|
-
|
|
247
|
-
model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
|
|
248
|
-
|
|
249
|
-
period: str = Field(
|
|
250
|
-
default="1hour",
|
|
251
|
-
description=(
|
|
252
|
-
"How recent. Options: "
|
|
253
|
-
+ ", ".join(sorted(TIME_PERIODS.keys(), key=lambda k: list(TIME_PERIODS.keys()).index(k)))
|
|
254
|
-
+ ". Or raw Everything syntax like 'last2hours'."
|
|
255
|
-
),
|
|
256
|
-
)
|
|
257
|
-
path: str = Field(default="", description="Restrict to this directory path")
|
|
258
|
-
extensions: str = Field(
|
|
259
|
-
default="",
|
|
260
|
-
description="Filter by extensions, e.g. 'py,js,ts' or 'py;js;ts'",
|
|
261
|
-
)
|
|
262
|
-
query: str = Field(default="", description="Additional search filter")
|
|
263
|
-
max_results: int = Field(default=50, ge=1, le=500)
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
@mcp.tool(
|
|
267
|
-
name="everything_find_recent",
|
|
268
|
-
annotations={
|
|
269
|
-
"title": "Find Recently Modified Files",
|
|
270
|
-
"readOnlyHint": True,
|
|
271
|
-
"destructiveHint": False,
|
|
272
|
-
"idempotentHint": True,
|
|
273
|
-
"openWorldHint": False,
|
|
274
|
-
},
|
|
275
|
-
)
|
|
276
|
-
async def everything_find_recent(params: FindRecentInput) -> str:
|
|
277
|
-
"""Find files modified within a recent time period.
|
|
278
|
-
|
|
279
|
-
Ideal for discovering what changed in a project, tracking recent
|
|
280
|
-
downloads, finding today's log files, etc. Sorted newest-first.
|
|
281
|
-
"""
|
|
282
|
-
try:
|
|
283
|
-
backend = _get_backend()
|
|
284
|
-
|
|
285
|
-
query = build_recent_query(params.period, params.path, params.extensions)
|
|
286
|
-
if params.query:
|
|
287
|
-
query = f"{query} {params.query}"
|
|
288
|
-
|
|
289
|
-
results = await backend.search(
|
|
290
|
-
query=query,
|
|
291
|
-
max_results=params.max_results,
|
|
292
|
-
sort="date-modified-desc",
|
|
293
|
-
)
|
|
294
|
-
return _format_search_results(results, f"recent ({params.period})", params.max_results)
|
|
295
|
-
except Exception as exc:
|
|
296
|
-
return f"Error: {exc}"
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
# ═══════════════════════════════════════════════════════════════════════════
|
|
300
|
-
# Tool 4: everything_file_details
|
|
301
|
-
# ═══════════════════════════════════════════════════════════════════════════
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
class FileDetailsInput(BaseModel):
|
|
305
|
-
"""Input schema for ``everything_file_details``."""
|
|
306
|
-
|
|
307
|
-
model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
|
|
308
|
-
|
|
309
|
-
paths: list[str] = Field(
|
|
310
|
-
...,
|
|
311
|
-
description="File/folder paths to inspect (1-20)",
|
|
312
|
-
min_length=1,
|
|
313
|
-
max_length=20,
|
|
314
|
-
)
|
|
315
|
-
preview_lines: int = Field(
|
|
316
|
-
default=0,
|
|
317
|
-
description="Lines of text content to preview (0 = none, max 200)",
|
|
318
|
-
ge=0, le=200,
|
|
319
|
-
)
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
@mcp.tool(
|
|
323
|
-
name="everything_file_details",
|
|
324
|
-
annotations={
|
|
325
|
-
"title": "Get File Details & Content Preview",
|
|
326
|
-
"readOnlyHint": True,
|
|
327
|
-
"destructiveHint": False,
|
|
328
|
-
"idempotentHint": True,
|
|
329
|
-
"openWorldHint": False,
|
|
330
|
-
},
|
|
331
|
-
)
|
|
332
|
-
async def everything_file_details(params: FileDetailsInput) -> str:
|
|
333
|
-
"""Get detailed metadata and optional content preview for specific files.
|
|
334
|
-
|
|
335
|
-
Returns: full path, size, dates, type, permissions, hidden status.
|
|
336
|
-
For directories: item count, subdirectories, file listing.
|
|
337
|
-
For text files with preview_lines > 0: first N lines of content.
|
|
338
|
-
"""
|
|
339
|
-
# Run blocking file I/O in thread pool to not block the event loop
|
|
340
|
-
return await asyncio.to_thread(
|
|
341
|
-
_get_file_details_sync,
|
|
342
|
-
params.paths,
|
|
343
|
-
params.preview_lines,
|
|
344
|
-
)
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
def _get_file_details_sync(paths: list[str], preview_lines: int) -> str:
|
|
348
|
-
"""Synchronous implementation of file details gathering."""
|
|
349
|
-
output_parts: list[str] = []
|
|
350
|
-
|
|
351
|
-
for filepath in paths:
|
|
352
|
-
p = Path(filepath)
|
|
353
|
-
info: dict = {"path": str(p)}
|
|
354
|
-
|
|
355
|
-
if not p.exists():
|
|
356
|
-
info["error"] = "File not found"
|
|
357
|
-
output_parts.append(json.dumps(info, indent=2, ensure_ascii=False))
|
|
358
|
-
continue
|
|
359
|
-
|
|
360
|
-
try:
|
|
361
|
-
stat = p.stat()
|
|
362
|
-
info["name"] = p.name
|
|
363
|
-
info["type"] = "folder" if p.is_dir() else "file"
|
|
364
|
-
|
|
365
|
-
if not p.is_dir():
|
|
366
|
-
info["size"] = stat.st_size
|
|
367
|
-
info["size_human"] = human_size(stat.st_size)
|
|
368
|
-
info["extension"] = p.suffix.lstrip(".").lower()
|
|
369
|
-
|
|
370
|
-
info["date_modified"] = datetime.fromtimestamp(stat.st_mtime).strftime("%Y-%m-%d %H:%M:%S")
|
|
371
|
-
info["date_created"] = datetime.fromtimestamp(stat.st_ctime).strftime("%Y-%m-%d %H:%M:%S")
|
|
372
|
-
info["date_accessed"] = datetime.fromtimestamp(stat.st_atime).strftime("%Y-%m-%d %H:%M:%S")
|
|
373
|
-
info["read_only"] = not os.access(filepath, os.W_OK)
|
|
374
|
-
|
|
375
|
-
# Windows hidden attribute or Unix dotfile
|
|
376
|
-
file_attrs = getattr(stat, "st_file_attributes", 0)
|
|
377
|
-
info["hidden"] = bool(file_attrs & 0x2) if file_attrs else p.name.startswith(".")
|
|
378
|
-
|
|
379
|
-
# Directory listing
|
|
380
|
-
if p.is_dir():
|
|
381
|
-
try:
|
|
382
|
-
info.update(_summarize_directory(p))
|
|
383
|
-
except PermissionError:
|
|
384
|
-
info["listing_error"] = "Permission denied"
|
|
385
|
-
except OSError as exc:
|
|
386
|
-
info["listing_error"] = str(exc)
|
|
387
|
-
|
|
388
|
-
# Content preview for text files
|
|
389
|
-
elif preview_lines > 0:
|
|
390
|
-
preview = _read_preview(p, preview_lines)
|
|
391
|
-
if preview is not None:
|
|
392
|
-
info["preview"] = preview
|
|
393
|
-
|
|
394
|
-
except PermissionError:
|
|
395
|
-
info["error"] = "Permission denied"
|
|
396
|
-
except OSError as exc:
|
|
397
|
-
info["error"] = str(exc)
|
|
398
|
-
|
|
399
|
-
output_parts.append(json.dumps(info, indent=2, ensure_ascii=False))
|
|
400
|
-
|
|
401
|
-
return "\n---\n".join(output_parts)
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
# ═══════════════════════════════════════════════════════════════════════════
|
|
405
|
-
# Tool 5: everything_count_stats
|
|
406
|
-
# ═══════════════════════════════════════════════════════════════════════════
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
class CountStatsInput(BaseModel):
|
|
410
|
-
"""Input schema for ``everything_count_stats``."""
|
|
411
|
-
|
|
412
|
-
model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
|
|
413
|
-
|
|
414
|
-
query: str = Field(
|
|
415
|
-
...,
|
|
416
|
-
description=(
|
|
417
|
-
"Search query to count/measure. Same syntax as everything_search. "
|
|
418
|
-
"Examples: 'ext:py path:C:\\Projects', 'ext:log size:>1mb', '*.tmp'"
|
|
419
|
-
),
|
|
420
|
-
min_length=1,
|
|
421
|
-
max_length=2000,
|
|
422
|
-
)
|
|
423
|
-
include_size: bool = Field(
|
|
424
|
-
default=True,
|
|
425
|
-
description="Also calculate total size of all matching files",
|
|
426
|
-
)
|
|
427
|
-
breakdown_by_extension: bool = Field(
|
|
428
|
-
default=False,
|
|
429
|
-
description="Break down count and size by file extension (samples top 200 results)",
|
|
430
|
-
)
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
@mcp.tool(
|
|
434
|
-
name="everything_count_stats",
|
|
435
|
-
annotations={
|
|
436
|
-
"title": "Count & Size Statistics",
|
|
437
|
-
"readOnlyHint": True,
|
|
438
|
-
"destructiveHint": False,
|
|
439
|
-
"idempotentHint": True,
|
|
440
|
-
"openWorldHint": False,
|
|
441
|
-
},
|
|
442
|
-
)
|
|
443
|
-
async def everything_count_stats(params: CountStatsInput) -> str:
|
|
444
|
-
"""Get count and size statistics for files matching a query.
|
|
445
|
-
|
|
446
|
-
Fast way to understand the scope of a query without listing every file.
|
|
447
|
-
Optionally breaks down by extension for a high-level overview.
|
|
448
|
-
"""
|
|
449
|
-
try:
|
|
450
|
-
backend = _get_backend()
|
|
451
|
-
output: dict = {"query": params.query}
|
|
452
|
-
|
|
453
|
-
# Count
|
|
454
|
-
try:
|
|
455
|
-
output["total_count"] = await backend.count(params.query)
|
|
456
|
-
except Exception:
|
|
457
|
-
output["count_note"] = "Count not available (es.exe may not support -get-result-count)"
|
|
458
|
-
|
|
459
|
-
# Total size
|
|
460
|
-
if params.include_size:
|
|
461
|
-
try:
|
|
462
|
-
total_size = await backend.get_total_size(params.query)
|
|
463
|
-
if total_size >= 0:
|
|
464
|
-
output["total_size"] = total_size
|
|
465
|
-
output["total_size_human"] = human_size(total_size)
|
|
466
|
-
except Exception:
|
|
467
|
-
output["size_note"] = "Total size not available"
|
|
468
|
-
|
|
469
|
-
# Extension breakdown
|
|
470
|
-
if params.breakdown_by_extension:
|
|
471
|
-
try:
|
|
472
|
-
sample_limit = 500
|
|
473
|
-
results = await backend.search(
|
|
474
|
-
params.query,
|
|
475
|
-
max_results=sample_limit,
|
|
476
|
-
sort="name",
|
|
477
|
-
)
|
|
478
|
-
ext_stats: dict[str, dict] = {}
|
|
479
|
-
sampled_files = 0
|
|
480
|
-
for r in results:
|
|
481
|
-
if r.is_dir:
|
|
482
|
-
continue
|
|
483
|
-
sampled_files += 1
|
|
484
|
-
ext = r.extension or "(no extension)"
|
|
485
|
-
entry = ext_stats.setdefault(ext, {"count": 0, "total_size": 0})
|
|
486
|
-
entry["count"] += 1
|
|
487
|
-
if r.size >= 0:
|
|
488
|
-
entry["total_size"] += r.size
|
|
489
|
-
|
|
490
|
-
sorted_exts = sorted(ext_stats.items(), key=lambda x: x[1]["count"], reverse=True)
|
|
491
|
-
breakdown = {}
|
|
492
|
-
for ext, stats in sorted_exts[:30]:
|
|
493
|
-
breakdown[ext] = {
|
|
494
|
-
"count": stats["count"],
|
|
495
|
-
"total_size": stats["total_size"],
|
|
496
|
-
"total_size_human": human_size(stats["total_size"]),
|
|
497
|
-
}
|
|
498
|
-
output["extension_breakdown"] = breakdown
|
|
499
|
-
output["breakdown_note"] = (
|
|
500
|
-
f"Based on {sampled_files} sampled files from first {len(results)} "
|
|
501
|
-
f"results (max sample {sample_limit}); directories excluded."
|
|
502
|
-
)
|
|
503
|
-
except Exception as exc:
|
|
504
|
-
output["breakdown_error"] = str(exc)
|
|
505
|
-
|
|
506
|
-
return json.dumps(output, indent=2, ensure_ascii=False)
|
|
507
|
-
except Exception as exc:
|
|
508
|
-
return f"Error: {exc}"
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
# ═══════════════════════════════════════════════════════════════════════════
|
|
512
|
-
# Resource: Health Check
|
|
513
|
-
# ═══════════════════════════════════════════════════════════════════════════
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
@mcp.resource("everything://status")
|
|
517
|
-
async def get_status() -> str:
|
|
518
|
-
"""Get the current status of the Everything connection."""
|
|
519
|
-
if _backend:
|
|
520
|
-
status = await _backend.health_check()
|
|
521
|
-
else:
|
|
522
|
-
status = {"status": "not initialised"}
|
|
523
|
-
return json.dumps(status, indent=2)
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
# ═══════════════════════════════════════════════════════════════════════════
|
|
527
|
-
# Helpers
|
|
528
|
-
# ═══════════════════════════════════════════════════════════════════════════
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
def _format_search_results(
|
|
532
|
-
results: list,
|
|
533
|
-
query_label: str,
|
|
534
|
-
max_results: int,
|
|
535
|
-
offset: int = 0,
|
|
536
|
-
) -> str:
|
|
537
|
-
"""Format search results into a clean, readable string for LLM consumption."""
|
|
538
|
-
if not results:
|
|
539
|
-
return f"No results found for: {query_label}"
|
|
540
|
-
|
|
541
|
-
header = f"Found {len(results)} results for: {query_label}"
|
|
542
|
-
if offset > 0:
|
|
543
|
-
header += f" (offset: {offset})"
|
|
544
|
-
lines = [header, ""]
|
|
545
|
-
|
|
546
|
-
for r in results:
|
|
547
|
-
d = r.to_dict() if hasattr(r, "to_dict") else r
|
|
548
|
-
path = d.get("path", "?")
|
|
549
|
-
ftype = d.get("type", "file")
|
|
550
|
-
size_h = d.get("size_human", "")
|
|
551
|
-
dm = d.get("date_modified", "")
|
|
552
|
-
|
|
553
|
-
prefix = "[DIR]" if ftype == "folder" else "[FILE]"
|
|
554
|
-
meta_parts: list[str] = []
|
|
555
|
-
if size_h:
|
|
556
|
-
meta_parts.append(size_h)
|
|
557
|
-
if dm:
|
|
558
|
-
meta_parts.append(dm)
|
|
559
|
-
|
|
560
|
-
meta = f" ({', '.join(meta_parts)})" if meta_parts else ""
|
|
561
|
-
lines.append(f" {prefix} {path}{meta}")
|
|
562
|
-
|
|
563
|
-
if len(results) >= max_results:
|
|
564
|
-
lines.append("")
|
|
565
|
-
lines.append(
|
|
566
|
-
f"Showing first {max_results} results. "
|
|
567
|
-
"Use 'offset' to paginate or refine the query."
|
|
568
|
-
)
|
|
569
|
-
|
|
570
|
-
return "\n".join(lines)
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
# ── Text file preview ─────────────────────────────────────────────────────
|
|
574
|
-
|
|
575
|
-
# Extensions we can safely read as text
|
|
576
|
-
_TEXT_EXTENSIONS: frozenset[str] = frozenset({
|
|
577
|
-
# Text & docs
|
|
578
|
-
"txt", "md", "mdx", "rst", "adoc", "org",
|
|
579
|
-
# Python
|
|
580
|
-
"py", "pyi", "pyw", "pyx", "pxd",
|
|
581
|
-
# JavaScript/TypeScript
|
|
582
|
-
"js", "mjs", "cjs", "ts", "mts", "cts", "jsx", "tsx",
|
|
583
|
-
# Web frameworks
|
|
584
|
-
"vue", "svelte", "astro", "marko",
|
|
585
|
-
# C family
|
|
586
|
-
"c", "cpp", "cc", "cxx", "h", "hpp", "hxx", "cs", "java", "m", "mm",
|
|
587
|
-
# Systems languages
|
|
588
|
-
"go", "rs", "rb", "php", "swift", "kt", "kts", "scala", "r", "lua",
|
|
589
|
-
# Shell
|
|
590
|
-
"sh", "bash", "zsh", "fish", "ps1", "psm1", "psd1", "bat", "cmd",
|
|
591
|
-
# Database & query
|
|
592
|
-
"sql", "prisma", "graphql", "gql",
|
|
593
|
-
# Web
|
|
594
|
-
"html", "htm", "css", "scss", "sass", "less", "styl", "pcss",
|
|
595
|
-
# Data formats
|
|
596
|
-
"json", "jsonc", "json5", "jsonl", "ndjson",
|
|
597
|
-
"xml", "xsl", "xslt", "xsd", "svg", "rss", "atom",
|
|
598
|
-
"yaml", "yml", "toml", "ini", "cfg", "conf", "env", "properties",
|
|
599
|
-
"csv", "tsv", "log",
|
|
600
|
-
# Config files (with extensions)
|
|
601
|
-
"gitignore", "gitattributes", "gitmodules", "npmrc", "nvmrc", "yarnrc",
|
|
602
|
-
"dockerignore", "editorconfig", "eslintrc", "prettierrc", "babelrc",
|
|
603
|
-
"stylelintrc", "browserslistrc",
|
|
604
|
-
# Build tools
|
|
605
|
-
"makefile", "dockerfile", "cmake", "gradle", "sbt", "cabal", "bazel",
|
|
606
|
-
# Academic
|
|
607
|
-
"tex", "bib", "cls", "sty",
|
|
608
|
-
# Hardware
|
|
609
|
-
"asm", "s", "v", "sv", "vhd", "vhdl",
|
|
610
|
-
# Modern languages
|
|
611
|
-
"dart", "zig", "nim", "hx", "odin", "jai", "vlang",
|
|
612
|
-
# Functional
|
|
613
|
-
"ex", "exs", "erl", "hrl", "hs", "lhs", "ml", "mli", "fs", "fsi", "fsx",
|
|
614
|
-
"clj", "cljs", "cljc", "edn", "lisp", "el", "rkt", "scm", "fnl",
|
|
615
|
-
# Other
|
|
616
|
-
"pro", "pri", "qml", "proto", "thrift", "capnp",
|
|
617
|
-
"tf", "hcl", "nix", "dhall", "jsonnet", "cue",
|
|
618
|
-
"http", "rest", "lock",
|
|
619
|
-
})
|
|
620
|
-
|
|
621
|
-
# Filenames (no extension) that are always text
|
|
622
|
-
_TEXT_FILENAMES: frozenset[str] = frozenset({
|
|
623
|
-
"makefile", "dockerfile", "cmakelists.txt", "rakefile", "gemfile",
|
|
624
|
-
"procfile", "vagrantfile", "brewfile", "justfile", "taskfile",
|
|
625
|
-
"license", "licence", "readme", "authors", "contributors",
|
|
626
|
-
"changelog", "changes", "history", "news", "todo",
|
|
627
|
-
})
|
|
628
|
-
|
|
629
|
-
_MAX_DIR_SCAN_ITEMS = 10_000
|
|
630
|
-
_MAX_SUBDIRECTORY_SAMPLE = 20
|
|
631
|
-
_MAX_FILE_SAMPLE = 30
|
|
632
|
-
_MAX_PREVIEW_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
|
|
633
|
-
_MAX_PREVIEW_CHARS = 50_000
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
def _summarize_directory(path: Path) -> dict[str, object]:
|
|
637
|
-
"""Return bounded directory metadata without loading all entries in memory."""
|
|
638
|
-
dirs: list[str] = []
|
|
639
|
-
files: list[str] = []
|
|
640
|
-
scanned = 0
|
|
641
|
-
truncated = False
|
|
642
|
-
|
|
643
|
-
with os.scandir(path) as entries:
|
|
644
|
-
for entry in entries:
|
|
645
|
-
if scanned >= _MAX_DIR_SCAN_ITEMS:
|
|
646
|
-
truncated = True
|
|
647
|
-
break
|
|
648
|
-
scanned += 1
|
|
649
|
-
try:
|
|
650
|
-
if entry.is_dir(follow_symlinks=False):
|
|
651
|
-
if len(dirs) < _MAX_SUBDIRECTORY_SAMPLE:
|
|
652
|
-
dirs.append(entry.name)
|
|
653
|
-
elif entry.is_file(follow_symlinks=False) and len(files) < _MAX_FILE_SAMPLE:
|
|
654
|
-
files.append(entry.name)
|
|
655
|
-
except OSError:
|
|
656
|
-
continue
|
|
657
|
-
|
|
658
|
-
summary: dict[str, object] = {
|
|
659
|
-
"item_count": scanned,
|
|
660
|
-
"subdirectories": sorted(dirs),
|
|
661
|
-
"files_sample": sorted(files),
|
|
662
|
-
}
|
|
663
|
-
if truncated:
|
|
664
|
-
summary["note"] = (
|
|
665
|
-
f"Directory scan capped at {_MAX_DIR_SCAN_ITEMS} entries; samples may be incomplete"
|
|
666
|
-
)
|
|
667
|
-
elif scanned > (_MAX_SUBDIRECTORY_SAMPLE + _MAX_FILE_SAMPLE):
|
|
668
|
-
summary["note"] = f"Showing first items of {scanned} total"
|
|
669
|
-
return summary
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
def _read_preview(path: Path, max_lines: int) -> str | None:
|
|
673
|
-
"""Read the first *max_lines* lines of a text file.
|
|
674
|
-
|
|
675
|
-
Returns ``None`` for binary files or files that can't be read.
|
|
676
|
-
"""
|
|
677
|
-
try:
|
|
678
|
-
if path.stat().st_size > _MAX_PREVIEW_FILE_SIZE:
|
|
679
|
-
return "(file too large for preview)"
|
|
680
|
-
except OSError:
|
|
681
|
-
return None
|
|
682
|
-
|
|
683
|
-
ext = path.suffix.lstrip(".").lower()
|
|
684
|
-
name_lower = path.name.lower()
|
|
685
|
-
stem_lower = path.stem.lower()
|
|
686
|
-
|
|
687
|
-
is_text = (
|
|
688
|
-
ext in _TEXT_EXTENSIONS
|
|
689
|
-
or name_lower in _TEXT_FILENAMES
|
|
690
|
-
or stem_lower in _TEXT_FILENAMES
|
|
691
|
-
or name_lower.startswith(".") # dotfiles are usually text
|
|
692
|
-
)
|
|
693
|
-
|
|
694
|
-
if not is_text:
|
|
695
|
-
# Sniff for binary content
|
|
696
|
-
try:
|
|
697
|
-
with open(path, "rb") as f:
|
|
698
|
-
chunk = f.read(512)
|
|
699
|
-
if b"\x00" in chunk:
|
|
700
|
-
return None # binary
|
|
701
|
-
is_text = True
|
|
702
|
-
except (OSError, PermissionError):
|
|
703
|
-
return None
|
|
704
|
-
|
|
705
|
-
if not is_text:
|
|
706
|
-
return None
|
|
707
|
-
|
|
708
|
-
# Read lines with encoding fallback
|
|
709
|
-
for encoding in ("utf-8", "utf-8-sig", "latin-1"):
|
|
710
|
-
try:
|
|
711
|
-
with open(path, encoding=encoding) as f:
|
|
712
|
-
lines: list[str] = []
|
|
713
|
-
total_chars = 0
|
|
714
|
-
truncated = False
|
|
715
|
-
for _ in range(max_lines):
|
|
716
|
-
remaining = _MAX_PREVIEW_CHARS - total_chars
|
|
717
|
-
if remaining <= 0:
|
|
718
|
-
truncated = True
|
|
719
|
-
break
|
|
720
|
-
|
|
721
|
-
# Bound each read to avoid huge single-line payloads.
|
|
722
|
-
line = f.readline(remaining + 1)
|
|
723
|
-
if not line:
|
|
724
|
-
break
|
|
725
|
-
|
|
726
|
-
if len(line) > remaining:
|
|
727
|
-
line = line[:remaining]
|
|
728
|
-
truncated = True
|
|
729
|
-
|
|
730
|
-
total_chars += len(line)
|
|
731
|
-
lines.append(line.rstrip("\n\r"))
|
|
732
|
-
|
|
733
|
-
if total_chars >= _MAX_PREVIEW_CHARS:
|
|
734
|
-
truncated = True
|
|
735
|
-
break
|
|
736
|
-
|
|
737
|
-
if truncated:
|
|
738
|
-
lines.append("... [preview truncated]")
|
|
739
|
-
return "\n".join(lines)
|
|
740
|
-
except UnicodeDecodeError:
|
|
741
|
-
continue
|
|
742
|
-
except (OSError, PermissionError):
|
|
743
|
-
return None
|
|
744
|
-
|
|
745
|
-
return "(unable to decode file content)"
|
|
1
|
+
"""
|
|
2
|
+
Everything MCP Server - The definitive MCP server for voidtools Everything.
|
|
3
|
+
|
|
4
|
+
Provides 5 tools for AI agents to search and analyse files at lightning speed
|
|
5
|
+
using voidtools Everything's real-time NTFS index.
|
|
6
|
+
|
|
7
|
+
Compatible with: Claude Code, Codex, Gemini, Kimi, Qwen, Cursor, Windsurf,
|
|
8
|
+
and any MCP-compatible client using stdio transport.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
import json
|
|
15
|
+
import logging
|
|
16
|
+
import os
|
|
17
|
+
import sys
|
|
18
|
+
from contextlib import asynccontextmanager
|
|
19
|
+
from datetime import datetime
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
from mcp.server.fastmcp import FastMCP
|
|
23
|
+
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
24
|
+
|
|
25
|
+
from everything_mcp.backend import (
|
|
26
|
+
FILE_TYPES,
|
|
27
|
+
SORT_MAP,
|
|
28
|
+
TIME_PERIODS,
|
|
29
|
+
EverythingBackend,
|
|
30
|
+
build_recent_query,
|
|
31
|
+
build_type_query,
|
|
32
|
+
human_size,
|
|
33
|
+
)
|
|
34
|
+
from everything_mcp.config import EverythingConfig
|
|
35
|
+
|
|
36
|
+
# ── Logging (stderr - required for stdio MCP transport) ──────────────────
|
|
37
|
+
|
|
38
|
+
logging.basicConfig(
|
|
39
|
+
level=logging.INFO,
|
|
40
|
+
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
|
41
|
+
stream=sys.stderr,
|
|
42
|
+
)
|
|
43
|
+
logger = logging.getLogger("everything_mcp")
|
|
44
|
+
|
|
45
|
+
# ── Globals (initialised during lifespan) ─────────────────────────────────
|
|
46
|
+
|
|
47
|
+
_backend: EverythingBackend | None = None
|
|
48
|
+
_config: EverythingConfig | None = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@asynccontextmanager
|
|
52
|
+
async def lifespan(server):
|
|
53
|
+
"""Initialise Everything backend on startup, cleanup on shutdown."""
|
|
54
|
+
global _backend, _config
|
|
55
|
+
|
|
56
|
+
logger.info("Everything MCP starting - auto-detecting Everything installation…")
|
|
57
|
+
_config = EverythingConfig.auto_detect()
|
|
58
|
+
|
|
59
|
+
if _config.is_valid:
|
|
60
|
+
logger.info("Connected: %s (es: %s)", _config.version_info, _config.es_path)
|
|
61
|
+
else:
|
|
62
|
+
for err in _config.errors:
|
|
63
|
+
logger.error(" %s", err)
|
|
64
|
+
for warn in _config.warnings:
|
|
65
|
+
logger.warning(" %s", warn)
|
|
66
|
+
|
|
67
|
+
_backend = EverythingBackend(_config)
|
|
68
|
+
try:
|
|
69
|
+
yield
|
|
70
|
+
finally:
|
|
71
|
+
logger.info("Everything MCP shutting down.")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ── Server instance ───────────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
mcp = FastMCP("everything_mcp", lifespan=lifespan)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _get_backend() -> EverythingBackend:
|
|
80
|
+
"""Return the backend or raise with a clear message."""
|
|
81
|
+
if _backend is None:
|
|
82
|
+
raise RuntimeError("Everything MCP not initialised")
|
|
83
|
+
if not _config or not _config.is_valid:
|
|
84
|
+
errors = _config.errors if _config else ["Not initialised"]
|
|
85
|
+
raise RuntimeError("Everything is not available. " + " ".join(errors))
|
|
86
|
+
return _backend
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
90
|
+
# Tool 1: everything_search - The Workhorse
|
|
91
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class SearchInput(BaseModel):
|
|
95
|
+
"""Input schema for ``everything_search``."""
|
|
96
|
+
|
|
97
|
+
model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
|
|
98
|
+
|
|
99
|
+
query: str = Field(
|
|
100
|
+
...,
|
|
101
|
+
description=(
|
|
102
|
+
"Search query using Everything syntax. Examples: "
|
|
103
|
+
"'*.py' (all Python files), "
|
|
104
|
+
"'ext:py;js path:C:\\Projects' (Python/JS in Projects), "
|
|
105
|
+
"'size:>10mb ext:log' (large logs), "
|
|
106
|
+
"'dm:today ext:py' (Python files modified today), "
|
|
107
|
+
"'content:TODO ext:py' (files containing TODO - requires content indexing), "
|
|
108
|
+
"'\"exact phrase\"' (exact filename match), "
|
|
109
|
+
"'regex:test_\\d+\\.py$' (regex). "
|
|
110
|
+
"Combine with space (AND) or | (OR). Prefix ! to exclude."
|
|
111
|
+
),
|
|
112
|
+
min_length=1,
|
|
113
|
+
max_length=2000,
|
|
114
|
+
)
|
|
115
|
+
max_results: int = Field(
|
|
116
|
+
default=50,
|
|
117
|
+
description="Maximum results to return (1-500)",
|
|
118
|
+
ge=1, le=500,
|
|
119
|
+
)
|
|
120
|
+
sort: str = Field(
|
|
121
|
+
default="date-modified-desc",
|
|
122
|
+
description=(
|
|
123
|
+
"Sort order. Options: "
|
|
124
|
+
+ ", ".join(sorted(SORT_MAP.keys()))
|
|
125
|
+
),
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
@field_validator("sort")
|
|
129
|
+
@classmethod
|
|
130
|
+
def validate_sort(cls, v: str) -> str:
|
|
131
|
+
if v not in SORT_MAP:
|
|
132
|
+
raise ValueError(f"Invalid sort option '{v}'. Valid: {', '.join(sorted(SORT_MAP.keys()))}")
|
|
133
|
+
return v
|
|
134
|
+
|
|
135
|
+
match_case: bool = Field(default=False, description="Case-sensitive search")
|
|
136
|
+
match_whole_word: bool = Field(default=False, description="Match whole words only")
|
|
137
|
+
match_regex: bool = Field(default=False, description="Treat query as regex")
|
|
138
|
+
match_path: bool = Field(default=False, description="Match against full path, not just filename")
|
|
139
|
+
offset: int = Field(default=0, description="Skip N results (pagination)", ge=0)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@mcp.tool(
|
|
143
|
+
name="everything_search",
|
|
144
|
+
annotations={
|
|
145
|
+
"title": "Search Files & Folders",
|
|
146
|
+
"readOnlyHint": True,
|
|
147
|
+
"destructiveHint": False,
|
|
148
|
+
"idempotentHint": True,
|
|
149
|
+
"openWorldHint": False,
|
|
150
|
+
},
|
|
151
|
+
)
|
|
152
|
+
async def everything_search(params: SearchInput) -> str:
|
|
153
|
+
"""Search for files and folders instantly using voidtools Everything.
|
|
154
|
+
|
|
155
|
+
Leverages Everything's real-time NTFS index for sub-millisecond search
|
|
156
|
+
across all local and mapped drives. Supports wildcards, regex, size/date
|
|
157
|
+
filters, extension filters, path restrictions, and content search.
|
|
158
|
+
"""
|
|
159
|
+
try:
|
|
160
|
+
backend = _get_backend()
|
|
161
|
+
results = await backend.search(
|
|
162
|
+
query=params.query,
|
|
163
|
+
max_results=params.max_results,
|
|
164
|
+
sort=params.sort,
|
|
165
|
+
match_case=params.match_case,
|
|
166
|
+
match_whole_word=params.match_whole_word,
|
|
167
|
+
match_regex=params.match_regex,
|
|
168
|
+
match_path=params.match_path,
|
|
169
|
+
offset=params.offset,
|
|
170
|
+
)
|
|
171
|
+
return _format_search_results(results, params.query, params.max_results, params.offset)
|
|
172
|
+
except Exception as exc:
|
|
173
|
+
return f"Error: {exc}"
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
177
|
+
# Tool 2: everything_search_by_type - Category Search
|
|
178
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class SearchByTypeInput(BaseModel):
|
|
182
|
+
"""Input schema for ``everything_search_by_type``."""
|
|
183
|
+
|
|
184
|
+
model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
|
|
185
|
+
|
|
186
|
+
file_type: str = Field(
|
|
187
|
+
...,
|
|
188
|
+
description="File type category: " + ", ".join(sorted(FILE_TYPES.keys())),
|
|
189
|
+
)
|
|
190
|
+
query: str = Field(
|
|
191
|
+
default="",
|
|
192
|
+
description="Additional search filter (e.g. 'config' to narrow results)",
|
|
193
|
+
)
|
|
194
|
+
path: str = Field(
|
|
195
|
+
default="",
|
|
196
|
+
description="Restrict search to this directory (e.g. 'C:\\Projects')",
|
|
197
|
+
)
|
|
198
|
+
max_results: int = Field(default=50, ge=1, le=500)
|
|
199
|
+
sort: str = Field(default="date-modified-desc")
|
|
200
|
+
|
|
201
|
+
@field_validator("sort")
|
|
202
|
+
@classmethod
|
|
203
|
+
def validate_sort(cls, v: str) -> str:
|
|
204
|
+
if v not in SORT_MAP:
|
|
205
|
+
raise ValueError(f"Invalid sort option '{v}'. Valid: {', '.join(sorted(SORT_MAP.keys()))}")
|
|
206
|
+
return v
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
@mcp.tool(
|
|
210
|
+
name="everything_search_by_type",
|
|
211
|
+
annotations={
|
|
212
|
+
"title": "Search by File Type Category",
|
|
213
|
+
"readOnlyHint": True,
|
|
214
|
+
"destructiveHint": False,
|
|
215
|
+
"idempotentHint": True,
|
|
216
|
+
"openWorldHint": False,
|
|
217
|
+
},
|
|
218
|
+
)
|
|
219
|
+
async def everything_search_by_type(params: SearchByTypeInput) -> str:
|
|
220
|
+
"""Search for files by type category.
|
|
221
|
+
|
|
222
|
+
Categories: audio, video, image, document, code, archive, executable,
|
|
223
|
+
font, 3d, data. Each maps to a curated list of file extensions.
|
|
224
|
+
"""
|
|
225
|
+
try:
|
|
226
|
+
backend = _get_backend()
|
|
227
|
+
query = build_type_query(params.file_type, params.query, params.path)
|
|
228
|
+
results = await backend.search(
|
|
229
|
+
query=query,
|
|
230
|
+
max_results=params.max_results,
|
|
231
|
+
sort=params.sort,
|
|
232
|
+
)
|
|
233
|
+
label = f"type:{params.file_type}" + (f" {params.query}" if params.query else "")
|
|
234
|
+
return _format_search_results(results, label, params.max_results)
|
|
235
|
+
except Exception as exc:
|
|
236
|
+
return f"Error: {exc}"
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
240
|
+
# Tool 3: everything_find_recent - What Changed?
|
|
241
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
class FindRecentInput(BaseModel):
|
|
245
|
+
"""Input schema for ``everything_find_recent``."""
|
|
246
|
+
|
|
247
|
+
model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
|
|
248
|
+
|
|
249
|
+
period: str = Field(
|
|
250
|
+
default="1hour",
|
|
251
|
+
description=(
|
|
252
|
+
"How recent. Options: "
|
|
253
|
+
+ ", ".join(sorted(TIME_PERIODS.keys(), key=lambda k: list(TIME_PERIODS.keys()).index(k)))
|
|
254
|
+
+ ". Or raw Everything syntax like 'last2hours'."
|
|
255
|
+
),
|
|
256
|
+
)
|
|
257
|
+
path: str = Field(default="", description="Restrict to this directory path")
|
|
258
|
+
extensions: str = Field(
|
|
259
|
+
default="",
|
|
260
|
+
description="Filter by extensions, e.g. 'py,js,ts' or 'py;js;ts'",
|
|
261
|
+
)
|
|
262
|
+
query: str = Field(default="", description="Additional search filter")
|
|
263
|
+
max_results: int = Field(default=50, ge=1, le=500)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
@mcp.tool(
|
|
267
|
+
name="everything_find_recent",
|
|
268
|
+
annotations={
|
|
269
|
+
"title": "Find Recently Modified Files",
|
|
270
|
+
"readOnlyHint": True,
|
|
271
|
+
"destructiveHint": False,
|
|
272
|
+
"idempotentHint": True,
|
|
273
|
+
"openWorldHint": False,
|
|
274
|
+
},
|
|
275
|
+
)
|
|
276
|
+
async def everything_find_recent(params: FindRecentInput) -> str:
|
|
277
|
+
"""Find files modified within a recent time period.
|
|
278
|
+
|
|
279
|
+
Ideal for discovering what changed in a project, tracking recent
|
|
280
|
+
downloads, finding today's log files, etc. Sorted newest-first.
|
|
281
|
+
"""
|
|
282
|
+
try:
|
|
283
|
+
backend = _get_backend()
|
|
284
|
+
|
|
285
|
+
query = build_recent_query(params.period, params.path, params.extensions)
|
|
286
|
+
if params.query:
|
|
287
|
+
query = f"{query} {params.query}"
|
|
288
|
+
|
|
289
|
+
results = await backend.search(
|
|
290
|
+
query=query,
|
|
291
|
+
max_results=params.max_results,
|
|
292
|
+
sort="date-modified-desc",
|
|
293
|
+
)
|
|
294
|
+
return _format_search_results(results, f"recent ({params.period})", params.max_results)
|
|
295
|
+
except Exception as exc:
|
|
296
|
+
return f"Error: {exc}"
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
300
|
+
# Tool 4: everything_file_details - Deep Inspection
|
|
301
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
class FileDetailsInput(BaseModel):
|
|
305
|
+
"""Input schema for ``everything_file_details``."""
|
|
306
|
+
|
|
307
|
+
model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
|
|
308
|
+
|
|
309
|
+
paths: list[str] = Field(
|
|
310
|
+
...,
|
|
311
|
+
description="File/folder paths to inspect (1-20)",
|
|
312
|
+
min_length=1,
|
|
313
|
+
max_length=20,
|
|
314
|
+
)
|
|
315
|
+
preview_lines: int = Field(
|
|
316
|
+
default=0,
|
|
317
|
+
description="Lines of text content to preview (0 = none, max 200)",
|
|
318
|
+
ge=0, le=200,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
@mcp.tool(
|
|
323
|
+
name="everything_file_details",
|
|
324
|
+
annotations={
|
|
325
|
+
"title": "Get File Details & Content Preview",
|
|
326
|
+
"readOnlyHint": True,
|
|
327
|
+
"destructiveHint": False,
|
|
328
|
+
"idempotentHint": True,
|
|
329
|
+
"openWorldHint": False,
|
|
330
|
+
},
|
|
331
|
+
)
|
|
332
|
+
async def everything_file_details(params: FileDetailsInput) -> str:
|
|
333
|
+
"""Get detailed metadata and optional content preview for specific files.
|
|
334
|
+
|
|
335
|
+
Returns: full path, size, dates, type, permissions, hidden status.
|
|
336
|
+
For directories: item count, subdirectories, file listing.
|
|
337
|
+
For text files with preview_lines > 0: first N lines of content.
|
|
338
|
+
"""
|
|
339
|
+
# Run blocking file I/O in thread pool to not block the event loop
|
|
340
|
+
return await asyncio.to_thread(
|
|
341
|
+
_get_file_details_sync,
|
|
342
|
+
params.paths,
|
|
343
|
+
params.preview_lines,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def _get_file_details_sync(paths: list[str], preview_lines: int) -> str:
|
|
348
|
+
"""Synchronous implementation of file details gathering."""
|
|
349
|
+
output_parts: list[str] = []
|
|
350
|
+
|
|
351
|
+
for filepath in paths:
|
|
352
|
+
p = Path(filepath)
|
|
353
|
+
info: dict = {"path": str(p)}
|
|
354
|
+
|
|
355
|
+
if not p.exists():
|
|
356
|
+
info["error"] = "File not found"
|
|
357
|
+
output_parts.append(json.dumps(info, indent=2, ensure_ascii=False))
|
|
358
|
+
continue
|
|
359
|
+
|
|
360
|
+
try:
|
|
361
|
+
stat = p.stat()
|
|
362
|
+
info["name"] = p.name
|
|
363
|
+
info["type"] = "folder" if p.is_dir() else "file"
|
|
364
|
+
|
|
365
|
+
if not p.is_dir():
|
|
366
|
+
info["size"] = stat.st_size
|
|
367
|
+
info["size_human"] = human_size(stat.st_size)
|
|
368
|
+
info["extension"] = p.suffix.lstrip(".").lower()
|
|
369
|
+
|
|
370
|
+
info["date_modified"] = datetime.fromtimestamp(stat.st_mtime).strftime("%Y-%m-%d %H:%M:%S")
|
|
371
|
+
info["date_created"] = datetime.fromtimestamp(stat.st_ctime).strftime("%Y-%m-%d %H:%M:%S")
|
|
372
|
+
info["date_accessed"] = datetime.fromtimestamp(stat.st_atime).strftime("%Y-%m-%d %H:%M:%S")
|
|
373
|
+
info["read_only"] = not os.access(filepath, os.W_OK)
|
|
374
|
+
|
|
375
|
+
# Windows hidden attribute or Unix dotfile
|
|
376
|
+
file_attrs = getattr(stat, "st_file_attributes", 0)
|
|
377
|
+
info["hidden"] = bool(file_attrs & 0x2) if file_attrs else p.name.startswith(".")
|
|
378
|
+
|
|
379
|
+
# Directory listing
|
|
380
|
+
if p.is_dir():
|
|
381
|
+
try:
|
|
382
|
+
info.update(_summarize_directory(p))
|
|
383
|
+
except PermissionError:
|
|
384
|
+
info["listing_error"] = "Permission denied"
|
|
385
|
+
except OSError as exc:
|
|
386
|
+
info["listing_error"] = str(exc)
|
|
387
|
+
|
|
388
|
+
# Content preview for text files
|
|
389
|
+
elif preview_lines > 0:
|
|
390
|
+
preview = _read_preview(p, preview_lines)
|
|
391
|
+
if preview is not None:
|
|
392
|
+
info["preview"] = preview
|
|
393
|
+
|
|
394
|
+
except PermissionError:
|
|
395
|
+
info["error"] = "Permission denied"
|
|
396
|
+
except OSError as exc:
|
|
397
|
+
info["error"] = str(exc)
|
|
398
|
+
|
|
399
|
+
output_parts.append(json.dumps(info, indent=2, ensure_ascii=False))
|
|
400
|
+
|
|
401
|
+
return "\n---\n".join(output_parts)
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
405
|
+
# Tool 5: everything_count_stats - Quick Analytics
|
|
406
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
class CountStatsInput(BaseModel):
|
|
410
|
+
"""Input schema for ``everything_count_stats``."""
|
|
411
|
+
|
|
412
|
+
model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
|
|
413
|
+
|
|
414
|
+
query: str = Field(
|
|
415
|
+
...,
|
|
416
|
+
description=(
|
|
417
|
+
"Search query to count/measure. Same syntax as everything_search. "
|
|
418
|
+
"Examples: 'ext:py path:C:\\Projects', 'ext:log size:>1mb', '*.tmp'"
|
|
419
|
+
),
|
|
420
|
+
min_length=1,
|
|
421
|
+
max_length=2000,
|
|
422
|
+
)
|
|
423
|
+
include_size: bool = Field(
|
|
424
|
+
default=True,
|
|
425
|
+
description="Also calculate total size of all matching files",
|
|
426
|
+
)
|
|
427
|
+
breakdown_by_extension: bool = Field(
|
|
428
|
+
default=False,
|
|
429
|
+
description="Break down count and size by file extension (samples top 200 results)",
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
@mcp.tool(
|
|
434
|
+
name="everything_count_stats",
|
|
435
|
+
annotations={
|
|
436
|
+
"title": "Count & Size Statistics",
|
|
437
|
+
"readOnlyHint": True,
|
|
438
|
+
"destructiveHint": False,
|
|
439
|
+
"idempotentHint": True,
|
|
440
|
+
"openWorldHint": False,
|
|
441
|
+
},
|
|
442
|
+
)
|
|
443
|
+
async def everything_count_stats(params: CountStatsInput) -> str:
|
|
444
|
+
"""Get count and size statistics for files matching a query.
|
|
445
|
+
|
|
446
|
+
Fast way to understand the scope of a query without listing every file.
|
|
447
|
+
Optionally breaks down by extension for a high-level overview.
|
|
448
|
+
"""
|
|
449
|
+
try:
|
|
450
|
+
backend = _get_backend()
|
|
451
|
+
output: dict = {"query": params.query}
|
|
452
|
+
|
|
453
|
+
# Count
|
|
454
|
+
try:
|
|
455
|
+
output["total_count"] = await backend.count(params.query)
|
|
456
|
+
except Exception:
|
|
457
|
+
output["count_note"] = "Count not available (es.exe may not support -get-result-count)"
|
|
458
|
+
|
|
459
|
+
# Total size
|
|
460
|
+
if params.include_size:
|
|
461
|
+
try:
|
|
462
|
+
total_size = await backend.get_total_size(params.query)
|
|
463
|
+
if total_size >= 0:
|
|
464
|
+
output["total_size"] = total_size
|
|
465
|
+
output["total_size_human"] = human_size(total_size)
|
|
466
|
+
except Exception:
|
|
467
|
+
output["size_note"] = "Total size not available"
|
|
468
|
+
|
|
469
|
+
# Extension breakdown
|
|
470
|
+
if params.breakdown_by_extension:
|
|
471
|
+
try:
|
|
472
|
+
sample_limit = 500
|
|
473
|
+
results = await backend.search(
|
|
474
|
+
params.query,
|
|
475
|
+
max_results=sample_limit,
|
|
476
|
+
sort="name",
|
|
477
|
+
)
|
|
478
|
+
ext_stats: dict[str, dict] = {}
|
|
479
|
+
sampled_files = 0
|
|
480
|
+
for r in results:
|
|
481
|
+
if r.is_dir:
|
|
482
|
+
continue
|
|
483
|
+
sampled_files += 1
|
|
484
|
+
ext = r.extension or "(no extension)"
|
|
485
|
+
entry = ext_stats.setdefault(ext, {"count": 0, "total_size": 0})
|
|
486
|
+
entry["count"] += 1
|
|
487
|
+
if r.size >= 0:
|
|
488
|
+
entry["total_size"] += r.size
|
|
489
|
+
|
|
490
|
+
sorted_exts = sorted(ext_stats.items(), key=lambda x: x[1]["count"], reverse=True)
|
|
491
|
+
breakdown = {}
|
|
492
|
+
for ext, stats in sorted_exts[:30]:
|
|
493
|
+
breakdown[ext] = {
|
|
494
|
+
"count": stats["count"],
|
|
495
|
+
"total_size": stats["total_size"],
|
|
496
|
+
"total_size_human": human_size(stats["total_size"]),
|
|
497
|
+
}
|
|
498
|
+
output["extension_breakdown"] = breakdown
|
|
499
|
+
output["breakdown_note"] = (
|
|
500
|
+
f"Based on {sampled_files} sampled files from first {len(results)} "
|
|
501
|
+
f"results (max sample {sample_limit}); directories excluded."
|
|
502
|
+
)
|
|
503
|
+
except Exception as exc:
|
|
504
|
+
output["breakdown_error"] = str(exc)
|
|
505
|
+
|
|
506
|
+
return json.dumps(output, indent=2, ensure_ascii=False)
|
|
507
|
+
except Exception as exc:
|
|
508
|
+
return f"Error: {exc}"
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
512
|
+
# Resource: Health Check
|
|
513
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
@mcp.resource("everything://status")
|
|
517
|
+
async def get_status() -> str:
|
|
518
|
+
"""Get the current status of the Everything connection."""
|
|
519
|
+
if _backend:
|
|
520
|
+
status = await _backend.health_check()
|
|
521
|
+
else:
|
|
522
|
+
status = {"status": "not initialised"}
|
|
523
|
+
return json.dumps(status, indent=2)
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
527
|
+
# Helpers
|
|
528
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def _format_search_results(
|
|
532
|
+
results: list,
|
|
533
|
+
query_label: str,
|
|
534
|
+
max_results: int,
|
|
535
|
+
offset: int = 0,
|
|
536
|
+
) -> str:
|
|
537
|
+
"""Format search results into a clean, readable string for LLM consumption."""
|
|
538
|
+
if not results:
|
|
539
|
+
return f"No results found for: {query_label}"
|
|
540
|
+
|
|
541
|
+
header = f"Found {len(results)} results for: {query_label}"
|
|
542
|
+
if offset > 0:
|
|
543
|
+
header += f" (offset: {offset})"
|
|
544
|
+
lines = [header, ""]
|
|
545
|
+
|
|
546
|
+
for r in results:
|
|
547
|
+
d = r.to_dict() if hasattr(r, "to_dict") else r
|
|
548
|
+
path = d.get("path", "?")
|
|
549
|
+
ftype = d.get("type", "file")
|
|
550
|
+
size_h = d.get("size_human", "")
|
|
551
|
+
dm = d.get("date_modified", "")
|
|
552
|
+
|
|
553
|
+
prefix = "[DIR]" if ftype == "folder" else "[FILE]"
|
|
554
|
+
meta_parts: list[str] = []
|
|
555
|
+
if size_h:
|
|
556
|
+
meta_parts.append(size_h)
|
|
557
|
+
if dm:
|
|
558
|
+
meta_parts.append(dm)
|
|
559
|
+
|
|
560
|
+
meta = f" ({', '.join(meta_parts)})" if meta_parts else ""
|
|
561
|
+
lines.append(f" {prefix} {path}{meta}")
|
|
562
|
+
|
|
563
|
+
if len(results) >= max_results:
|
|
564
|
+
lines.append("")
|
|
565
|
+
lines.append(
|
|
566
|
+
f"Showing first {max_results} results. "
|
|
567
|
+
"Use 'offset' to paginate or refine the query."
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
return "\n".join(lines)
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
# ── Text file preview ─────────────────────────────────────────────────────
|
|
574
|
+
|
|
575
|
+
# Extensions we can safely read as text
|
|
576
|
+
_TEXT_EXTENSIONS: frozenset[str] = frozenset({
|
|
577
|
+
# Text & docs
|
|
578
|
+
"txt", "md", "mdx", "rst", "adoc", "org",
|
|
579
|
+
# Python
|
|
580
|
+
"py", "pyi", "pyw", "pyx", "pxd",
|
|
581
|
+
# JavaScript/TypeScript
|
|
582
|
+
"js", "mjs", "cjs", "ts", "mts", "cts", "jsx", "tsx",
|
|
583
|
+
# Web frameworks
|
|
584
|
+
"vue", "svelte", "astro", "marko",
|
|
585
|
+
# C family
|
|
586
|
+
"c", "cpp", "cc", "cxx", "h", "hpp", "hxx", "cs", "java", "m", "mm",
|
|
587
|
+
# Systems languages
|
|
588
|
+
"go", "rs", "rb", "php", "swift", "kt", "kts", "scala", "r", "lua",
|
|
589
|
+
# Shell
|
|
590
|
+
"sh", "bash", "zsh", "fish", "ps1", "psm1", "psd1", "bat", "cmd",
|
|
591
|
+
# Database & query
|
|
592
|
+
"sql", "prisma", "graphql", "gql",
|
|
593
|
+
# Web
|
|
594
|
+
"html", "htm", "css", "scss", "sass", "less", "styl", "pcss",
|
|
595
|
+
# Data formats
|
|
596
|
+
"json", "jsonc", "json5", "jsonl", "ndjson",
|
|
597
|
+
"xml", "xsl", "xslt", "xsd", "svg", "rss", "atom",
|
|
598
|
+
"yaml", "yml", "toml", "ini", "cfg", "conf", "env", "properties",
|
|
599
|
+
"csv", "tsv", "log",
|
|
600
|
+
# Config files (with extensions)
|
|
601
|
+
"gitignore", "gitattributes", "gitmodules", "npmrc", "nvmrc", "yarnrc",
|
|
602
|
+
"dockerignore", "editorconfig", "eslintrc", "prettierrc", "babelrc",
|
|
603
|
+
"stylelintrc", "browserslistrc",
|
|
604
|
+
# Build tools
|
|
605
|
+
"makefile", "dockerfile", "cmake", "gradle", "sbt", "cabal", "bazel",
|
|
606
|
+
# Academic
|
|
607
|
+
"tex", "bib", "cls", "sty",
|
|
608
|
+
# Hardware
|
|
609
|
+
"asm", "s", "v", "sv", "vhd", "vhdl",
|
|
610
|
+
# Modern languages
|
|
611
|
+
"dart", "zig", "nim", "hx", "odin", "jai", "vlang",
|
|
612
|
+
# Functional
|
|
613
|
+
"ex", "exs", "erl", "hrl", "hs", "lhs", "ml", "mli", "fs", "fsi", "fsx",
|
|
614
|
+
"clj", "cljs", "cljc", "edn", "lisp", "el", "rkt", "scm", "fnl",
|
|
615
|
+
# Other
|
|
616
|
+
"pro", "pri", "qml", "proto", "thrift", "capnp",
|
|
617
|
+
"tf", "hcl", "nix", "dhall", "jsonnet", "cue",
|
|
618
|
+
"http", "rest", "lock",
|
|
619
|
+
})
|
|
620
|
+
|
|
621
|
+
# Filenames (no extension) that are always text
|
|
622
|
+
_TEXT_FILENAMES: frozenset[str] = frozenset({
|
|
623
|
+
"makefile", "dockerfile", "cmakelists.txt", "rakefile", "gemfile",
|
|
624
|
+
"procfile", "vagrantfile", "brewfile", "justfile", "taskfile",
|
|
625
|
+
"license", "licence", "readme", "authors", "contributors",
|
|
626
|
+
"changelog", "changes", "history", "news", "todo",
|
|
627
|
+
})
|
|
628
|
+
|
|
629
|
+
_MAX_DIR_SCAN_ITEMS = 10_000
|
|
630
|
+
_MAX_SUBDIRECTORY_SAMPLE = 20
|
|
631
|
+
_MAX_FILE_SAMPLE = 30
|
|
632
|
+
_MAX_PREVIEW_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
|
|
633
|
+
_MAX_PREVIEW_CHARS = 50_000
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
def _summarize_directory(path: Path) -> dict[str, object]:
|
|
637
|
+
"""Return bounded directory metadata without loading all entries in memory."""
|
|
638
|
+
dirs: list[str] = []
|
|
639
|
+
files: list[str] = []
|
|
640
|
+
scanned = 0
|
|
641
|
+
truncated = False
|
|
642
|
+
|
|
643
|
+
with os.scandir(path) as entries:
|
|
644
|
+
for entry in entries:
|
|
645
|
+
if scanned >= _MAX_DIR_SCAN_ITEMS:
|
|
646
|
+
truncated = True
|
|
647
|
+
break
|
|
648
|
+
scanned += 1
|
|
649
|
+
try:
|
|
650
|
+
if entry.is_dir(follow_symlinks=False):
|
|
651
|
+
if len(dirs) < _MAX_SUBDIRECTORY_SAMPLE:
|
|
652
|
+
dirs.append(entry.name)
|
|
653
|
+
elif entry.is_file(follow_symlinks=False) and len(files) < _MAX_FILE_SAMPLE:
|
|
654
|
+
files.append(entry.name)
|
|
655
|
+
except OSError:
|
|
656
|
+
continue
|
|
657
|
+
|
|
658
|
+
summary: dict[str, object] = {
|
|
659
|
+
"item_count": scanned,
|
|
660
|
+
"subdirectories": sorted(dirs),
|
|
661
|
+
"files_sample": sorted(files),
|
|
662
|
+
}
|
|
663
|
+
if truncated:
|
|
664
|
+
summary["note"] = (
|
|
665
|
+
f"Directory scan capped at {_MAX_DIR_SCAN_ITEMS} entries; samples may be incomplete"
|
|
666
|
+
)
|
|
667
|
+
elif scanned > (_MAX_SUBDIRECTORY_SAMPLE + _MAX_FILE_SAMPLE):
|
|
668
|
+
summary["note"] = f"Showing first items of {scanned} total"
|
|
669
|
+
return summary
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
def _read_preview(path: Path, max_lines: int) -> str | None:
|
|
673
|
+
"""Read the first *max_lines* lines of a text file.
|
|
674
|
+
|
|
675
|
+
Returns ``None`` for binary files or files that can't be read.
|
|
676
|
+
"""
|
|
677
|
+
try:
|
|
678
|
+
if path.stat().st_size > _MAX_PREVIEW_FILE_SIZE:
|
|
679
|
+
return "(file too large for preview)"
|
|
680
|
+
except OSError:
|
|
681
|
+
return None
|
|
682
|
+
|
|
683
|
+
ext = path.suffix.lstrip(".").lower()
|
|
684
|
+
name_lower = path.name.lower()
|
|
685
|
+
stem_lower = path.stem.lower()
|
|
686
|
+
|
|
687
|
+
is_text = (
|
|
688
|
+
ext in _TEXT_EXTENSIONS
|
|
689
|
+
or name_lower in _TEXT_FILENAMES
|
|
690
|
+
or stem_lower in _TEXT_FILENAMES
|
|
691
|
+
or name_lower.startswith(".") # dotfiles are usually text
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
if not is_text:
|
|
695
|
+
# Sniff for binary content
|
|
696
|
+
try:
|
|
697
|
+
with open(path, "rb") as f:
|
|
698
|
+
chunk = f.read(512)
|
|
699
|
+
if b"\x00" in chunk:
|
|
700
|
+
return None # binary
|
|
701
|
+
is_text = True
|
|
702
|
+
except (OSError, PermissionError):
|
|
703
|
+
return None
|
|
704
|
+
|
|
705
|
+
if not is_text:
|
|
706
|
+
return None
|
|
707
|
+
|
|
708
|
+
# Read lines with encoding fallback
|
|
709
|
+
for encoding in ("utf-8", "utf-8-sig", "latin-1"):
|
|
710
|
+
try:
|
|
711
|
+
with open(path, encoding=encoding) as f:
|
|
712
|
+
lines: list[str] = []
|
|
713
|
+
total_chars = 0
|
|
714
|
+
truncated = False
|
|
715
|
+
for _ in range(max_lines):
|
|
716
|
+
remaining = _MAX_PREVIEW_CHARS - total_chars
|
|
717
|
+
if remaining <= 0:
|
|
718
|
+
truncated = True
|
|
719
|
+
break
|
|
720
|
+
|
|
721
|
+
# Bound each read to avoid huge single-line payloads.
|
|
722
|
+
line = f.readline(remaining + 1)
|
|
723
|
+
if not line:
|
|
724
|
+
break
|
|
725
|
+
|
|
726
|
+
if len(line) > remaining:
|
|
727
|
+
line = line[:remaining]
|
|
728
|
+
truncated = True
|
|
729
|
+
|
|
730
|
+
total_chars += len(line)
|
|
731
|
+
lines.append(line.rstrip("\n\r"))
|
|
732
|
+
|
|
733
|
+
if total_chars >= _MAX_PREVIEW_CHARS:
|
|
734
|
+
truncated = True
|
|
735
|
+
break
|
|
736
|
+
|
|
737
|
+
if truncated:
|
|
738
|
+
lines.append("... [preview truncated]")
|
|
739
|
+
return "\n".join(lines)
|
|
740
|
+
except UnicodeDecodeError:
|
|
741
|
+
continue
|
|
742
|
+
except (OSError, PermissionError):
|
|
743
|
+
return None
|
|
744
|
+
|
|
745
|
+
return "(unable to decode file content)"
|