mcp-vector-search 0.15.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +10 -0
- mcp_vector_search/cli/__init__.py +1 -0
- mcp_vector_search/cli/commands/__init__.py +1 -0
- mcp_vector_search/cli/commands/auto_index.py +397 -0
- mcp_vector_search/cli/commands/chat.py +534 -0
- mcp_vector_search/cli/commands/config.py +393 -0
- mcp_vector_search/cli/commands/demo.py +358 -0
- mcp_vector_search/cli/commands/index.py +762 -0
- mcp_vector_search/cli/commands/init.py +658 -0
- mcp_vector_search/cli/commands/install.py +869 -0
- mcp_vector_search/cli/commands/install_old.py +700 -0
- mcp_vector_search/cli/commands/mcp.py +1254 -0
- mcp_vector_search/cli/commands/reset.py +393 -0
- mcp_vector_search/cli/commands/search.py +796 -0
- mcp_vector_search/cli/commands/setup.py +1133 -0
- mcp_vector_search/cli/commands/status.py +584 -0
- mcp_vector_search/cli/commands/uninstall.py +404 -0
- mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
- mcp_vector_search/cli/commands/visualize/cli.py +265 -0
- mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
- mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
- mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
- mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
- mcp_vector_search/cli/commands/visualize/server.py +201 -0
- mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
- mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
- mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
- mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
- mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
- mcp_vector_search/cli/commands/visualize.py.original +2536 -0
- mcp_vector_search/cli/commands/watch.py +287 -0
- mcp_vector_search/cli/didyoumean.py +520 -0
- mcp_vector_search/cli/export.py +320 -0
- mcp_vector_search/cli/history.py +295 -0
- mcp_vector_search/cli/interactive.py +342 -0
- mcp_vector_search/cli/main.py +484 -0
- mcp_vector_search/cli/output.py +414 -0
- mcp_vector_search/cli/suggestions.py +375 -0
- mcp_vector_search/config/__init__.py +1 -0
- mcp_vector_search/config/constants.py +24 -0
- mcp_vector_search/config/defaults.py +200 -0
- mcp_vector_search/config/settings.py +146 -0
- mcp_vector_search/core/__init__.py +1 -0
- mcp_vector_search/core/auto_indexer.py +298 -0
- mcp_vector_search/core/config_utils.py +394 -0
- mcp_vector_search/core/connection_pool.py +360 -0
- mcp_vector_search/core/database.py +1237 -0
- mcp_vector_search/core/directory_index.py +318 -0
- mcp_vector_search/core/embeddings.py +294 -0
- mcp_vector_search/core/exceptions.py +89 -0
- mcp_vector_search/core/factory.py +318 -0
- mcp_vector_search/core/git_hooks.py +345 -0
- mcp_vector_search/core/indexer.py +1002 -0
- mcp_vector_search/core/llm_client.py +453 -0
- mcp_vector_search/core/models.py +294 -0
- mcp_vector_search/core/project.py +350 -0
- mcp_vector_search/core/scheduler.py +330 -0
- mcp_vector_search/core/search.py +952 -0
- mcp_vector_search/core/watcher.py +322 -0
- mcp_vector_search/mcp/__init__.py +5 -0
- mcp_vector_search/mcp/__main__.py +25 -0
- mcp_vector_search/mcp/server.py +752 -0
- mcp_vector_search/parsers/__init__.py +8 -0
- mcp_vector_search/parsers/base.py +296 -0
- mcp_vector_search/parsers/dart.py +605 -0
- mcp_vector_search/parsers/html.py +413 -0
- mcp_vector_search/parsers/javascript.py +643 -0
- mcp_vector_search/parsers/php.py +694 -0
- mcp_vector_search/parsers/python.py +502 -0
- mcp_vector_search/parsers/registry.py +223 -0
- mcp_vector_search/parsers/ruby.py +678 -0
- mcp_vector_search/parsers/text.py +186 -0
- mcp_vector_search/parsers/utils.py +265 -0
- mcp_vector_search/py.typed +1 -0
- mcp_vector_search/utils/__init__.py +42 -0
- mcp_vector_search/utils/gitignore.py +250 -0
- mcp_vector_search/utils/gitignore_updater.py +212 -0
- mcp_vector_search/utils/monorepo.py +339 -0
- mcp_vector_search/utils/timing.py +338 -0
- mcp_vector_search/utils/version.py +47 -0
- mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
- mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
- mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
- mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
- mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
"""Monorepo detection and subproject identification."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import NamedTuple
|
|
6
|
+
|
|
7
|
+
from loguru import logger
|
|
8
|
+
|
|
9
|
+
# Directories to exclude from subproject detection
|
|
10
|
+
# These are typically test/example/docs directories, not actual subprojects
|
|
11
|
+
EXCLUDED_SUBPROJECT_DIRS = {
|
|
12
|
+
"tests",
|
|
13
|
+
"test",
|
|
14
|
+
"examples",
|
|
15
|
+
"example",
|
|
16
|
+
"docs",
|
|
17
|
+
"doc",
|
|
18
|
+
"scripts",
|
|
19
|
+
"tools",
|
|
20
|
+
"benchmarks",
|
|
21
|
+
"benchmark",
|
|
22
|
+
"node_modules",
|
|
23
|
+
".git",
|
|
24
|
+
".github",
|
|
25
|
+
".gitlab",
|
|
26
|
+
"build",
|
|
27
|
+
"dist",
|
|
28
|
+
"__pycache__",
|
|
29
|
+
".pytest_cache",
|
|
30
|
+
".mypy_cache",
|
|
31
|
+
".ruff_cache",
|
|
32
|
+
"coverage",
|
|
33
|
+
".coverage",
|
|
34
|
+
"htmlcov",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Subproject(NamedTuple):
|
|
39
|
+
"""Represents a subproject in a monorepo."""
|
|
40
|
+
|
|
41
|
+
name: str # "ewtn-plus-foundation"
|
|
42
|
+
path: Path # Absolute path to subproject
|
|
43
|
+
relative_path: str # Relative to monorepo root
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class MonorepoDetector:
|
|
47
|
+
"""Detects monorepo structure and identifies subprojects."""
|
|
48
|
+
|
|
49
|
+
def __init__(self, project_root: Path):
|
|
50
|
+
"""Initialize monorepo detector.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
project_root: Root directory of the project
|
|
54
|
+
"""
|
|
55
|
+
self.project_root = project_root
|
|
56
|
+
self._subprojects: list[Subproject] | None = None
|
|
57
|
+
|
|
58
|
+
def _is_excluded_path(self, path: Path) -> bool:
|
|
59
|
+
"""Check if a path should be excluded from subproject detection.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
path: Path to check (relative to project root)
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
True if path should be excluded from subproject detection
|
|
66
|
+
"""
|
|
67
|
+
try:
|
|
68
|
+
relative_path = path.relative_to(self.project_root)
|
|
69
|
+
# Check if any part of the path is in the excluded set
|
|
70
|
+
return any(part in EXCLUDED_SUBPROJECT_DIRS for part in relative_path.parts)
|
|
71
|
+
except ValueError:
|
|
72
|
+
# Path is not relative to project root
|
|
73
|
+
return True
|
|
74
|
+
|
|
75
|
+
def is_monorepo(self) -> bool:
|
|
76
|
+
"""Check if project is a monorepo.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
True if monorepo structure detected
|
|
80
|
+
"""
|
|
81
|
+
return bool(self.detect_subprojects())
|
|
82
|
+
|
|
83
|
+
def detect_subprojects(self) -> list[Subproject]:
|
|
84
|
+
"""Detect all subprojects in the monorepo.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
List of detected subprojects
|
|
88
|
+
"""
|
|
89
|
+
if self._subprojects is not None:
|
|
90
|
+
return self._subprojects
|
|
91
|
+
|
|
92
|
+
subprojects = []
|
|
93
|
+
|
|
94
|
+
# Try package.json workspaces (npm/yarn/pnpm)
|
|
95
|
+
subprojects.extend(self._detect_npm_workspaces())
|
|
96
|
+
|
|
97
|
+
# Try lerna.json
|
|
98
|
+
if not subprojects:
|
|
99
|
+
subprojects.extend(self._detect_lerna_packages())
|
|
100
|
+
|
|
101
|
+
# Try pnpm-workspace.yaml
|
|
102
|
+
if not subprojects:
|
|
103
|
+
subprojects.extend(self._detect_pnpm_workspaces())
|
|
104
|
+
|
|
105
|
+
# Try nx workspace
|
|
106
|
+
if not subprojects:
|
|
107
|
+
subprojects.extend(self._detect_nx_workspace())
|
|
108
|
+
|
|
109
|
+
# Fallback: Look for multiple package.json files
|
|
110
|
+
if not subprojects:
|
|
111
|
+
subprojects.extend(self._detect_by_package_json())
|
|
112
|
+
|
|
113
|
+
self._subprojects = subprojects
|
|
114
|
+
logger.debug(f"Detected {len(subprojects)} subprojects in {self.project_root}")
|
|
115
|
+
|
|
116
|
+
return subprojects
|
|
117
|
+
|
|
118
|
+
def _detect_npm_workspaces(self) -> list[Subproject]:
|
|
119
|
+
"""Detect npm/yarn/pnpm workspaces from package.json.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
List of subprojects from workspaces
|
|
123
|
+
"""
|
|
124
|
+
package_json = self.project_root / "package.json"
|
|
125
|
+
if not package_json.exists():
|
|
126
|
+
return []
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
with open(package_json) as f:
|
|
130
|
+
data = json.load(f)
|
|
131
|
+
|
|
132
|
+
workspaces = data.get("workspaces", [])
|
|
133
|
+
|
|
134
|
+
# Handle both array and object format
|
|
135
|
+
if isinstance(workspaces, dict):
|
|
136
|
+
workspaces = workspaces.get("packages", [])
|
|
137
|
+
|
|
138
|
+
return self._expand_workspace_patterns(workspaces)
|
|
139
|
+
|
|
140
|
+
except Exception as e:
|
|
141
|
+
logger.debug(f"Failed to parse package.json workspaces: {e}")
|
|
142
|
+
return []
|
|
143
|
+
|
|
144
|
+
def _detect_lerna_packages(self) -> list[Subproject]:
|
|
145
|
+
"""Detect lerna packages from lerna.json.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
List of subprojects from lerna
|
|
149
|
+
"""
|
|
150
|
+
lerna_json = self.project_root / "lerna.json"
|
|
151
|
+
if not lerna_json.exists():
|
|
152
|
+
return []
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
with open(lerna_json) as f:
|
|
156
|
+
data = json.load(f)
|
|
157
|
+
|
|
158
|
+
packages = data.get("packages", ["packages/*"])
|
|
159
|
+
return self._expand_workspace_patterns(packages)
|
|
160
|
+
|
|
161
|
+
except Exception as e:
|
|
162
|
+
logger.debug(f"Failed to parse lerna.json: {e}")
|
|
163
|
+
return []
|
|
164
|
+
|
|
165
|
+
def _detect_pnpm_workspaces(self) -> list[Subproject]:
|
|
166
|
+
"""Detect pnpm workspaces from pnpm-workspace.yaml.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
List of subprojects from pnpm
|
|
170
|
+
"""
|
|
171
|
+
pnpm_workspace = self.project_root / "pnpm-workspace.yaml"
|
|
172
|
+
if not pnpm_workspace.exists():
|
|
173
|
+
return []
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
import yaml
|
|
177
|
+
|
|
178
|
+
with open(pnpm_workspace) as f:
|
|
179
|
+
data = yaml.safe_load(f)
|
|
180
|
+
|
|
181
|
+
packages = data.get("packages", [])
|
|
182
|
+
return self._expand_workspace_patterns(packages)
|
|
183
|
+
|
|
184
|
+
except ImportError:
|
|
185
|
+
logger.debug("pyyaml not installed, skipping pnpm-workspace.yaml detection")
|
|
186
|
+
return []
|
|
187
|
+
except Exception as e:
|
|
188
|
+
logger.debug(f"Failed to parse pnpm-workspace.yaml: {e}")
|
|
189
|
+
return []
|
|
190
|
+
|
|
191
|
+
def _detect_nx_workspace(self) -> list[Subproject]:
|
|
192
|
+
"""Detect nx workspace projects.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
List of subprojects from nx workspace
|
|
196
|
+
"""
|
|
197
|
+
nx_json = self.project_root / "nx.json"
|
|
198
|
+
workspace_json = self.project_root / "workspace.json"
|
|
199
|
+
|
|
200
|
+
if not (nx_json.exists() or workspace_json.exists()):
|
|
201
|
+
return []
|
|
202
|
+
|
|
203
|
+
# Nx projects are typically in apps/ and libs/
|
|
204
|
+
subprojects = []
|
|
205
|
+
for base_dir in ["apps", "libs", "packages"]:
|
|
206
|
+
base_path = self.project_root / base_dir
|
|
207
|
+
if base_path.exists():
|
|
208
|
+
for subdir in base_path.iterdir():
|
|
209
|
+
if subdir.is_dir() and not subdir.name.startswith("."):
|
|
210
|
+
# Skip excluded directories
|
|
211
|
+
if self._is_excluded_path(subdir):
|
|
212
|
+
logger.debug(
|
|
213
|
+
f"Skipping excluded nx workspace path: {subdir.relative_to(self.project_root)}"
|
|
214
|
+
)
|
|
215
|
+
continue
|
|
216
|
+
|
|
217
|
+
package_json = subdir / "package.json"
|
|
218
|
+
name = self._get_package_name(package_json) or subdir.name
|
|
219
|
+
relative = str(subdir.relative_to(self.project_root))
|
|
220
|
+
subprojects.append(Subproject(name, subdir, relative))
|
|
221
|
+
|
|
222
|
+
return subprojects
|
|
223
|
+
|
|
224
|
+
def _detect_by_package_json(self) -> list[Subproject]:
|
|
225
|
+
"""Fallback: Find all directories with package.json.
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
List of subprojects by package.json presence
|
|
229
|
+
"""
|
|
230
|
+
subprojects = []
|
|
231
|
+
|
|
232
|
+
# Only search up to 3 levels deep
|
|
233
|
+
for package_json in self.project_root.rglob("package.json"):
|
|
234
|
+
# Skip root package.json
|
|
235
|
+
if package_json.parent == self.project_root:
|
|
236
|
+
continue
|
|
237
|
+
|
|
238
|
+
# Skip excluded directories (tests, examples, docs, etc.)
|
|
239
|
+
if self._is_excluded_path(package_json.parent):
|
|
240
|
+
logger.debug(
|
|
241
|
+
f"Skipping excluded path: {package_json.relative_to(self.project_root)}"
|
|
242
|
+
)
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
# Check depth
|
|
246
|
+
relative_parts = package_json.relative_to(self.project_root).parts
|
|
247
|
+
if len(relative_parts) > 4: # Too deep
|
|
248
|
+
continue
|
|
249
|
+
|
|
250
|
+
subdir = package_json.parent
|
|
251
|
+
name = self._get_package_name(package_json) or subdir.name
|
|
252
|
+
relative = str(subdir.relative_to(self.project_root))
|
|
253
|
+
subprojects.append(Subproject(name, subdir, relative))
|
|
254
|
+
|
|
255
|
+
return subprojects
|
|
256
|
+
|
|
257
|
+
def _expand_workspace_patterns(self, patterns: list[str]) -> list[Subproject]:
|
|
258
|
+
"""Expand workspace glob patterns to actual directories.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
patterns: List of glob patterns (e.g., ["packages/*", "apps/*"])
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
List of subprojects matching patterns
|
|
265
|
+
"""
|
|
266
|
+
subprojects = []
|
|
267
|
+
|
|
268
|
+
for pattern in patterns:
|
|
269
|
+
# Remove negation patterns (e.g., "!packages/excluded")
|
|
270
|
+
if pattern.startswith("!"):
|
|
271
|
+
continue
|
|
272
|
+
|
|
273
|
+
# Expand glob pattern
|
|
274
|
+
for path in self.project_root.glob(pattern):
|
|
275
|
+
if not path.is_dir():
|
|
276
|
+
continue
|
|
277
|
+
|
|
278
|
+
if path.name.startswith("."):
|
|
279
|
+
continue
|
|
280
|
+
|
|
281
|
+
# Skip excluded directories (tests, examples, docs, etc.)
|
|
282
|
+
if self._is_excluded_path(path):
|
|
283
|
+
logger.debug(
|
|
284
|
+
f"Skipping excluded workspace path: {path.relative_to(self.project_root)}"
|
|
285
|
+
)
|
|
286
|
+
continue
|
|
287
|
+
|
|
288
|
+
# Try to get name from package.json
|
|
289
|
+
package_json = path / "package.json"
|
|
290
|
+
name = self._get_package_name(package_json) or path.name
|
|
291
|
+
relative = str(path.relative_to(self.project_root))
|
|
292
|
+
|
|
293
|
+
subprojects.append(Subproject(name, path, relative))
|
|
294
|
+
|
|
295
|
+
return subprojects
|
|
296
|
+
|
|
297
|
+
def _get_package_name(self, package_json: Path) -> str | None:
|
|
298
|
+
"""Get package name from package.json.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
package_json: Path to package.json file
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
Package name or None
|
|
305
|
+
"""
|
|
306
|
+
if not package_json.exists():
|
|
307
|
+
return None
|
|
308
|
+
|
|
309
|
+
try:
|
|
310
|
+
with open(package_json) as f:
|
|
311
|
+
data = json.load(f)
|
|
312
|
+
return data.get("name")
|
|
313
|
+
except Exception:
|
|
314
|
+
return None
|
|
315
|
+
|
|
316
|
+
def get_subproject_for_file(self, file_path: Path) -> Subproject | None:
|
|
317
|
+
"""Determine which subproject a file belongs to.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
file_path: Path to file
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
Subproject containing the file, or None
|
|
324
|
+
"""
|
|
325
|
+
subprojects = self.detect_subprojects()
|
|
326
|
+
|
|
327
|
+
if not subprojects:
|
|
328
|
+
return None
|
|
329
|
+
|
|
330
|
+
# Find the most specific (deepest) subproject containing this file
|
|
331
|
+
matching_subprojects = [
|
|
332
|
+
sp for sp in subprojects if file_path.is_relative_to(sp.path)
|
|
333
|
+
]
|
|
334
|
+
|
|
335
|
+
if not matching_subprojects:
|
|
336
|
+
return None
|
|
337
|
+
|
|
338
|
+
# Return the deepest match (longest path)
|
|
339
|
+
return max(matching_subprojects, key=lambda sp: len(sp.path.parts))
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
"""Timing utilities for performance measurement and optimization."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import statistics
|
|
6
|
+
import time
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
from contextlib import asynccontextmanager, contextmanager
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from loguru import logger
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class TimingResult:
|
|
18
|
+
"""Result of a timing measurement."""
|
|
19
|
+
|
|
20
|
+
operation: str
|
|
21
|
+
duration: float # in seconds
|
|
22
|
+
timestamp: float
|
|
23
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def duration_ms(self) -> float:
|
|
27
|
+
"""Duration in milliseconds."""
|
|
28
|
+
return self.duration * 1000
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def duration_us(self) -> float:
|
|
32
|
+
"""Duration in microseconds."""
|
|
33
|
+
return self.duration * 1_000_000
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class PerformanceProfiler:
|
|
37
|
+
"""Performance profiler for measuring and analyzing operation timings."""
|
|
38
|
+
|
|
39
|
+
def __init__(self, name: str = "default"):
|
|
40
|
+
self.name = name
|
|
41
|
+
self.results: list[TimingResult] = []
|
|
42
|
+
self._active_timers: dict[str, float] = {}
|
|
43
|
+
self._nested_level = 0
|
|
44
|
+
|
|
45
|
+
def start_timer(self, operation: str) -> None:
|
|
46
|
+
"""Start timing an operation."""
|
|
47
|
+
if operation in self._active_timers:
|
|
48
|
+
logger.warning(f"Timer '{operation}' already active, overwriting")
|
|
49
|
+
self._active_timers[operation] = time.perf_counter()
|
|
50
|
+
|
|
51
|
+
def stop_timer(
|
|
52
|
+
self, operation: str, metadata: dict[str, Any] | None = None
|
|
53
|
+
) -> TimingResult:
|
|
54
|
+
"""Stop timing an operation and record the result."""
|
|
55
|
+
if operation not in self._active_timers:
|
|
56
|
+
raise ValueError(f"Timer '{operation}' not found or not started")
|
|
57
|
+
|
|
58
|
+
start_time = self._active_timers.pop(operation)
|
|
59
|
+
duration = time.perf_counter() - start_time
|
|
60
|
+
|
|
61
|
+
result = TimingResult(
|
|
62
|
+
operation=operation,
|
|
63
|
+
duration=duration,
|
|
64
|
+
timestamp=time.time(),
|
|
65
|
+
metadata=metadata or {},
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
self.results.append(result)
|
|
69
|
+
return result
|
|
70
|
+
|
|
71
|
+
@contextmanager
|
|
72
|
+
def time_operation(self, operation: str, metadata: dict[str, Any] | None = None):
|
|
73
|
+
"""Context manager for timing an operation."""
|
|
74
|
+
indent = " " * self._nested_level
|
|
75
|
+
logger.debug(f"{indent}⏱️ Starting: {operation}")
|
|
76
|
+
|
|
77
|
+
self._nested_level += 1
|
|
78
|
+
start_time = time.perf_counter()
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
yield
|
|
82
|
+
finally:
|
|
83
|
+
duration = time.perf_counter() - start_time
|
|
84
|
+
self._nested_level -= 1
|
|
85
|
+
|
|
86
|
+
result = TimingResult(
|
|
87
|
+
operation=operation,
|
|
88
|
+
duration=duration,
|
|
89
|
+
timestamp=time.time(),
|
|
90
|
+
metadata=metadata or {},
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
self.results.append(result)
|
|
94
|
+
|
|
95
|
+
indent = " " * self._nested_level
|
|
96
|
+
logger.debug(f"{indent}✅ Completed: {operation} ({duration * 1000:.2f}ms)")
|
|
97
|
+
|
|
98
|
+
@asynccontextmanager
|
|
99
|
+
async def time_async_operation(
|
|
100
|
+
self, operation: str, metadata: dict[str, Any] | None = None
|
|
101
|
+
):
|
|
102
|
+
"""Async context manager for timing an operation."""
|
|
103
|
+
indent = " " * self._nested_level
|
|
104
|
+
logger.debug(f"{indent}⏱️ Starting: {operation}")
|
|
105
|
+
|
|
106
|
+
self._nested_level += 1
|
|
107
|
+
start_time = time.perf_counter()
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
yield
|
|
111
|
+
finally:
|
|
112
|
+
duration = time.perf_counter() - start_time
|
|
113
|
+
self._nested_level -= 1
|
|
114
|
+
|
|
115
|
+
result = TimingResult(
|
|
116
|
+
operation=operation,
|
|
117
|
+
duration=duration,
|
|
118
|
+
timestamp=time.time(),
|
|
119
|
+
metadata=metadata or {},
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
self.results.append(result)
|
|
123
|
+
|
|
124
|
+
indent = " " * self._nested_level
|
|
125
|
+
logger.debug(f"{indent}✅ Completed: {operation} ({duration * 1000:.2f}ms)")
|
|
126
|
+
|
|
127
|
+
def get_stats(self, operation: str | None = None) -> dict[str, Any]:
|
|
128
|
+
"""Get timing statistics for operations."""
|
|
129
|
+
if operation:
|
|
130
|
+
durations = [r.duration for r in self.results if r.operation == operation]
|
|
131
|
+
else:
|
|
132
|
+
durations = [r.duration for r in self.results]
|
|
133
|
+
|
|
134
|
+
if not durations:
|
|
135
|
+
return {}
|
|
136
|
+
|
|
137
|
+
return {
|
|
138
|
+
"count": len(durations),
|
|
139
|
+
"total": sum(durations),
|
|
140
|
+
"mean": statistics.mean(durations),
|
|
141
|
+
"median": statistics.median(durations),
|
|
142
|
+
"min": min(durations),
|
|
143
|
+
"max": max(durations),
|
|
144
|
+
"std_dev": statistics.stdev(durations) if len(durations) > 1 else 0.0,
|
|
145
|
+
"p95": (
|
|
146
|
+
statistics.quantiles(durations, n=20)[18]
|
|
147
|
+
if len(durations) >= 20
|
|
148
|
+
else max(durations)
|
|
149
|
+
),
|
|
150
|
+
"p99": (
|
|
151
|
+
statistics.quantiles(durations, n=100)[98]
|
|
152
|
+
if len(durations) >= 100
|
|
153
|
+
else max(durations)
|
|
154
|
+
),
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
def get_operation_breakdown(self) -> dict[str, dict[str, Any]]:
|
|
158
|
+
"""Get breakdown of all operations."""
|
|
159
|
+
operations = {r.operation for r in self.results}
|
|
160
|
+
return {op: self.get_stats(op) for op in operations}
|
|
161
|
+
|
|
162
|
+
def print_report(self, show_individual: bool = False, min_duration_ms: float = 0.0):
|
|
163
|
+
"""Print a detailed performance report."""
|
|
164
|
+
if not self.results:
|
|
165
|
+
print("No timing results recorded.")
|
|
166
|
+
return
|
|
167
|
+
|
|
168
|
+
print(f"\n{'=' * 60}")
|
|
169
|
+
print(f"PERFORMANCE REPORT: {self.name}")
|
|
170
|
+
print(f"{'=' * 60}")
|
|
171
|
+
|
|
172
|
+
# Overall stats
|
|
173
|
+
overall_stats = self.get_stats()
|
|
174
|
+
print("\nOVERALL STATISTICS:")
|
|
175
|
+
print(f" Total operations: {overall_stats['count']}")
|
|
176
|
+
print(f" Total time: {overall_stats['total'] * 1000:.2f}ms")
|
|
177
|
+
print(f" Average: {overall_stats['mean'] * 1000:.2f}ms")
|
|
178
|
+
print(f" Median: {overall_stats['median'] * 1000:.2f}ms")
|
|
179
|
+
print(f" Min: {overall_stats['min'] * 1000:.2f}ms")
|
|
180
|
+
print(f" Max: {overall_stats['max'] * 1000:.2f}ms")
|
|
181
|
+
|
|
182
|
+
# Per-operation breakdown
|
|
183
|
+
breakdown = self.get_operation_breakdown()
|
|
184
|
+
print("\nPER-OPERATION BREAKDOWN:")
|
|
185
|
+
|
|
186
|
+
for operation, stats in sorted(
|
|
187
|
+
breakdown.items(), key=lambda x: x[1]["total"], reverse=True
|
|
188
|
+
):
|
|
189
|
+
print(f"\n {operation}:")
|
|
190
|
+
print(f" Count: {stats['count']}")
|
|
191
|
+
print(
|
|
192
|
+
f" Total: {stats['total'] * 1000:.2f}ms ({stats['total'] / overall_stats['total'] * 100:.1f}%)"
|
|
193
|
+
)
|
|
194
|
+
print(f" Average: {stats['mean'] * 1000:.2f}ms")
|
|
195
|
+
print(
|
|
196
|
+
f" Min/Max: {stats['min'] * 1000:.2f}ms / {stats['max'] * 1000:.2f}ms"
|
|
197
|
+
)
|
|
198
|
+
if stats["count"] > 1:
|
|
199
|
+
print(f" StdDev: {stats['std_dev'] * 1000:.2f}ms")
|
|
200
|
+
|
|
201
|
+
# Individual results if requested
|
|
202
|
+
if show_individual:
|
|
203
|
+
print("\nINDIVIDUAL RESULTS:")
|
|
204
|
+
for result in self.results:
|
|
205
|
+
if result.duration_ms >= min_duration_ms:
|
|
206
|
+
print(f" {result.operation}: {result.duration_ms:.2f}ms")
|
|
207
|
+
if result.metadata:
|
|
208
|
+
print(f" Metadata: {result.metadata}")
|
|
209
|
+
|
|
210
|
+
def save_results(self, file_path: Path):
|
|
211
|
+
"""Save timing results to a JSON file."""
|
|
212
|
+
data = {
|
|
213
|
+
"profiler_name": self.name,
|
|
214
|
+
"timestamp": time.time(),
|
|
215
|
+
"results": [
|
|
216
|
+
{
|
|
217
|
+
"operation": r.operation,
|
|
218
|
+
"duration": r.duration,
|
|
219
|
+
"timestamp": r.timestamp,
|
|
220
|
+
"metadata": r.metadata,
|
|
221
|
+
}
|
|
222
|
+
for r in self.results
|
|
223
|
+
],
|
|
224
|
+
"stats": self.get_operation_breakdown(),
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
with open(file_path, "w") as f:
|
|
228
|
+
json.dump(data, f, indent=2)
|
|
229
|
+
|
|
230
|
+
def clear(self):
|
|
231
|
+
"""Clear all timing results."""
|
|
232
|
+
self.results.clear()
|
|
233
|
+
self._active_timers.clear()
|
|
234
|
+
self._nested_level = 0
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
# Global profiler instance
|
|
238
|
+
_global_profiler = PerformanceProfiler("global")
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def time_function(
|
|
242
|
+
operation_name: str | None = None, metadata: dict[str, Any] | None = None
|
|
243
|
+
):
|
|
244
|
+
"""Decorator for timing function execution."""
|
|
245
|
+
|
|
246
|
+
def decorator(func: Callable) -> Callable:
|
|
247
|
+
name = operation_name or f"{func.__module__}.{func.__name__}"
|
|
248
|
+
|
|
249
|
+
if asyncio.iscoroutinefunction(func):
|
|
250
|
+
|
|
251
|
+
async def async_wrapper(*args, **kwargs):
|
|
252
|
+
async with _global_profiler.time_async_operation(name, metadata):
|
|
253
|
+
return await func(*args, **kwargs)
|
|
254
|
+
|
|
255
|
+
return async_wrapper
|
|
256
|
+
else:
|
|
257
|
+
|
|
258
|
+
def sync_wrapper(*args, **kwargs):
|
|
259
|
+
with _global_profiler.time_operation(name, metadata):
|
|
260
|
+
return func(*args, **kwargs)
|
|
261
|
+
|
|
262
|
+
return sync_wrapper
|
|
263
|
+
|
|
264
|
+
return decorator
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
@contextmanager
|
|
268
|
+
def time_block(operation: str, metadata: dict[str, Any] | None = None):
|
|
269
|
+
"""Context manager for timing a block of code using the global profiler."""
|
|
270
|
+
with _global_profiler.time_operation(operation, metadata):
|
|
271
|
+
yield
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
@asynccontextmanager
|
|
275
|
+
async def time_async_block(operation: str, metadata: dict[str, Any] | None = None):
|
|
276
|
+
"""Async context manager for timing a block of code using the global profiler."""
|
|
277
|
+
async with _global_profiler.time_async_operation(operation, metadata):
|
|
278
|
+
yield
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def get_global_profiler() -> PerformanceProfiler:
|
|
282
|
+
"""Get the global profiler instance."""
|
|
283
|
+
return _global_profiler
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def print_global_report(**kwargs):
|
|
287
|
+
"""Print report from the global profiler."""
|
|
288
|
+
_global_profiler.print_report(**kwargs)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def clear_global_profiler():
|
|
292
|
+
"""Clear the global profiler."""
|
|
293
|
+
_global_profiler.clear()
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
class SearchProfiler(PerformanceProfiler):
|
|
297
|
+
"""Specialized profiler for search operations."""
|
|
298
|
+
|
|
299
|
+
def __init__(self):
|
|
300
|
+
super().__init__("search_profiler")
|
|
301
|
+
|
|
302
|
+
async def profile_search(
|
|
303
|
+
self, search_func: Callable, query: str, **search_kwargs
|
|
304
|
+
) -> tuple[Any, dict[str, float]]:
|
|
305
|
+
"""Profile a complete search operation with detailed breakdown."""
|
|
306
|
+
|
|
307
|
+
async with self.time_async_operation(
|
|
308
|
+
"total_search", {"query": query, "kwargs": search_kwargs}
|
|
309
|
+
):
|
|
310
|
+
# Time the actual search
|
|
311
|
+
async with self.time_async_operation("search_execution", {"query": query}):
|
|
312
|
+
result = await search_func(query, **search_kwargs)
|
|
313
|
+
|
|
314
|
+
# Time result processing if we can measure it
|
|
315
|
+
async with self.time_async_operation(
|
|
316
|
+
"result_processing",
|
|
317
|
+
{"result_count": len(result) if hasattr(result, "__len__") else 0},
|
|
318
|
+
):
|
|
319
|
+
# Simulate any post-processing that might happen
|
|
320
|
+
await asyncio.sleep(0) # Placeholder for actual processing
|
|
321
|
+
|
|
322
|
+
# Return results and timing breakdown
|
|
323
|
+
timing_breakdown = {
|
|
324
|
+
op: self.get_stats(op)["mean"] * 1000 # Convert to ms
|
|
325
|
+
for op in ["total_search", "search_execution", "result_processing"]
|
|
326
|
+
if self.get_stats(op)
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
return result, timing_breakdown
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
# Convenience function for quick search profiling
|
|
333
|
+
async def profile_search_operation(
|
|
334
|
+
search_func: Callable, query: str, **kwargs
|
|
335
|
+
) -> tuple[Any, dict[str, float]]:
|
|
336
|
+
"""Quick function to profile a search operation."""
|
|
337
|
+
profiler = SearchProfiler()
|
|
338
|
+
return await profiler.profile_search(search_func, query, **kwargs)
|