contextual-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextual/__init__.py +18 -0
- contextual/__main__.py +11 -0
- contextual/cli.py +339 -0
- contextual/cli_docs.py +685 -0
- contextual/config.py +7 -0
- contextual/core/__init__.py +11 -0
- contextual/core/errors.py +470 -0
- contextual/core/models.py +590 -0
- contextual/docs/__init__.py +66 -0
- contextual/docs/chunker.py +550 -0
- contextual/docs/pipeline.py +513 -0
- contextual/docs/retrieval.py +654 -0
- contextual/docs/watcher.py +265 -0
- contextual/embedding/__init__.py +87 -0
- contextual/embedding/cache.py +455 -0
- contextual/embedding/embedder.py +414 -0
- contextual/embedding/helpers.py +252 -0
- contextual/git/__init__.py +22 -0
- contextual/git/blame.py +334 -0
- contextual/indexing/__init__.py +20 -0
- contextual/indexing/bug_sweep.py +119 -0
- contextual/indexing/chunker.py +691 -0
- contextual/indexing/embedder.py +271 -0
- contextual/indexing/file_watcher.py +154 -0
- contextual/indexing/incremental.py +260 -0
- contextual/indexing/index_writer.py +442 -0
- contextual/indexing/pipeline.py +438 -0
- contextual/indexing/processor.py +436 -0
- contextual/indexing/queries/readme.md +22 -0
- contextual/indexing/symbol_extractor.py +426 -0
- contextual/indexing/tokenizer.py +203 -0
- contextual/integrations/__init__.py +10 -0
- contextual/mcp/__init__.py +15 -0
- contextual/mcp/__main__.py +24 -0
- contextual/mcp/docs_tools.py +286 -0
- contextual/mcp/server.py +118 -0
- contextual/mcp/tools.py +443 -0
- contextual/observability/__init__.py +21 -0
- contextual/observability/logging.py +115 -0
- contextual/py.typed +0 -0
- contextual/retrieval/__init__.py +24 -0
- contextual/retrieval/context_assembler.py +372 -0
- contextual/retrieval/ranker.py +193 -0
- contextual/retrieval/search.py +548 -0
- contextual/security/__init__.py +52 -0
- contextual/security/paths.py +347 -0
- contextual/security/sanitize.py +349 -0
- contextual/security/workspace.py +348 -0
- contextual/storage/__init__.py +36 -0
- contextual/storage/fts_manager.py +273 -0
- contextual/storage/migration_v2.py +289 -0
- contextual/storage/migrations.py +316 -0
- contextual/storage/schema.py +210 -0
- contextual/storage/sqlite_pool.py +468 -0
- contextual/storage/vec0_manager.py +421 -0
- contextual_engine-0.1.0.dist-info/METADATA +297 -0
- contextual_engine-0.1.0.dist-info/RECORD +60 -0
- contextual_engine-0.1.0.dist-info/WHEEL +4 -0
- contextual_engine-0.1.0.dist-info/entry_points.txt +2 -0
- contextual_engine-0.1.0.dist-info/licenses/LICENSE +111 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Core data models, errors, and orchestration engine.
|
|
2
|
+
|
|
3
|
+
This module contains the foundational contracts that all other modules depend on.
|
|
4
|
+
All data shapes (Pydantic models), error types, and the main indexing/retrieval
|
|
5
|
+
orchestrator live here.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
__all__ = []
|
|
11
|
+
# Exports will be added as models.py, errors.py, and engine.py are built
|
|
@@ -0,0 +1,470 @@
|
|
|
1
|
+
"""Error types and exception hierarchy for Contextual.
|
|
2
|
+
|
|
3
|
+
Provides typed error codes and structured exceptions for consistent error handling
|
|
4
|
+
across all modules. Every error carries a code, message, and optional context.
|
|
5
|
+
|
|
6
|
+
Error handling philosophy:
|
|
7
|
+
- Fail fast with clear error messages
|
|
8
|
+
- Never swallow exceptions silently
|
|
9
|
+
- Provide actionable error context
|
|
10
|
+
- Distinguish retriable vs non-retriable errors
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from enum import StrEnum
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# ============================================================================
|
|
20
|
+
# ERROR CODES - Typed categorical error classification
|
|
21
|
+
# ============================================================================
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ErrorCode(StrEnum):
|
|
25
|
+
"""Categorized error codes for all subsystems.
|
|
26
|
+
|
|
27
|
+
Format: SUBSYSTEM_SPECIFIC_CONDITION
|
|
28
|
+
|
|
29
|
+
These codes enable:
|
|
30
|
+
- Structured error logging
|
|
31
|
+
- Client-side error handling (MCP clients)
|
|
32
|
+
- Metrics/alerting aggregation
|
|
33
|
+
- Automatic retry logic
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
# ========================================================================
|
|
37
|
+
# STORAGE ERRORS (1xx range conceptually)
|
|
38
|
+
# ========================================================================
|
|
39
|
+
|
|
40
|
+
# SQLite errors
|
|
41
|
+
STORAGE_SQLITE_CONNECTION_FAILED = "storage_sqlite_connection_failed"
|
|
42
|
+
STORAGE_SQLITE_POOL_EXHAUSTED = "storage_sqlite_pool_exhausted"
|
|
43
|
+
STORAGE_SQLITE_QUERY_FAILED = "storage_sqlite_query_failed"
|
|
44
|
+
STORAGE_SQLITE_TRANSACTION_FAILED = "storage_sqlite_transaction_failed"
|
|
45
|
+
STORAGE_SQLITE_SCHEMA_INVALID = "storage_sqlite_schema_invalid"
|
|
46
|
+
STORAGE_SQLITE_WAL_CHECKPOINT_FAILED = "storage_sqlite_wal_checkpoint_failed"
|
|
47
|
+
STORAGE_SQLITE_BUSY = "storage_sqlite_busy" # Retriable
|
|
48
|
+
|
|
49
|
+
# LanceDB errors
|
|
50
|
+
STORAGE_LANCE_CONNECTION_FAILED = "storage_lance_connection_failed"
|
|
51
|
+
STORAGE_LANCE_TABLE_NOT_FOUND = "storage_lance_table_not_found"
|
|
52
|
+
STORAGE_LANCE_INSERT_FAILED = "storage_lance_insert_failed"
|
|
53
|
+
STORAGE_LANCE_SEARCH_FAILED = "storage_lance_search_failed"
|
|
54
|
+
STORAGE_LANCE_INDEX_FAILED = "storage_lance_index_failed"
|
|
55
|
+
STORAGE_LANCE_OPTIMIZE_FAILED = "storage_lance_optimize_failed"
|
|
56
|
+
|
|
57
|
+
# Vec0 errors
|
|
58
|
+
STORAGE_VEC_FAILED = "storage_vec_failed"
|
|
59
|
+
STORAGE_VEC_DIMENSION_MISMATCH = "storage_vec_dimension_mismatch"
|
|
60
|
+
STORAGE_VEC_INVALID_PARAMETER = "storage_vec_invalid_parameter"
|
|
61
|
+
|
|
62
|
+
# Tantivy errors
|
|
63
|
+
STORAGE_TANTIVY_INDEX_OPEN_FAILED = "storage_tantivy_index_open_failed"
|
|
64
|
+
STORAGE_TANTIVY_INDEX_WRITE_FAILED = "storage_tantivy_index_write_failed"
|
|
65
|
+
STORAGE_TANTIVY_SEARCH_FAILED = "storage_tantivy_search_failed"
|
|
66
|
+
|
|
67
|
+
# Migration errors
|
|
68
|
+
STORAGE_MIGRATION_FAILED = "storage_migration_failed"
|
|
69
|
+
STORAGE_MIGRATION_ROLLBACK_FAILED = "storage_migration_rollback_failed"
|
|
70
|
+
STORAGE_MIGRATION_VERSION_CONFLICT = "storage_migration_version_conflict"
|
|
71
|
+
|
|
72
|
+
# Temporal query errors
|
|
73
|
+
STORAGE_TEMPORAL_QUERY_INVALID = "storage_temporal_query_invalid"
|
|
74
|
+
STORAGE_TEMPORAL_CONTRADICTION_DETECTED = "storage_temporal_contradiction_detected"
|
|
75
|
+
|
|
76
|
+
# ========================================================================
|
|
77
|
+
# INDEXING ERRORS (2xx range conceptually)
|
|
78
|
+
# ========================================================================
|
|
79
|
+
|
|
80
|
+
# Parser errors
|
|
81
|
+
INDEXING_PARSER_INIT_FAILED = "indexing_parser_init_failed"
|
|
82
|
+
INDEXING_PARSER_PARSE_FAILED = "indexing_parser_parse_failed"
|
|
83
|
+
INDEXING_PARSER_UNSUPPORTED_LANGUAGE = "indexing_parser_unsupported_language"
|
|
84
|
+
INDEXING_PARSER_POOL_EXHAUSTED = "indexing_parser_pool_exhausted"
|
|
85
|
+
|
|
86
|
+
# Chunking errors
|
|
87
|
+
INDEXING_CHUNKER_FILE_TOO_LARGE = "indexing_chunker_file_too_large"
|
|
88
|
+
INDEXING_CHUNKER_BINARY_FILE = "indexing_chunker_binary_file"
|
|
89
|
+
INDEXING_CHUNKER_ENCODING_ERROR = "indexing_chunker_encoding_error"
|
|
90
|
+
INDEXING_CHUNKER_SPLIT_FAILED = "indexing_chunker_split_failed"
|
|
91
|
+
|
|
92
|
+
# File processing errors
|
|
93
|
+
INDEXING_FILE_NOT_FOUND = "indexing_file_not_found"
|
|
94
|
+
INDEXING_FILE_PERMISSION_DENIED = "indexing_file_permission_denied"
|
|
95
|
+
INDEXING_FILE_READ_FAILED = "indexing_file_read_failed"
|
|
96
|
+
INDEXING_FILE_IGNORED = "indexing_file_ignored" # Not an error, just logged
|
|
97
|
+
|
|
98
|
+
# Embedding errors
|
|
99
|
+
INDEXING_EMBEDDING_MODEL_LOAD_FAILED = "indexing_embedding_model_load_failed"
|
|
100
|
+
INDEXING_EMBEDDING_INFERENCE_FAILED = "indexing_embedding_inference_failed"
|
|
101
|
+
INDEXING_EMBEDDING_BATCH_TOO_LARGE = "indexing_embedding_batch_too_large"
|
|
102
|
+
INDEXING_EMBEDDING_OOM = "indexing_embedding_oom" # Out of memory
|
|
103
|
+
|
|
104
|
+
# Content hash errors
|
|
105
|
+
INDEXING_HASH_COLLISION = "indexing_hash_collision" # Extremely unlikely SHA-256 collision
|
|
106
|
+
|
|
107
|
+
# ========================================================================
|
|
108
|
+
# GIT ERRORS (3xx range conceptually)
|
|
109
|
+
# ========================================================================
|
|
110
|
+
|
|
111
|
+
GIT_REPO_NOT_FOUND = "git_repo_not_found"
|
|
112
|
+
GIT_REPO_INVALID = "git_repo_invalid"
|
|
113
|
+
GIT_COMMIT_WALK_FAILED = "git_commit_walk_failed"
|
|
114
|
+
GIT_BLAME_FAILED = "git_blame_failed"
|
|
115
|
+
GIT_DIFF_FAILED = "git_diff_failed"
|
|
116
|
+
GIT_SHALLOW_CLONE = "git_shallow_clone" # Warning, not error
|
|
117
|
+
GIT_DETACHED_HEAD = "git_detached_head" # Warning
|
|
118
|
+
GIT_REBASE_DETECTED = "git_rebase_detected" # Requires special handling
|
|
119
|
+
GIT_FORCE_PUSH_DETECTED = "git_force_push_detected" # Requires re-indexing
|
|
120
|
+
|
|
121
|
+
# ========================================================================
|
|
122
|
+
# RETRIEVAL ERRORS (4xx range conceptually)
|
|
123
|
+
# ========================================================================
|
|
124
|
+
|
|
125
|
+
# Search errors
|
|
126
|
+
RETRIEVAL_QUERY_EMPTY = "retrieval_query_empty"
|
|
127
|
+
RETRIEVAL_QUERY_TOO_LONG = "retrieval_query_too_long"
|
|
128
|
+
RETRIEVAL_BM25_FAILED = "retrieval_bm25_failed"
|
|
129
|
+
RETRIEVAL_DENSE_SEARCH_FAILED = "retrieval_dense_search_failed"
|
|
130
|
+
RETRIEVAL_FUSION_FAILED = "retrieval_fusion_failed"
|
|
131
|
+
RETRIEVAL_RERANK_FAILED = "retrieval_rerank_failed"
|
|
132
|
+
RETRIEVAL_HYDRATION_FAILED = "retrieval_hydration_failed"
|
|
133
|
+
RETRIEVAL_NO_RESULTS = "retrieval_no_results" # Not an error, informational
|
|
134
|
+
|
|
135
|
+
# Context assembly errors
|
|
136
|
+
RETRIEVAL_CONTEXT_BUDGET_EXCEEDED = "retrieval_context_budget_exceeded"
|
|
137
|
+
RETRIEVAL_CONTEXT_ASSEMBLY_FAILED = "retrieval_context_assembly_failed"
|
|
138
|
+
|
|
139
|
+
# ========================================================================
|
|
140
|
+
# SECURITY ERRORS (5xx range conceptually)
|
|
141
|
+
# ========================================================================
|
|
142
|
+
|
|
143
|
+
# Path safety errors
|
|
144
|
+
SECURITY_PATH_TRAVERSAL = "security_path_traversal"
|
|
145
|
+
SECURITY_PATH_OUTSIDE_ROOT = "security_path_outside_root"
|
|
146
|
+
SECURITY_PATH_SYMLINK_LOOP = "security_path_symlink_loop"
|
|
147
|
+
SECURITY_PATH_INVALID = "security_path_invalid"
|
|
148
|
+
SECURITY_PATH_RESERVED_NAME = "security_path_reserved_name" # Windows: CON, PRN, etc.
|
|
149
|
+
|
|
150
|
+
# Sanitization errors
|
|
151
|
+
SECURITY_SQL_INJECTION_ATTEMPT = "security_sql_injection_attempt"
|
|
152
|
+
SECURITY_FTS5_INJECTION_ATTEMPT = "security_fts5_injection_attempt"
|
|
153
|
+
SECURITY_UNICODE_ATTACK = "security_unicode_attack"
|
|
154
|
+
|
|
155
|
+
# Workspace errors
|
|
156
|
+
SECURITY_WORKSPACE_PERMISSION_DENIED = "security_workspace_permission_denied"
|
|
157
|
+
SECURITY_WORKSPACE_INIT_FAILED = "security_workspace_init_failed"
|
|
158
|
+
SECURITY_WORKSPACE_ISOLATION_VIOLATION = "security_workspace_isolation_violation"
|
|
159
|
+
|
|
160
|
+
# Prompt injection
|
|
161
|
+
SECURITY_PROMPT_INJECTION_DETECTED = "security_prompt_injection_detected"
|
|
162
|
+
|
|
163
|
+
# ========================================================================
|
|
164
|
+
# CONFIGURATION ERRORS (6xx range conceptually)
|
|
165
|
+
# ========================================================================
|
|
166
|
+
|
|
167
|
+
CONFIG_FILE_NOT_FOUND = "config_file_not_found"
|
|
168
|
+
CONFIG_FILE_INVALID = "config_file_invalid"
|
|
169
|
+
CONFIG_PARSE_FAILED = "config_parse_failed"
|
|
170
|
+
CONFIG_VALIDATION_FAILED = "config_validation_failed"
|
|
171
|
+
CONFIG_MISSING_REQUIRED = "config_missing_required"
|
|
172
|
+
CONFIG_VALUE_OUT_OF_RANGE = "config_value_out_of_range"
|
|
173
|
+
|
|
174
|
+
# ========================================================================
|
|
175
|
+
# MCP SERVER ERRORS (7xx range conceptually)
|
|
176
|
+
# ========================================================================
|
|
177
|
+
|
|
178
|
+
MCP_TOOL_CALL_FAILED = "mcp_tool_call_failed"
|
|
179
|
+
MCP_INVALID_PARAMETERS = "mcp_invalid_parameters"
|
|
180
|
+
MCP_TRANSPORT_ERROR = "mcp_transport_error"
|
|
181
|
+
MCP_TIMEOUT = "mcp_timeout"
|
|
182
|
+
MCP_CLIENT_DISCONNECTED = "mcp_client_disconnected"
|
|
183
|
+
|
|
184
|
+
# ========================================================================
|
|
185
|
+
# SYSTEM ERRORS (8xx range conceptually)
|
|
186
|
+
# ========================================================================
|
|
187
|
+
|
|
188
|
+
SYSTEM_DISK_FULL = "system_disk_full"
|
|
189
|
+
SYSTEM_OOM = "system_oom" # Out of memory
|
|
190
|
+
SYSTEM_PERMISSION_DENIED = "system_permission_denied"
|
|
191
|
+
SYSTEM_RESOURCE_EXHAUSTED = "system_resource_exhausted"
|
|
192
|
+
SYSTEM_SHUTDOWN = "system_shutdown" # Graceful shutdown in progress
|
|
193
|
+
|
|
194
|
+
# ========================================================================
|
|
195
|
+
# UNKNOWN/INTERNAL ERRORS (9xx range conceptually)
|
|
196
|
+
# ========================================================================
|
|
197
|
+
|
|
198
|
+
INTERNAL_ASSERTION_FAILED = "internal_assertion_failed"
|
|
199
|
+
INTERNAL_INVARIANT_VIOLATED = "internal_invariant_violated"
|
|
200
|
+
INTERNAL_UNEXPECTED_STATE = "internal_unexpected_state"
|
|
201
|
+
UNKNOWN = "unknown"
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
# ============================================================================
|
|
205
|
+
# RETRY CLASSIFICATION
|
|
206
|
+
# ============================================================================
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
RETRIABLE_ERRORS = frozenset({
|
|
210
|
+
ErrorCode.STORAGE_SQLITE_BUSY,
|
|
211
|
+
ErrorCode.STORAGE_SQLITE_POOL_EXHAUSTED,
|
|
212
|
+
ErrorCode.INDEXING_PARSER_POOL_EXHAUSTED,
|
|
213
|
+
ErrorCode.INDEXING_EMBEDDING_OOM,
|
|
214
|
+
ErrorCode.MCP_TIMEOUT,
|
|
215
|
+
ErrorCode.MCP_CLIENT_DISCONNECTED,
|
|
216
|
+
ErrorCode.SYSTEM_RESOURCE_EXHAUSTED,
|
|
217
|
+
})
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def is_retriable(code: ErrorCode) -> bool:
|
|
221
|
+
"""Check if an error code represents a retriable condition.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
code: Error code to check.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
True if the error should be retried, False otherwise.
|
|
228
|
+
"""
|
|
229
|
+
return code in RETRIABLE_ERRORS
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
# ============================================================================
|
|
233
|
+
# EXCEPTION HIERARCHY
|
|
234
|
+
# ============================================================================
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
class ContextualExceptionError(Exception):
|
|
238
|
+
"""Base exception for all Contextual errors.
|
|
239
|
+
|
|
240
|
+
All custom exceptions inherit from this to enable broad catch blocks
|
|
241
|
+
while maintaining error code granularity.
|
|
242
|
+
|
|
243
|
+
Attributes:
|
|
244
|
+
code: Typed error code for categorization.
|
|
245
|
+
message: Human-readable error message.
|
|
246
|
+
context: Additional error context (file paths, query text, etc.).
|
|
247
|
+
"""
|
|
248
|
+
|
|
249
|
+
def __init__(
|
|
250
|
+
self,
|
|
251
|
+
code: ErrorCode,
|
|
252
|
+
message: str,
|
|
253
|
+
context: dict[str, Any] | None = None,
|
|
254
|
+
) -> None:
|
|
255
|
+
"""Initialize a Contextual exception.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
code: Error code categorizing this error.
|
|
259
|
+
message: Human-readable error description.
|
|
260
|
+
context: Optional contextual information.
|
|
261
|
+
"""
|
|
262
|
+
self.code = code
|
|
263
|
+
self.message = message
|
|
264
|
+
self.context = context or {}
|
|
265
|
+
super().__init__(self._format_message())
|
|
266
|
+
|
|
267
|
+
def _format_message(self) -> str:
|
|
268
|
+
"""Format the complete error message.
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
Formatted message with code and context.
|
|
272
|
+
"""
|
|
273
|
+
parts = [f"[{self.code.value}] {self.message}"]
|
|
274
|
+
if self.context:
|
|
275
|
+
context_str = ", ".join(f"{k}={v}" for k, v in self.context.items())
|
|
276
|
+
parts.append(f"({context_str})")
|
|
277
|
+
return " ".join(parts)
|
|
278
|
+
|
|
279
|
+
def is_retriable(self) -> bool:
|
|
280
|
+
"""Check if this error represents a retriable condition.
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
True if the error should be retried.
|
|
284
|
+
"""
|
|
285
|
+
return is_retriable(self.code)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
# ============================================================================
|
|
289
|
+
# SUBSYSTEM-SPECIFIC EXCEPTIONS
|
|
290
|
+
# ============================================================================
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
class StorageError(ContextualExceptionError):
|
|
294
|
+
"""Errors from the storage layer (SQLite, LanceDB, tantivy)."""
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
class IndexingError(ContextualExceptionError):
|
|
298
|
+
"""Errors from the indexing pipeline (parsing, chunking, embedding)."""
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
class GitError(ContextualExceptionError):
|
|
302
|
+
"""Errors from git operations (commit walking, blame, diff)."""
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
class RetrievalError(ContextualExceptionError):
|
|
306
|
+
"""Errors from the retrieval pipeline (search, fusion, reranking)."""
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
class SecurityError(ContextualExceptionError):
|
|
310
|
+
"""Errors from security checks (path safety, sanitization, isolation)."""
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
class ConfigError(ContextualExceptionError):
|
|
314
|
+
"""Errors from configuration loading and validation."""
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
class MCPError(ContextualExceptionError):
|
|
318
|
+
"""Errors from MCP server operations."""
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
class ContextualSystemError(ContextualExceptionError):
|
|
322
|
+
"""System-level errors (disk, memory, permissions)."""
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
class InternalError(ContextualExceptionError):
|
|
326
|
+
"""Internal errors indicating bugs (assertion failures, invariant violations)."""
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
# ============================================================================
|
|
330
|
+
# ERROR CONTEXT HELPERS
|
|
331
|
+
# ============================================================================
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def storage_context(
|
|
335
|
+
db_path: str | None = None,
|
|
336
|
+
table: str | None = None,
|
|
337
|
+
query: str | None = None,
|
|
338
|
+
) -> dict[str, Any]:
|
|
339
|
+
"""Build context dict for storage errors.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
db_path: Database file path.
|
|
343
|
+
table: Table name.
|
|
344
|
+
query: SQL query (truncated if too long).
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
Context dictionary.
|
|
348
|
+
"""
|
|
349
|
+
context = {}
|
|
350
|
+
if db_path:
|
|
351
|
+
context["db_path"] = db_path
|
|
352
|
+
if table:
|
|
353
|
+
context["table"] = table
|
|
354
|
+
# Context truncation limit for long queries
|
|
355
|
+
_query_truncate_length = 200
|
|
356
|
+
if query:
|
|
357
|
+
# Truncate long queries
|
|
358
|
+
context["query"] = (
|
|
359
|
+
query[:_query_truncate_length] + "..."
|
|
360
|
+
if len(query) > _query_truncate_length
|
|
361
|
+
else query
|
|
362
|
+
)
|
|
363
|
+
return context
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def indexing_context(
|
|
367
|
+
file_path: str | None = None,
|
|
368
|
+
language: str | None = None,
|
|
369
|
+
line: int | None = None,
|
|
370
|
+
) -> dict[str, Any]:
|
|
371
|
+
"""Build context dict for indexing errors.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
file_path: File being processed.
|
|
375
|
+
language: Programming language.
|
|
376
|
+
line: Line number where error occurred.
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
Context dictionary.
|
|
380
|
+
"""
|
|
381
|
+
context: dict[str, Any] = {}
|
|
382
|
+
if file_path:
|
|
383
|
+
context["file_path"] = file_path
|
|
384
|
+
if language:
|
|
385
|
+
context["language"] = language
|
|
386
|
+
if line is not None:
|
|
387
|
+
context["line"] = line
|
|
388
|
+
return context
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def git_context(
|
|
392
|
+
repo_path: str | None = None,
|
|
393
|
+
commit_sha: str | None = None,
|
|
394
|
+
branch: str | None = None,
|
|
395
|
+
) -> dict[str, Any]:
|
|
396
|
+
"""Build context dict for git errors.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
repo_path: Repository path.
|
|
400
|
+
commit_sha: Commit SHA (first 8 chars).
|
|
401
|
+
branch: Branch name.
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
Context dictionary.
|
|
405
|
+
"""
|
|
406
|
+
context = {}
|
|
407
|
+
if repo_path:
|
|
408
|
+
context["repo_path"] = repo_path
|
|
409
|
+
if commit_sha:
|
|
410
|
+
context["commit_sha"] = commit_sha[:8] # Short SHA
|
|
411
|
+
if branch:
|
|
412
|
+
context["branch"] = branch
|
|
413
|
+
return context
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def retrieval_context(
|
|
417
|
+
query: str | None = None,
|
|
418
|
+
top_k: int | None = None,
|
|
419
|
+
phase: str | None = None,
|
|
420
|
+
) -> dict[str, Any]:
|
|
421
|
+
"""Build context dict for retrieval errors.
|
|
422
|
+
|
|
423
|
+
Args:
|
|
424
|
+
query: Search query (truncated).
|
|
425
|
+
top_k: Number of results requested.
|
|
426
|
+
phase: Pipeline phase (bm25, dense, fusion, rerank).
|
|
427
|
+
|
|
428
|
+
Returns:
|
|
429
|
+
Context dictionary.
|
|
430
|
+
"""
|
|
431
|
+
# Query truncation limit for retrieval context
|
|
432
|
+
_query_truncate_length = 100
|
|
433
|
+
context: dict[str, Any] = {}
|
|
434
|
+
if query:
|
|
435
|
+
# Truncate long queries
|
|
436
|
+
context["query"] = (
|
|
437
|
+
query[:_query_truncate_length] + "..."
|
|
438
|
+
if len(query) > _query_truncate_length
|
|
439
|
+
else query
|
|
440
|
+
)
|
|
441
|
+
if top_k is not None:
|
|
442
|
+
context["top_k"] = top_k
|
|
443
|
+
if phase:
|
|
444
|
+
context["phase"] = phase
|
|
445
|
+
return context
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def security_context(
|
|
449
|
+
path: str | None = None,
|
|
450
|
+
root: str | None = None,
|
|
451
|
+
attack_type: str | None = None,
|
|
452
|
+
) -> dict[str, Any]:
|
|
453
|
+
"""Build context dict for security errors.
|
|
454
|
+
|
|
455
|
+
Args:
|
|
456
|
+
path: Path that violated security check.
|
|
457
|
+
root: Expected root path.
|
|
458
|
+
attack_type: Type of attack detected.
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
Context dictionary.
|
|
462
|
+
"""
|
|
463
|
+
context = {}
|
|
464
|
+
if path:
|
|
465
|
+
context["path"] = path
|
|
466
|
+
if root:
|
|
467
|
+
context["root"] = root
|
|
468
|
+
if attack_type:
|
|
469
|
+
context["attack_type"] = attack_type
|
|
470
|
+
return context
|