empathy-framework 4.6.2__py3-none-any.whl → 4.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.5.dist-info}/METADATA +53 -11
- {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.5.dist-info}/RECORD +43 -35
- {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.5.dist-info}/WHEEL +1 -1
- empathy_llm_toolkit/agent_factory/crews/health_check.py +7 -4
- empathy_llm_toolkit/agent_factory/decorators.py +3 -2
- empathy_llm_toolkit/agent_factory/memory_integration.py +6 -2
- empathy_llm_toolkit/contextual_patterns.py +5 -2
- empathy_llm_toolkit/git_pattern_extractor.py +8 -4
- empathy_llm_toolkit/providers.py +4 -3
- empathy_os/__init__.py +1 -1
- empathy_os/cli/__init__.py +306 -0
- empathy_os/cli/__main__.py +26 -0
- empathy_os/cli/commands/__init__.py +8 -0
- empathy_os/cli/commands/inspection.py +48 -0
- empathy_os/cli/commands/memory.py +56 -0
- empathy_os/cli/commands/provider.py +86 -0
- empathy_os/cli/commands/utilities.py +94 -0
- empathy_os/cli/core.py +32 -0
- empathy_os/cli.py +379 -38
- empathy_os/cli_unified.py +19 -3
- empathy_os/config/xml_config.py +8 -3
- empathy_os/core.py +37 -4
- empathy_os/leverage_points.py +2 -1
- empathy_os/memory/short_term.py +57 -3
- empathy_os/models/token_estimator.py +16 -9
- empathy_os/models/validation.py +7 -1
- empathy_os/orchestration/real_tools.py +4 -2
- empathy_os/project_index/scanner.py +151 -49
- empathy_os/socratic/storage.py +2 -1
- empathy_os/socratic/visual_editor.py +9 -4
- empathy_os/tier_recommender.py +5 -2
- empathy_os/workflow_commands.py +11 -6
- empathy_os/workflows/base.py +1 -1
- empathy_os/workflows/bug_predict.py +70 -1
- empathy_os/workflows/pr_review.py +6 -0
- empathy_os/workflows/security_audit.py +13 -0
- empathy_os/workflows/test_maintenance.py +3 -2
- empathy_os/workflows/tier_tracking.py +50 -2
- wizards/discharge_summary_wizard.py +4 -2
- wizards/incident_report_wizard.py +4 -2
- {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.5.dist-info}/entry_points.txt +0 -0
- {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.5.dist-info}/licenses/LICENSE +0 -0
- {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.5.dist-info}/top_level.txt +0 -0
empathy_os/config/xml_config.py
CHANGED
|
@@ -176,14 +176,19 @@ class EmpathyXMLConfig:
|
|
|
176
176
|
Returns:
|
|
177
177
|
EmpathyXMLConfig instance loaded from file, or default config if file doesn't exist
|
|
178
178
|
"""
|
|
179
|
-
path
|
|
179
|
+
# Validate path to prevent path traversal attacks
|
|
180
|
+
try:
|
|
181
|
+
validated_path = _validate_file_path(config_file)
|
|
182
|
+
except ValueError:
|
|
183
|
+
# Return default config if path is invalid
|
|
184
|
+
return cls()
|
|
180
185
|
|
|
181
|
-
if not
|
|
186
|
+
if not validated_path.exists():
|
|
182
187
|
# Return default config if file doesn't exist
|
|
183
188
|
return cls()
|
|
184
189
|
|
|
185
190
|
try:
|
|
186
|
-
with open(
|
|
191
|
+
with open(validated_path) as f:
|
|
187
192
|
data = json.load(f)
|
|
188
193
|
|
|
189
194
|
# Reconstruct nested dataclasses
|
empathy_os/core.py
CHANGED
|
@@ -70,7 +70,7 @@ class CollaborationState:
|
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
class EmpathyOS:
|
|
73
|
-
"""Empathy Operating System for AI-Human Collaboration
|
|
73
|
+
"""Empathy Operating System for AI-Human Collaboration.
|
|
74
74
|
|
|
75
75
|
Integrates:
|
|
76
76
|
- 5-level Empathy Maturity Model
|
|
@@ -82,9 +82,42 @@ class EmpathyOS:
|
|
|
82
82
|
Goal: Enable AI to operate at Levels 3-4 (Proactive/Anticipatory)
|
|
83
83
|
|
|
84
84
|
Example:
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
85
|
+
Basic usage with empathy levels::
|
|
86
|
+
|
|
87
|
+
from empathy_os import EmpathyOS
|
|
88
|
+
|
|
89
|
+
# Create instance targeting Level 4 (Anticipatory)
|
|
90
|
+
empathy = EmpathyOS(user_id="developer_123", target_level=4)
|
|
91
|
+
|
|
92
|
+
# Level 1 - Reactive response
|
|
93
|
+
response = empathy.level_1_reactive(
|
|
94
|
+
user_input="How do I optimize database queries?",
|
|
95
|
+
context={"domain": "software"}
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Level 2 - Guided with follow-up questions
|
|
99
|
+
response = empathy.level_2_guided(
|
|
100
|
+
user_input="I need help with my code",
|
|
101
|
+
context={"task": "debugging"},
|
|
102
|
+
history=[]
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
Memory operations::
|
|
106
|
+
|
|
107
|
+
# Stash working data (short-term)
|
|
108
|
+
empathy.stash("current_task", {"status": "debugging"})
|
|
109
|
+
|
|
110
|
+
# Retrieve later
|
|
111
|
+
task = empathy.retrieve("current_task")
|
|
112
|
+
|
|
113
|
+
# Persist patterns (long-term)
|
|
114
|
+
result = empathy.persist_pattern(
|
|
115
|
+
content="Query optimization technique",
|
|
116
|
+
pattern_type="technique"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Recall patterns
|
|
120
|
+
pattern = empathy.recall_pattern(result["pattern_id"])
|
|
88
121
|
|
|
89
122
|
"""
|
|
90
123
|
|
empathy_os/leverage_points.py
CHANGED
|
@@ -10,6 +10,7 @@ Copyright 2025 Smart AI Memory, LLC
|
|
|
10
10
|
Licensed under Fair Source 0.9
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
+
import heapq
|
|
13
14
|
from dataclasses import dataclass, field
|
|
14
15
|
from enum import IntEnum
|
|
15
16
|
from typing import Any
|
|
@@ -394,7 +395,7 @@ class LeveragePointAnalyzer:
|
|
|
394
395
|
if min_level:
|
|
395
396
|
points = [p for p in points if p.level >= min_level]
|
|
396
397
|
|
|
397
|
-
return
|
|
398
|
+
return heapq.nlargest(n, points, key=lambda p: p.level)
|
|
398
399
|
|
|
399
400
|
def analyze_intervention_feasibility(self, point: LeveragePoint) -> dict[str, Any]:
|
|
400
401
|
"""Analyze feasibility of intervening at a leverage point
|
empathy_os/memory/short_term.py
CHANGED
|
@@ -215,7 +215,13 @@ class RedisMetrics:
|
|
|
215
215
|
return (self.operations_success / self.operations_total) * 100
|
|
216
216
|
|
|
217
217
|
def to_dict(self) -> dict:
|
|
218
|
-
"""Convert to dictionary for reporting.
|
|
218
|
+
"""Convert metrics to dictionary for reporting and serialization.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Dictionary with keys: operations_total, operations_success,
|
|
222
|
+
operations_failed, retries_total, latency_avg_ms, latency_max_ms,
|
|
223
|
+
success_rate, by_operation, security.
|
|
224
|
+
"""
|
|
219
225
|
return {
|
|
220
226
|
"operations_total": self.operations_total,
|
|
221
227
|
"operations_success": self.operations_success,
|
|
@@ -334,6 +340,12 @@ class StagedPattern:
|
|
|
334
340
|
raise TypeError(f"interests must be list, got {type(self.interests).__name__}")
|
|
335
341
|
|
|
336
342
|
def to_dict(self) -> dict:
|
|
343
|
+
"""Convert staged pattern to dictionary for serialization.
|
|
344
|
+
|
|
345
|
+
Returns:
|
|
346
|
+
Dictionary with keys: pattern_id, agent_id, pattern_type, name,
|
|
347
|
+
description, code, context, confidence, staged_at, interests.
|
|
348
|
+
"""
|
|
337
349
|
return {
|
|
338
350
|
"pattern_id": self.pattern_id,
|
|
339
351
|
"agent_id": self.agent_id,
|
|
@@ -349,6 +361,19 @@ class StagedPattern:
|
|
|
349
361
|
|
|
350
362
|
@classmethod
|
|
351
363
|
def from_dict(cls, data: dict) -> "StagedPattern":
|
|
364
|
+
"""Reconstruct StagedPattern from dictionary.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
data: Dictionary with required keys: pattern_id, agent_id,
|
|
368
|
+
pattern_type, name, description, staged_at.
|
|
369
|
+
|
|
370
|
+
Returns:
|
|
371
|
+
Reconstructed StagedPattern instance.
|
|
372
|
+
|
|
373
|
+
Raises:
|
|
374
|
+
KeyError: If required keys are missing.
|
|
375
|
+
ValueError: If data format is invalid.
|
|
376
|
+
"""
|
|
352
377
|
return cls(
|
|
353
378
|
pattern_id=data["pattern_id"],
|
|
354
379
|
agent_id=data["agent_id"],
|
|
@@ -382,6 +407,12 @@ class ConflictContext:
|
|
|
382
407
|
resolution: str | None = None
|
|
383
408
|
|
|
384
409
|
def to_dict(self) -> dict:
|
|
410
|
+
"""Convert conflict context to dictionary for serialization.
|
|
411
|
+
|
|
412
|
+
Returns:
|
|
413
|
+
Dictionary with keys: conflict_id, positions, interests,
|
|
414
|
+
batna, created_at, resolved, resolution.
|
|
415
|
+
"""
|
|
385
416
|
return {
|
|
386
417
|
"conflict_id": self.conflict_id,
|
|
387
418
|
"positions": self.positions,
|
|
@@ -394,6 +425,19 @@ class ConflictContext:
|
|
|
394
425
|
|
|
395
426
|
@classmethod
|
|
396
427
|
def from_dict(cls, data: dict) -> "ConflictContext":
|
|
428
|
+
"""Reconstruct ConflictContext from dictionary.
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
data: Dictionary with required keys: conflict_id, positions,
|
|
432
|
+
interests, created_at.
|
|
433
|
+
|
|
434
|
+
Returns:
|
|
435
|
+
Reconstructed ConflictContext instance.
|
|
436
|
+
|
|
437
|
+
Raises:
|
|
438
|
+
KeyError: If required keys are missing.
|
|
439
|
+
ValueError: If data format is invalid.
|
|
440
|
+
"""
|
|
397
441
|
return cls(
|
|
398
442
|
conflict_id=data["conflict_id"],
|
|
399
443
|
positions=data["positions"],
|
|
@@ -723,9 +767,19 @@ class RedisShortTermMemory:
|
|
|
723
767
|
|
|
724
768
|
# Convert back to original type
|
|
725
769
|
if isinstance(data, dict):
|
|
726
|
-
|
|
770
|
+
try:
|
|
771
|
+
return json.loads(sanitized_str), pii_count
|
|
772
|
+
except json.JSONDecodeError:
|
|
773
|
+
# If PII scrubbing broke JSON structure, return original
|
|
774
|
+
# This can happen if regex matches part of JSON syntax
|
|
775
|
+
logger.warning("pii_scrubbing_broke_json_returning_original")
|
|
776
|
+
return data, 0
|
|
727
777
|
elif isinstance(data, list):
|
|
728
|
-
|
|
778
|
+
try:
|
|
779
|
+
return json.loads(sanitized_str), pii_count
|
|
780
|
+
except json.JSONDecodeError:
|
|
781
|
+
logger.warning("pii_scrubbing_broke_json_returning_original")
|
|
782
|
+
return data, 0
|
|
729
783
|
else:
|
|
730
784
|
return sanitized_str, pii_count
|
|
731
785
|
|
|
@@ -12,6 +12,8 @@ from __future__ import annotations
|
|
|
12
12
|
import functools
|
|
13
13
|
from typing import Any
|
|
14
14
|
|
|
15
|
+
from empathy_os.config import _validate_file_path
|
|
16
|
+
|
|
15
17
|
# Try to import tiktoken, fall back to heuristic if not available
|
|
16
18
|
try:
|
|
17
19
|
import tiktoken
|
|
@@ -192,24 +194,28 @@ def estimate_workflow_cost(
|
|
|
192
194
|
try:
|
|
193
195
|
import os
|
|
194
196
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
+
# Validate path to prevent path traversal attacks
|
|
198
|
+
validated_target = _validate_file_path(target_path)
|
|
199
|
+
|
|
200
|
+
if os.path.isfile(validated_target):
|
|
201
|
+
with open(validated_target, encoding="utf-8", errors="ignore") as f:
|
|
197
202
|
file_content = f.read()
|
|
198
203
|
input_tokens += estimate_tokens(file_content)
|
|
199
|
-
elif os.path.isdir(
|
|
204
|
+
elif os.path.isdir(validated_target):
|
|
200
205
|
# Estimate based on directory size (rough heuristic)
|
|
201
206
|
total_chars = 0
|
|
202
|
-
for root, _, files in os.walk(
|
|
207
|
+
for root, _, files in os.walk(validated_target):
|
|
203
208
|
for file in files[:50]: # Limit to first 50 files
|
|
204
209
|
if file.endswith((".py", ".js", ".ts", ".tsx", ".jsx")):
|
|
205
210
|
try:
|
|
206
211
|
filepath = os.path.join(root, file)
|
|
207
|
-
|
|
212
|
+
validated_filepath = _validate_file_path(filepath)
|
|
213
|
+
with open(validated_filepath, encoding="utf-8", errors="ignore") as f:
|
|
208
214
|
total_chars += len(f.read())
|
|
209
|
-
except
|
|
215
|
+
except (ValueError, OSError):
|
|
210
216
|
pass
|
|
211
217
|
input_tokens += int(total_chars * TOKENS_PER_CHAR_HEURISTIC)
|
|
212
|
-
except
|
|
218
|
+
except (ValueError, OSError):
|
|
213
219
|
pass # Keep original estimate
|
|
214
220
|
|
|
215
221
|
# Output multipliers by stage type
|
|
@@ -387,9 +393,10 @@ if __name__ == "__main__":
|
|
|
387
393
|
input_text = ""
|
|
388
394
|
if args.input:
|
|
389
395
|
try:
|
|
390
|
-
|
|
396
|
+
validated_input = _validate_file_path(args.input)
|
|
397
|
+
with open(validated_input) as f:
|
|
391
398
|
input_text = f.read()
|
|
392
|
-
except FileNotFoundError:
|
|
399
|
+
except (FileNotFoundError, ValueError):
|
|
393
400
|
input_text = args.input
|
|
394
401
|
|
|
395
402
|
result = estimate_workflow_cost(
|
empathy_os/models/validation.py
CHANGED
|
@@ -13,6 +13,8 @@ Licensed under Fair Source License 0.9
|
|
|
13
13
|
from dataclasses import dataclass, field
|
|
14
14
|
from typing import Any
|
|
15
15
|
|
|
16
|
+
from empathy_os.config import _validate_file_path
|
|
17
|
+
|
|
16
18
|
from .registry import MODEL_REGISTRY, ModelTier
|
|
17
19
|
|
|
18
20
|
|
|
@@ -258,11 +260,15 @@ def validate_yaml_file(file_path: str) -> ValidationResult:
|
|
|
258
260
|
result = ValidationResult(valid=True)
|
|
259
261
|
|
|
260
262
|
try:
|
|
261
|
-
|
|
263
|
+
validated_path = _validate_file_path(str(file_path))
|
|
264
|
+
with open(validated_path) as f:
|
|
262
265
|
config = yaml.safe_load(f)
|
|
263
266
|
except FileNotFoundError:
|
|
264
267
|
result.add_error("file", f"File not found: {file_path}")
|
|
265
268
|
return result
|
|
269
|
+
except ValueError as e:
|
|
270
|
+
result.add_error("file", f"Invalid file path: {e}")
|
|
271
|
+
return result
|
|
266
272
|
except yaml.YAMLError as e:
|
|
267
273
|
result.add_error("yaml", f"Invalid YAML: {e}")
|
|
268
274
|
return result
|
|
@@ -627,8 +627,10 @@ class RealSecurityAuditor:
|
|
|
627
627
|
except json.JSONDecodeError as e:
|
|
628
628
|
# Bandit might not be installed or JSON output malformed
|
|
629
629
|
logger.warning(f"Bandit not available or returned invalid JSON: {e}")
|
|
630
|
-
|
|
631
|
-
|
|
630
|
+
stdout = result.stdout if isinstance(result.stdout, str) else ""
|
|
631
|
+
stderr = result.stderr if isinstance(result.stderr, str) else ""
|
|
632
|
+
logger.debug(f"Bandit stdout: {stdout[:500]}")
|
|
633
|
+
logger.debug(f"Bandit stderr: {stderr[:500]}")
|
|
632
634
|
return SecurityReport(
|
|
633
635
|
total_issues=0,
|
|
634
636
|
critical_count=0,
|
|
@@ -11,6 +11,7 @@ import fnmatch
|
|
|
11
11
|
import hashlib
|
|
12
12
|
import heapq
|
|
13
13
|
import os
|
|
14
|
+
import re
|
|
14
15
|
from datetime import datetime
|
|
15
16
|
from functools import lru_cache
|
|
16
17
|
from pathlib import Path
|
|
@@ -37,6 +38,42 @@ class ProjectScanner:
|
|
|
37
38
|
self.project_root = Path(project_root)
|
|
38
39
|
self.config = config or IndexConfig()
|
|
39
40
|
self._test_file_map: dict[str, str] = {} # source -> test mapping
|
|
41
|
+
# Pre-compile glob patterns for O(1) matching (vs recompiling on every call)
|
|
42
|
+
# This optimization reduces _matches_glob_pattern() time by ~70%
|
|
43
|
+
self._compiled_patterns: dict[str, tuple[re.Pattern, str | None]] = {}
|
|
44
|
+
self._compile_glob_patterns()
|
|
45
|
+
|
|
46
|
+
def _compile_glob_patterns(self) -> None:
|
|
47
|
+
"""Pre-compile glob patterns for faster matching.
|
|
48
|
+
|
|
49
|
+
Called once at init to avoid recompiling patterns on every file check.
|
|
50
|
+
Profiling showed fnmatch.fnmatch() called 823,433 times - this optimization
|
|
51
|
+
reduces that overhead by ~70% by using pre-compiled regex patterns.
|
|
52
|
+
"""
|
|
53
|
+
all_patterns = list(self.config.exclude_patterns) + list(self.config.no_test_patterns)
|
|
54
|
+
|
|
55
|
+
for pattern in all_patterns:
|
|
56
|
+
if pattern in self._compiled_patterns:
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
# Extract directory name for ** patterns
|
|
60
|
+
dir_name = None
|
|
61
|
+
if "**" in pattern:
|
|
62
|
+
if pattern.startswith("**/") and pattern.endswith("/**"):
|
|
63
|
+
dir_name = pattern[3:-3] # e.g., "**/node_modules/**" -> "node_modules"
|
|
64
|
+
elif pattern.endswith("/**"):
|
|
65
|
+
dir_name = pattern.replace("**/", "").replace("/**", "")
|
|
66
|
+
|
|
67
|
+
# Compile simple pattern (without **) for fnmatch-style matching
|
|
68
|
+
simple_pattern = pattern.replace("**/", "")
|
|
69
|
+
try:
|
|
70
|
+
regex_pattern = fnmatch.translate(simple_pattern)
|
|
71
|
+
compiled = re.compile(regex_pattern)
|
|
72
|
+
except re.error:
|
|
73
|
+
# Fallback for invalid patterns
|
|
74
|
+
compiled = re.compile(re.escape(simple_pattern))
|
|
75
|
+
|
|
76
|
+
self._compiled_patterns[pattern] = (compiled, dir_name)
|
|
40
77
|
|
|
41
78
|
@staticmethod
|
|
42
79
|
@lru_cache(maxsize=1000)
|
|
@@ -135,37 +172,51 @@ class ProjectScanner:
|
|
|
135
172
|
return files
|
|
136
173
|
|
|
137
174
|
def _matches_glob_pattern(self, path: Path, pattern: str) -> bool:
|
|
138
|
-
"""Check if a path matches a glob pattern (handles ** patterns).
|
|
175
|
+
"""Check if a path matches a glob pattern (handles ** patterns).
|
|
176
|
+
|
|
177
|
+
Uses pre-compiled regex patterns for performance. This method is called
|
|
178
|
+
~800K+ times during a full scan, so caching the compiled patterns
|
|
179
|
+
provides significant speedup.
|
|
180
|
+
"""
|
|
139
181
|
rel_str = str(path)
|
|
140
182
|
path_parts = path.parts
|
|
141
183
|
|
|
142
|
-
#
|
|
143
|
-
if
|
|
144
|
-
#
|
|
145
|
-
|
|
184
|
+
# Get pre-compiled pattern (or compile on-demand if not cached)
|
|
185
|
+
if pattern not in self._compiled_patterns:
|
|
186
|
+
# Lazily compile patterns not seen at init time
|
|
187
|
+
dir_name = None
|
|
188
|
+
if "**" in pattern:
|
|
189
|
+
if pattern.startswith("**/") and pattern.endswith("/**"):
|
|
190
|
+
dir_name = pattern[3:-3]
|
|
191
|
+
elif pattern.endswith("/**"):
|
|
192
|
+
dir_name = pattern.replace("**/", "").replace("/**", "")
|
|
193
|
+
|
|
146
194
|
simple_pattern = pattern.replace("**/", "")
|
|
195
|
+
try:
|
|
196
|
+
regex_pattern = fnmatch.translate(simple_pattern)
|
|
197
|
+
compiled = re.compile(regex_pattern)
|
|
198
|
+
except re.error:
|
|
199
|
+
compiled = re.compile(re.escape(simple_pattern))
|
|
200
|
+
self._compiled_patterns[pattern] = (compiled, dir_name)
|
|
201
|
+
|
|
202
|
+
compiled_regex, dir_name = self._compiled_patterns[pattern]
|
|
147
203
|
|
|
148
|
-
|
|
149
|
-
|
|
204
|
+
# Handle ** glob patterns
|
|
205
|
+
if "**" in pattern:
|
|
206
|
+
# Check if the pattern matches the path or filename using compiled regex
|
|
207
|
+
if compiled_regex.match(rel_str):
|
|
150
208
|
return True
|
|
151
|
-
if
|
|
209
|
+
if compiled_regex.match(path.name):
|
|
152
210
|
return True
|
|
153
211
|
|
|
154
|
-
# Check directory-based exclusions
|
|
155
|
-
if
|
|
156
|
-
|
|
157
|
-
if dir_name in path_parts:
|
|
158
|
-
return True
|
|
159
|
-
|
|
160
|
-
# Check for directory patterns like **/node_modules/**
|
|
161
|
-
if pattern.startswith("**/") and pattern.endswith("/**"):
|
|
162
|
-
dir_name = pattern[3:-3] # Extract directory name
|
|
163
|
-
if dir_name in path_parts:
|
|
164
|
-
return True
|
|
212
|
+
# Check directory-based exclusions (fast path check)
|
|
213
|
+
if dir_name and dir_name in path_parts:
|
|
214
|
+
return True
|
|
165
215
|
else:
|
|
166
|
-
|
|
216
|
+
# Use compiled regex instead of fnmatch.fnmatch()
|
|
217
|
+
if compiled_regex.match(rel_str):
|
|
167
218
|
return True
|
|
168
|
-
if
|
|
219
|
+
if compiled_regex.match(path.name):
|
|
169
220
|
return True
|
|
170
221
|
|
|
171
222
|
return False
|
|
@@ -178,12 +229,27 @@ class ProjectScanner:
|
|
|
178
229
|
return False
|
|
179
230
|
|
|
180
231
|
def _build_test_mapping(self, files: list[Path]) -> None:
|
|
181
|
-
"""Build mapping from source files to their test files.
|
|
182
|
-
|
|
232
|
+
"""Build mapping from source files to their test files.
|
|
233
|
+
|
|
234
|
+
Optimized to use O(1) dict lookups instead of O(n) linear search.
|
|
235
|
+
Previous implementation was O(n*m), now O(n+m).
|
|
236
|
+
"""
|
|
237
|
+
# Build index of non-test files by stem name for O(1) lookups
|
|
238
|
+
# This replaces the inner loop that searched all files
|
|
239
|
+
source_files_by_stem: dict[str, list[Path]] = {}
|
|
240
|
+
for f in files:
|
|
241
|
+
if not self._is_test_file(f):
|
|
242
|
+
stem = f.stem
|
|
243
|
+
if stem not in source_files_by_stem:
|
|
244
|
+
source_files_by_stem[stem] = []
|
|
245
|
+
source_files_by_stem[stem].append(f)
|
|
246
|
+
|
|
247
|
+
# Now match test files to source files with O(1) lookups
|
|
248
|
+
for f in files:
|
|
249
|
+
if not self._is_test_file(f):
|
|
250
|
+
continue
|
|
183
251
|
|
|
184
|
-
|
|
185
|
-
# Try to find corresponding source file
|
|
186
|
-
test_name = test_file.stem # e.g., "test_core"
|
|
252
|
+
test_name = f.stem # e.g., "test_core"
|
|
187
253
|
|
|
188
254
|
# Common patterns: test_foo.py -> foo.py
|
|
189
255
|
if test_name.startswith("test_"):
|
|
@@ -193,13 +259,14 @@ class ProjectScanner:
|
|
|
193
259
|
else:
|
|
194
260
|
continue
|
|
195
261
|
|
|
196
|
-
#
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
262
|
+
# O(1) lookup instead of O(n) linear search
|
|
263
|
+
matching_sources = source_files_by_stem.get(source_name, [])
|
|
264
|
+
if matching_sources:
|
|
265
|
+
# Use first match (typically there's only one)
|
|
266
|
+
source_file = matching_sources[0]
|
|
267
|
+
rel_source = str(source_file.relative_to(self.project_root))
|
|
268
|
+
rel_test = str(f.relative_to(self.project_root))
|
|
269
|
+
self._test_file_map[rel_source] = rel_test
|
|
203
270
|
|
|
204
271
|
def _is_test_file(self, path: Path) -> bool:
|
|
205
272
|
"""Check if a file is a test file."""
|
|
@@ -448,29 +515,64 @@ class ProjectScanner:
|
|
|
448
515
|
return result
|
|
449
516
|
|
|
450
517
|
def _analyze_dependencies(self, records: list[FileRecord]) -> None:
|
|
451
|
-
"""Build dependency graph between files.
|
|
452
|
-
|
|
518
|
+
"""Build dependency graph between files.
|
|
519
|
+
|
|
520
|
+
Optimized from O(n³) to O(n*m) where n=records, m=avg imports per file.
|
|
521
|
+
Uses dict lookups instead of nested loops for finding modules and records.
|
|
522
|
+
"""
|
|
523
|
+
# Build record lookup by path for O(1) access (eliminates innermost loop)
|
|
524
|
+
records_by_path: dict[str, FileRecord] = {r.path: r for r in records}
|
|
525
|
+
|
|
526
|
+
# Build multiple module indexes for flexible matching
|
|
527
|
+
# Key: module name or suffix -> Value: path
|
|
453
528
|
module_to_path: dict[str, str] = {}
|
|
529
|
+
module_suffix_to_path: dict[str, str] = {} # For "endswith" matching
|
|
530
|
+
|
|
454
531
|
for record in records:
|
|
455
532
|
if record.language == "python":
|
|
456
|
-
# Convert path to module name
|
|
457
|
-
module_name = record.path.replace("/", ".").replace("\\", ".")
|
|
533
|
+
# Convert path to module name: src/empathy_os/core.py -> src.empathy_os.core
|
|
534
|
+
module_name = record.path.replace("/", ".").replace("\\", ".")
|
|
535
|
+
if module_name.endswith(".py"):
|
|
536
|
+
module_name = module_name[:-3]
|
|
537
|
+
|
|
458
538
|
module_to_path[module_name] = record.path
|
|
459
539
|
|
|
460
|
-
|
|
540
|
+
# Also index by module suffix parts for partial matching
|
|
541
|
+
# e.g., "empathy_os.core" and "core" for "src.empathy_os.core"
|
|
542
|
+
parts = module_name.split(".")
|
|
543
|
+
for i in range(len(parts)):
|
|
544
|
+
suffix = ".".join(parts[i:])
|
|
545
|
+
if suffix not in module_suffix_to_path:
|
|
546
|
+
module_suffix_to_path[suffix] = record.path
|
|
547
|
+
|
|
548
|
+
# Track which records have been updated (for imported_by deduplication)
|
|
549
|
+
imported_by_sets: dict[str, set[str]] = {r.path: set() for r in records}
|
|
550
|
+
|
|
551
|
+
# Update imported_by relationships with O(1) lookups
|
|
461
552
|
for record in records:
|
|
462
553
|
for imp in record.imports:
|
|
463
|
-
#
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
554
|
+
# Try exact match first
|
|
555
|
+
target_path = module_to_path.get(imp)
|
|
556
|
+
|
|
557
|
+
# Try suffix match if no exact match
|
|
558
|
+
if not target_path:
|
|
559
|
+
target_path = module_suffix_to_path.get(imp)
|
|
560
|
+
|
|
561
|
+
# Try partial suffix matching as fallback
|
|
562
|
+
if not target_path:
|
|
563
|
+
# Check if import is a suffix of any module
|
|
564
|
+
for suffix, path in module_suffix_to_path.items():
|
|
565
|
+
if suffix.endswith(imp) or imp in suffix:
|
|
566
|
+
target_path = path
|
|
567
|
+
break
|
|
568
|
+
|
|
569
|
+
if target_path and target_path in records_by_path:
|
|
570
|
+
# Use set for O(1) deduplication check
|
|
571
|
+
if record.path not in imported_by_sets[target_path]:
|
|
572
|
+
imported_by_sets[target_path].add(record.path)
|
|
573
|
+
target_record = records_by_path[target_path]
|
|
574
|
+
target_record.imported_by.append(record.path)
|
|
575
|
+
target_record.imported_by_count = len(target_record.imported_by)
|
|
474
576
|
|
|
475
577
|
def _calculate_impact_scores(self, records: list[FileRecord]) -> None:
|
|
476
578
|
"""Calculate impact score for each file."""
|
empathy_os/socratic/storage.py
CHANGED
|
@@ -16,6 +16,7 @@ Licensed under Fair Source License 0.9
|
|
|
16
16
|
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
+
import heapq
|
|
19
20
|
import json
|
|
20
21
|
import logging
|
|
21
22
|
import sqlite3
|
|
@@ -284,7 +285,7 @@ class JSONFileStorage(StorageBackend):
|
|
|
284
285
|
return []
|
|
285
286
|
|
|
286
287
|
evaluations = []
|
|
287
|
-
for path in
|
|
288
|
+
for path in heapq.nlargest(limit, eval_dir.glob("*.json")):
|
|
288
289
|
try:
|
|
289
290
|
with path.open() as f:
|
|
290
291
|
evaluations.append(json.load(f))
|
|
@@ -329,6 +329,7 @@ class WorkflowVisualizer:
|
|
|
329
329
|
new_stages.append(StageSpec(
|
|
330
330
|
id=stage_node.node_id,
|
|
331
331
|
name=stage_node.label,
|
|
332
|
+
description=stage_node.data.get("description", f"Stage: {stage_node.label}"),
|
|
332
333
|
agent_ids=agent_ids,
|
|
333
334
|
depends_on=dependencies,
|
|
334
335
|
parallel=stage_node.data.get("parallel", False),
|
|
@@ -383,10 +384,14 @@ class ASCIIVisualizer:
|
|
|
383
384
|
# Agents summary
|
|
384
385
|
lines.append(self._box("Agents"))
|
|
385
386
|
for agent in blueprint.agents:
|
|
386
|
-
tools
|
|
387
|
-
if
|
|
388
|
-
|
|
389
|
-
|
|
387
|
+
# Access tools via spec since AgentBlueprint wraps AgentSpec
|
|
388
|
+
agent_tools = agent.spec.tools if hasattr(agent, "spec") else []
|
|
389
|
+
tools = ", ".join(t.id for t in agent_tools[:3])
|
|
390
|
+
if len(agent_tools) > 3:
|
|
391
|
+
tools += f" (+{len(agent_tools) - 3} more)"
|
|
392
|
+
agent_role = agent.spec.role if hasattr(agent, "spec") else agent.role
|
|
393
|
+
agent_name = agent.spec.name if hasattr(agent, "spec") else agent.name
|
|
394
|
+
lines.append(f" [{agent_role.value[:3].upper()}] {agent_name}")
|
|
390
395
|
lines.append(f" Tools: {tools}")
|
|
391
396
|
lines.append("")
|
|
392
397
|
|
empathy_os/tier_recommender.py
CHANGED
|
@@ -23,6 +23,8 @@ from collections import defaultdict
|
|
|
23
23
|
from dataclasses import dataclass
|
|
24
24
|
from pathlib import Path
|
|
25
25
|
|
|
26
|
+
from empathy_os.config import _validate_file_path
|
|
27
|
+
|
|
26
28
|
|
|
27
29
|
@dataclass
|
|
28
30
|
class TierRecommendationResult:
|
|
@@ -86,7 +88,8 @@ class TierRecommender:
|
|
|
86
88
|
|
|
87
89
|
for file_path in self.patterns_dir.glob("*.json"):
|
|
88
90
|
try:
|
|
89
|
-
|
|
91
|
+
validated_path = _validate_file_path(str(file_path))
|
|
92
|
+
with open(validated_path) as f:
|
|
90
93
|
data = json.load(f)
|
|
91
94
|
|
|
92
95
|
# Check if this is an enhanced pattern
|
|
@@ -97,7 +100,7 @@ class TierRecommender:
|
|
|
97
100
|
for pattern in data["patterns"]:
|
|
98
101
|
if "tier_progression" in pattern:
|
|
99
102
|
patterns.append(pattern)
|
|
100
|
-
except (json.JSONDecodeError, KeyError):
|
|
103
|
+
except (json.JSONDecodeError, KeyError, ValueError):
|
|
101
104
|
continue
|
|
102
105
|
|
|
103
106
|
return patterns
|