htmlgraph 0.20.1__py3-none-any.whl → 0.27.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- htmlgraph/.htmlgraph/.session-warning-state.json +6 -0
- htmlgraph/.htmlgraph/agents.json +72 -0
- htmlgraph/.htmlgraph/htmlgraph.db +0 -0
- htmlgraph/__init__.py +51 -1
- htmlgraph/__init__.pyi +123 -0
- htmlgraph/agent_detection.py +26 -10
- htmlgraph/agent_registry.py +2 -1
- htmlgraph/analytics/__init__.py +8 -1
- htmlgraph/analytics/cli.py +86 -20
- htmlgraph/analytics/cost_analyzer.py +391 -0
- htmlgraph/analytics/cost_monitor.py +664 -0
- htmlgraph/analytics/cost_reporter.py +675 -0
- htmlgraph/analytics/cross_session.py +617 -0
- htmlgraph/analytics/dependency.py +10 -6
- htmlgraph/analytics/pattern_learning.py +771 -0
- htmlgraph/analytics/session_graph.py +707 -0
- htmlgraph/analytics/strategic/__init__.py +80 -0
- htmlgraph/analytics/strategic/cost_optimizer.py +611 -0
- htmlgraph/analytics/strategic/pattern_detector.py +876 -0
- htmlgraph/analytics/strategic/preference_manager.py +709 -0
- htmlgraph/analytics/strategic/suggestion_engine.py +747 -0
- htmlgraph/analytics/work_type.py +67 -27
- htmlgraph/analytics_index.py +53 -20
- htmlgraph/api/__init__.py +3 -0
- htmlgraph/api/cost_alerts_websocket.py +416 -0
- htmlgraph/api/main.py +2498 -0
- htmlgraph/api/static/htmx.min.js +1 -0
- htmlgraph/api/static/style-redesign.css +1344 -0
- htmlgraph/api/static/style.css +1079 -0
- htmlgraph/api/templates/dashboard-redesign.html +1366 -0
- htmlgraph/api/templates/dashboard.html +794 -0
- htmlgraph/api/templates/partials/activity-feed-hierarchical.html +326 -0
- htmlgraph/api/templates/partials/activity-feed.html +1100 -0
- htmlgraph/api/templates/partials/agents-redesign.html +317 -0
- htmlgraph/api/templates/partials/agents.html +317 -0
- htmlgraph/api/templates/partials/event-traces.html +373 -0
- htmlgraph/api/templates/partials/features-kanban-redesign.html +509 -0
- htmlgraph/api/templates/partials/features.html +578 -0
- htmlgraph/api/templates/partials/metrics-redesign.html +346 -0
- htmlgraph/api/templates/partials/metrics.html +346 -0
- htmlgraph/api/templates/partials/orchestration-redesign.html +443 -0
- htmlgraph/api/templates/partials/orchestration.html +198 -0
- htmlgraph/api/templates/partials/spawners.html +375 -0
- htmlgraph/api/templates/partials/work-items.html +613 -0
- htmlgraph/api/websocket.py +538 -0
- htmlgraph/archive/__init__.py +24 -0
- htmlgraph/archive/bloom.py +234 -0
- htmlgraph/archive/fts.py +297 -0
- htmlgraph/archive/manager.py +583 -0
- htmlgraph/archive/search.py +244 -0
- htmlgraph/atomic_ops.py +560 -0
- htmlgraph/attribute_index.py +2 -1
- htmlgraph/bounded_paths.py +539 -0
- htmlgraph/builders/base.py +57 -2
- htmlgraph/builders/bug.py +19 -3
- htmlgraph/builders/chore.py +19 -3
- htmlgraph/builders/epic.py +19 -3
- htmlgraph/builders/feature.py +27 -3
- htmlgraph/builders/insight.py +2 -1
- htmlgraph/builders/metric.py +2 -1
- htmlgraph/builders/pattern.py +2 -1
- htmlgraph/builders/phase.py +19 -3
- htmlgraph/builders/spike.py +29 -3
- htmlgraph/builders/track.py +42 -1
- htmlgraph/cigs/__init__.py +81 -0
- htmlgraph/cigs/autonomy.py +385 -0
- htmlgraph/cigs/cost.py +475 -0
- htmlgraph/cigs/messages_basic.py +472 -0
- htmlgraph/cigs/messaging.py +365 -0
- htmlgraph/cigs/models.py +771 -0
- htmlgraph/cigs/pattern_storage.py +427 -0
- htmlgraph/cigs/patterns.py +503 -0
- htmlgraph/cigs/posttool_analyzer.py +234 -0
- htmlgraph/cigs/reporter.py +818 -0
- htmlgraph/cigs/tracker.py +317 -0
- htmlgraph/cli/.htmlgraph/.session-warning-state.json +6 -0
- htmlgraph/cli/.htmlgraph/agents.json +72 -0
- htmlgraph/cli/.htmlgraph/htmlgraph.db +0 -0
- htmlgraph/cli/__init__.py +42 -0
- htmlgraph/cli/__main__.py +6 -0
- htmlgraph/cli/analytics.py +1424 -0
- htmlgraph/cli/base.py +685 -0
- htmlgraph/cli/constants.py +206 -0
- htmlgraph/cli/core.py +954 -0
- htmlgraph/cli/main.py +147 -0
- htmlgraph/cli/models.py +475 -0
- htmlgraph/cli/templates/__init__.py +1 -0
- htmlgraph/cli/templates/cost_dashboard.py +399 -0
- htmlgraph/cli/work/__init__.py +239 -0
- htmlgraph/cli/work/browse.py +115 -0
- htmlgraph/cli/work/features.py +568 -0
- htmlgraph/cli/work/orchestration.py +676 -0
- htmlgraph/cli/work/report.py +728 -0
- htmlgraph/cli/work/sessions.py +466 -0
- htmlgraph/cli/work/snapshot.py +559 -0
- htmlgraph/cli/work/tracks.py +486 -0
- htmlgraph/cli_commands/__init__.py +1 -0
- htmlgraph/cli_commands/feature.py +195 -0
- htmlgraph/cli_framework.py +115 -0
- htmlgraph/collections/__init__.py +2 -0
- htmlgraph/collections/base.py +197 -14
- htmlgraph/collections/bug.py +2 -1
- htmlgraph/collections/chore.py +2 -1
- htmlgraph/collections/epic.py +2 -1
- htmlgraph/collections/feature.py +2 -1
- htmlgraph/collections/insight.py +2 -1
- htmlgraph/collections/metric.py +2 -1
- htmlgraph/collections/pattern.py +2 -1
- htmlgraph/collections/phase.py +2 -1
- htmlgraph/collections/session.py +194 -0
- htmlgraph/collections/spike.py +13 -2
- htmlgraph/collections/task_delegation.py +241 -0
- htmlgraph/collections/todo.py +14 -1
- htmlgraph/collections/traces.py +487 -0
- htmlgraph/config/cost_models.json +56 -0
- htmlgraph/config.py +190 -0
- htmlgraph/context_analytics.py +2 -1
- htmlgraph/converter.py +116 -7
- htmlgraph/cost_analysis/__init__.py +5 -0
- htmlgraph/cost_analysis/analyzer.py +438 -0
- htmlgraph/dashboard.html +2246 -248
- htmlgraph/dashboard.html.backup +6592 -0
- htmlgraph/dashboard.html.bak +7181 -0
- htmlgraph/dashboard.html.bak2 +7231 -0
- htmlgraph/dashboard.html.bak3 +7232 -0
- htmlgraph/db/__init__.py +38 -0
- htmlgraph/db/queries.py +790 -0
- htmlgraph/db/schema.py +1788 -0
- htmlgraph/decorators.py +317 -0
- htmlgraph/dependency_models.py +2 -1
- htmlgraph/deploy.py +26 -27
- htmlgraph/docs/API_REFERENCE.md +841 -0
- htmlgraph/docs/HTTP_API.md +750 -0
- htmlgraph/docs/INTEGRATION_GUIDE.md +752 -0
- htmlgraph/docs/ORCHESTRATION_PATTERNS.md +717 -0
- htmlgraph/docs/README.md +532 -0
- htmlgraph/docs/__init__.py +77 -0
- htmlgraph/docs/docs_version.py +55 -0
- htmlgraph/docs/metadata.py +93 -0
- htmlgraph/docs/migrations.py +232 -0
- htmlgraph/docs/template_engine.py +143 -0
- htmlgraph/docs/templates/_sections/cli_reference.md.j2 +52 -0
- htmlgraph/docs/templates/_sections/core_concepts.md.j2 +29 -0
- htmlgraph/docs/templates/_sections/sdk_basics.md.j2 +69 -0
- htmlgraph/docs/templates/base_agents.md.j2 +78 -0
- htmlgraph/docs/templates/example_user_override.md.j2 +47 -0
- htmlgraph/docs/version_check.py +163 -0
- htmlgraph/edge_index.py +2 -1
- htmlgraph/error_handler.py +544 -0
- htmlgraph/event_log.py +86 -37
- htmlgraph/event_migration.py +2 -1
- htmlgraph/file_watcher.py +12 -8
- htmlgraph/find_api.py +2 -1
- htmlgraph/git_events.py +67 -9
- htmlgraph/hooks/.htmlgraph/.session-warning-state.json +6 -0
- htmlgraph/hooks/.htmlgraph/agents.json +72 -0
- htmlgraph/hooks/.htmlgraph/index.sqlite +0 -0
- htmlgraph/hooks/__init__.py +8 -0
- htmlgraph/hooks/bootstrap.py +169 -0
- htmlgraph/hooks/cigs_pretool_enforcer.py +354 -0
- htmlgraph/hooks/concurrent_sessions.py +208 -0
- htmlgraph/hooks/context.py +350 -0
- htmlgraph/hooks/drift_handler.py +525 -0
- htmlgraph/hooks/event_tracker.py +790 -99
- htmlgraph/hooks/git_commands.py +175 -0
- htmlgraph/hooks/installer.py +5 -1
- htmlgraph/hooks/orchestrator.py +327 -76
- htmlgraph/hooks/orchestrator_reflector.py +31 -4
- htmlgraph/hooks/post_tool_use_failure.py +32 -7
- htmlgraph/hooks/post_tool_use_handler.py +257 -0
- htmlgraph/hooks/posttooluse.py +92 -19
- htmlgraph/hooks/pretooluse.py +527 -7
- htmlgraph/hooks/prompt_analyzer.py +637 -0
- htmlgraph/hooks/session_handler.py +668 -0
- htmlgraph/hooks/session_summary.py +395 -0
- htmlgraph/hooks/state_manager.py +504 -0
- htmlgraph/hooks/subagent_detection.py +202 -0
- htmlgraph/hooks/subagent_stop.py +369 -0
- htmlgraph/hooks/task_enforcer.py +99 -4
- htmlgraph/hooks/validator.py +212 -91
- htmlgraph/ids.py +2 -1
- htmlgraph/learning.py +125 -100
- htmlgraph/mcp_server.py +2 -1
- htmlgraph/models.py +217 -18
- htmlgraph/operations/README.md +62 -0
- htmlgraph/operations/__init__.py +79 -0
- htmlgraph/operations/analytics.py +339 -0
- htmlgraph/operations/bootstrap.py +289 -0
- htmlgraph/operations/events.py +244 -0
- htmlgraph/operations/fastapi_server.py +231 -0
- htmlgraph/operations/hooks.py +350 -0
- htmlgraph/operations/initialization.py +597 -0
- htmlgraph/operations/initialization.py.backup +228 -0
- htmlgraph/operations/server.py +303 -0
- htmlgraph/orchestration/__init__.py +58 -0
- htmlgraph/orchestration/claude_launcher.py +179 -0
- htmlgraph/orchestration/command_builder.py +72 -0
- htmlgraph/orchestration/headless_spawner.py +281 -0
- htmlgraph/orchestration/live_events.py +377 -0
- htmlgraph/orchestration/model_selection.py +327 -0
- htmlgraph/orchestration/plugin_manager.py +140 -0
- htmlgraph/orchestration/prompts.py +137 -0
- htmlgraph/orchestration/spawner_event_tracker.py +383 -0
- htmlgraph/orchestration/spawners/__init__.py +16 -0
- htmlgraph/orchestration/spawners/base.py +194 -0
- htmlgraph/orchestration/spawners/claude.py +173 -0
- htmlgraph/orchestration/spawners/codex.py +435 -0
- htmlgraph/orchestration/spawners/copilot.py +294 -0
- htmlgraph/orchestration/spawners/gemini.py +471 -0
- htmlgraph/orchestration/subprocess_runner.py +36 -0
- htmlgraph/{orchestration.py → orchestration/task_coordination.py} +16 -8
- htmlgraph/orchestration.md +563 -0
- htmlgraph/orchestrator-system-prompt-optimized.txt +863 -0
- htmlgraph/orchestrator.py +2 -1
- htmlgraph/orchestrator_config.py +357 -0
- htmlgraph/orchestrator_mode.py +115 -4
- htmlgraph/parallel.py +2 -1
- htmlgraph/parser.py +86 -6
- htmlgraph/path_query.py +608 -0
- htmlgraph/pattern_matcher.py +636 -0
- htmlgraph/pydantic_models.py +476 -0
- htmlgraph/quality_gates.py +350 -0
- htmlgraph/query_builder.py +2 -1
- htmlgraph/query_composer.py +509 -0
- htmlgraph/reflection.py +443 -0
- htmlgraph/refs.py +344 -0
- htmlgraph/repo_hash.py +512 -0
- htmlgraph/repositories/__init__.py +292 -0
- htmlgraph/repositories/analytics_repository.py +455 -0
- htmlgraph/repositories/analytics_repository_standard.py +628 -0
- htmlgraph/repositories/feature_repository.py +581 -0
- htmlgraph/repositories/feature_repository_htmlfile.py +668 -0
- htmlgraph/repositories/feature_repository_memory.py +607 -0
- htmlgraph/repositories/feature_repository_sqlite.py +858 -0
- htmlgraph/repositories/filter_service.py +620 -0
- htmlgraph/repositories/filter_service_standard.py +445 -0
- htmlgraph/repositories/shared_cache.py +621 -0
- htmlgraph/repositories/shared_cache_memory.py +395 -0
- htmlgraph/repositories/track_repository.py +552 -0
- htmlgraph/repositories/track_repository_htmlfile.py +619 -0
- htmlgraph/repositories/track_repository_memory.py +508 -0
- htmlgraph/repositories/track_repository_sqlite.py +711 -0
- htmlgraph/sdk/__init__.py +398 -0
- htmlgraph/sdk/__init__.pyi +14 -0
- htmlgraph/sdk/analytics/__init__.py +19 -0
- htmlgraph/sdk/analytics/engine.py +155 -0
- htmlgraph/sdk/analytics/helpers.py +178 -0
- htmlgraph/sdk/analytics/registry.py +109 -0
- htmlgraph/sdk/base.py +484 -0
- htmlgraph/sdk/constants.py +216 -0
- htmlgraph/sdk/core.pyi +308 -0
- htmlgraph/sdk/discovery.py +120 -0
- htmlgraph/sdk/help/__init__.py +12 -0
- htmlgraph/sdk/help/mixin.py +699 -0
- htmlgraph/sdk/mixins/__init__.py +15 -0
- htmlgraph/sdk/mixins/attribution.py +113 -0
- htmlgraph/sdk/mixins/mixin.py +410 -0
- htmlgraph/sdk/operations/__init__.py +12 -0
- htmlgraph/sdk/operations/mixin.py +427 -0
- htmlgraph/sdk/orchestration/__init__.py +17 -0
- htmlgraph/sdk/orchestration/coordinator.py +203 -0
- htmlgraph/sdk/orchestration/spawner.py +204 -0
- htmlgraph/sdk/planning/__init__.py +19 -0
- htmlgraph/sdk/planning/bottlenecks.py +93 -0
- htmlgraph/sdk/planning/mixin.py +211 -0
- htmlgraph/sdk/planning/parallel.py +186 -0
- htmlgraph/sdk/planning/queue.py +210 -0
- htmlgraph/sdk/planning/recommendations.py +87 -0
- htmlgraph/sdk/planning/smart_planning.py +319 -0
- htmlgraph/sdk/session/__init__.py +19 -0
- htmlgraph/sdk/session/continuity.py +57 -0
- htmlgraph/sdk/session/handoff.py +110 -0
- htmlgraph/sdk/session/info.py +309 -0
- htmlgraph/sdk/session/manager.py +103 -0
- htmlgraph/sdk/strategic/__init__.py +26 -0
- htmlgraph/sdk/strategic/mixin.py +563 -0
- htmlgraph/server.py +295 -107
- htmlgraph/session_hooks.py +300 -0
- htmlgraph/session_manager.py +285 -3
- htmlgraph/session_registry.py +587 -0
- htmlgraph/session_state.py +436 -0
- htmlgraph/session_warning.py +2 -1
- htmlgraph/sessions/__init__.py +23 -0
- htmlgraph/sessions/handoff.py +756 -0
- htmlgraph/system_prompts.py +450 -0
- htmlgraph/templates/orchestration-view.html +350 -0
- htmlgraph/track_builder.py +33 -1
- htmlgraph/track_manager.py +38 -0
- htmlgraph/transcript.py +18 -5
- htmlgraph/validation.py +115 -0
- htmlgraph/watch.py +2 -1
- htmlgraph/work_type_utils.py +2 -1
- {htmlgraph-0.20.1.data → htmlgraph-0.27.5.data}/data/htmlgraph/dashboard.html +2246 -248
- {htmlgraph-0.20.1.dist-info → htmlgraph-0.27.5.dist-info}/METADATA +95 -64
- htmlgraph-0.27.5.dist-info/RECORD +337 -0
- {htmlgraph-0.20.1.dist-info → htmlgraph-0.27.5.dist-info}/entry_points.txt +1 -1
- htmlgraph/cli.py +0 -4839
- htmlgraph/sdk.py +0 -2359
- htmlgraph-0.20.1.dist-info/RECORD +0 -118
- {htmlgraph-0.20.1.data → htmlgraph-0.27.5.data}/data/htmlgraph/styles.css +0 -0
- {htmlgraph-0.20.1.data → htmlgraph-0.27.5.data}/data/htmlgraph/templates/AGENTS.md.template +0 -0
- {htmlgraph-0.20.1.data → htmlgraph-0.27.5.data}/data/htmlgraph/templates/CLAUDE.md.template +0 -0
- {htmlgraph-0.20.1.data → htmlgraph-0.27.5.data}/data/htmlgraph/templates/GEMINI.md.template +0 -0
- {htmlgraph-0.20.1.dist-info → htmlgraph-0.27.5.dist-info}/WHEEL +0 -0
htmlgraph/orchestrator.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
"""
|
|
2
4
|
SubagentOrchestrator for context-preserving delegation.
|
|
3
5
|
|
|
@@ -83,7 +85,6 @@ Key Patterns
|
|
|
83
85
|
4. Parallel execution: Multiple subagents can work simultaneously
|
|
84
86
|
"""
|
|
85
87
|
|
|
86
|
-
from __future__ import annotations
|
|
87
88
|
|
|
88
89
|
from dataclasses import dataclass, field
|
|
89
90
|
from datetime import datetime
|
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Orchestrator Configuration Management
|
|
3
|
+
|
|
4
|
+
Provides configurable thresholds for delegation enforcement instead of hardcoded values.
|
|
5
|
+
Supports:
|
|
6
|
+
- Threshold configuration (exploration, circuit breaker)
|
|
7
|
+
- Time-based violation decay
|
|
8
|
+
- Rapid sequence collapsing
|
|
9
|
+
- CLI commands to view/edit config
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from datetime import datetime, timedelta, timezone
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import yaml # type: ignore[import-untyped]
|
|
17
|
+
from pydantic import BaseModel
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ThresholdsConfig(BaseModel):
|
|
21
|
+
"""Threshold configuration for orchestrator enforcement."""
|
|
22
|
+
|
|
23
|
+
exploration_calls: int = 5
|
|
24
|
+
"""How many consecutive Grep/Read/Glob calls before warning."""
|
|
25
|
+
|
|
26
|
+
circuit_breaker_violations: int = 3
|
|
27
|
+
"""How many violations before blocking all operations."""
|
|
28
|
+
|
|
29
|
+
violation_decay_seconds: int = 120
|
|
30
|
+
"""How old violations can be before they don't count (seconds)."""
|
|
31
|
+
|
|
32
|
+
rapid_sequence_window: int = 0
|
|
33
|
+
"""Time window for collapsing rapid violations (seconds). 0 = disabled."""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class AntiPatternsConfig(BaseModel):
|
|
37
|
+
"""Anti-pattern detection thresholds."""
|
|
38
|
+
|
|
39
|
+
consecutive_bash: int = 5
|
|
40
|
+
consecutive_edit: int = 4
|
|
41
|
+
consecutive_grep: int = 4
|
|
42
|
+
consecutive_read: int = 5
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ModeConfig(BaseModel):
|
|
46
|
+
"""Configuration for an enforcement mode."""
|
|
47
|
+
|
|
48
|
+
block_after_violations: bool = True
|
|
49
|
+
require_work_items: bool = True
|
|
50
|
+
warn_on_patterns: bool = True
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ModesConfig(BaseModel):
|
|
54
|
+
"""All enforcement mode configurations."""
|
|
55
|
+
|
|
56
|
+
strict: ModeConfig = ModeConfig(
|
|
57
|
+
block_after_violations=True,
|
|
58
|
+
require_work_items=True,
|
|
59
|
+
warn_on_patterns=True,
|
|
60
|
+
)
|
|
61
|
+
moderate: ModeConfig = ModeConfig(
|
|
62
|
+
block_after_violations=False,
|
|
63
|
+
require_work_items=False,
|
|
64
|
+
warn_on_patterns=True,
|
|
65
|
+
)
|
|
66
|
+
guidance: ModeConfig = ModeConfig(
|
|
67
|
+
block_after_violations=False,
|
|
68
|
+
require_work_items=False,
|
|
69
|
+
warn_on_patterns=False,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class OrchestratorConfig(BaseModel):
|
|
74
|
+
"""Complete orchestrator configuration."""
|
|
75
|
+
|
|
76
|
+
thresholds: ThresholdsConfig = ThresholdsConfig()
|
|
77
|
+
anti_patterns: AntiPatternsConfig = AntiPatternsConfig()
|
|
78
|
+
modes: ModesConfig = ModesConfig()
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_config_paths() -> list[Path]:
|
|
82
|
+
"""
|
|
83
|
+
Get list of config file paths to check (in priority order).
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
List of paths to check for config file
|
|
87
|
+
"""
|
|
88
|
+
return [
|
|
89
|
+
Path.cwd() / ".htmlgraph" / "orchestrator-config.yaml",
|
|
90
|
+
Path.home() / ".config" / "htmlgraph" / "orchestrator-config.yaml",
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def load_orchestrator_config() -> OrchestratorConfig:
|
|
95
|
+
"""
|
|
96
|
+
Load orchestrator configuration from file or use defaults.
|
|
97
|
+
|
|
98
|
+
Checks multiple locations:
|
|
99
|
+
1. .htmlgraph/orchestrator-config.yaml (project-specific)
|
|
100
|
+
2. ~/.config/htmlgraph/orchestrator-config.yaml (user defaults)
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
OrchestratorConfig with loaded or default values
|
|
104
|
+
"""
|
|
105
|
+
for config_path in get_config_paths():
|
|
106
|
+
if config_path.exists():
|
|
107
|
+
try:
|
|
108
|
+
with open(config_path) as f:
|
|
109
|
+
data = yaml.safe_load(f)
|
|
110
|
+
if data:
|
|
111
|
+
return OrchestratorConfig(**data)
|
|
112
|
+
except Exception:
|
|
113
|
+
# If file is corrupted, continue to next location
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
# No valid config found, return defaults
|
|
117
|
+
return OrchestratorConfig()
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def save_orchestrator_config(
|
|
121
|
+
config: OrchestratorConfig, path: Path | None = None
|
|
122
|
+
) -> None:
|
|
123
|
+
"""
|
|
124
|
+
Save orchestrator configuration to file.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
config: Configuration to save
|
|
128
|
+
path: Optional path to save to. If None, uses first config path.
|
|
129
|
+
"""
|
|
130
|
+
if path is None:
|
|
131
|
+
path = get_config_paths()[0]
|
|
132
|
+
|
|
133
|
+
# Ensure directory exists
|
|
134
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
135
|
+
|
|
136
|
+
# Convert to dict for YAML serialization
|
|
137
|
+
data = config.model_dump()
|
|
138
|
+
|
|
139
|
+
# Write YAML with comments
|
|
140
|
+
with open(path, "w") as f:
|
|
141
|
+
f.write("# HtmlGraph Orchestrator Configuration\n")
|
|
142
|
+
f.write("# Controls delegation enforcement behavior\n\n")
|
|
143
|
+
yaml.dump(data, f, default_flow_style=False, sort_keys=False)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def filter_recent_violations(
|
|
147
|
+
violations: list[dict[str, Any]], decay_seconds: int
|
|
148
|
+
) -> list[dict[str, Any]]:
|
|
149
|
+
"""
|
|
150
|
+
Filter violations to only include recent ones within decay window.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
violations: List of violation dicts with 'timestamp' field
|
|
154
|
+
decay_seconds: How old violations can be (in seconds)
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
Filtered list of recent violations only
|
|
158
|
+
"""
|
|
159
|
+
cutoff = datetime.now(timezone.utc) - timedelta(seconds=decay_seconds)
|
|
160
|
+
|
|
161
|
+
recent = []
|
|
162
|
+
for v in violations:
|
|
163
|
+
try:
|
|
164
|
+
# Parse timestamp (handle both ISO format and timestamp float)
|
|
165
|
+
ts = v.get("timestamp")
|
|
166
|
+
if isinstance(ts, str):
|
|
167
|
+
violation_time = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
|
168
|
+
elif isinstance(ts, (int, float)):
|
|
169
|
+
violation_time = datetime.fromtimestamp(ts, tz=timezone.utc)
|
|
170
|
+
else:
|
|
171
|
+
continue
|
|
172
|
+
|
|
173
|
+
if violation_time > cutoff:
|
|
174
|
+
recent.append(v)
|
|
175
|
+
except Exception:
|
|
176
|
+
# Skip violations with invalid timestamps
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
return recent
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def collapse_rapid_sequences(
|
|
183
|
+
violations: list[dict[str, Any]], window_seconds: int
|
|
184
|
+
) -> list[dict[str, Any]]:
|
|
185
|
+
"""
|
|
186
|
+
Collapse violations within rapid sequence window to one.
|
|
187
|
+
|
|
188
|
+
This prevents "violation spam" when user makes multiple rapid mistakes.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
violations: List of violation dicts with 'timestamp' field
|
|
192
|
+
window_seconds: Time window for collapsing (seconds)
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Collapsed list where rapid sequences count as one
|
|
196
|
+
"""
|
|
197
|
+
if not violations:
|
|
198
|
+
return []
|
|
199
|
+
|
|
200
|
+
collapsed = [violations[0]]
|
|
201
|
+
|
|
202
|
+
for v in violations[1:]:
|
|
203
|
+
try:
|
|
204
|
+
# Get timestamps
|
|
205
|
+
last_ts = collapsed[-1].get("timestamp")
|
|
206
|
+
curr_ts = v.get("timestamp")
|
|
207
|
+
|
|
208
|
+
# Parse timestamps
|
|
209
|
+
if isinstance(last_ts, str):
|
|
210
|
+
last_time = datetime.fromisoformat(last_ts.replace("Z", "+00:00"))
|
|
211
|
+
elif isinstance(last_ts, (int, float)):
|
|
212
|
+
last_time = datetime.fromtimestamp(last_ts, tz=timezone.utc)
|
|
213
|
+
else:
|
|
214
|
+
collapsed.append(v)
|
|
215
|
+
continue
|
|
216
|
+
|
|
217
|
+
if isinstance(curr_ts, str):
|
|
218
|
+
curr_time = datetime.fromisoformat(curr_ts.replace("Z", "+00:00"))
|
|
219
|
+
elif isinstance(curr_ts, (int, float)):
|
|
220
|
+
curr_time = datetime.fromtimestamp(curr_ts, tz=timezone.utc)
|
|
221
|
+
else:
|
|
222
|
+
collapsed.append(v)
|
|
223
|
+
continue
|
|
224
|
+
|
|
225
|
+
# Only add if outside rapid sequence window
|
|
226
|
+
if (curr_time - last_time).total_seconds() > window_seconds:
|
|
227
|
+
collapsed.append(v)
|
|
228
|
+
except Exception:
|
|
229
|
+
# On error, include the violation
|
|
230
|
+
collapsed.append(v)
|
|
231
|
+
|
|
232
|
+
return collapsed
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def get_effective_violation_count(
|
|
236
|
+
violations: list[dict[str, Any]], config: OrchestratorConfig
|
|
237
|
+
) -> int:
|
|
238
|
+
"""
|
|
239
|
+
Get effective violation count after applying decay and collapsing.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
violations: Raw list of all violations
|
|
243
|
+
config: Configuration with thresholds
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
Effective violation count (after decay and collapsing)
|
|
247
|
+
"""
|
|
248
|
+
# Apply time-based decay
|
|
249
|
+
recent = filter_recent_violations(
|
|
250
|
+
violations, config.thresholds.violation_decay_seconds
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Collapse rapid sequences
|
|
254
|
+
collapsed = collapse_rapid_sequences(
|
|
255
|
+
recent, config.thresholds.rapid_sequence_window
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
return len(collapsed)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def get_config_value(config: OrchestratorConfig, key_path: str) -> Any:
|
|
262
|
+
"""
|
|
263
|
+
Get a config value by dot-separated path.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
config: Configuration object
|
|
267
|
+
key_path: Dot-separated path (e.g., "thresholds.exploration_calls")
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
Value at that path
|
|
271
|
+
|
|
272
|
+
Raises:
|
|
273
|
+
KeyError: If path doesn't exist
|
|
274
|
+
"""
|
|
275
|
+
parts = key_path.split(".")
|
|
276
|
+
value: Any = config
|
|
277
|
+
|
|
278
|
+
for part in parts:
|
|
279
|
+
if hasattr(value, part):
|
|
280
|
+
value = getattr(value, part)
|
|
281
|
+
else:
|
|
282
|
+
raise KeyError(f"Config path not found: {key_path}")
|
|
283
|
+
|
|
284
|
+
return value
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def set_config_value(config: OrchestratorConfig, key_path: str, value: Any) -> None:
|
|
288
|
+
"""
|
|
289
|
+
Set a config value by dot-separated path.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
config: Configuration object to modify
|
|
293
|
+
key_path: Dot-separated path (e.g., "thresholds.exploration_calls")
|
|
294
|
+
value: Value to set
|
|
295
|
+
|
|
296
|
+
Raises:
|
|
297
|
+
KeyError: If path doesn't exist
|
|
298
|
+
"""
|
|
299
|
+
parts = key_path.split(".")
|
|
300
|
+
obj: Any = config
|
|
301
|
+
|
|
302
|
+
# Navigate to parent object
|
|
303
|
+
for part in parts[:-1]:
|
|
304
|
+
if hasattr(obj, part):
|
|
305
|
+
obj = getattr(obj, part)
|
|
306
|
+
else:
|
|
307
|
+
raise KeyError(f"Config path not found: {key_path}")
|
|
308
|
+
|
|
309
|
+
# Set the final attribute
|
|
310
|
+
final_key = parts[-1]
|
|
311
|
+
if hasattr(obj, final_key):
|
|
312
|
+
setattr(obj, final_key, value)
|
|
313
|
+
else:
|
|
314
|
+
raise KeyError(f"Config path not found: {key_path}")
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def format_config_display(config: OrchestratorConfig) -> str:
|
|
318
|
+
"""
|
|
319
|
+
Format configuration for human-readable display.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
config: Configuration to format
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
Formatted string representation
|
|
326
|
+
"""
|
|
327
|
+
lines = [
|
|
328
|
+
"HtmlGraph Orchestrator Configuration",
|
|
329
|
+
"=" * 50,
|
|
330
|
+
"",
|
|
331
|
+
"Thresholds:",
|
|
332
|
+
f" exploration_calls: {config.thresholds.exploration_calls}",
|
|
333
|
+
f" circuit_breaker_violations: {config.thresholds.circuit_breaker_violations}",
|
|
334
|
+
f" violation_decay_seconds: {config.thresholds.violation_decay_seconds}",
|
|
335
|
+
f" rapid_sequence_window: {config.thresholds.rapid_sequence_window}",
|
|
336
|
+
"",
|
|
337
|
+
"Anti-patterns:",
|
|
338
|
+
f" consecutive_bash: {config.anti_patterns.consecutive_bash}",
|
|
339
|
+
f" consecutive_edit: {config.anti_patterns.consecutive_edit}",
|
|
340
|
+
f" consecutive_grep: {config.anti_patterns.consecutive_grep}",
|
|
341
|
+
f" consecutive_read: {config.anti_patterns.consecutive_read}",
|
|
342
|
+
"",
|
|
343
|
+
"Modes:",
|
|
344
|
+
" strict:",
|
|
345
|
+
f" block_after_violations: {config.modes.strict.block_after_violations}",
|
|
346
|
+
f" require_work_items: {config.modes.strict.require_work_items}",
|
|
347
|
+
f" warn_on_patterns: {config.modes.strict.warn_on_patterns}",
|
|
348
|
+
" moderate:",
|
|
349
|
+
f" block_after_violations: {config.modes.moderate.block_after_violations}",
|
|
350
|
+
f" require_work_items: {config.modes.moderate.require_work_items}",
|
|
351
|
+
f" warn_on_patterns: {config.modes.moderate.warn_on_patterns}",
|
|
352
|
+
" guidance:",
|
|
353
|
+
f" block_after_violations: {config.modes.guidance.block_after_violations}",
|
|
354
|
+
f" require_work_items: {config.modes.guidance.require_work_items}",
|
|
355
|
+
f" warn_on_patterns: {config.modes.guidance.warn_on_patterns}",
|
|
356
|
+
]
|
|
357
|
+
return "\n".join(lines)
|
htmlgraph/orchestrator_mode.py
CHANGED
|
@@ -8,10 +8,15 @@ State is persisted in .htmlgraph/orchestrator-mode.json
|
|
|
8
8
|
import json
|
|
9
9
|
from datetime import datetime, timezone
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import Literal
|
|
11
|
+
from typing import Any, Literal
|
|
12
12
|
|
|
13
13
|
from pydantic import BaseModel
|
|
14
14
|
|
|
15
|
+
from htmlgraph.orchestrator_config import (
|
|
16
|
+
get_effective_violation_count,
|
|
17
|
+
load_orchestrator_config,
|
|
18
|
+
)
|
|
19
|
+
|
|
15
20
|
|
|
16
21
|
class OrchestratorMode(BaseModel):
|
|
17
22
|
"""Orchestrator mode state."""
|
|
@@ -34,7 +39,19 @@ class OrchestratorMode(BaseModel):
|
|
|
34
39
|
disabled_by_user: bool = False
|
|
35
40
|
"""Whether user explicitly disabled mode (prevents auto-reactivation)."""
|
|
36
41
|
|
|
37
|
-
|
|
42
|
+
violations: int = 0
|
|
43
|
+
"""Count of delegation violations in current session."""
|
|
44
|
+
|
|
45
|
+
last_violation_at: datetime | None = None
|
|
46
|
+
"""Timestamp of most recent violation."""
|
|
47
|
+
|
|
48
|
+
circuit_breaker_triggered: bool = False
|
|
49
|
+
"""Whether circuit breaker has been triggered (N+ violations, configurable)."""
|
|
50
|
+
|
|
51
|
+
violation_history: list[dict[str, Any]] = []
|
|
52
|
+
"""Full history of violations with timestamps for time-based decay."""
|
|
53
|
+
|
|
54
|
+
def to_dict(self) -> dict[str, Any]:
|
|
38
55
|
"""Convert to dict for JSON serialization."""
|
|
39
56
|
return {
|
|
40
57
|
"enabled": self.enabled,
|
|
@@ -45,10 +62,16 @@ class OrchestratorMode(BaseModel):
|
|
|
45
62
|
"enforcement_level": self.enforcement_level,
|
|
46
63
|
"auto_activated": self.auto_activated,
|
|
47
64
|
"disabled_by_user": self.disabled_by_user,
|
|
65
|
+
"violations": self.violations,
|
|
66
|
+
"last_violation_at": (
|
|
67
|
+
self.last_violation_at.isoformat() if self.last_violation_at else None
|
|
68
|
+
),
|
|
69
|
+
"circuit_breaker_triggered": self.circuit_breaker_triggered,
|
|
70
|
+
"violation_history": self.violation_history,
|
|
48
71
|
}
|
|
49
72
|
|
|
50
73
|
@classmethod
|
|
51
|
-
def from_dict(cls, data: dict) -> "OrchestratorMode":
|
|
74
|
+
def from_dict(cls, data: dict[str, Any]) -> "OrchestratorMode":
|
|
52
75
|
"""Create from dict loaded from JSON."""
|
|
53
76
|
activated_at = data.get("activated_at")
|
|
54
77
|
if activated_at:
|
|
@@ -57,6 +80,13 @@ class OrchestratorMode(BaseModel):
|
|
|
57
80
|
activated_at = activated_at[:-1] + "+00:00"
|
|
58
81
|
activated_at = datetime.fromisoformat(activated_at)
|
|
59
82
|
|
|
83
|
+
last_violation_at = data.get("last_violation_at")
|
|
84
|
+
if last_violation_at:
|
|
85
|
+
# Handle both 'Z' suffix and '+00:00' timezone format
|
|
86
|
+
if last_violation_at.endswith("Z"):
|
|
87
|
+
last_violation_at = last_violation_at[:-1] + "+00:00"
|
|
88
|
+
last_violation_at = datetime.fromisoformat(last_violation_at)
|
|
89
|
+
|
|
60
90
|
return cls(
|
|
61
91
|
enabled=data.get("enabled", False),
|
|
62
92
|
activated_at=activated_at,
|
|
@@ -64,6 +94,10 @@ class OrchestratorMode(BaseModel):
|
|
|
64
94
|
enforcement_level=data.get("enforcement_level", "strict"),
|
|
65
95
|
auto_activated=data.get("auto_activated", False),
|
|
66
96
|
disabled_by_user=data.get("disabled_by_user", False),
|
|
97
|
+
violations=data.get("violations", 0),
|
|
98
|
+
last_violation_at=last_violation_at,
|
|
99
|
+
circuit_breaker_triggered=data.get("circuit_breaker_triggered", False),
|
|
100
|
+
violation_history=data.get("violation_history", []),
|
|
67
101
|
)
|
|
68
102
|
|
|
69
103
|
|
|
@@ -196,7 +230,7 @@ class OrchestratorModeManager:
|
|
|
196
230
|
mode = self.load()
|
|
197
231
|
return not mode.disabled_by_user
|
|
198
232
|
|
|
199
|
-
def status(self) -> dict:
|
|
233
|
+
def status(self) -> dict[str, Any]:
|
|
200
234
|
"""
|
|
201
235
|
Get human-readable status.
|
|
202
236
|
|
|
@@ -214,4 +248,81 @@ class OrchestratorModeManager:
|
|
|
214
248
|
),
|
|
215
249
|
"auto_activated": mode.auto_activated,
|
|
216
250
|
"disabled_by_user": mode.disabled_by_user,
|
|
251
|
+
"violations": mode.violations,
|
|
252
|
+
"circuit_breaker_triggered": mode.circuit_breaker_triggered,
|
|
217
253
|
}
|
|
254
|
+
|
|
255
|
+
def increment_violation(self, tool: str | None = None) -> OrchestratorMode:
|
|
256
|
+
"""
|
|
257
|
+
Increment violation counter and update timestamp.
|
|
258
|
+
|
|
259
|
+
Uses configurable thresholds and time-based decay.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
tool: Optional tool name that caused violation
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
Updated OrchestratorMode with incremented violations
|
|
266
|
+
"""
|
|
267
|
+
mode = self.load()
|
|
268
|
+
config = load_orchestrator_config()
|
|
269
|
+
|
|
270
|
+
# Add to violation history with timestamp
|
|
271
|
+
violation = {
|
|
272
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
273
|
+
"tool": tool,
|
|
274
|
+
}
|
|
275
|
+
mode.violation_history.append(violation)
|
|
276
|
+
|
|
277
|
+
# Calculate effective violation count with decay and collapsing
|
|
278
|
+
effective_count = get_effective_violation_count(mode.violation_history, config)
|
|
279
|
+
|
|
280
|
+
# Update counters
|
|
281
|
+
mode.violations = effective_count
|
|
282
|
+
mode.last_violation_at = datetime.now(timezone.utc)
|
|
283
|
+
|
|
284
|
+
# Trigger circuit breaker if threshold reached (configurable)
|
|
285
|
+
threshold = config.thresholds.circuit_breaker_violations
|
|
286
|
+
if effective_count >= threshold:
|
|
287
|
+
mode.circuit_breaker_triggered = True
|
|
288
|
+
|
|
289
|
+
self.save(mode)
|
|
290
|
+
return mode
|
|
291
|
+
|
|
292
|
+
def reset_violations(self) -> OrchestratorMode:
|
|
293
|
+
"""
|
|
294
|
+
Reset violation counter and circuit breaker.
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
Updated OrchestratorMode with reset violations
|
|
298
|
+
"""
|
|
299
|
+
mode = self.load()
|
|
300
|
+
mode.violations = 0
|
|
301
|
+
mode.last_violation_at = None
|
|
302
|
+
mode.circuit_breaker_triggered = False
|
|
303
|
+
mode.violation_history = []
|
|
304
|
+
self.save(mode)
|
|
305
|
+
return mode
|
|
306
|
+
|
|
307
|
+
def is_circuit_breaker_triggered(self) -> bool:
|
|
308
|
+
"""
|
|
309
|
+
Check if circuit breaker is currently triggered.
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
True if circuit breaker is active
|
|
313
|
+
"""
|
|
314
|
+
mode = self.load()
|
|
315
|
+
return mode.circuit_breaker_triggered
|
|
316
|
+
|
|
317
|
+
def get_violation_count(self) -> int:
|
|
318
|
+
"""
|
|
319
|
+
Get current violation count (with time-based decay applied).
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
Effective number of violations in current session
|
|
323
|
+
"""
|
|
324
|
+
mode = self.load()
|
|
325
|
+
config = load_orchestrator_config()
|
|
326
|
+
|
|
327
|
+
# Return effective count with decay and collapsing
|
|
328
|
+
return get_effective_violation_count(mode.violation_history, config)
|
htmlgraph/parallel.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
"""
|
|
2
4
|
Parallel workflow execution coordinator for multi-agent task processing.
|
|
3
5
|
|
|
@@ -76,7 +78,6 @@ Best Practices:
|
|
|
76
78
|
- Limit to 3-5 parallel agents for optimal results
|
|
77
79
|
"""
|
|
78
80
|
|
|
79
|
-
from __future__ import annotations
|
|
80
81
|
|
|
81
82
|
from dataclasses import dataclass, field
|
|
82
83
|
from datetime import datetime
|
htmlgraph/parser.py
CHANGED
|
@@ -80,7 +80,8 @@ class HtmlParser:
|
|
|
80
80
|
|
|
81
81
|
def get_article(self) -> Any | None:
|
|
82
82
|
"""Get the main article element (graph node root)."""
|
|
83
|
-
|
|
83
|
+
results = self.query("article[id]")
|
|
84
|
+
return results[0] if results else None
|
|
84
85
|
|
|
85
86
|
def get_node_id(self) -> str | None:
|
|
86
87
|
"""Extract node ID from article element."""
|
|
@@ -152,6 +153,17 @@ class HtmlParser:
|
|
|
152
153
|
if auto_generated:
|
|
153
154
|
metadata["auto_generated"] = auto_generated.lower() == "true"
|
|
154
155
|
|
|
156
|
+
# Pattern sequence (for pattern nodes)
|
|
157
|
+
sequence_attr = self.get_data_attribute(article, "sequence")
|
|
158
|
+
if sequence_attr:
|
|
159
|
+
try:
|
|
160
|
+
import json
|
|
161
|
+
|
|
162
|
+
metadata["sequence"] = json.loads(sequence_attr)
|
|
163
|
+
except (json.JSONDecodeError, ValueError):
|
|
164
|
+
# Invalid JSON, skip
|
|
165
|
+
pass
|
|
166
|
+
|
|
155
167
|
# Timestamps (with fallbacks for session-specific attributes)
|
|
156
168
|
claimed_at = self.get_data_attribute(article, "claimed-at")
|
|
157
169
|
if claimed_at:
|
|
@@ -189,13 +201,15 @@ class HtmlParser:
|
|
|
189
201
|
def get_title(self) -> str | None:
|
|
190
202
|
"""Get node title from h1 or title element."""
|
|
191
203
|
# Try h1 in header first
|
|
192
|
-
|
|
204
|
+
h1_results = self.query("article header h1")
|
|
205
|
+
h1 = h1_results[0] if h1_results else None
|
|
193
206
|
if h1:
|
|
194
207
|
text: str = h1.to_text().strip()
|
|
195
208
|
return text
|
|
196
209
|
|
|
197
210
|
# Fall back to title element
|
|
198
|
-
|
|
211
|
+
title_results = self.query("title")
|
|
212
|
+
title = title_results[0] if title_results else None
|
|
199
213
|
if title:
|
|
200
214
|
text2: str = title.to_text().strip()
|
|
201
215
|
return text2
|
|
@@ -214,7 +228,8 @@ class HtmlParser:
|
|
|
214
228
|
"""
|
|
215
229
|
edges: dict[str, list[dict[str, Any]]] = {}
|
|
216
230
|
|
|
217
|
-
|
|
231
|
+
edge_nav_results = self.query("nav[data-graph-edges]")
|
|
232
|
+
edge_nav = edge_nav_results[0] if edge_nav_results else None
|
|
218
233
|
if not edge_nav:
|
|
219
234
|
return edges
|
|
220
235
|
|
|
@@ -352,7 +367,10 @@ class HtmlParser:
|
|
|
352
367
|
|
|
353
368
|
def get_content(self) -> str:
|
|
354
369
|
"""Extract main content from section[data-content]."""
|
|
355
|
-
|
|
370
|
+
content_section_results = self.query("section[data-content]")
|
|
371
|
+
content_section = (
|
|
372
|
+
content_section_results[0] if content_section_results else None
|
|
373
|
+
)
|
|
356
374
|
if not content_section:
|
|
357
375
|
return ""
|
|
358
376
|
|
|
@@ -368,6 +386,57 @@ class HtmlParser:
|
|
|
368
386
|
|
|
369
387
|
return "\n".join(text_parts)
|
|
370
388
|
|
|
389
|
+
def get_findings(self) -> str | None:
|
|
390
|
+
"""Extract findings from section[data-findings] (Spike-specific)."""
|
|
391
|
+
findings_section_results = self.query("section[data-findings]")
|
|
392
|
+
findings_section = (
|
|
393
|
+
findings_section_results[0] if findings_section_results else None
|
|
394
|
+
)
|
|
395
|
+
if not findings_section:
|
|
396
|
+
return None
|
|
397
|
+
|
|
398
|
+
# Look for findings-content div using full selector
|
|
399
|
+
content_div_results = self.query("section[data-findings] div.findings-content")
|
|
400
|
+
content_div = content_div_results[0] if content_div_results else None
|
|
401
|
+
if content_div:
|
|
402
|
+
text = content_div.to_text().strip()
|
|
403
|
+
return text if text else None
|
|
404
|
+
|
|
405
|
+
# Fallback: get all text excluding h3 header
|
|
406
|
+
text_parts = []
|
|
407
|
+
for child in findings_section.children:
|
|
408
|
+
if hasattr(child, "name") and child.name == "h3":
|
|
409
|
+
continue
|
|
410
|
+
if hasattr(child, "to_text"):
|
|
411
|
+
text = child.to_text().strip()
|
|
412
|
+
if text:
|
|
413
|
+
text_parts.append(text)
|
|
414
|
+
|
|
415
|
+
result = "\n".join(text_parts)
|
|
416
|
+
return result if result else None
|
|
417
|
+
|
|
418
|
+
def get_decision(self) -> str | None:
|
|
419
|
+
"""Extract decision from section[data-decision] (Spike-specific)."""
|
|
420
|
+
decision_section_results = self.query("section[data-decision]")
|
|
421
|
+
decision_section = (
|
|
422
|
+
decision_section_results[0] if decision_section_results else None
|
|
423
|
+
)
|
|
424
|
+
if not decision_section:
|
|
425
|
+
return None
|
|
426
|
+
|
|
427
|
+
# Get text content excluding the h3 header
|
|
428
|
+
text_parts = []
|
|
429
|
+
for child in decision_section.children:
|
|
430
|
+
if hasattr(child, "name") and child.name == "h3":
|
|
431
|
+
continue
|
|
432
|
+
if hasattr(child, "to_text"):
|
|
433
|
+
text = child.to_text().strip()
|
|
434
|
+
if text:
|
|
435
|
+
text_parts.append(text)
|
|
436
|
+
|
|
437
|
+
result = "\n".join(text_parts)
|
|
438
|
+
return result if result else None
|
|
439
|
+
|
|
371
440
|
def parse_full_node(self) -> dict[str, Any]:
|
|
372
441
|
"""
|
|
373
442
|
Parse complete node data from HTML.
|
|
@@ -377,7 +446,7 @@ class HtmlParser:
|
|
|
377
446
|
metadata = self.get_node_metadata()
|
|
378
447
|
title = self.get_title()
|
|
379
448
|
|
|
380
|
-
|
|
449
|
+
result = {
|
|
381
450
|
**metadata,
|
|
382
451
|
"title": title or metadata.get("id", "Untitled"),
|
|
383
452
|
"edges": self.get_edges(),
|
|
@@ -386,6 +455,17 @@ class HtmlParser:
|
|
|
386
455
|
"content": self.get_content(),
|
|
387
456
|
}
|
|
388
457
|
|
|
458
|
+
# Add Spike-specific fields if present
|
|
459
|
+
findings = self.get_findings()
|
|
460
|
+
if findings is not None:
|
|
461
|
+
result["findings"] = findings
|
|
462
|
+
|
|
463
|
+
decision = self.get_decision()
|
|
464
|
+
if decision is not None:
|
|
465
|
+
result["decision"] = decision
|
|
466
|
+
|
|
467
|
+
return result
|
|
468
|
+
|
|
389
469
|
|
|
390
470
|
def parse_html_file(filepath: Path | str) -> dict[str, Any]:
|
|
391
471
|
"""
|