htmlgraph 0.9.3__py3-none-any.whl → 0.27.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- htmlgraph/.htmlgraph/.session-warning-state.json +6 -0
- htmlgraph/.htmlgraph/agents.json +72 -0
- htmlgraph/.htmlgraph/htmlgraph.db +0 -0
- htmlgraph/__init__.py +173 -17
- htmlgraph/__init__.pyi +123 -0
- htmlgraph/agent_detection.py +127 -0
- htmlgraph/agent_registry.py +45 -30
- htmlgraph/agents.py +160 -107
- htmlgraph/analytics/__init__.py +9 -2
- htmlgraph/analytics/cli.py +190 -51
- htmlgraph/analytics/cost_analyzer.py +391 -0
- htmlgraph/analytics/cost_monitor.py +664 -0
- htmlgraph/analytics/cost_reporter.py +675 -0
- htmlgraph/analytics/cross_session.py +617 -0
- htmlgraph/analytics/dependency.py +192 -100
- htmlgraph/analytics/pattern_learning.py +771 -0
- htmlgraph/analytics/session_graph.py +707 -0
- htmlgraph/analytics/strategic/__init__.py +80 -0
- htmlgraph/analytics/strategic/cost_optimizer.py +611 -0
- htmlgraph/analytics/strategic/pattern_detector.py +876 -0
- htmlgraph/analytics/strategic/preference_manager.py +709 -0
- htmlgraph/analytics/strategic/suggestion_engine.py +747 -0
- htmlgraph/analytics/work_type.py +190 -14
- htmlgraph/analytics_index.py +135 -51
- htmlgraph/api/__init__.py +3 -0
- htmlgraph/api/cost_alerts_websocket.py +416 -0
- htmlgraph/api/main.py +2498 -0
- htmlgraph/api/static/htmx.min.js +1 -0
- htmlgraph/api/static/style-redesign.css +1344 -0
- htmlgraph/api/static/style.css +1079 -0
- htmlgraph/api/templates/dashboard-redesign.html +1366 -0
- htmlgraph/api/templates/dashboard.html +794 -0
- htmlgraph/api/templates/partials/activity-feed-hierarchical.html +326 -0
- htmlgraph/api/templates/partials/activity-feed.html +1100 -0
- htmlgraph/api/templates/partials/agents-redesign.html +317 -0
- htmlgraph/api/templates/partials/agents.html +317 -0
- htmlgraph/api/templates/partials/event-traces.html +373 -0
- htmlgraph/api/templates/partials/features-kanban-redesign.html +509 -0
- htmlgraph/api/templates/partials/features.html +578 -0
- htmlgraph/api/templates/partials/metrics-redesign.html +346 -0
- htmlgraph/api/templates/partials/metrics.html +346 -0
- htmlgraph/api/templates/partials/orchestration-redesign.html +443 -0
- htmlgraph/api/templates/partials/orchestration.html +198 -0
- htmlgraph/api/templates/partials/spawners.html +375 -0
- htmlgraph/api/templates/partials/work-items.html +613 -0
- htmlgraph/api/websocket.py +538 -0
- htmlgraph/archive/__init__.py +24 -0
- htmlgraph/archive/bloom.py +234 -0
- htmlgraph/archive/fts.py +297 -0
- htmlgraph/archive/manager.py +583 -0
- htmlgraph/archive/search.py +244 -0
- htmlgraph/atomic_ops.py +560 -0
- htmlgraph/attribute_index.py +208 -0
- htmlgraph/bounded_paths.py +539 -0
- htmlgraph/builders/__init__.py +14 -0
- htmlgraph/builders/base.py +118 -29
- htmlgraph/builders/bug.py +150 -0
- htmlgraph/builders/chore.py +119 -0
- htmlgraph/builders/epic.py +150 -0
- htmlgraph/builders/feature.py +31 -6
- htmlgraph/builders/insight.py +195 -0
- htmlgraph/builders/metric.py +217 -0
- htmlgraph/builders/pattern.py +202 -0
- htmlgraph/builders/phase.py +162 -0
- htmlgraph/builders/spike.py +52 -19
- htmlgraph/builders/track.py +148 -72
- htmlgraph/cigs/__init__.py +81 -0
- htmlgraph/cigs/autonomy.py +385 -0
- htmlgraph/cigs/cost.py +475 -0
- htmlgraph/cigs/messages_basic.py +472 -0
- htmlgraph/cigs/messaging.py +365 -0
- htmlgraph/cigs/models.py +771 -0
- htmlgraph/cigs/pattern_storage.py +427 -0
- htmlgraph/cigs/patterns.py +503 -0
- htmlgraph/cigs/posttool_analyzer.py +234 -0
- htmlgraph/cigs/reporter.py +818 -0
- htmlgraph/cigs/tracker.py +317 -0
- htmlgraph/cli/.htmlgraph/.session-warning-state.json +6 -0
- htmlgraph/cli/.htmlgraph/agents.json +72 -0
- htmlgraph/cli/.htmlgraph/htmlgraph.db +0 -0
- htmlgraph/cli/__init__.py +42 -0
- htmlgraph/cli/__main__.py +6 -0
- htmlgraph/cli/analytics.py +1424 -0
- htmlgraph/cli/base.py +685 -0
- htmlgraph/cli/constants.py +206 -0
- htmlgraph/cli/core.py +954 -0
- htmlgraph/cli/main.py +147 -0
- htmlgraph/cli/models.py +475 -0
- htmlgraph/cli/templates/__init__.py +1 -0
- htmlgraph/cli/templates/cost_dashboard.py +399 -0
- htmlgraph/cli/work/__init__.py +239 -0
- htmlgraph/cli/work/browse.py +115 -0
- htmlgraph/cli/work/features.py +568 -0
- htmlgraph/cli/work/orchestration.py +676 -0
- htmlgraph/cli/work/report.py +728 -0
- htmlgraph/cli/work/sessions.py +466 -0
- htmlgraph/cli/work/snapshot.py +559 -0
- htmlgraph/cli/work/tracks.py +486 -0
- htmlgraph/cli_commands/__init__.py +1 -0
- htmlgraph/cli_commands/feature.py +195 -0
- htmlgraph/cli_framework.py +115 -0
- htmlgraph/collections/__init__.py +18 -0
- htmlgraph/collections/base.py +415 -98
- htmlgraph/collections/bug.py +53 -0
- htmlgraph/collections/chore.py +53 -0
- htmlgraph/collections/epic.py +53 -0
- htmlgraph/collections/feature.py +12 -26
- htmlgraph/collections/insight.py +100 -0
- htmlgraph/collections/metric.py +92 -0
- htmlgraph/collections/pattern.py +97 -0
- htmlgraph/collections/phase.py +53 -0
- htmlgraph/collections/session.py +194 -0
- htmlgraph/collections/spike.py +56 -16
- htmlgraph/collections/task_delegation.py +241 -0
- htmlgraph/collections/todo.py +511 -0
- htmlgraph/collections/traces.py +487 -0
- htmlgraph/config/cost_models.json +56 -0
- htmlgraph/config.py +190 -0
- htmlgraph/context_analytics.py +344 -0
- htmlgraph/converter.py +216 -28
- htmlgraph/cost_analysis/__init__.py +5 -0
- htmlgraph/cost_analysis/analyzer.py +438 -0
- htmlgraph/dashboard.html +2406 -307
- htmlgraph/dashboard.html.backup +6592 -0
- htmlgraph/dashboard.html.bak +7181 -0
- htmlgraph/dashboard.html.bak2 +7231 -0
- htmlgraph/dashboard.html.bak3 +7232 -0
- htmlgraph/db/__init__.py +38 -0
- htmlgraph/db/queries.py +790 -0
- htmlgraph/db/schema.py +1788 -0
- htmlgraph/decorators.py +317 -0
- htmlgraph/dependency_models.py +19 -2
- htmlgraph/deploy.py +142 -125
- htmlgraph/deployment_models.py +474 -0
- htmlgraph/docs/API_REFERENCE.md +841 -0
- htmlgraph/docs/HTTP_API.md +750 -0
- htmlgraph/docs/INTEGRATION_GUIDE.md +752 -0
- htmlgraph/docs/ORCHESTRATION_PATTERNS.md +717 -0
- htmlgraph/docs/README.md +532 -0
- htmlgraph/docs/__init__.py +77 -0
- htmlgraph/docs/docs_version.py +55 -0
- htmlgraph/docs/metadata.py +93 -0
- htmlgraph/docs/migrations.py +232 -0
- htmlgraph/docs/template_engine.py +143 -0
- htmlgraph/docs/templates/_sections/cli_reference.md.j2 +52 -0
- htmlgraph/docs/templates/_sections/core_concepts.md.j2 +29 -0
- htmlgraph/docs/templates/_sections/sdk_basics.md.j2 +69 -0
- htmlgraph/docs/templates/base_agents.md.j2 +78 -0
- htmlgraph/docs/templates/example_user_override.md.j2 +47 -0
- htmlgraph/docs/version_check.py +163 -0
- htmlgraph/edge_index.py +182 -27
- htmlgraph/error_handler.py +544 -0
- htmlgraph/event_log.py +100 -52
- htmlgraph/event_migration.py +13 -4
- htmlgraph/exceptions.py +49 -0
- htmlgraph/file_watcher.py +101 -28
- htmlgraph/find_api.py +75 -63
- htmlgraph/git_events.py +145 -63
- htmlgraph/graph.py +1122 -106
- htmlgraph/hooks/.htmlgraph/.session-warning-state.json +6 -0
- htmlgraph/hooks/.htmlgraph/agents.json +72 -0
- htmlgraph/hooks/.htmlgraph/index.sqlite +0 -0
- htmlgraph/hooks/__init__.py +45 -0
- htmlgraph/hooks/bootstrap.py +169 -0
- htmlgraph/hooks/cigs_pretool_enforcer.py +354 -0
- htmlgraph/hooks/concurrent_sessions.py +208 -0
- htmlgraph/hooks/context.py +350 -0
- htmlgraph/hooks/drift_handler.py +525 -0
- htmlgraph/hooks/event_tracker.py +1314 -0
- htmlgraph/hooks/git_commands.py +175 -0
- htmlgraph/hooks/hooks-config.example.json +12 -0
- htmlgraph/hooks/installer.py +343 -0
- htmlgraph/hooks/orchestrator.py +674 -0
- htmlgraph/hooks/orchestrator_reflector.py +223 -0
- htmlgraph/hooks/post-checkout.sh +28 -0
- htmlgraph/hooks/post-commit.sh +24 -0
- htmlgraph/hooks/post-merge.sh +26 -0
- htmlgraph/hooks/post_tool_use_failure.py +273 -0
- htmlgraph/hooks/post_tool_use_handler.py +257 -0
- htmlgraph/hooks/posttooluse.py +408 -0
- htmlgraph/hooks/pre-commit.sh +94 -0
- htmlgraph/hooks/pre-push.sh +28 -0
- htmlgraph/hooks/pretooluse.py +819 -0
- htmlgraph/hooks/prompt_analyzer.py +637 -0
- htmlgraph/hooks/session_handler.py +668 -0
- htmlgraph/hooks/session_summary.py +395 -0
- htmlgraph/hooks/state_manager.py +504 -0
- htmlgraph/hooks/subagent_detection.py +202 -0
- htmlgraph/hooks/subagent_stop.py +369 -0
- htmlgraph/hooks/task_enforcer.py +255 -0
- htmlgraph/hooks/task_validator.py +177 -0
- htmlgraph/hooks/validator.py +628 -0
- htmlgraph/ids.py +41 -27
- htmlgraph/index.d.ts +286 -0
- htmlgraph/learning.py +767 -0
- htmlgraph/mcp_server.py +69 -23
- htmlgraph/models.py +1586 -87
- htmlgraph/operations/README.md +62 -0
- htmlgraph/operations/__init__.py +79 -0
- htmlgraph/operations/analytics.py +339 -0
- htmlgraph/operations/bootstrap.py +289 -0
- htmlgraph/operations/events.py +244 -0
- htmlgraph/operations/fastapi_server.py +231 -0
- htmlgraph/operations/hooks.py +350 -0
- htmlgraph/operations/initialization.py +597 -0
- htmlgraph/operations/initialization.py.backup +228 -0
- htmlgraph/operations/server.py +303 -0
- htmlgraph/orchestration/__init__.py +58 -0
- htmlgraph/orchestration/claude_launcher.py +179 -0
- htmlgraph/orchestration/command_builder.py +72 -0
- htmlgraph/orchestration/headless_spawner.py +281 -0
- htmlgraph/orchestration/live_events.py +377 -0
- htmlgraph/orchestration/model_selection.py +327 -0
- htmlgraph/orchestration/plugin_manager.py +140 -0
- htmlgraph/orchestration/prompts.py +137 -0
- htmlgraph/orchestration/spawner_event_tracker.py +383 -0
- htmlgraph/orchestration/spawners/__init__.py +16 -0
- htmlgraph/orchestration/spawners/base.py +194 -0
- htmlgraph/orchestration/spawners/claude.py +173 -0
- htmlgraph/orchestration/spawners/codex.py +435 -0
- htmlgraph/orchestration/spawners/copilot.py +294 -0
- htmlgraph/orchestration/spawners/gemini.py +471 -0
- htmlgraph/orchestration/subprocess_runner.py +36 -0
- htmlgraph/orchestration/task_coordination.py +343 -0
- htmlgraph/orchestration.md +563 -0
- htmlgraph/orchestrator-system-prompt-optimized.txt +863 -0
- htmlgraph/orchestrator.py +669 -0
- htmlgraph/orchestrator_config.py +357 -0
- htmlgraph/orchestrator_mode.py +328 -0
- htmlgraph/orchestrator_validator.py +133 -0
- htmlgraph/parallel.py +646 -0
- htmlgraph/parser.py +160 -35
- htmlgraph/path_query.py +608 -0
- htmlgraph/pattern_matcher.py +636 -0
- htmlgraph/planning.py +147 -52
- htmlgraph/pydantic_models.py +476 -0
- htmlgraph/quality_gates.py +350 -0
- htmlgraph/query_builder.py +109 -72
- htmlgraph/query_composer.py +509 -0
- htmlgraph/reflection.py +443 -0
- htmlgraph/refs.py +344 -0
- htmlgraph/repo_hash.py +512 -0
- htmlgraph/repositories/__init__.py +292 -0
- htmlgraph/repositories/analytics_repository.py +455 -0
- htmlgraph/repositories/analytics_repository_standard.py +628 -0
- htmlgraph/repositories/feature_repository.py +581 -0
- htmlgraph/repositories/feature_repository_htmlfile.py +668 -0
- htmlgraph/repositories/feature_repository_memory.py +607 -0
- htmlgraph/repositories/feature_repository_sqlite.py +858 -0
- htmlgraph/repositories/filter_service.py +620 -0
- htmlgraph/repositories/filter_service_standard.py +445 -0
- htmlgraph/repositories/shared_cache.py +621 -0
- htmlgraph/repositories/shared_cache_memory.py +395 -0
- htmlgraph/repositories/track_repository.py +552 -0
- htmlgraph/repositories/track_repository_htmlfile.py +619 -0
- htmlgraph/repositories/track_repository_memory.py +508 -0
- htmlgraph/repositories/track_repository_sqlite.py +711 -0
- htmlgraph/routing.py +8 -19
- htmlgraph/scripts/deploy.py +1 -2
- htmlgraph/sdk/__init__.py +398 -0
- htmlgraph/sdk/__init__.pyi +14 -0
- htmlgraph/sdk/analytics/__init__.py +19 -0
- htmlgraph/sdk/analytics/engine.py +155 -0
- htmlgraph/sdk/analytics/helpers.py +178 -0
- htmlgraph/sdk/analytics/registry.py +109 -0
- htmlgraph/sdk/base.py +484 -0
- htmlgraph/sdk/constants.py +216 -0
- htmlgraph/sdk/core.pyi +308 -0
- htmlgraph/sdk/discovery.py +120 -0
- htmlgraph/sdk/help/__init__.py +12 -0
- htmlgraph/sdk/help/mixin.py +699 -0
- htmlgraph/sdk/mixins/__init__.py +15 -0
- htmlgraph/sdk/mixins/attribution.py +113 -0
- htmlgraph/sdk/mixins/mixin.py +410 -0
- htmlgraph/sdk/operations/__init__.py +12 -0
- htmlgraph/sdk/operations/mixin.py +427 -0
- htmlgraph/sdk/orchestration/__init__.py +17 -0
- htmlgraph/sdk/orchestration/coordinator.py +203 -0
- htmlgraph/sdk/orchestration/spawner.py +204 -0
- htmlgraph/sdk/planning/__init__.py +19 -0
- htmlgraph/sdk/planning/bottlenecks.py +93 -0
- htmlgraph/sdk/planning/mixin.py +211 -0
- htmlgraph/sdk/planning/parallel.py +186 -0
- htmlgraph/sdk/planning/queue.py +210 -0
- htmlgraph/sdk/planning/recommendations.py +87 -0
- htmlgraph/sdk/planning/smart_planning.py +319 -0
- htmlgraph/sdk/session/__init__.py +19 -0
- htmlgraph/sdk/session/continuity.py +57 -0
- htmlgraph/sdk/session/handoff.py +110 -0
- htmlgraph/sdk/session/info.py +309 -0
- htmlgraph/sdk/session/manager.py +103 -0
- htmlgraph/sdk/strategic/__init__.py +26 -0
- htmlgraph/sdk/strategic/mixin.py +563 -0
- htmlgraph/server.py +685 -180
- htmlgraph/services/__init__.py +10 -0
- htmlgraph/services/claiming.py +199 -0
- htmlgraph/session_hooks.py +300 -0
- htmlgraph/session_manager.py +1392 -175
- htmlgraph/session_registry.py +587 -0
- htmlgraph/session_state.py +436 -0
- htmlgraph/session_warning.py +201 -0
- htmlgraph/sessions/__init__.py +23 -0
- htmlgraph/sessions/handoff.py +756 -0
- htmlgraph/setup.py +34 -17
- htmlgraph/spike_index.py +143 -0
- htmlgraph/sync_docs.py +12 -15
- htmlgraph/system_prompts.py +450 -0
- htmlgraph/templates/AGENTS.md.template +366 -0
- htmlgraph/templates/CLAUDE.md.template +97 -0
- htmlgraph/templates/GEMINI.md.template +87 -0
- htmlgraph/templates/orchestration-view.html +350 -0
- htmlgraph/track_builder.py +146 -15
- htmlgraph/track_manager.py +69 -21
- htmlgraph/transcript.py +890 -0
- htmlgraph/transcript_analytics.py +699 -0
- htmlgraph/types.py +323 -0
- htmlgraph/validation.py +115 -0
- htmlgraph/watch.py +8 -5
- htmlgraph/work_type_utils.py +3 -2
- {htmlgraph-0.9.3.data → htmlgraph-0.27.5.data}/data/htmlgraph/dashboard.html +2406 -307
- htmlgraph-0.27.5.data/data/htmlgraph/templates/AGENTS.md.template +366 -0
- htmlgraph-0.27.5.data/data/htmlgraph/templates/CLAUDE.md.template +97 -0
- htmlgraph-0.27.5.data/data/htmlgraph/templates/GEMINI.md.template +87 -0
- {htmlgraph-0.9.3.dist-info → htmlgraph-0.27.5.dist-info}/METADATA +97 -64
- htmlgraph-0.27.5.dist-info/RECORD +337 -0
- {htmlgraph-0.9.3.dist-info → htmlgraph-0.27.5.dist-info}/entry_points.txt +1 -1
- htmlgraph/cli.py +0 -2688
- htmlgraph/sdk.py +0 -709
- htmlgraph-0.9.3.dist-info/RECORD +0 -61
- {htmlgraph-0.9.3.data → htmlgraph-0.27.5.data}/data/htmlgraph/styles.css +0 -0
- {htmlgraph-0.9.3.dist-info → htmlgraph-0.27.5.dist-info}/WHEEL +0 -0
htmlgraph/parser.py
CHANGED
|
@@ -4,10 +4,10 @@ HTML parser wrapper using justhtml.
|
|
|
4
4
|
Provides CSS selector-based querying and data extraction from HTML files.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import re
|
|
8
|
+
from datetime import datetime
|
|
7
9
|
from pathlib import Path
|
|
8
10
|
from typing import Any
|
|
9
|
-
from datetime import datetime
|
|
10
|
-
import re
|
|
11
11
|
|
|
12
12
|
from justhtml import JustHTML
|
|
13
13
|
|
|
@@ -22,7 +22,9 @@ class HtmlParser:
|
|
|
22
22
|
- Graph structure parsing (nodes, edges)
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
-
def __init__(
|
|
25
|
+
def __init__(
|
|
26
|
+
self, html_content: str | None = None, filepath: Path | str | None = None
|
|
27
|
+
):
|
|
26
28
|
"""
|
|
27
29
|
Initialize parser with HTML content or file.
|
|
28
30
|
|
|
@@ -60,7 +62,8 @@ class HtmlParser:
|
|
|
60
62
|
Returns:
|
|
61
63
|
List of matching elements
|
|
62
64
|
"""
|
|
63
|
-
|
|
65
|
+
result: list[Any] = self.html.query(selector)
|
|
66
|
+
return result
|
|
64
67
|
|
|
65
68
|
def query_one(self, selector: str) -> Any | None:
|
|
66
69
|
"""
|
|
@@ -77,20 +80,23 @@ class HtmlParser:
|
|
|
77
80
|
|
|
78
81
|
def get_article(self) -> Any | None:
|
|
79
82
|
"""Get the main article element (graph node root)."""
|
|
80
|
-
|
|
83
|
+
results = self.query("article[id]")
|
|
84
|
+
return results[0] if results else None
|
|
81
85
|
|
|
82
86
|
def get_node_id(self) -> str | None:
|
|
83
87
|
"""Extract node ID from article element."""
|
|
84
88
|
article = self.get_article()
|
|
85
89
|
if article:
|
|
86
|
-
|
|
90
|
+
result: str | None = article.attrs.get("id")
|
|
91
|
+
return result
|
|
87
92
|
return None
|
|
88
93
|
|
|
89
94
|
def get_data_attribute(self, element: Any, attr: str) -> str | None:
|
|
90
95
|
"""Get a data-* attribute value from an element."""
|
|
91
96
|
if element is None:
|
|
92
97
|
return None
|
|
93
|
-
|
|
98
|
+
result: str | None = element.attrs.get(f"data-{attr}")
|
|
99
|
+
return result
|
|
94
100
|
|
|
95
101
|
def get_all_data_attributes(self, element: Any) -> dict[str, str]:
|
|
96
102
|
"""Get all data-* attributes from an element."""
|
|
@@ -123,31 +129,70 @@ class HtmlParser:
|
|
|
123
129
|
}
|
|
124
130
|
|
|
125
131
|
# Standard attributes
|
|
126
|
-
for attr in [
|
|
132
|
+
for attr in [
|
|
133
|
+
"type",
|
|
134
|
+
"status",
|
|
135
|
+
"priority",
|
|
136
|
+
"agent-assigned",
|
|
137
|
+
"track-id",
|
|
138
|
+
"plan-task-id",
|
|
139
|
+
"claimed-by-session",
|
|
140
|
+
"spike-subtype",
|
|
141
|
+
"session-id",
|
|
142
|
+
"from-feature-id",
|
|
143
|
+
"to-feature-id",
|
|
144
|
+
"model-name",
|
|
145
|
+
]:
|
|
127
146
|
value = self.get_data_attribute(article, attr)
|
|
128
147
|
if value:
|
|
129
148
|
key = attr.replace("-", "_")
|
|
130
149
|
metadata[key] = value
|
|
131
150
|
|
|
151
|
+
# Boolean attributes
|
|
152
|
+
auto_generated = self.get_data_attribute(article, "auto-generated")
|
|
153
|
+
if auto_generated:
|
|
154
|
+
metadata["auto_generated"] = auto_generated.lower() == "true"
|
|
155
|
+
|
|
156
|
+
# Pattern sequence (for pattern nodes)
|
|
157
|
+
sequence_attr = self.get_data_attribute(article, "sequence")
|
|
158
|
+
if sequence_attr:
|
|
159
|
+
try:
|
|
160
|
+
import json
|
|
161
|
+
|
|
162
|
+
metadata["sequence"] = json.loads(sequence_attr)
|
|
163
|
+
except (json.JSONDecodeError, ValueError):
|
|
164
|
+
# Invalid JSON, skip
|
|
165
|
+
pass
|
|
166
|
+
|
|
132
167
|
# Timestamps (with fallbacks for session-specific attributes)
|
|
133
168
|
claimed_at = self.get_data_attribute(article, "claimed-at")
|
|
134
169
|
if claimed_at:
|
|
135
170
|
try:
|
|
136
|
-
metadata["claimed_at"] = datetime.fromisoformat(
|
|
171
|
+
metadata["claimed_at"] = datetime.fromisoformat(
|
|
172
|
+
claimed_at.replace("Z", "+00:00")
|
|
173
|
+
)
|
|
137
174
|
except ValueError:
|
|
138
175
|
metadata["claimed_at"] = claimed_at
|
|
139
176
|
|
|
140
|
-
created_value = self.get_data_attribute(
|
|
177
|
+
created_value = self.get_data_attribute(
|
|
178
|
+
article, "created"
|
|
179
|
+
) or self.get_data_attribute(article, "started-at")
|
|
141
180
|
if created_value:
|
|
142
181
|
try:
|
|
143
|
-
metadata["created"] = datetime.fromisoformat(
|
|
182
|
+
metadata["created"] = datetime.fromisoformat(
|
|
183
|
+
created_value.replace("Z", "+00:00")
|
|
184
|
+
)
|
|
144
185
|
except ValueError:
|
|
145
186
|
metadata["created"] = created_value
|
|
146
187
|
|
|
147
|
-
updated_value = self.get_data_attribute(
|
|
188
|
+
updated_value = self.get_data_attribute(
|
|
189
|
+
article, "updated"
|
|
190
|
+
) or self.get_data_attribute(article, "last-activity")
|
|
148
191
|
if updated_value:
|
|
149
192
|
try:
|
|
150
|
-
metadata["updated"] = datetime.fromisoformat(
|
|
193
|
+
metadata["updated"] = datetime.fromisoformat(
|
|
194
|
+
updated_value.replace("Z", "+00:00")
|
|
195
|
+
)
|
|
151
196
|
except ValueError:
|
|
152
197
|
metadata["updated"] = updated_value
|
|
153
198
|
|
|
@@ -156,14 +201,18 @@ class HtmlParser:
|
|
|
156
201
|
def get_title(self) -> str | None:
|
|
157
202
|
"""Get node title from h1 or title element."""
|
|
158
203
|
# Try h1 in header first
|
|
159
|
-
|
|
204
|
+
h1_results = self.query("article header h1")
|
|
205
|
+
h1 = h1_results[0] if h1_results else None
|
|
160
206
|
if h1:
|
|
161
|
-
|
|
207
|
+
text: str = h1.to_text().strip()
|
|
208
|
+
return text
|
|
162
209
|
|
|
163
210
|
# Fall back to title element
|
|
164
|
-
|
|
211
|
+
title_results = self.query("title")
|
|
212
|
+
title = title_results[0] if title_results else None
|
|
165
213
|
if title:
|
|
166
|
-
|
|
214
|
+
text2: str = title.to_text().strip()
|
|
215
|
+
return text2
|
|
167
216
|
|
|
168
217
|
return None
|
|
169
218
|
|
|
@@ -179,7 +228,8 @@ class HtmlParser:
|
|
|
179
228
|
"""
|
|
180
229
|
edges: dict[str, list[dict[str, Any]]] = {}
|
|
181
230
|
|
|
182
|
-
|
|
231
|
+
edge_nav_results = self.query("nav[data-graph-edges]")
|
|
232
|
+
edge_nav = edge_nav_results[0] if edge_nav_results else None
|
|
183
233
|
if not edge_nav:
|
|
184
234
|
return edges
|
|
185
235
|
|
|
@@ -211,13 +261,18 @@ class HtmlParser:
|
|
|
211
261
|
since = link.attrs.get("data-since")
|
|
212
262
|
if since:
|
|
213
263
|
try:
|
|
214
|
-
edge_data["since"] = datetime.fromisoformat(
|
|
264
|
+
edge_data["since"] = datetime.fromisoformat(
|
|
265
|
+
since.replace("Z", "+00:00")
|
|
266
|
+
)
|
|
215
267
|
except ValueError:
|
|
216
268
|
edge_data["since"] = since
|
|
217
269
|
|
|
218
270
|
# Any other data attributes as properties
|
|
219
271
|
for key, value in link.attrs.items():
|
|
220
|
-
if key.startswith("data-") and key not in [
|
|
272
|
+
if key.startswith("data-") and key not in [
|
|
273
|
+
"data-relationship",
|
|
274
|
+
"data-since",
|
|
275
|
+
]:
|
|
221
276
|
if "properties" not in edge_data:
|
|
222
277
|
edge_data["properties"] = {}
|
|
223
278
|
edge_data["properties"][key[5:]] = value
|
|
@@ -245,11 +300,13 @@ class HtmlParser:
|
|
|
245
300
|
# Remove common status emojis
|
|
246
301
|
text = re.sub(r"^[✅⏳❌🔄]\s*", "", text)
|
|
247
302
|
|
|
248
|
-
steps.append(
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
303
|
+
steps.append(
|
|
304
|
+
{
|
|
305
|
+
"description": text,
|
|
306
|
+
"completed": completed,
|
|
307
|
+
"agent": agent,
|
|
308
|
+
}
|
|
309
|
+
)
|
|
253
310
|
|
|
254
311
|
return steps
|
|
255
312
|
|
|
@@ -289,10 +346,10 @@ class HtmlParser:
|
|
|
289
346
|
article = self.get_article()
|
|
290
347
|
if article and self.get_data_attribute(article, "type") == "session":
|
|
291
348
|
# Add event_count if present
|
|
292
|
-
|
|
293
|
-
if
|
|
349
|
+
event_count_str: str | None = article.attrs.get("data-event-count")
|
|
350
|
+
if event_count_str:
|
|
294
351
|
try:
|
|
295
|
-
properties["event_count"] = int(
|
|
352
|
+
properties["event_count"] = int(event_count_str) # type: ignore[assignment]
|
|
296
353
|
except (ValueError, TypeError):
|
|
297
354
|
pass
|
|
298
355
|
|
|
@@ -301,26 +358,85 @@ class HtmlParser:
|
|
|
301
358
|
if agent:
|
|
302
359
|
properties["agent"] = agent
|
|
303
360
|
|
|
361
|
+
# Add transcript_id if present (for Claude Code transcript integration)
|
|
362
|
+
transcript_id = article.attrs.get("data-transcript-id")
|
|
363
|
+
if transcript_id:
|
|
364
|
+
properties["transcript_id"] = transcript_id
|
|
365
|
+
|
|
304
366
|
return properties
|
|
305
367
|
|
|
306
368
|
def get_content(self) -> str:
|
|
307
369
|
"""Extract main content from section[data-content]."""
|
|
308
|
-
|
|
370
|
+
content_section_results = self.query("section[data-content]")
|
|
371
|
+
content_section = (
|
|
372
|
+
content_section_results[0] if content_section_results else None
|
|
373
|
+
)
|
|
309
374
|
if not content_section:
|
|
310
375
|
return ""
|
|
311
376
|
|
|
312
377
|
# Get text content excluding the h3 header
|
|
313
378
|
text_parts = []
|
|
314
379
|
for child in content_section.children:
|
|
315
|
-
if hasattr(child,
|
|
380
|
+
if hasattr(child, "name") and child.name == "h3":
|
|
316
381
|
continue
|
|
317
|
-
if hasattr(child,
|
|
382
|
+
if hasattr(child, "to_text"):
|
|
318
383
|
text = child.to_text().strip()
|
|
319
384
|
if text:
|
|
320
385
|
text_parts.append(text)
|
|
321
386
|
|
|
322
387
|
return "\n".join(text_parts)
|
|
323
388
|
|
|
389
|
+
def get_findings(self) -> str | None:
|
|
390
|
+
"""Extract findings from section[data-findings] (Spike-specific)."""
|
|
391
|
+
findings_section_results = self.query("section[data-findings]")
|
|
392
|
+
findings_section = (
|
|
393
|
+
findings_section_results[0] if findings_section_results else None
|
|
394
|
+
)
|
|
395
|
+
if not findings_section:
|
|
396
|
+
return None
|
|
397
|
+
|
|
398
|
+
# Look for findings-content div using full selector
|
|
399
|
+
content_div_results = self.query("section[data-findings] div.findings-content")
|
|
400
|
+
content_div = content_div_results[0] if content_div_results else None
|
|
401
|
+
if content_div:
|
|
402
|
+
text = content_div.to_text().strip()
|
|
403
|
+
return text if text else None
|
|
404
|
+
|
|
405
|
+
# Fallback: get all text excluding h3 header
|
|
406
|
+
text_parts = []
|
|
407
|
+
for child in findings_section.children:
|
|
408
|
+
if hasattr(child, "name") and child.name == "h3":
|
|
409
|
+
continue
|
|
410
|
+
if hasattr(child, "to_text"):
|
|
411
|
+
text = child.to_text().strip()
|
|
412
|
+
if text:
|
|
413
|
+
text_parts.append(text)
|
|
414
|
+
|
|
415
|
+
result = "\n".join(text_parts)
|
|
416
|
+
return result if result else None
|
|
417
|
+
|
|
418
|
+
def get_decision(self) -> str | None:
|
|
419
|
+
"""Extract decision from section[data-decision] (Spike-specific)."""
|
|
420
|
+
decision_section_results = self.query("section[data-decision]")
|
|
421
|
+
decision_section = (
|
|
422
|
+
decision_section_results[0] if decision_section_results else None
|
|
423
|
+
)
|
|
424
|
+
if not decision_section:
|
|
425
|
+
return None
|
|
426
|
+
|
|
427
|
+
# Get text content excluding the h3 header
|
|
428
|
+
text_parts = []
|
|
429
|
+
for child in decision_section.children:
|
|
430
|
+
if hasattr(child, "name") and child.name == "h3":
|
|
431
|
+
continue
|
|
432
|
+
if hasattr(child, "to_text"):
|
|
433
|
+
text = child.to_text().strip()
|
|
434
|
+
if text:
|
|
435
|
+
text_parts.append(text)
|
|
436
|
+
|
|
437
|
+
result = "\n".join(text_parts)
|
|
438
|
+
return result if result else None
|
|
439
|
+
|
|
324
440
|
def parse_full_node(self) -> dict[str, Any]:
|
|
325
441
|
"""
|
|
326
442
|
Parse complete node data from HTML.
|
|
@@ -330,7 +446,7 @@ class HtmlParser:
|
|
|
330
446
|
metadata = self.get_node_metadata()
|
|
331
447
|
title = self.get_title()
|
|
332
448
|
|
|
333
|
-
|
|
449
|
+
result = {
|
|
334
450
|
**metadata,
|
|
335
451
|
"title": title or metadata.get("id", "Untitled"),
|
|
336
452
|
"edges": self.get_edges(),
|
|
@@ -339,6 +455,17 @@ class HtmlParser:
|
|
|
339
455
|
"content": self.get_content(),
|
|
340
456
|
}
|
|
341
457
|
|
|
458
|
+
# Add Spike-specific fields if present
|
|
459
|
+
findings = self.get_findings()
|
|
460
|
+
if findings is not None:
|
|
461
|
+
result["findings"] = findings
|
|
462
|
+
|
|
463
|
+
decision = self.get_decision()
|
|
464
|
+
if decision is not None:
|
|
465
|
+
result["decision"] = decision
|
|
466
|
+
|
|
467
|
+
return result
|
|
468
|
+
|
|
342
469
|
|
|
343
470
|
def parse_html_file(filepath: Path | str) -> dict[str, Any]:
|
|
344
471
|
"""
|
|
@@ -355,9 +482,7 @@ def parse_html_file(filepath: Path | str) -> dict[str, Any]:
|
|
|
355
482
|
|
|
356
483
|
|
|
357
484
|
def query_html_files(
|
|
358
|
-
directory: Path | str,
|
|
359
|
-
selector: str,
|
|
360
|
-
pattern: str = "*.html"
|
|
485
|
+
directory: Path | str, selector: str, pattern: str = "*.html"
|
|
361
486
|
) -> list[tuple[Path, list[Any]]]:
|
|
362
487
|
"""
|
|
363
488
|
Query multiple HTML files with CSS selector.
|