htmlgraph 0.9.3__py3-none-any.whl → 0.27.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. htmlgraph/.htmlgraph/.session-warning-state.json +6 -0
  2. htmlgraph/.htmlgraph/agents.json +72 -0
  3. htmlgraph/.htmlgraph/htmlgraph.db +0 -0
  4. htmlgraph/__init__.py +173 -17
  5. htmlgraph/__init__.pyi +123 -0
  6. htmlgraph/agent_detection.py +127 -0
  7. htmlgraph/agent_registry.py +45 -30
  8. htmlgraph/agents.py +160 -107
  9. htmlgraph/analytics/__init__.py +9 -2
  10. htmlgraph/analytics/cli.py +190 -51
  11. htmlgraph/analytics/cost_analyzer.py +391 -0
  12. htmlgraph/analytics/cost_monitor.py +664 -0
  13. htmlgraph/analytics/cost_reporter.py +675 -0
  14. htmlgraph/analytics/cross_session.py +617 -0
  15. htmlgraph/analytics/dependency.py +192 -100
  16. htmlgraph/analytics/pattern_learning.py +771 -0
  17. htmlgraph/analytics/session_graph.py +707 -0
  18. htmlgraph/analytics/strategic/__init__.py +80 -0
  19. htmlgraph/analytics/strategic/cost_optimizer.py +611 -0
  20. htmlgraph/analytics/strategic/pattern_detector.py +876 -0
  21. htmlgraph/analytics/strategic/preference_manager.py +709 -0
  22. htmlgraph/analytics/strategic/suggestion_engine.py +747 -0
  23. htmlgraph/analytics/work_type.py +190 -14
  24. htmlgraph/analytics_index.py +135 -51
  25. htmlgraph/api/__init__.py +3 -0
  26. htmlgraph/api/cost_alerts_websocket.py +416 -0
  27. htmlgraph/api/main.py +2498 -0
  28. htmlgraph/api/static/htmx.min.js +1 -0
  29. htmlgraph/api/static/style-redesign.css +1344 -0
  30. htmlgraph/api/static/style.css +1079 -0
  31. htmlgraph/api/templates/dashboard-redesign.html +1366 -0
  32. htmlgraph/api/templates/dashboard.html +794 -0
  33. htmlgraph/api/templates/partials/activity-feed-hierarchical.html +326 -0
  34. htmlgraph/api/templates/partials/activity-feed.html +1100 -0
  35. htmlgraph/api/templates/partials/agents-redesign.html +317 -0
  36. htmlgraph/api/templates/partials/agents.html +317 -0
  37. htmlgraph/api/templates/partials/event-traces.html +373 -0
  38. htmlgraph/api/templates/partials/features-kanban-redesign.html +509 -0
  39. htmlgraph/api/templates/partials/features.html +578 -0
  40. htmlgraph/api/templates/partials/metrics-redesign.html +346 -0
  41. htmlgraph/api/templates/partials/metrics.html +346 -0
  42. htmlgraph/api/templates/partials/orchestration-redesign.html +443 -0
  43. htmlgraph/api/templates/partials/orchestration.html +198 -0
  44. htmlgraph/api/templates/partials/spawners.html +375 -0
  45. htmlgraph/api/templates/partials/work-items.html +613 -0
  46. htmlgraph/api/websocket.py +538 -0
  47. htmlgraph/archive/__init__.py +24 -0
  48. htmlgraph/archive/bloom.py +234 -0
  49. htmlgraph/archive/fts.py +297 -0
  50. htmlgraph/archive/manager.py +583 -0
  51. htmlgraph/archive/search.py +244 -0
  52. htmlgraph/atomic_ops.py +560 -0
  53. htmlgraph/attribute_index.py +208 -0
  54. htmlgraph/bounded_paths.py +539 -0
  55. htmlgraph/builders/__init__.py +14 -0
  56. htmlgraph/builders/base.py +118 -29
  57. htmlgraph/builders/bug.py +150 -0
  58. htmlgraph/builders/chore.py +119 -0
  59. htmlgraph/builders/epic.py +150 -0
  60. htmlgraph/builders/feature.py +31 -6
  61. htmlgraph/builders/insight.py +195 -0
  62. htmlgraph/builders/metric.py +217 -0
  63. htmlgraph/builders/pattern.py +202 -0
  64. htmlgraph/builders/phase.py +162 -0
  65. htmlgraph/builders/spike.py +52 -19
  66. htmlgraph/builders/track.py +148 -72
  67. htmlgraph/cigs/__init__.py +81 -0
  68. htmlgraph/cigs/autonomy.py +385 -0
  69. htmlgraph/cigs/cost.py +475 -0
  70. htmlgraph/cigs/messages_basic.py +472 -0
  71. htmlgraph/cigs/messaging.py +365 -0
  72. htmlgraph/cigs/models.py +771 -0
  73. htmlgraph/cigs/pattern_storage.py +427 -0
  74. htmlgraph/cigs/patterns.py +503 -0
  75. htmlgraph/cigs/posttool_analyzer.py +234 -0
  76. htmlgraph/cigs/reporter.py +818 -0
  77. htmlgraph/cigs/tracker.py +317 -0
  78. htmlgraph/cli/.htmlgraph/.session-warning-state.json +6 -0
  79. htmlgraph/cli/.htmlgraph/agents.json +72 -0
  80. htmlgraph/cli/.htmlgraph/htmlgraph.db +0 -0
  81. htmlgraph/cli/__init__.py +42 -0
  82. htmlgraph/cli/__main__.py +6 -0
  83. htmlgraph/cli/analytics.py +1424 -0
  84. htmlgraph/cli/base.py +685 -0
  85. htmlgraph/cli/constants.py +206 -0
  86. htmlgraph/cli/core.py +954 -0
  87. htmlgraph/cli/main.py +147 -0
  88. htmlgraph/cli/models.py +475 -0
  89. htmlgraph/cli/templates/__init__.py +1 -0
  90. htmlgraph/cli/templates/cost_dashboard.py +399 -0
  91. htmlgraph/cli/work/__init__.py +239 -0
  92. htmlgraph/cli/work/browse.py +115 -0
  93. htmlgraph/cli/work/features.py +568 -0
  94. htmlgraph/cli/work/orchestration.py +676 -0
  95. htmlgraph/cli/work/report.py +728 -0
  96. htmlgraph/cli/work/sessions.py +466 -0
  97. htmlgraph/cli/work/snapshot.py +559 -0
  98. htmlgraph/cli/work/tracks.py +486 -0
  99. htmlgraph/cli_commands/__init__.py +1 -0
  100. htmlgraph/cli_commands/feature.py +195 -0
  101. htmlgraph/cli_framework.py +115 -0
  102. htmlgraph/collections/__init__.py +18 -0
  103. htmlgraph/collections/base.py +415 -98
  104. htmlgraph/collections/bug.py +53 -0
  105. htmlgraph/collections/chore.py +53 -0
  106. htmlgraph/collections/epic.py +53 -0
  107. htmlgraph/collections/feature.py +12 -26
  108. htmlgraph/collections/insight.py +100 -0
  109. htmlgraph/collections/metric.py +92 -0
  110. htmlgraph/collections/pattern.py +97 -0
  111. htmlgraph/collections/phase.py +53 -0
  112. htmlgraph/collections/session.py +194 -0
  113. htmlgraph/collections/spike.py +56 -16
  114. htmlgraph/collections/task_delegation.py +241 -0
  115. htmlgraph/collections/todo.py +511 -0
  116. htmlgraph/collections/traces.py +487 -0
  117. htmlgraph/config/cost_models.json +56 -0
  118. htmlgraph/config.py +190 -0
  119. htmlgraph/context_analytics.py +344 -0
  120. htmlgraph/converter.py +216 -28
  121. htmlgraph/cost_analysis/__init__.py +5 -0
  122. htmlgraph/cost_analysis/analyzer.py +438 -0
  123. htmlgraph/dashboard.html +2406 -307
  124. htmlgraph/dashboard.html.backup +6592 -0
  125. htmlgraph/dashboard.html.bak +7181 -0
  126. htmlgraph/dashboard.html.bak2 +7231 -0
  127. htmlgraph/dashboard.html.bak3 +7232 -0
  128. htmlgraph/db/__init__.py +38 -0
  129. htmlgraph/db/queries.py +790 -0
  130. htmlgraph/db/schema.py +1788 -0
  131. htmlgraph/decorators.py +317 -0
  132. htmlgraph/dependency_models.py +19 -2
  133. htmlgraph/deploy.py +142 -125
  134. htmlgraph/deployment_models.py +474 -0
  135. htmlgraph/docs/API_REFERENCE.md +841 -0
  136. htmlgraph/docs/HTTP_API.md +750 -0
  137. htmlgraph/docs/INTEGRATION_GUIDE.md +752 -0
  138. htmlgraph/docs/ORCHESTRATION_PATTERNS.md +717 -0
  139. htmlgraph/docs/README.md +532 -0
  140. htmlgraph/docs/__init__.py +77 -0
  141. htmlgraph/docs/docs_version.py +55 -0
  142. htmlgraph/docs/metadata.py +93 -0
  143. htmlgraph/docs/migrations.py +232 -0
  144. htmlgraph/docs/template_engine.py +143 -0
  145. htmlgraph/docs/templates/_sections/cli_reference.md.j2 +52 -0
  146. htmlgraph/docs/templates/_sections/core_concepts.md.j2 +29 -0
  147. htmlgraph/docs/templates/_sections/sdk_basics.md.j2 +69 -0
  148. htmlgraph/docs/templates/base_agents.md.j2 +78 -0
  149. htmlgraph/docs/templates/example_user_override.md.j2 +47 -0
  150. htmlgraph/docs/version_check.py +163 -0
  151. htmlgraph/edge_index.py +182 -27
  152. htmlgraph/error_handler.py +544 -0
  153. htmlgraph/event_log.py +100 -52
  154. htmlgraph/event_migration.py +13 -4
  155. htmlgraph/exceptions.py +49 -0
  156. htmlgraph/file_watcher.py +101 -28
  157. htmlgraph/find_api.py +75 -63
  158. htmlgraph/git_events.py +145 -63
  159. htmlgraph/graph.py +1122 -106
  160. htmlgraph/hooks/.htmlgraph/.session-warning-state.json +6 -0
  161. htmlgraph/hooks/.htmlgraph/agents.json +72 -0
  162. htmlgraph/hooks/.htmlgraph/index.sqlite +0 -0
  163. htmlgraph/hooks/__init__.py +45 -0
  164. htmlgraph/hooks/bootstrap.py +169 -0
  165. htmlgraph/hooks/cigs_pretool_enforcer.py +354 -0
  166. htmlgraph/hooks/concurrent_sessions.py +208 -0
  167. htmlgraph/hooks/context.py +350 -0
  168. htmlgraph/hooks/drift_handler.py +525 -0
  169. htmlgraph/hooks/event_tracker.py +1314 -0
  170. htmlgraph/hooks/git_commands.py +175 -0
  171. htmlgraph/hooks/hooks-config.example.json +12 -0
  172. htmlgraph/hooks/installer.py +343 -0
  173. htmlgraph/hooks/orchestrator.py +674 -0
  174. htmlgraph/hooks/orchestrator_reflector.py +223 -0
  175. htmlgraph/hooks/post-checkout.sh +28 -0
  176. htmlgraph/hooks/post-commit.sh +24 -0
  177. htmlgraph/hooks/post-merge.sh +26 -0
  178. htmlgraph/hooks/post_tool_use_failure.py +273 -0
  179. htmlgraph/hooks/post_tool_use_handler.py +257 -0
  180. htmlgraph/hooks/posttooluse.py +408 -0
  181. htmlgraph/hooks/pre-commit.sh +94 -0
  182. htmlgraph/hooks/pre-push.sh +28 -0
  183. htmlgraph/hooks/pretooluse.py +819 -0
  184. htmlgraph/hooks/prompt_analyzer.py +637 -0
  185. htmlgraph/hooks/session_handler.py +668 -0
  186. htmlgraph/hooks/session_summary.py +395 -0
  187. htmlgraph/hooks/state_manager.py +504 -0
  188. htmlgraph/hooks/subagent_detection.py +202 -0
  189. htmlgraph/hooks/subagent_stop.py +369 -0
  190. htmlgraph/hooks/task_enforcer.py +255 -0
  191. htmlgraph/hooks/task_validator.py +177 -0
  192. htmlgraph/hooks/validator.py +628 -0
  193. htmlgraph/ids.py +41 -27
  194. htmlgraph/index.d.ts +286 -0
  195. htmlgraph/learning.py +767 -0
  196. htmlgraph/mcp_server.py +69 -23
  197. htmlgraph/models.py +1586 -87
  198. htmlgraph/operations/README.md +62 -0
  199. htmlgraph/operations/__init__.py +79 -0
  200. htmlgraph/operations/analytics.py +339 -0
  201. htmlgraph/operations/bootstrap.py +289 -0
  202. htmlgraph/operations/events.py +244 -0
  203. htmlgraph/operations/fastapi_server.py +231 -0
  204. htmlgraph/operations/hooks.py +350 -0
  205. htmlgraph/operations/initialization.py +597 -0
  206. htmlgraph/operations/initialization.py.backup +228 -0
  207. htmlgraph/operations/server.py +303 -0
  208. htmlgraph/orchestration/__init__.py +58 -0
  209. htmlgraph/orchestration/claude_launcher.py +179 -0
  210. htmlgraph/orchestration/command_builder.py +72 -0
  211. htmlgraph/orchestration/headless_spawner.py +281 -0
  212. htmlgraph/orchestration/live_events.py +377 -0
  213. htmlgraph/orchestration/model_selection.py +327 -0
  214. htmlgraph/orchestration/plugin_manager.py +140 -0
  215. htmlgraph/orchestration/prompts.py +137 -0
  216. htmlgraph/orchestration/spawner_event_tracker.py +383 -0
  217. htmlgraph/orchestration/spawners/__init__.py +16 -0
  218. htmlgraph/orchestration/spawners/base.py +194 -0
  219. htmlgraph/orchestration/spawners/claude.py +173 -0
  220. htmlgraph/orchestration/spawners/codex.py +435 -0
  221. htmlgraph/orchestration/spawners/copilot.py +294 -0
  222. htmlgraph/orchestration/spawners/gemini.py +471 -0
  223. htmlgraph/orchestration/subprocess_runner.py +36 -0
  224. htmlgraph/orchestration/task_coordination.py +343 -0
  225. htmlgraph/orchestration.md +563 -0
  226. htmlgraph/orchestrator-system-prompt-optimized.txt +863 -0
  227. htmlgraph/orchestrator.py +669 -0
  228. htmlgraph/orchestrator_config.py +357 -0
  229. htmlgraph/orchestrator_mode.py +328 -0
  230. htmlgraph/orchestrator_validator.py +133 -0
  231. htmlgraph/parallel.py +646 -0
  232. htmlgraph/parser.py +160 -35
  233. htmlgraph/path_query.py +608 -0
  234. htmlgraph/pattern_matcher.py +636 -0
  235. htmlgraph/planning.py +147 -52
  236. htmlgraph/pydantic_models.py +476 -0
  237. htmlgraph/quality_gates.py +350 -0
  238. htmlgraph/query_builder.py +109 -72
  239. htmlgraph/query_composer.py +509 -0
  240. htmlgraph/reflection.py +443 -0
  241. htmlgraph/refs.py +344 -0
  242. htmlgraph/repo_hash.py +512 -0
  243. htmlgraph/repositories/__init__.py +292 -0
  244. htmlgraph/repositories/analytics_repository.py +455 -0
  245. htmlgraph/repositories/analytics_repository_standard.py +628 -0
  246. htmlgraph/repositories/feature_repository.py +581 -0
  247. htmlgraph/repositories/feature_repository_htmlfile.py +668 -0
  248. htmlgraph/repositories/feature_repository_memory.py +607 -0
  249. htmlgraph/repositories/feature_repository_sqlite.py +858 -0
  250. htmlgraph/repositories/filter_service.py +620 -0
  251. htmlgraph/repositories/filter_service_standard.py +445 -0
  252. htmlgraph/repositories/shared_cache.py +621 -0
  253. htmlgraph/repositories/shared_cache_memory.py +395 -0
  254. htmlgraph/repositories/track_repository.py +552 -0
  255. htmlgraph/repositories/track_repository_htmlfile.py +619 -0
  256. htmlgraph/repositories/track_repository_memory.py +508 -0
  257. htmlgraph/repositories/track_repository_sqlite.py +711 -0
  258. htmlgraph/routing.py +8 -19
  259. htmlgraph/scripts/deploy.py +1 -2
  260. htmlgraph/sdk/__init__.py +398 -0
  261. htmlgraph/sdk/__init__.pyi +14 -0
  262. htmlgraph/sdk/analytics/__init__.py +19 -0
  263. htmlgraph/sdk/analytics/engine.py +155 -0
  264. htmlgraph/sdk/analytics/helpers.py +178 -0
  265. htmlgraph/sdk/analytics/registry.py +109 -0
  266. htmlgraph/sdk/base.py +484 -0
  267. htmlgraph/sdk/constants.py +216 -0
  268. htmlgraph/sdk/core.pyi +308 -0
  269. htmlgraph/sdk/discovery.py +120 -0
  270. htmlgraph/sdk/help/__init__.py +12 -0
  271. htmlgraph/sdk/help/mixin.py +699 -0
  272. htmlgraph/sdk/mixins/__init__.py +15 -0
  273. htmlgraph/sdk/mixins/attribution.py +113 -0
  274. htmlgraph/sdk/mixins/mixin.py +410 -0
  275. htmlgraph/sdk/operations/__init__.py +12 -0
  276. htmlgraph/sdk/operations/mixin.py +427 -0
  277. htmlgraph/sdk/orchestration/__init__.py +17 -0
  278. htmlgraph/sdk/orchestration/coordinator.py +203 -0
  279. htmlgraph/sdk/orchestration/spawner.py +204 -0
  280. htmlgraph/sdk/planning/__init__.py +19 -0
  281. htmlgraph/sdk/planning/bottlenecks.py +93 -0
  282. htmlgraph/sdk/planning/mixin.py +211 -0
  283. htmlgraph/sdk/planning/parallel.py +186 -0
  284. htmlgraph/sdk/planning/queue.py +210 -0
  285. htmlgraph/sdk/planning/recommendations.py +87 -0
  286. htmlgraph/sdk/planning/smart_planning.py +319 -0
  287. htmlgraph/sdk/session/__init__.py +19 -0
  288. htmlgraph/sdk/session/continuity.py +57 -0
  289. htmlgraph/sdk/session/handoff.py +110 -0
  290. htmlgraph/sdk/session/info.py +309 -0
  291. htmlgraph/sdk/session/manager.py +103 -0
  292. htmlgraph/sdk/strategic/__init__.py +26 -0
  293. htmlgraph/sdk/strategic/mixin.py +563 -0
  294. htmlgraph/server.py +685 -180
  295. htmlgraph/services/__init__.py +10 -0
  296. htmlgraph/services/claiming.py +199 -0
  297. htmlgraph/session_hooks.py +300 -0
  298. htmlgraph/session_manager.py +1392 -175
  299. htmlgraph/session_registry.py +587 -0
  300. htmlgraph/session_state.py +436 -0
  301. htmlgraph/session_warning.py +201 -0
  302. htmlgraph/sessions/__init__.py +23 -0
  303. htmlgraph/sessions/handoff.py +756 -0
  304. htmlgraph/setup.py +34 -17
  305. htmlgraph/spike_index.py +143 -0
  306. htmlgraph/sync_docs.py +12 -15
  307. htmlgraph/system_prompts.py +450 -0
  308. htmlgraph/templates/AGENTS.md.template +366 -0
  309. htmlgraph/templates/CLAUDE.md.template +97 -0
  310. htmlgraph/templates/GEMINI.md.template +87 -0
  311. htmlgraph/templates/orchestration-view.html +350 -0
  312. htmlgraph/track_builder.py +146 -15
  313. htmlgraph/track_manager.py +69 -21
  314. htmlgraph/transcript.py +890 -0
  315. htmlgraph/transcript_analytics.py +699 -0
  316. htmlgraph/types.py +323 -0
  317. htmlgraph/validation.py +115 -0
  318. htmlgraph/watch.py +8 -5
  319. htmlgraph/work_type_utils.py +3 -2
  320. {htmlgraph-0.9.3.data → htmlgraph-0.27.5.data}/data/htmlgraph/dashboard.html +2406 -307
  321. htmlgraph-0.27.5.data/data/htmlgraph/templates/AGENTS.md.template +366 -0
  322. htmlgraph-0.27.5.data/data/htmlgraph/templates/CLAUDE.md.template +97 -0
  323. htmlgraph-0.27.5.data/data/htmlgraph/templates/GEMINI.md.template +87 -0
  324. {htmlgraph-0.9.3.dist-info → htmlgraph-0.27.5.dist-info}/METADATA +97 -64
  325. htmlgraph-0.27.5.dist-info/RECORD +337 -0
  326. {htmlgraph-0.9.3.dist-info → htmlgraph-0.27.5.dist-info}/entry_points.txt +1 -1
  327. htmlgraph/cli.py +0 -2688
  328. htmlgraph/sdk.py +0 -709
  329. htmlgraph-0.9.3.dist-info/RECORD +0 -61
  330. {htmlgraph-0.9.3.data → htmlgraph-0.27.5.data}/data/htmlgraph/styles.css +0 -0
  331. {htmlgraph-0.9.3.dist-info → htmlgraph-0.27.5.dist-info}/WHEEL +0 -0
htmlgraph/graph.py CHANGED
@@ -6,18 +6,193 @@ Provides:
6
6
  - CSS selector queries
7
7
  - Graph algorithms (BFS, shortest path, dependency analysis)
8
8
  - Bottleneck detection
9
+ - Transaction/snapshot support for concurrency
9
10
  """
10
11
 
12
+ import hashlib
13
+ import os
14
+ import time
11
15
  from collections import defaultdict, deque
16
+ from collections.abc import Callable, Iterator
17
+ from contextlib import contextmanager
18
+ from dataclasses import dataclass, field
19
+ from datetime import datetime
12
20
  from pathlib import Path
13
- from typing import Any, Callable, Iterator
21
+ from typing import Any, cast
14
22
 
15
- from htmlgraph.models import Node, Edge
16
- from htmlgraph.converter import html_to_node, node_to_html, NodeConverter
17
- from htmlgraph.parser import HtmlParser
23
+ from htmlgraph.attribute_index import AttributeIndex
24
+ from htmlgraph.converter import NodeConverter
18
25
  from htmlgraph.edge_index import EdgeIndex, EdgeRef
19
- from htmlgraph.query_builder import QueryBuilder
26
+ from htmlgraph.exceptions import NodeNotFoundError
20
27
  from htmlgraph.find_api import FindAPI
28
+ from htmlgraph.models import Node
29
+ from htmlgraph.parser import HtmlParser
30
+ from htmlgraph.query_builder import QueryBuilder
31
+
32
+
33
+ @dataclass
34
+ class CompiledQuery:
35
+ """
36
+ Pre-compiled CSS selector query for efficient reuse.
37
+
38
+ While justhtml doesn't support native selector pre-compilation,
39
+ this class provides:
40
+ - Cached selector string to avoid string manipulation overhead
41
+ - Reusable query execution with metrics tracking
42
+ - Integration with query cache for performance
43
+
44
+ Example:
45
+ >>> graph = HtmlGraph("features/")
46
+ >>> compiled = graph.compile_query("[data-status='blocked']")
47
+ >>> results = graph.query_compiled(compiled) # Fast on reuse
48
+ >>> results2 = graph.query_compiled(compiled) # Uses cache
49
+ """
50
+
51
+ selector: str
52
+ _compiled_at: datetime = field(default_factory=datetime.now)
53
+ _use_count: int = field(default=0, init=False)
54
+
55
+ def matches(self, node: Node) -> bool:
56
+ """
57
+ Check if a node matches this compiled query.
58
+
59
+ Args:
60
+ node: Node to check
61
+
62
+ Returns:
63
+ True if node matches selector
64
+ """
65
+ try:
66
+ # Convert node to HTML in-memory
67
+ html_content = node.to_html()
68
+
69
+ # Parse the HTML string
70
+ parser = HtmlParser.from_string(html_content)
71
+
72
+ # Check if selector matches
73
+ return bool(parser.query(f"article{self.selector}"))
74
+ except Exception:
75
+ return False
76
+
77
+ def execute(self, nodes: dict[str, Node]) -> list[Node]:
78
+ """
79
+ Execute this compiled query on a set of nodes.
80
+
81
+ Args:
82
+ nodes: Dict of nodes to query
83
+
84
+ Returns:
85
+ List of matching nodes
86
+ """
87
+ self._use_count += 1
88
+ return [node for node in nodes.values() if self.matches(node)]
89
+
90
+
91
+ class GraphSnapshot:
92
+ """
93
+ Immutable snapshot of graph state at a point in time.
94
+
95
+ Provides read-only access to graph data without affecting the original graph.
96
+ Safe to use across multiple agents or threads.
97
+
98
+ Example:
99
+ snapshot = graph.snapshot()
100
+ node = snapshot.get("feature-001") # Read-only access
101
+ results = snapshot.query("[data-status='blocked']")
102
+ """
103
+
104
+ def __init__(self, nodes: dict[str, Node], directory: Path):
105
+ """
106
+ Create a snapshot of graph nodes.
107
+
108
+ Args:
109
+ nodes: Dictionary of nodes to snapshot
110
+ directory: Graph directory (for context)
111
+ """
112
+ # Deep copy to prevent external mutations
113
+ self._nodes = {
114
+ node_id: node.model_copy(deep=True) for node_id, node in nodes.items()
115
+ }
116
+ self._directory = directory
117
+
118
+ def get(self, node_id: str) -> Node | None:
119
+ """
120
+ Get a node by ID from the snapshot.
121
+
122
+ Args:
123
+ node_id: Node identifier
124
+
125
+ Returns:
126
+ Node instance or None if not found
127
+ """
128
+ node = self._nodes.get(node_id)
129
+ # Return a copy to prevent mutation of snapshot
130
+ return node.model_copy(deep=True) if node else None
131
+
132
+ def query(self, selector: str) -> list[Node]:
133
+ """
134
+ Query nodes using CSS selector.
135
+
136
+ Args:
137
+ selector: CSS selector string
138
+
139
+ Returns:
140
+ List of matching nodes (copies)
141
+ """
142
+ matching = []
143
+
144
+ for node in self._nodes.values():
145
+ try:
146
+ # Convert node to HTML in-memory
147
+ html_content = node.to_html()
148
+
149
+ # Parse the HTML string
150
+ parser = HtmlParser.from_string(html_content)
151
+
152
+ # Check if selector matches
153
+ if parser.query(f"article{selector}"):
154
+ # Return copy to prevent mutation
155
+ matching.append(node.model_copy(deep=True))
156
+ except Exception:
157
+ # Skip nodes that fail to parse
158
+ continue
159
+
160
+ return matching
161
+
162
+ def filter(self, predicate: Callable[[Node], bool]) -> list[Node]:
163
+ """
164
+ Filter nodes using a predicate function.
165
+
166
+ Args:
167
+ predicate: Function that takes Node and returns bool
168
+
169
+ Returns:
170
+ List of matching nodes (copies)
171
+ """
172
+ return [
173
+ node.model_copy(deep=True)
174
+ for node in self._nodes.values()
175
+ if predicate(node)
176
+ ]
177
+
178
+ def __len__(self) -> int:
179
+ """Get number of nodes in snapshot."""
180
+ return len(self._nodes)
181
+
182
+ def __contains__(self, node_id: str) -> bool:
183
+ """Check if node exists in snapshot."""
184
+ return node_id in self._nodes
185
+
186
+ def __iter__(self) -> Iterator[Node]:
187
+ """Iterate over nodes in snapshot (returns copies)."""
188
+ return iter(node.model_copy(deep=True) for node in self._nodes.values())
189
+
190
+ @property
191
+ def nodes(self) -> dict[str, Node]:
192
+ """Get all nodes as a dict (returns copies)."""
193
+ return {
194
+ node_id: node.model_copy(deep=True) for node_id, node in self._nodes.items()
195
+ }
21
196
 
22
197
 
23
198
  class HtmlGraph:
@@ -38,8 +213,8 @@ class HtmlGraph:
38
213
  self,
39
214
  directory: Path | str,
40
215
  stylesheet_path: str = "../styles.css",
41
- auto_load: bool = True,
42
- pattern: str | list[str] = "*.html"
216
+ auto_load: bool = False,
217
+ pattern: str | list[str] = "*.html",
43
218
  ):
44
219
  """
45
220
  Initialize graph from a directory.
@@ -47,7 +222,7 @@ class HtmlGraph:
47
222
  Args:
48
223
  directory: Directory containing HTML node files
49
224
  stylesheet_path: Default stylesheet path for new files
50
- auto_load: Whether to load all nodes on init
225
+ auto_load: Whether to load all nodes on init (default: False for lazy loading)
51
226
  pattern: Glob pattern(s) for node files. Can be a single pattern or list.
52
227
  Examples: "*.html", ["*.html", "*/index.html"]
53
228
  """
@@ -59,10 +234,84 @@ class HtmlGraph:
59
234
  self._nodes: dict[str, Node] = {}
60
235
  self._converter = NodeConverter(directory, stylesheet_path)
61
236
  self._edge_index = EdgeIndex()
237
+ self._attr_index = AttributeIndex()
238
+ self._query_cache: dict[str, list[Node]] = {}
239
+ self._adjacency_cache: dict[str, dict[str, list[str]]] | None = None
240
+ self._cache_enabled: bool = True
241
+ self._explicitly_loaded: bool = False
242
+ self._file_hashes: dict[str, str] = {} # Track file content hashes
243
+
244
+ # Query compilation cache (LRU cache with max 100 compiled queries)
245
+ self._compiled_queries: dict[str, CompiledQuery] = {}
246
+ self._compiled_query_max_size: int = 100
247
+
248
+ # Performance metrics
249
+ self._metrics = {
250
+ "query_count": 0,
251
+ "cache_hits": 0,
252
+ "cache_misses": 0,
253
+ "reload_count": 0,
254
+ "single_reload_count": 0,
255
+ "total_query_time_ms": 0.0,
256
+ "slowest_query_ms": 0.0,
257
+ "slowest_query_selector": "",
258
+ "last_reload_time_ms": 0.0,
259
+ "compiled_queries": 0,
260
+ "compiled_query_hits": 0,
261
+ "auto_compiled_count": 0,
262
+ }
263
+
264
+ # Check for env override (backwards compatibility)
265
+ if os.environ.get("HTMLGRAPH_EAGER_LOAD") == "1":
266
+ auto_load = True
62
267
 
63
268
  if auto_load:
64
269
  self.reload()
65
270
 
271
+ def _invalidate_cache(self) -> None:
272
+ """Clear query, adjacency, attribute, and compiled query caches. Called when graph is modified."""
273
+ self._query_cache.clear()
274
+ self._compiled_queries.clear()
275
+ self._adjacency_cache = None
276
+ self._attr_index.clear()
277
+
278
+ def _compute_file_hash(self, filepath: Path) -> str:
279
+ """
280
+ Compute MD5 hash of file content.
281
+
282
+ Args:
283
+ filepath: Path to file to hash
284
+
285
+ Returns:
286
+ MD5 hash as hex string
287
+ """
288
+ try:
289
+ content = filepath.read_bytes()
290
+ return hashlib.md5(content).hexdigest()
291
+ except Exception:
292
+ return ""
293
+
294
+ def has_file_changed(self, filepath: Path | str) -> bool:
295
+ """
296
+ Check if a file has changed since it was last loaded.
297
+
298
+ Args:
299
+ filepath: Path to file to check
300
+
301
+ Returns:
302
+ True if file changed or not yet loaded, False if unchanged
303
+ """
304
+ filepath = Path(filepath)
305
+ if not filepath.exists():
306
+ return True
307
+
308
+ filepath_str = str(filepath)
309
+ current_hash = self._compute_file_hash(filepath)
310
+ stored_hash = self._file_hashes.get(filepath_str)
311
+
312
+ # If no stored hash or hash changed, file has changed
313
+ return stored_hash is None or current_hash != stored_hash
314
+
66
315
  def reload(self) -> int:
67
316
  """
68
317
  Reload all nodes from disk.
@@ -70,14 +319,70 @@ class HtmlGraph:
70
319
  Returns:
71
320
  Number of nodes loaded
72
321
  """
73
- self._nodes.clear()
74
- for node in self._converter.load_all(self.pattern):
75
- self._nodes[node.id] = node
322
+ start = time.perf_counter()
323
+ self._cache_enabled = False # Disable during reload
324
+ try:
325
+ self._nodes.clear()
326
+ self._file_hashes.clear()
327
+
328
+ # Load all nodes and compute file hashes
329
+ for node in self._converter.load_all(self.pattern):
330
+ self._nodes[node.id] = node
331
+
332
+ # Find and hash the node file
333
+ filepath = self._find_node_file(node.id)
334
+ if filepath:
335
+ file_hash = self._compute_file_hash(filepath)
336
+ self._file_hashes[str(filepath)] = file_hash
337
+
338
+ # Rebuild edge index for O(1) reverse lookups
339
+ # Rebuild attribute index for O(1) attribute lookups
340
+ self._attr_index.rebuild(self._nodes)
341
+ self._edge_index.rebuild(self._nodes)
342
+
343
+ self._explicitly_loaded = True
344
+
345
+ # Track metrics
346
+ elapsed_ms = (time.perf_counter() - start) * 1000
347
+ reload_count: int = int(self._metrics.get("reload_count", 0)) # type: ignore[call-overload]
348
+ self._metrics["reload_count"] = reload_count + 1
349
+ self._metrics["last_reload_time_ms"] = elapsed_ms
350
+
351
+ return len(self._nodes)
352
+ finally:
353
+ self._cache_enabled = True
354
+ self._invalidate_cache()
355
+
356
+ def _ensure_loaded(self) -> None:
357
+ """Ensure nodes are loaded. Called lazily on first access."""
358
+ if not self._explicitly_loaded and not self._nodes:
359
+ self.reload()
76
360
 
77
- # Rebuild edge index for O(1) reverse lookups
78
- self._edge_index.rebuild(self._nodes)
361
+ def _get_node_files(self) -> list[Path]:
362
+ """
363
+ Get all node files matching the configured pattern(s).
79
364
 
80
- return len(self._nodes)
365
+ Returns:
366
+ List of Path objects for node files
367
+ """
368
+ files: list[Path] = []
369
+ patterns = [self.pattern] if isinstance(self.pattern, str) else self.pattern
370
+ for pattern in patterns:
371
+ files.extend(self.directory.glob(pattern))
372
+ return files
373
+
374
+ def _filepath_to_node_id(self, filepath: Path) -> str:
375
+ """
376
+ Extract node ID from a filepath.
377
+
378
+ Handles:
379
+ - Flat files: features/node-id.html -> "node-id"
380
+ - Directory-based: features/node-id/index.html -> "node-id"
381
+ """
382
+ if filepath.name == "index.html":
383
+ return filepath.parent.name
384
+ else:
385
+ return filepath.stem
81
386
 
82
387
  @property
83
388
  def nodes(self) -> dict[str, Node]:
@@ -85,18 +390,272 @@ class HtmlGraph:
85
390
  return self._nodes.copy()
86
391
 
87
392
  def __len__(self) -> int:
88
- """Number of nodes in graph."""
393
+ """
394
+ Get the number of nodes in the graph.
395
+
396
+ Enables using len() on graph instances.
397
+
398
+ Returns:
399
+ int: Total number of nodes
400
+
401
+ Example:
402
+ >>> graph = HtmlGraph("features/")
403
+ >>> print(f"Graph has {len(graph)} nodes")
404
+ Graph has 42 nodes
405
+ """
89
406
  return len(self._nodes)
90
407
 
91
408
  def __contains__(self, node_id: str) -> bool:
92
- """Check if node exists."""
409
+ """
410
+ Check if a node exists in the graph.
411
+
412
+ Enables using 'in' operator on graph instances.
413
+
414
+ Args:
415
+ node_id: Node identifier to check
416
+
417
+ Returns:
418
+ bool: True if node exists, False otherwise
419
+
420
+ Example:
421
+ >>> graph = HtmlGraph("features/")
422
+ >>> if "feature-001" in graph:
423
+ ... print("Feature exists!")
424
+ Feature exists!
425
+ >>> if "nonexistent" not in graph:
426
+ ... print("Not found")
427
+ Not found
428
+ """
93
429
  return node_id in self._nodes
94
430
 
95
431
  def __iter__(self) -> Iterator[Node]:
96
- """Iterate over all nodes."""
432
+ """
433
+ Iterate over all nodes in the graph.
434
+
435
+ Enables using graphs in for loops and other iteration contexts.
436
+
437
+ Yields:
438
+ Node: Each node in the graph (in arbitrary order)
439
+
440
+ Example:
441
+ >>> graph = HtmlGraph("features/")
442
+ >>> for node in graph:
443
+ ... print(f"{node.id}: {node.title} [{node.status}]")
444
+ feature-001: User Auth [in-progress]
445
+ feature-002: Database [done]
446
+
447
+ >>> # Works with list comprehensions
448
+ >>> todo_titles = [n.title for n in graph if n.status == "todo"]
449
+ >>>
450
+ >>> # Works with any iterable operation
451
+ >>> high_priority = list(filter(lambda n: n.priority == "high", graph))
452
+ """
453
+ self._ensure_loaded()
97
454
  return iter(self._nodes.values())
98
455
 
99
456
  # =========================================================================
457
+ # Memory-Efficient Loading (for large graphs 10K+ nodes)
458
+ # =========================================================================
459
+
460
+ def load_chunked(self, chunk_size: int = 100) -> Iterator[list[Node]]:
461
+ """
462
+ Yield nodes in chunks for memory-efficient processing.
463
+
464
+ Loads nodes in batches without loading the entire graph into memory.
465
+ Useful for large graphs (10K+ nodes).
466
+
467
+ Args:
468
+ chunk_size: Number of nodes per chunk (default: 100)
469
+
470
+ Yields:
471
+ List of nodes (up to chunk_size per batch)
472
+
473
+ Example:
474
+ >>> graph = HtmlGraph("features/")
475
+ >>> for chunk in graph.load_chunked(chunk_size=50):
476
+ ... # Process 50 nodes at a time
477
+ ... for node in chunk:
478
+ ... print(node.title)
479
+ """
480
+ files = self._get_node_files()
481
+
482
+ # Yield nodes in chunks
483
+ for i in range(0, len(files), chunk_size):
484
+ chunk = []
485
+ for filepath in files[i : i + chunk_size]:
486
+ try:
487
+ node_id = self._filepath_to_node_id(filepath)
488
+ node = self._converter.load(node_id)
489
+ if node:
490
+ chunk.append(node)
491
+ except Exception:
492
+ # Skip files that fail to parse
493
+ continue
494
+ if chunk:
495
+ yield chunk
496
+
497
+ def iter_nodes(self) -> Iterator[Node]:
498
+ """
499
+ Iterate over all nodes without loading all into memory.
500
+
501
+ Memory-efficient iteration for large graphs. Loads nodes one at a time
502
+ instead of loading the entire graph.
503
+
504
+ Yields:
505
+ Node: Individual nodes from the graph
506
+
507
+ Example:
508
+ >>> graph = HtmlGraph("features/")
509
+ >>> for node in graph.iter_nodes():
510
+ ... if node.status == "blocked":
511
+ ... print(f"Blocked: {node.title}")
512
+ """
513
+ for filepath in self._get_node_files():
514
+ try:
515
+ node_id = self._filepath_to_node_id(filepath)
516
+ node = self._converter.load(node_id)
517
+ if node:
518
+ yield node
519
+ except Exception:
520
+ # Skip files that fail to parse
521
+ continue
522
+
523
+ @property
524
+ def node_count(self) -> int:
525
+ """
526
+ Count nodes without loading them.
527
+
528
+ Efficient count by globbing files without parsing HTML.
529
+
530
+ Returns:
531
+ Number of nodes in the graph
532
+
533
+ Example:
534
+ >>> graph = HtmlGraph("features/")
535
+ >>> print(f"Graph has {graph.node_count} nodes")
536
+ Graph has 42 nodes
537
+ """
538
+ return len(self._get_node_files())
539
+
540
+ # =========================================================================
541
+
542
+ # =========================================================================
543
+ # Transaction & Snapshot Support
544
+ # =========================================================================
545
+
546
+ def snapshot(self) -> GraphSnapshot:
547
+ """
548
+ Create an immutable snapshot of the current graph state.
549
+
550
+ The snapshot is a frozen copy that won't be affected by subsequent
551
+ changes to the graph. Useful for:
552
+ - Concurrent read operations
553
+ - Comparing graph state before/after changes
554
+ - Safe multi-agent scenarios
555
+
556
+ Returns:
557
+ GraphSnapshot: Immutable view of current graph state
558
+
559
+ Example:
560
+ # Agent 1 takes snapshot
561
+ snapshot = graph.snapshot()
562
+
563
+ # Agent 2 modifies graph
564
+ graph.update(node)
565
+
566
+ # Agent 1's snapshot is unchanged
567
+ old_node = snapshot.get("feature-001")
568
+ """
569
+ self._ensure_loaded()
570
+ return GraphSnapshot(self._nodes, self.directory)
571
+
572
+ @contextmanager
573
+ def transaction(self) -> Iterator[Any]:
574
+ """
575
+ Context manager for atomic multi-operation transactions.
576
+
577
+ Operations performed within the transaction are batched and applied
578
+ atomically. If any exception occurs, no changes are persisted.
579
+
580
+ Yields:
581
+ TransactionContext: Context for collecting operations
582
+
583
+ Raises:
584
+ Exception: Any exception from operations causes rollback
585
+
586
+ Example:
587
+ # All-or-nothing batch update
588
+ with graph.transaction() as tx:
589
+ tx.add(node1)
590
+ tx.update(node2)
591
+ tx.delete("feature-003")
592
+ # All changes persisted atomically
593
+
594
+ # Failed transaction (rollback)
595
+ try:
596
+ with graph.transaction() as tx:
597
+ tx.add(node1)
598
+ tx.update(invalid_node) # Raises error
599
+ except Exception:
600
+ pass # No changes persisted
601
+ """
602
+ # Create snapshot before transaction
603
+ snapshot_nodes = {
604
+ node_id: node.model_copy(deep=True) for node_id, node in self._nodes.items()
605
+ }
606
+ snapshot_file_hashes = self._file_hashes.copy()
607
+
608
+ # Transaction context for collecting operations
609
+ class TransactionContext:
610
+ def __init__(self, graph: "HtmlGraph"):
611
+ self._graph = graph
612
+ self._operations: list[Callable[[], Any]] = []
613
+
614
+ def add(self, node: Node, overwrite: bool = False) -> "TransactionContext":
615
+ """Queue an add operation."""
616
+ self._operations.append(
617
+ lambda: self._graph.add(node, overwrite=overwrite)
618
+ )
619
+ return self
620
+
621
+ def update(self, node: Node) -> "TransactionContext":
622
+ """Queue an update operation."""
623
+ self._operations.append(lambda: self._graph.update(node))
624
+ return self
625
+
626
+ def delete(self, node_id: str) -> "TransactionContext":
627
+ """Queue a delete operation."""
628
+ self._operations.append(lambda: self._graph.delete(node_id))
629
+ return self
630
+
631
+ def remove(self, node_id: str) -> "TransactionContext":
632
+ """Queue a remove operation (alias for delete)."""
633
+ return self.delete(node_id)
634
+
635
+ def _commit(self) -> None:
636
+ """Execute all queued operations."""
637
+ for operation in self._operations:
638
+ operation()
639
+
640
+ tx = TransactionContext(self)
641
+
642
+ try:
643
+ yield tx
644
+ # Commit all operations if no exceptions
645
+ tx._commit()
646
+ except Exception:
647
+ # Rollback: restore snapshot state
648
+ self._nodes = snapshot_nodes
649
+ self._file_hashes = snapshot_file_hashes
650
+ self._invalidate_cache()
651
+
652
+ # Rebuild indexes from restored state
653
+ self._edge_index.rebuild(self._nodes)
654
+ self._attr_index.rebuild(self._nodes)
655
+
656
+ # Re-raise exception
657
+ raise
658
+
100
659
  # CRUD Operations
101
660
  # =========================================================================
102
661
 
@@ -117,18 +676,28 @@ class HtmlGraph:
117
676
  if node.id in self._nodes and not overwrite:
118
677
  raise ValueError(f"Node already exists: {node.id}")
119
678
 
120
- # If overwriting, remove old edges from index first
679
+ # If overwriting, remove old node from indexes first
121
680
  if overwrite and node.id in self._nodes:
681
+ old_node = self._nodes[node.id]
122
682
  self._edge_index.remove_node(node.id)
683
+ self._attr_index.remove_node(node.id, old_node)
123
684
 
124
685
  filepath = self._converter.save(node)
125
686
  self._nodes[node.id] = node
126
687
 
688
+ # Update file hash
689
+ file_hash = self._compute_file_hash(filepath)
690
+ self._file_hashes[str(filepath)] = file_hash
691
+
127
692
  # Add new edges to index
128
693
  for relationship, edges in node.edges.items():
129
694
  for edge in edges:
130
695
  self._edge_index.add(node.id, edge.target_id, edge.relationship)
131
696
 
697
+ # Add node to attribute index
698
+ self._attr_index.add_node(node.id, node)
699
+
700
+ self._invalidate_cache()
132
701
  return filepath
133
702
 
134
703
  def update(self, node: Node) -> Path:
@@ -142,10 +711,10 @@ class HtmlGraph:
142
711
  Path to updated HTML file
143
712
 
144
713
  Raises:
145
- KeyError: If node doesn't exist
714
+ NodeNotFoundError: If node doesn't exist
146
715
  """
147
716
  if node.id not in self._nodes:
148
- raise KeyError(f"Node not found: {node.id}")
717
+ raise NodeNotFoundError(node.type, node.id)
149
718
 
150
719
  # Get current outgoing edges from the edge index (source of truth)
151
720
  # This handles the case where node and self._nodes[node.id] are the same object
@@ -154,15 +723,27 @@ class HtmlGraph:
154
723
  # Remove all old OUTGOING edges (where this node is source)
155
724
  # DO NOT use remove_node() as it removes incoming edges too!
156
725
  for edge_ref in old_outgoing:
157
- self._edge_index.remove(edge_ref.source_id, edge_ref.target_id, edge_ref.relationship)
726
+ self._edge_index.remove(
727
+ edge_ref.source_id, edge_ref.target_id, edge_ref.relationship
728
+ )
158
729
 
159
730
  # Add new OUTGOING edges (where this node is source)
160
731
  for relationship, edges in node.edges.items():
161
732
  for edge in edges:
162
733
  self._edge_index.add(node.id, edge.target_id, edge.relationship)
163
734
 
735
+ # Update attribute index
736
+ old_node = self._nodes[node.id]
737
+ self._attr_index.update_node(node.id, old_node, node)
738
+
164
739
  filepath = self._converter.save(node)
165
740
  self._nodes[node.id] = node
741
+
742
+ # Update file hash
743
+ file_hash = self._compute_file_hash(filepath)
744
+ self._file_hashes[str(filepath)] = file_hash
745
+
746
+ self._invalidate_cache()
166
747
  return filepath
167
748
 
168
749
  def get(self, node_id: str) -> Node | None:
@@ -175,6 +756,7 @@ class HtmlGraph:
175
756
  Returns:
176
757
  Node instance or None if not found
177
758
  """
759
+ self._ensure_loaded()
178
760
  return self._nodes.get(node_id)
179
761
 
180
762
  def get_or_load(self, node_id: str) -> Node | None:
@@ -189,8 +771,104 @@ class HtmlGraph:
189
771
  node = self._converter.load(node_id)
190
772
  if node:
191
773
  self._nodes[node_id] = node
774
+ reload_count: int = int(self._metrics.get("single_reload_count", 0)) # type: ignore[call-overload]
775
+ self._metrics["single_reload_count"] = reload_count + 1
192
776
  return node
193
777
 
778
+ def reload_node(self, node_id: str) -> Node | None:
779
+ """
780
+ Reload a single node from disk without full graph reload.
781
+
782
+ Much faster than full reload() when only one node changed.
783
+ Updates the node in cache and refreshes its edges in the index.
784
+ Uses file hash to skip reload if content hasn't changed.
785
+
786
+ Args:
787
+ node_id: ID of the node to reload
788
+
789
+ Returns:
790
+ Updated node if found and loaded, None if not found
791
+
792
+ Example:
793
+ >>> graph.reload_node("feat-001") # Reload just this node
794
+ """
795
+ # Verify the node file exists
796
+ filepath = self._find_node_file(node_id)
797
+ if not filepath:
798
+ return None
799
+
800
+ # Check if file has actually changed
801
+ if not self.has_file_changed(filepath):
802
+ # File unchanged, return cached node if available
803
+ return self._nodes.get(node_id)
804
+
805
+ try:
806
+ # Remove old node's edges from index if exists
807
+ if node_id in self._nodes:
808
+ old_node = self._nodes[node_id]
809
+ self._edge_index.remove_node_edges(node_id, old_node)
810
+
811
+ # Load updated node from disk (converter.load expects node_id)
812
+ updated_node = self._converter.load(node_id)
813
+ if not updated_node:
814
+ return None
815
+
816
+ # Update cache
817
+ self._nodes[node_id] = updated_node
818
+
819
+ # Update file hash
820
+ file_hash = self._compute_file_hash(filepath)
821
+ self._file_hashes[str(filepath)] = file_hash
822
+
823
+ # Add new edges to index
824
+ self._edge_index.add_node_edges(node_id, updated_node)
825
+
826
+ # Invalidate query cache
827
+ self._invalidate_cache()
828
+
829
+ # Track metric
830
+ reload_count: int = int(self._metrics.get("single_reload_count", 0)) # type: ignore[call-overload]
831
+ self._metrics["single_reload_count"] = reload_count + 1
832
+
833
+ return updated_node
834
+ except Exception:
835
+ return None
836
+
837
+ def _find_node_file(self, node_id: str) -> Path | None:
838
+ """
839
+ Find the file path for a node by ID.
840
+
841
+ Checks common naming patterns for node files.
842
+
843
+ Args:
844
+ node_id: Node ID to find
845
+
846
+ Returns:
847
+ Path to node file, or None if not found
848
+ """
849
+ # Try direct match patterns
850
+ patterns = [
851
+ f"{node_id}.html",
852
+ f"{node_id}/index.html",
853
+ ]
854
+
855
+ for pattern in patterns:
856
+ filepath = self.directory / pattern
857
+ if filepath.exists():
858
+ return filepath
859
+
860
+ # Fall back to scanning (slower but thorough)
861
+ for filepath in self.directory.glob("*.html"):
862
+ try:
863
+ # Quick check of file content for ID
864
+ content = filepath.read_text()
865
+ if f'id="{node_id}"' in content or f"id='{node_id}'" in content:
866
+ return filepath
867
+ except Exception:
868
+ continue
869
+
870
+ return None
871
+
194
872
  def remove(self, node_id: str) -> bool:
195
873
  """
196
874
  Remove a node from the graph.
@@ -202,10 +880,19 @@ class HtmlGraph:
202
880
  True if node was removed
203
881
  """
204
882
  if node_id in self._nodes:
205
- # Remove all edges involving this node from index
883
+ # Find and remove file hash
884
+ filepath = self._find_node_file(node_id)
885
+ if filepath:
886
+ self._file_hashes.pop(str(filepath), None)
887
+
888
+ # Remove node from indexes
889
+ old_node = self._nodes[node_id]
206
890
  self._edge_index.remove_node(node_id)
891
+ self._attr_index.remove_node(node_id, old_node)
207
892
  del self._nodes[node_id]
208
- return self._converter.delete(node_id)
893
+ result = self._converter.delete(node_id)
894
+ self._invalidate_cache()
895
+ return result
209
896
  return False
210
897
 
211
898
  def delete(self, node_id: str) -> bool:
@@ -248,9 +935,10 @@ class HtmlGraph:
248
935
 
249
936
  def query(self, selector: str) -> list[Node]:
250
937
  """
251
- Query nodes using CSS selector.
938
+ Query nodes using CSS selector with caching and metrics.
252
939
 
253
940
  Selector is applied to article element of each node.
941
+ Uses cached nodes instead of re-parsing from disk for better performance.
254
942
 
255
943
  Args:
256
944
  selector: CSS selector string
@@ -262,21 +950,53 @@ class HtmlGraph:
262
950
  graph.query("[data-status='blocked']")
263
951
  graph.query("[data-priority='high'][data-type='feature']")
264
952
  """
953
+ self._ensure_loaded()
954
+ query_count: int = int(self._metrics.get("query_count", 0)) # type: ignore[call-overload]
955
+ self._metrics["query_count"] = query_count + 1
956
+
957
+ # Check cache first
958
+ if self._cache_enabled and selector in self._query_cache:
959
+ cache_hits: int = int(self._metrics.get("cache_hits", 0)) # type: ignore[call-overload]
960
+ self._metrics["cache_hits"] = cache_hits + 1
961
+ return self._query_cache[selector].copy() # Return copy to prevent mutation
962
+
963
+ cache_misses: int = int(self._metrics.get("cache_misses", 0)) # type: ignore[call-overload]
964
+ self._metrics["cache_misses"] = cache_misses + 1
965
+
966
+ # Time the query
967
+ start = time.perf_counter()
968
+
969
+ # Perform query using cached nodes instead of disk I/O
265
970
  matching = []
266
971
 
267
- patterns = [self.pattern] if isinstance(self.pattern, str) else self.pattern
268
- for pat in patterns:
269
- for filepath in self.directory.glob(pat):
270
- if filepath.is_file():
271
- try:
272
- parser = HtmlParser.from_file(filepath)
273
- # Query for article matching selector
274
- if parser.query(f"article{selector}"):
275
- node_id = parser.get_node_id()
276
- if node_id and node_id in self._nodes:
277
- matching.append(self._nodes[node_id])
278
- except Exception:
279
- continue
972
+ for node in self._nodes.values():
973
+ try:
974
+ # Convert node to HTML in-memory
975
+ html_content = node.to_html()
976
+
977
+ # Parse the HTML string
978
+ parser = HtmlParser.from_string(html_content)
979
+
980
+ # Check if selector matches
981
+ if parser.query(f"article{selector}"):
982
+ matching.append(node)
983
+ except Exception:
984
+ # Skip nodes that fail to parse
985
+ continue
986
+
987
+ # Track timing
988
+ elapsed_ms = (time.perf_counter() - start) * 1000
989
+ total_time: float = cast(float, self._metrics.get("total_query_time_ms", 0.0))
990
+ self._metrics["total_query_time_ms"] = total_time + elapsed_ms
991
+
992
+ slowest: float = cast(float, self._metrics.get("slowest_query_ms", 0.0))
993
+ if elapsed_ms > slowest:
994
+ self._metrics["slowest_query_ms"] = elapsed_ms
995
+ self._metrics["slowest_query_selector"] = selector
996
+
997
+ # Cache result
998
+ if self._cache_enabled:
999
+ self._query_cache[selector] = matching.copy()
280
1000
 
281
1001
  return matching
282
1002
 
@@ -285,6 +1005,99 @@ class HtmlGraph:
285
1005
  results = self.query(selector)
286
1006
  return results[0] if results else None
287
1007
 
1008
+ def compile_query(self, selector: str) -> CompiledQuery:
1009
+ """
1010
+ Pre-compile a CSS selector for reuse.
1011
+
1012
+ Creates a CompiledQuery object that can be reused multiple times
1013
+ with query_compiled() for better performance when the same selector
1014
+ is used frequently.
1015
+
1016
+ Args:
1017
+ selector: CSS selector string to compile
1018
+
1019
+ Returns:
1020
+ CompiledQuery object that can be reused
1021
+
1022
+ Example:
1023
+ >>> graph = HtmlGraph("features/")
1024
+ >>> compiled = graph.compile_query("[data-status='blocked']")
1025
+ >>> results1 = graph.query_compiled(compiled)
1026
+ >>> results2 = graph.query_compiled(compiled) # Reuses compilation
1027
+ """
1028
+ # Check if already compiled
1029
+ if selector in self._compiled_queries:
1030
+ hits: int = int(self._metrics.get("compiled_query_hits", 0)) # type: ignore[call-overload]
1031
+ self._metrics["compiled_query_hits"] = hits + 1
1032
+ return self._compiled_queries[selector]
1033
+
1034
+ # Create new compiled query
1035
+ compiled = CompiledQuery(selector=selector)
1036
+ compiled_count: int = int(self._metrics.get("compiled_queries", 0)) # type: ignore[call-overload]
1037
+ self._metrics["compiled_queries"] = compiled_count + 1
1038
+
1039
+ # Add to cache (with LRU eviction if needed)
1040
+ if len(self._compiled_queries) >= self._compiled_query_max_size:
1041
+ # Evict least recently used (first item in dict)
1042
+ first_key = next(iter(self._compiled_queries))
1043
+ del self._compiled_queries[first_key]
1044
+
1045
+ self._compiled_queries[selector] = compiled
1046
+ return compiled
1047
+
1048
+ def query_compiled(self, compiled: CompiledQuery) -> list[Node]:
1049
+ """
1050
+ Execute a pre-compiled query.
1051
+
1052
+ Uses the regular query cache if available, otherwise executes
1053
+ the compiled query and caches the result.
1054
+
1055
+ Args:
1056
+ compiled: CompiledQuery object from compile_query()
1057
+
1058
+ Returns:
1059
+ List of matching nodes
1060
+
1061
+ Example:
1062
+ >>> compiled = graph.compile_query("[data-priority='high']")
1063
+ >>> high_priority = graph.query_compiled(compiled)
1064
+ """
1065
+ self._ensure_loaded()
1066
+ selector = compiled.selector
1067
+ query_count: int = int(self._metrics.get("query_count", 0)) # type: ignore[call-overload]
1068
+ self._metrics["query_count"] = query_count + 1
1069
+
1070
+ # Check cache first (same cache as regular query())
1071
+ if self._cache_enabled and selector in self._query_cache:
1072
+ cache_hits: int = int(self._metrics.get("cache_hits", 0)) # type: ignore[call-overload]
1073
+ self._metrics["cache_hits"] = cache_hits + 1
1074
+ return self._query_cache[selector].copy()
1075
+
1076
+ cache_misses: int = int(self._metrics.get("cache_misses", 0)) # type: ignore[call-overload]
1077
+ self._metrics["cache_misses"] = cache_misses + 1
1078
+
1079
+ # Time the query
1080
+ start = time.perf_counter()
1081
+
1082
+ # Execute compiled query
1083
+ matching = compiled.execute(self._nodes)
1084
+
1085
+ # Track timing
1086
+ elapsed_ms = (time.perf_counter() - start) * 1000
1087
+ total_time: float = cast(float, self._metrics.get("total_query_time_ms", 0.0))
1088
+ self._metrics["total_query_time_ms"] = total_time + elapsed_ms
1089
+
1090
+ slowest: float = cast(float, self._metrics.get("slowest_query_ms", 0.0))
1091
+ if elapsed_ms > slowest:
1092
+ self._metrics["slowest_query_ms"] = elapsed_ms
1093
+ self._metrics["slowest_query_selector"] = selector
1094
+
1095
+ # Cache result
1096
+ if self._cache_enabled:
1097
+ self._query_cache[selector] = matching.copy()
1098
+
1099
+ return matching
1100
+
288
1101
  def filter(self, predicate: Callable[[Node], bool]) -> list[Node]:
289
1102
  """
290
1103
  Filter nodes using a Python predicate function.
@@ -298,19 +1111,104 @@ class HtmlGraph:
298
1111
  Example:
299
1112
  graph.filter(lambda n: n.status == "todo" and n.priority == "high")
300
1113
  """
1114
+ self._ensure_loaded()
301
1115
  return [node for node in self._nodes.values() if predicate(node)]
302
1116
 
303
1117
  def by_status(self, status: str) -> list[Node]:
304
- """Get all nodes with given status."""
305
- return self.filter(lambda n: n.status == status)
1118
+ """
1119
+ Get all nodes with given status (O(1) lookup via attribute index).
1120
+
1121
+ Uses the attribute index for efficient lookups instead of
1122
+ filtering all nodes.
1123
+
1124
+ Args:
1125
+ status: Status value to filter by
1126
+
1127
+ Returns:
1128
+ List of nodes with the given status
1129
+ """
1130
+ self._ensure_loaded()
1131
+ self._attr_index.ensure_built(self._nodes)
1132
+ node_ids = self._attr_index.get_by_status(status)
1133
+ return [self._nodes[node_id] for node_id in node_ids if node_id in self._nodes]
306
1134
 
307
1135
  def by_type(self, node_type: str) -> list[Node]:
308
- """Get all nodes with given type."""
309
- return self.filter(lambda n: n.type == node_type)
1136
+ """
1137
+ Get all nodes with given type (O(1) lookup via attribute index).
1138
+
1139
+ Uses the attribute index for efficient lookups instead of
1140
+ filtering all nodes.
1141
+
1142
+ Args:
1143
+ node_type: Node type to filter by
1144
+
1145
+ Returns:
1146
+ List of nodes with the given type
1147
+ """
1148
+ self._ensure_loaded()
1149
+ self._attr_index.ensure_built(self._nodes)
1150
+ node_ids = self._attr_index.get_by_type(node_type)
1151
+ return [self._nodes[node_id] for node_id in node_ids if node_id in self._nodes]
310
1152
 
311
1153
  def by_priority(self, priority: str) -> list[Node]:
312
- """Get all nodes with given priority."""
313
- return self.filter(lambda n: n.priority == priority)
1154
+ """
1155
+ Get all nodes with given priority (O(1) lookup via attribute index).
1156
+
1157
+ Uses the attribute index for efficient lookups instead of
1158
+ filtering all nodes.
1159
+
1160
+ Args:
1161
+ priority: Priority value to filter by
1162
+
1163
+ Returns:
1164
+ List of nodes with the given priority
1165
+ """
1166
+ self._ensure_loaded()
1167
+ self._attr_index.ensure_built(self._nodes)
1168
+ node_ids = self._attr_index.get_by_priority(priority)
1169
+ return [self._nodes[node_id] for node_id in node_ids if node_id in self._nodes]
1170
+
1171
+ def get_by_status(self, status: str) -> list[Node]:
1172
+ """
1173
+ Get all nodes with given status (O(1) lookup via attribute index).
1174
+
1175
+ Alias for by_status() with explicit name for clarity.
1176
+
1177
+ Args:
1178
+ status: Status value to filter by
1179
+
1180
+ Returns:
1181
+ List of nodes with the given status
1182
+ """
1183
+ return self.by_status(status)
1184
+
1185
+ def get_by_type(self, node_type: str) -> list[Node]:
1186
+ """
1187
+ Get all nodes with given type (O(1) lookup via attribute index).
1188
+
1189
+ Alias for by_type() with explicit name for clarity.
1190
+
1191
+ Args:
1192
+ node_type: Node type to filter by
1193
+
1194
+ Returns:
1195
+ List of nodes with the given type
1196
+ """
1197
+ return self.by_type(node_type)
1198
+
1199
+ def get_by_priority(self, priority: str) -> list[Node]:
1200
+ """
1201
+ Get all nodes with given priority (O(1) lookup via attribute index).
1202
+
1203
+ Alias for by_priority() with explicit name for clarity.
1204
+
1205
+ Args:
1206
+ priority: Priority value to filter by
1207
+
1208
+ Returns:
1209
+ List of nodes with the given priority
1210
+ """
1211
+ return self.by_priority(priority)
314
1212
 
315
1213
  def query_builder(self) -> QueryBuilder:
316
1214
  """
@@ -348,7 +1246,7 @@ class HtmlGraph:
348
1246
  """
349
1247
  return QueryBuilder(_graph=self)
350
1248
 
351
- def find(self, type: str | None = None, **kwargs) -> Node | None:
1249
+ def find(self, type: str | None = None, **kwargs: Any) -> Node | None:
352
1250
  """
353
1251
  Find the first node matching the given criteria.
354
1252
 
@@ -374,7 +1272,9 @@ class HtmlGraph:
374
1272
  """
375
1273
  return FindAPI(self).find(type=type, **kwargs)
376
1274
 
377
- def find_all(self, type: str | None = None, limit: int | None = None, **kwargs) -> list[Node]:
1275
+ def find_all(
1276
+ self, type: str | None = None, limit: int | None = None, **kwargs: Any
1277
+ ) -> list[Node]:
378
1278
  """
379
1279
  Find all nodes matching the given criteria.
380
1280
 
@@ -405,10 +1305,7 @@ class HtmlGraph:
405
1305
  return FindAPI(self).find_all(type=type, limit=limit, **kwargs)
406
1306
 
407
1307
  def find_related(
408
- self,
409
- node_id: str,
410
- relationship: str | None = None,
411
- direction: str = "outgoing"
1308
+ self, node_id: str, relationship: str | None = None, direction: str = "outgoing"
412
1309
  ) -> list[Node]:
413
1310
  """
414
1311
  Find nodes related to a given node.
@@ -428,9 +1325,7 @@ class HtmlGraph:
428
1325
  # =========================================================================
429
1326
 
430
1327
  def get_incoming_edges(
431
- self,
432
- node_id: str,
433
- relationship: str | None = None
1328
+ self, node_id: str, relationship: str | None = None
434
1329
  ) -> list[EdgeRef]:
435
1330
  """
436
1331
  Get all edges pointing TO a node (O(1) lookup).
@@ -455,9 +1350,7 @@ class HtmlGraph:
455
1350
  return self._edge_index.get_incoming(node_id, relationship)
456
1351
 
457
1352
  def get_outgoing_edges(
458
- self,
459
- node_id: str,
460
- relationship: str | None = None
1353
+ self, node_id: str, relationship: str | None = None
461
1354
  ) -> list[EdgeRef]:
462
1355
  """
463
1356
  Get all edges pointing FROM a node (O(1) lookup).
@@ -472,10 +1365,7 @@ class HtmlGraph:
472
1365
  return self._edge_index.get_outgoing(node_id, relationship)
473
1366
 
474
1367
  def get_neighbors(
475
- self,
476
- node_id: str,
477
- relationship: str | None = None,
478
- direction: str = "both"
1368
+ self, node_id: str, relationship: str | None = None, direction: str = "both"
479
1369
  ) -> set[str]:
480
1370
  """
481
1371
  Get all neighboring node IDs connected to a node (O(1) lookup).
@@ -495,10 +1385,119 @@ class HtmlGraph:
495
1385
  """Access the edge index for advanced queries."""
496
1386
  return self._edge_index
497
1387
 
1388
+ @property
1389
+ def attribute_index(self) -> AttributeIndex:
1390
+ """
1391
+ Access the attribute index for advanced queries.
1392
+
1393
+ The attribute index is lazy-built on first access.
1394
+
1395
+ Returns:
1396
+ AttributeIndex instance
1397
+
1398
+ Example:
1399
+ >>> stats = graph.attribute_index.stats()
1400
+ >>> print(stats)
1401
+ """
1402
+ self._ensure_loaded()
1403
+ self._attr_index.ensure_built(self._nodes)
1404
+ return self._attr_index
1405
+
1406
+ @property
1407
+ def cache_stats(self) -> dict:
1408
+ """Get cache statistics."""
1409
+ return {
1410
+ "cached_queries": len(self._query_cache),
1411
+ "cache_enabled": self._cache_enabled,
1412
+ }
1413
+
1414
+ @property
1415
+ def metrics(self) -> dict:
1416
+ """
1417
+ Get performance metrics.
1418
+
1419
+ Returns:
1420
+ Dict with query counts, cache stats, timing info
1421
+
1422
+ Example:
1423
+ >>> graph.metrics
1424
+ {
1425
+ 'query_count': 42,
1426
+ 'cache_hits': 38,
1427
+ 'cache_hit_rate': '90.5%',
1428
+ 'avg_query_time_ms': 12.3,
1429
+ ...
1430
+ }
1431
+ """
1432
+ m = self._metrics.copy()
1433
+
1434
+ # Calculate derived metrics
1435
+ query_count = cast(int, m["query_count"])
1436
+ if query_count > 0:
1437
+ cache_hits = cast(int, m["cache_hits"])
1438
+ total_query_time_ms = cast(float, m["total_query_time_ms"])
1439
+ m["cache_hit_rate"] = f"{cache_hits / query_count * 100:.1f}%"
1440
+ m["avg_query_time_ms"] = total_query_time_ms / query_count
1441
+ else:
1442
+ m["cache_hit_rate"] = "N/A"
1443
+ m["avg_query_time_ms"] = 0.0
1444
+
1445
+ # Add current state
1446
+ m["nodes_loaded"] = len(self._nodes)
1447
+ m["cached_queries"] = len(self._query_cache)
1448
+ m["compiled_queries_cached"] = len(self._compiled_queries)
1449
+
1450
+ # Calculate compilation hit rate
1451
+ compiled_queries = cast(int, m["compiled_queries"])
1452
+ compiled_query_hits = cast(int, m["compiled_query_hits"])
1453
+ total_compilations = compiled_queries + compiled_query_hits
1454
+ if total_compilations > 0:
1455
+ m["compilation_hit_rate"] = (
1456
+ f"{compiled_query_hits / total_compilations * 100:.1f}%"
1457
+ )
1458
+ else:
1459
+ m["compilation_hit_rate"] = "N/A"
1460
+
1461
+ return m
1462
+
1463
+ def reset_metrics(self) -> None:
1464
+ """Reset all performance metrics to zero."""
1465
+ for key in self._metrics:
1466
+ if isinstance(self._metrics[key], (int, float)):
1467
+ self._metrics[key] = 0 if isinstance(self._metrics[key], int) else 0.0
1468
+ else:
1469
+ self._metrics[key] = ""
1470
+
498
1471
  # =========================================================================
499
1472
  # Graph Algorithms
500
1473
  # =========================================================================
501
1474
 
1475
+ def _get_adjacency_cache(self) -> dict[str, dict[str, list[str]]]:
1476
+ """
1477
+ Get or build the persistent adjacency cache.
1478
+
1479
+ Builds the cache on first access and returns it on subsequent calls.
1480
+ Cache structure: {node_id: {"outgoing": [ids], "incoming": [ids]}}
1481
+
1482
+ Returns:
1483
+ Dict mapping node_id to dict with "outgoing" and "incoming" neighbor lists
1484
+ """
1485
+ if self._adjacency_cache is None:
1486
+ self._adjacency_cache = {}
1487
+ for node_id in self._nodes:
1488
+ # Use edge index for efficient O(1) lookups
1489
+ outgoing = self._edge_index.get_neighbors(
1490
+ node_id, relationship=None, direction="outgoing"
1491
+ )
1492
+ incoming = self._edge_index.get_neighbors(
1493
+ node_id, relationship=None, direction="incoming"
1494
+ )
1495
+ self._adjacency_cache[node_id] = {
1496
+ "outgoing": list(outgoing),
1497
+ "incoming": list(incoming),
1498
+ }
1499
+ return self._adjacency_cache
1500
+
502
1501
  def _build_adjacency(self, relationship: str | None = None) -> dict[str, set[str]]:
503
1502
  """
504
1503
  Build adjacency list from edges.
@@ -521,10 +1520,7 @@ class HtmlGraph:
521
1520
  return adj
522
1521
 
523
1522
  def shortest_path(
524
- self,
525
- from_id: str,
526
- to_id: str,
527
- relationship: str | None = None
1523
+ self, from_id: str, to_id: str, relationship: str | None = None
528
1524
  ) -> list[str] | None:
529
1525
  """
530
1526
  Find shortest path between two nodes using BFS.
@@ -563,9 +1559,7 @@ class HtmlGraph:
563
1559
  return None
564
1560
 
565
1561
  def transitive_deps(
566
- self,
567
- node_id: str,
568
- relationship: str = "blocked_by"
1562
+ self, node_id: str, relationship: str = "blocked_by"
569
1563
  ) -> set[str]:
570
1564
  """
571
1565
  Get all transitive dependencies of a node.
@@ -600,11 +1594,7 @@ class HtmlGraph:
600
1594
 
601
1595
  return deps
602
1596
 
603
- def dependents(
604
- self,
605
- node_id: str,
606
- relationship: str = "blocked_by"
607
- ) -> set[str]:
1597
+ def dependents(self, node_id: str, relationship: str = "blocked_by") -> set[str]:
608
1598
  """
609
1599
  Find all nodes that depend on this node (O(1) lookup).
610
1600
 
@@ -621,7 +1611,9 @@ class HtmlGraph:
621
1611
  incoming = self._edge_index.get_incoming(node_id, relationship)
622
1612
  return {ref.source_id for ref in incoming}
623
1613
 
624
- def find_bottlenecks(self, relationship: str = "blocked_by", top_n: int = 5) -> list[tuple[str, int]]:
1614
+ def find_bottlenecks(
1615
+ self, relationship: str = "blocked_by", top_n: int = 5
1616
+ ) -> list[tuple[str, int]]:
625
1617
  """
626
1618
  Find nodes that block the most other nodes.
627
1619
 
@@ -639,9 +1631,7 @@ class HtmlGraph:
639
1631
  blocked_count[edge.target_id] += 1
640
1632
 
641
1633
  sorted_bottlenecks = sorted(
642
- blocked_count.items(),
643
- key=lambda x: x[1],
644
- reverse=True
1634
+ blocked_count.items(), key=lambda x: x[1], reverse=True
645
1635
  )
646
1636
 
647
1637
  return sorted_bottlenecks[:top_n]
@@ -725,7 +1715,7 @@ class HtmlGraph:
725
1715
  self,
726
1716
  node_id: str,
727
1717
  relationship: str = "blocked_by",
728
- max_depth: int | None = None
1718
+ max_depth: int | None = None,
729
1719
  ) -> list[str]:
730
1720
  """
731
1721
  Get all ancestor nodes (nodes that this node depends on).
@@ -773,7 +1763,7 @@ class HtmlGraph:
773
1763
  self,
774
1764
  node_id: str,
775
1765
  relationship: str = "blocked_by",
776
- max_depth: int | None = None
1766
+ max_depth: int | None = None,
777
1767
  ) -> list[str]:
778
1768
  """
779
1769
  Get all descendant nodes (nodes that depend on this node).
@@ -814,10 +1804,8 @@ class HtmlGraph:
814
1804
  return descendants
815
1805
 
816
1806
  def subgraph(
817
- self,
818
- node_ids: list[str] | set[str],
819
- include_edges: bool = True
820
- ) -> 'HtmlGraph':
1807
+ self, node_ids: list[str] | set[str], include_edges: bool = True
1808
+ ) -> "HtmlGraph":
821
1809
  """
822
1810
  Extract a subgraph containing only the specified nodes.
823
1811
 
@@ -835,7 +1823,6 @@ class HtmlGraph:
835
1823
  sub = graph.subgraph(deps)
836
1824
  """
837
1825
  import tempfile
838
- from htmlgraph.models import Edge
839
1826
 
840
1827
  # Create new graph in temp directory
841
1828
  temp_dir = tempfile.mkdtemp(prefix="htmlgraph_subgraph_")
@@ -865,9 +1852,7 @@ class HtmlGraph:
865
1852
  return subgraph
866
1853
 
867
1854
  def connected_component(
868
- self,
869
- node_id: str,
870
- relationship: str | None = None
1855
+ self, node_id: str, relationship: str | None = None
871
1856
  ) -> set[str]:
872
1857
  """
873
1858
  Get all nodes in the same connected component as the given node.
@@ -907,19 +1892,30 @@ class HtmlGraph:
907
1892
  from_id: str,
908
1893
  to_id: str,
909
1894
  relationship: str | None = None,
910
- max_length: int | None = None
1895
+ max_length: int | None = None,
1896
+ max_paths: int = 100,
1897
+ timeout_seconds: float = 5.0,
911
1898
  ) -> list[list[str]]:
912
1899
  """
913
1900
  Find all paths between two nodes.
914
1901
 
1902
+ WARNING: This method has O(V!) worst-case complexity in dense graphs.
1903
+ Use max_paths and timeout_seconds parameters to limit execution.
1904
+ For most use cases, prefer shortest_path() instead.
1905
+
915
1906
  Args:
916
- from_id: Starting node ID
1907
+ from_id: Source node ID
917
1908
  to_id: Target node ID
918
- relationship: Optional filter to specific edge type
919
- max_length: Maximum path length (None = unlimited, but recommended)
1909
+ relationship: Optional edge type filter
1910
+ max_length: Maximum path length
1911
+ max_paths: Maximum number of paths to return (default 100)
1912
+ timeout_seconds: Maximum execution time (default 5.0)
920
1913
 
921
1914
  Returns:
922
- List of paths, each path is a list of node IDs
1915
+ List of paths (each path is list of node IDs)
1916
+
1917
+ Raises:
1918
+ TimeoutError: If execution exceeds timeout_seconds
923
1919
  """
924
1920
  if from_id not in self._nodes or to_id not in self._nodes:
925
1921
  return []
@@ -929,8 +1925,20 @@ class HtmlGraph:
929
1925
 
930
1926
  paths: list[list[str]] = []
931
1927
  adj = self._build_adjacency(relationship)
1928
+ start_time = time.time()
1929
+
1930
+ def dfs(current: str, target: str, path: list[str], visited: set[str]) -> None:
1931
+ # Check timeout periodically (every recursive call)
1932
+ if time.time() - start_time > timeout_seconds:
1933
+ raise TimeoutError(
1934
+ f"all_paths() exceeded timeout of {timeout_seconds}s "
1935
+ f"(found {len(paths)} paths so far)"
1936
+ )
1937
+
1938
+ # Check if we've hit the max_paths limit
1939
+ if len(paths) >= max_paths:
1940
+ return
932
1941
 
933
- def dfs(current: str, target: str, path: list[str], visited: set[str]):
934
1942
  if max_length and len(path) > max_length:
935
1943
  return
936
1944
 
@@ -965,35 +1973,40 @@ class HtmlGraph:
965
1973
  - completion_rate: Overall completion percentage
966
1974
  - edge_count: Total number of edges
967
1975
  """
968
- stats = {
1976
+ by_status: defaultdict[str, int] = defaultdict(int)
1977
+ by_type: defaultdict[str, int] = defaultdict(int)
1978
+ by_priority: defaultdict[str, int] = defaultdict(int)
1979
+ edge_count = 0
1980
+
1981
+ stats: dict[str, Any] = {
969
1982
  "total": len(self._nodes),
970
- "by_status": defaultdict(int),
971
- "by_type": defaultdict(int),
972
- "by_priority": defaultdict(int),
973
- "edge_count": 0,
1983
+ "by_status": by_status,
1984
+ "by_type": by_type,
1985
+ "by_priority": by_priority,
1986
+ "edge_count": edge_count,
974
1987
  }
975
1988
 
976
1989
  done_count = 0
977
1990
  for node in self._nodes.values():
978
- stats["by_status"][node.status] += 1
979
- stats["by_type"][node.type] += 1
980
- stats["by_priority"][node.priority] += 1
1991
+ by_status[node.status] += 1
1992
+ by_type[node.type] += 1
1993
+ by_priority[node.priority] += 1
981
1994
 
982
1995
  for edges in node.edges.values():
983
- stats["edge_count"] += len(edges)
1996
+ edge_count += len(edges)
984
1997
 
985
1998
  if node.status == "done":
986
1999
  done_count += 1
987
2000
 
2001
+ stats["edge_count"] = edge_count
988
2002
  stats["completion_rate"] = (
989
- round(done_count / len(self._nodes) * 100, 1)
990
- if self._nodes else 0
2003
+ round(done_count / len(self._nodes) * 100, 1) if self._nodes else 0
991
2004
  )
992
2005
 
993
2006
  # Convert defaultdicts to regular dicts
994
- stats["by_status"] = dict(stats["by_status"])
995
- stats["by_type"] = dict(stats["by_type"])
996
- stats["by_priority"] = dict(stats["by_priority"])
2007
+ stats["by_status"] = dict(by_status)
2008
+ stats["by_type"] = dict(by_type)
2009
+ stats["by_priority"] = dict(by_priority)
997
2010
 
998
2011
  return stats
999
2012
 
@@ -1009,7 +2022,9 @@ class HtmlGraph:
1009
2022
  """
1010
2023
  lines = ["# Graph Summary"]
1011
2024
  stats = self.stats()
1012
- lines.append(f"Total: {stats['total']} nodes | Done: {stats['completion_rate']}%")
2025
+ lines.append(
2026
+ f"Total: {stats['total']} nodes | Done: {stats['completion_rate']}%"
2027
+ )
1013
2028
 
1014
2029
  # Status breakdown
1015
2030
  status_parts = [f"{s}: {c}" for s, c in stats["by_status"].items()]
@@ -1036,6 +2051,7 @@ class HtmlGraph:
1036
2051
  def to_json(self) -> list[dict[str, Any]]:
1037
2052
  """Export all nodes as JSON-serializable list."""
1038
2053
  from htmlgraph.converter import node_to_dict
2054
+
1039
2055
  return [node_to_dict(node) for node in self._nodes.values()]
1040
2056
 
1041
2057
  def to_mermaid(self, relationship: str | None = None) -> str: