htmlgraph 0.9.3__py3-none-any.whl → 0.27.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. htmlgraph/.htmlgraph/.session-warning-state.json +6 -0
  2. htmlgraph/.htmlgraph/agents.json +72 -0
  3. htmlgraph/.htmlgraph/htmlgraph.db +0 -0
  4. htmlgraph/__init__.py +173 -17
  5. htmlgraph/__init__.pyi +123 -0
  6. htmlgraph/agent_detection.py +127 -0
  7. htmlgraph/agent_registry.py +45 -30
  8. htmlgraph/agents.py +160 -107
  9. htmlgraph/analytics/__init__.py +9 -2
  10. htmlgraph/analytics/cli.py +190 -51
  11. htmlgraph/analytics/cost_analyzer.py +391 -0
  12. htmlgraph/analytics/cost_monitor.py +664 -0
  13. htmlgraph/analytics/cost_reporter.py +675 -0
  14. htmlgraph/analytics/cross_session.py +617 -0
  15. htmlgraph/analytics/dependency.py +192 -100
  16. htmlgraph/analytics/pattern_learning.py +771 -0
  17. htmlgraph/analytics/session_graph.py +707 -0
  18. htmlgraph/analytics/strategic/__init__.py +80 -0
  19. htmlgraph/analytics/strategic/cost_optimizer.py +611 -0
  20. htmlgraph/analytics/strategic/pattern_detector.py +876 -0
  21. htmlgraph/analytics/strategic/preference_manager.py +709 -0
  22. htmlgraph/analytics/strategic/suggestion_engine.py +747 -0
  23. htmlgraph/analytics/work_type.py +190 -14
  24. htmlgraph/analytics_index.py +135 -51
  25. htmlgraph/api/__init__.py +3 -0
  26. htmlgraph/api/cost_alerts_websocket.py +416 -0
  27. htmlgraph/api/main.py +2498 -0
  28. htmlgraph/api/static/htmx.min.js +1 -0
  29. htmlgraph/api/static/style-redesign.css +1344 -0
  30. htmlgraph/api/static/style.css +1079 -0
  31. htmlgraph/api/templates/dashboard-redesign.html +1366 -0
  32. htmlgraph/api/templates/dashboard.html +794 -0
  33. htmlgraph/api/templates/partials/activity-feed-hierarchical.html +326 -0
  34. htmlgraph/api/templates/partials/activity-feed.html +1100 -0
  35. htmlgraph/api/templates/partials/agents-redesign.html +317 -0
  36. htmlgraph/api/templates/partials/agents.html +317 -0
  37. htmlgraph/api/templates/partials/event-traces.html +373 -0
  38. htmlgraph/api/templates/partials/features-kanban-redesign.html +509 -0
  39. htmlgraph/api/templates/partials/features.html +578 -0
  40. htmlgraph/api/templates/partials/metrics-redesign.html +346 -0
  41. htmlgraph/api/templates/partials/metrics.html +346 -0
  42. htmlgraph/api/templates/partials/orchestration-redesign.html +443 -0
  43. htmlgraph/api/templates/partials/orchestration.html +198 -0
  44. htmlgraph/api/templates/partials/spawners.html +375 -0
  45. htmlgraph/api/templates/partials/work-items.html +613 -0
  46. htmlgraph/api/websocket.py +538 -0
  47. htmlgraph/archive/__init__.py +24 -0
  48. htmlgraph/archive/bloom.py +234 -0
  49. htmlgraph/archive/fts.py +297 -0
  50. htmlgraph/archive/manager.py +583 -0
  51. htmlgraph/archive/search.py +244 -0
  52. htmlgraph/atomic_ops.py +560 -0
  53. htmlgraph/attribute_index.py +208 -0
  54. htmlgraph/bounded_paths.py +539 -0
  55. htmlgraph/builders/__init__.py +14 -0
  56. htmlgraph/builders/base.py +118 -29
  57. htmlgraph/builders/bug.py +150 -0
  58. htmlgraph/builders/chore.py +119 -0
  59. htmlgraph/builders/epic.py +150 -0
  60. htmlgraph/builders/feature.py +31 -6
  61. htmlgraph/builders/insight.py +195 -0
  62. htmlgraph/builders/metric.py +217 -0
  63. htmlgraph/builders/pattern.py +202 -0
  64. htmlgraph/builders/phase.py +162 -0
  65. htmlgraph/builders/spike.py +52 -19
  66. htmlgraph/builders/track.py +148 -72
  67. htmlgraph/cigs/__init__.py +81 -0
  68. htmlgraph/cigs/autonomy.py +385 -0
  69. htmlgraph/cigs/cost.py +475 -0
  70. htmlgraph/cigs/messages_basic.py +472 -0
  71. htmlgraph/cigs/messaging.py +365 -0
  72. htmlgraph/cigs/models.py +771 -0
  73. htmlgraph/cigs/pattern_storage.py +427 -0
  74. htmlgraph/cigs/patterns.py +503 -0
  75. htmlgraph/cigs/posttool_analyzer.py +234 -0
  76. htmlgraph/cigs/reporter.py +818 -0
  77. htmlgraph/cigs/tracker.py +317 -0
  78. htmlgraph/cli/.htmlgraph/.session-warning-state.json +6 -0
  79. htmlgraph/cli/.htmlgraph/agents.json +72 -0
  80. htmlgraph/cli/.htmlgraph/htmlgraph.db +0 -0
  81. htmlgraph/cli/__init__.py +42 -0
  82. htmlgraph/cli/__main__.py +6 -0
  83. htmlgraph/cli/analytics.py +1424 -0
  84. htmlgraph/cli/base.py +685 -0
  85. htmlgraph/cli/constants.py +206 -0
  86. htmlgraph/cli/core.py +954 -0
  87. htmlgraph/cli/main.py +147 -0
  88. htmlgraph/cli/models.py +475 -0
  89. htmlgraph/cli/templates/__init__.py +1 -0
  90. htmlgraph/cli/templates/cost_dashboard.py +399 -0
  91. htmlgraph/cli/work/__init__.py +239 -0
  92. htmlgraph/cli/work/browse.py +115 -0
  93. htmlgraph/cli/work/features.py +568 -0
  94. htmlgraph/cli/work/orchestration.py +676 -0
  95. htmlgraph/cli/work/report.py +728 -0
  96. htmlgraph/cli/work/sessions.py +466 -0
  97. htmlgraph/cli/work/snapshot.py +559 -0
  98. htmlgraph/cli/work/tracks.py +486 -0
  99. htmlgraph/cli_commands/__init__.py +1 -0
  100. htmlgraph/cli_commands/feature.py +195 -0
  101. htmlgraph/cli_framework.py +115 -0
  102. htmlgraph/collections/__init__.py +18 -0
  103. htmlgraph/collections/base.py +415 -98
  104. htmlgraph/collections/bug.py +53 -0
  105. htmlgraph/collections/chore.py +53 -0
  106. htmlgraph/collections/epic.py +53 -0
  107. htmlgraph/collections/feature.py +12 -26
  108. htmlgraph/collections/insight.py +100 -0
  109. htmlgraph/collections/metric.py +92 -0
  110. htmlgraph/collections/pattern.py +97 -0
  111. htmlgraph/collections/phase.py +53 -0
  112. htmlgraph/collections/session.py +194 -0
  113. htmlgraph/collections/spike.py +56 -16
  114. htmlgraph/collections/task_delegation.py +241 -0
  115. htmlgraph/collections/todo.py +511 -0
  116. htmlgraph/collections/traces.py +487 -0
  117. htmlgraph/config/cost_models.json +56 -0
  118. htmlgraph/config.py +190 -0
  119. htmlgraph/context_analytics.py +344 -0
  120. htmlgraph/converter.py +216 -28
  121. htmlgraph/cost_analysis/__init__.py +5 -0
  122. htmlgraph/cost_analysis/analyzer.py +438 -0
  123. htmlgraph/dashboard.html +2406 -307
  124. htmlgraph/dashboard.html.backup +6592 -0
  125. htmlgraph/dashboard.html.bak +7181 -0
  126. htmlgraph/dashboard.html.bak2 +7231 -0
  127. htmlgraph/dashboard.html.bak3 +7232 -0
  128. htmlgraph/db/__init__.py +38 -0
  129. htmlgraph/db/queries.py +790 -0
  130. htmlgraph/db/schema.py +1788 -0
  131. htmlgraph/decorators.py +317 -0
  132. htmlgraph/dependency_models.py +19 -2
  133. htmlgraph/deploy.py +142 -125
  134. htmlgraph/deployment_models.py +474 -0
  135. htmlgraph/docs/API_REFERENCE.md +841 -0
  136. htmlgraph/docs/HTTP_API.md +750 -0
  137. htmlgraph/docs/INTEGRATION_GUIDE.md +752 -0
  138. htmlgraph/docs/ORCHESTRATION_PATTERNS.md +717 -0
  139. htmlgraph/docs/README.md +532 -0
  140. htmlgraph/docs/__init__.py +77 -0
  141. htmlgraph/docs/docs_version.py +55 -0
  142. htmlgraph/docs/metadata.py +93 -0
  143. htmlgraph/docs/migrations.py +232 -0
  144. htmlgraph/docs/template_engine.py +143 -0
  145. htmlgraph/docs/templates/_sections/cli_reference.md.j2 +52 -0
  146. htmlgraph/docs/templates/_sections/core_concepts.md.j2 +29 -0
  147. htmlgraph/docs/templates/_sections/sdk_basics.md.j2 +69 -0
  148. htmlgraph/docs/templates/base_agents.md.j2 +78 -0
  149. htmlgraph/docs/templates/example_user_override.md.j2 +47 -0
  150. htmlgraph/docs/version_check.py +163 -0
  151. htmlgraph/edge_index.py +182 -27
  152. htmlgraph/error_handler.py +544 -0
  153. htmlgraph/event_log.py +100 -52
  154. htmlgraph/event_migration.py +13 -4
  155. htmlgraph/exceptions.py +49 -0
  156. htmlgraph/file_watcher.py +101 -28
  157. htmlgraph/find_api.py +75 -63
  158. htmlgraph/git_events.py +145 -63
  159. htmlgraph/graph.py +1122 -106
  160. htmlgraph/hooks/.htmlgraph/.session-warning-state.json +6 -0
  161. htmlgraph/hooks/.htmlgraph/agents.json +72 -0
  162. htmlgraph/hooks/.htmlgraph/index.sqlite +0 -0
  163. htmlgraph/hooks/__init__.py +45 -0
  164. htmlgraph/hooks/bootstrap.py +169 -0
  165. htmlgraph/hooks/cigs_pretool_enforcer.py +354 -0
  166. htmlgraph/hooks/concurrent_sessions.py +208 -0
  167. htmlgraph/hooks/context.py +350 -0
  168. htmlgraph/hooks/drift_handler.py +525 -0
  169. htmlgraph/hooks/event_tracker.py +1314 -0
  170. htmlgraph/hooks/git_commands.py +175 -0
  171. htmlgraph/hooks/hooks-config.example.json +12 -0
  172. htmlgraph/hooks/installer.py +343 -0
  173. htmlgraph/hooks/orchestrator.py +674 -0
  174. htmlgraph/hooks/orchestrator_reflector.py +223 -0
  175. htmlgraph/hooks/post-checkout.sh +28 -0
  176. htmlgraph/hooks/post-commit.sh +24 -0
  177. htmlgraph/hooks/post-merge.sh +26 -0
  178. htmlgraph/hooks/post_tool_use_failure.py +273 -0
  179. htmlgraph/hooks/post_tool_use_handler.py +257 -0
  180. htmlgraph/hooks/posttooluse.py +408 -0
  181. htmlgraph/hooks/pre-commit.sh +94 -0
  182. htmlgraph/hooks/pre-push.sh +28 -0
  183. htmlgraph/hooks/pretooluse.py +819 -0
  184. htmlgraph/hooks/prompt_analyzer.py +637 -0
  185. htmlgraph/hooks/session_handler.py +668 -0
  186. htmlgraph/hooks/session_summary.py +395 -0
  187. htmlgraph/hooks/state_manager.py +504 -0
  188. htmlgraph/hooks/subagent_detection.py +202 -0
  189. htmlgraph/hooks/subagent_stop.py +369 -0
  190. htmlgraph/hooks/task_enforcer.py +255 -0
  191. htmlgraph/hooks/task_validator.py +177 -0
  192. htmlgraph/hooks/validator.py +628 -0
  193. htmlgraph/ids.py +41 -27
  194. htmlgraph/index.d.ts +286 -0
  195. htmlgraph/learning.py +767 -0
  196. htmlgraph/mcp_server.py +69 -23
  197. htmlgraph/models.py +1586 -87
  198. htmlgraph/operations/README.md +62 -0
  199. htmlgraph/operations/__init__.py +79 -0
  200. htmlgraph/operations/analytics.py +339 -0
  201. htmlgraph/operations/bootstrap.py +289 -0
  202. htmlgraph/operations/events.py +244 -0
  203. htmlgraph/operations/fastapi_server.py +231 -0
  204. htmlgraph/operations/hooks.py +350 -0
  205. htmlgraph/operations/initialization.py +597 -0
  206. htmlgraph/operations/initialization.py.backup +228 -0
  207. htmlgraph/operations/server.py +303 -0
  208. htmlgraph/orchestration/__init__.py +58 -0
  209. htmlgraph/orchestration/claude_launcher.py +179 -0
  210. htmlgraph/orchestration/command_builder.py +72 -0
  211. htmlgraph/orchestration/headless_spawner.py +281 -0
  212. htmlgraph/orchestration/live_events.py +377 -0
  213. htmlgraph/orchestration/model_selection.py +327 -0
  214. htmlgraph/orchestration/plugin_manager.py +140 -0
  215. htmlgraph/orchestration/prompts.py +137 -0
  216. htmlgraph/orchestration/spawner_event_tracker.py +383 -0
  217. htmlgraph/orchestration/spawners/__init__.py +16 -0
  218. htmlgraph/orchestration/spawners/base.py +194 -0
  219. htmlgraph/orchestration/spawners/claude.py +173 -0
  220. htmlgraph/orchestration/spawners/codex.py +435 -0
  221. htmlgraph/orchestration/spawners/copilot.py +294 -0
  222. htmlgraph/orchestration/spawners/gemini.py +471 -0
  223. htmlgraph/orchestration/subprocess_runner.py +36 -0
  224. htmlgraph/orchestration/task_coordination.py +343 -0
  225. htmlgraph/orchestration.md +563 -0
  226. htmlgraph/orchestrator-system-prompt-optimized.txt +863 -0
  227. htmlgraph/orchestrator.py +669 -0
  228. htmlgraph/orchestrator_config.py +357 -0
  229. htmlgraph/orchestrator_mode.py +328 -0
  230. htmlgraph/orchestrator_validator.py +133 -0
  231. htmlgraph/parallel.py +646 -0
  232. htmlgraph/parser.py +160 -35
  233. htmlgraph/path_query.py +608 -0
  234. htmlgraph/pattern_matcher.py +636 -0
  235. htmlgraph/planning.py +147 -52
  236. htmlgraph/pydantic_models.py +476 -0
  237. htmlgraph/quality_gates.py +350 -0
  238. htmlgraph/query_builder.py +109 -72
  239. htmlgraph/query_composer.py +509 -0
  240. htmlgraph/reflection.py +443 -0
  241. htmlgraph/refs.py +344 -0
  242. htmlgraph/repo_hash.py +512 -0
  243. htmlgraph/repositories/__init__.py +292 -0
  244. htmlgraph/repositories/analytics_repository.py +455 -0
  245. htmlgraph/repositories/analytics_repository_standard.py +628 -0
  246. htmlgraph/repositories/feature_repository.py +581 -0
  247. htmlgraph/repositories/feature_repository_htmlfile.py +668 -0
  248. htmlgraph/repositories/feature_repository_memory.py +607 -0
  249. htmlgraph/repositories/feature_repository_sqlite.py +858 -0
  250. htmlgraph/repositories/filter_service.py +620 -0
  251. htmlgraph/repositories/filter_service_standard.py +445 -0
  252. htmlgraph/repositories/shared_cache.py +621 -0
  253. htmlgraph/repositories/shared_cache_memory.py +395 -0
  254. htmlgraph/repositories/track_repository.py +552 -0
  255. htmlgraph/repositories/track_repository_htmlfile.py +619 -0
  256. htmlgraph/repositories/track_repository_memory.py +508 -0
  257. htmlgraph/repositories/track_repository_sqlite.py +711 -0
  258. htmlgraph/routing.py +8 -19
  259. htmlgraph/scripts/deploy.py +1 -2
  260. htmlgraph/sdk/__init__.py +398 -0
  261. htmlgraph/sdk/__init__.pyi +14 -0
  262. htmlgraph/sdk/analytics/__init__.py +19 -0
  263. htmlgraph/sdk/analytics/engine.py +155 -0
  264. htmlgraph/sdk/analytics/helpers.py +178 -0
  265. htmlgraph/sdk/analytics/registry.py +109 -0
  266. htmlgraph/sdk/base.py +484 -0
  267. htmlgraph/sdk/constants.py +216 -0
  268. htmlgraph/sdk/core.pyi +308 -0
  269. htmlgraph/sdk/discovery.py +120 -0
  270. htmlgraph/sdk/help/__init__.py +12 -0
  271. htmlgraph/sdk/help/mixin.py +699 -0
  272. htmlgraph/sdk/mixins/__init__.py +15 -0
  273. htmlgraph/sdk/mixins/attribution.py +113 -0
  274. htmlgraph/sdk/mixins/mixin.py +410 -0
  275. htmlgraph/sdk/operations/__init__.py +12 -0
  276. htmlgraph/sdk/operations/mixin.py +427 -0
  277. htmlgraph/sdk/orchestration/__init__.py +17 -0
  278. htmlgraph/sdk/orchestration/coordinator.py +203 -0
  279. htmlgraph/sdk/orchestration/spawner.py +204 -0
  280. htmlgraph/sdk/planning/__init__.py +19 -0
  281. htmlgraph/sdk/planning/bottlenecks.py +93 -0
  282. htmlgraph/sdk/planning/mixin.py +211 -0
  283. htmlgraph/sdk/planning/parallel.py +186 -0
  284. htmlgraph/sdk/planning/queue.py +210 -0
  285. htmlgraph/sdk/planning/recommendations.py +87 -0
  286. htmlgraph/sdk/planning/smart_planning.py +319 -0
  287. htmlgraph/sdk/session/__init__.py +19 -0
  288. htmlgraph/sdk/session/continuity.py +57 -0
  289. htmlgraph/sdk/session/handoff.py +110 -0
  290. htmlgraph/sdk/session/info.py +309 -0
  291. htmlgraph/sdk/session/manager.py +103 -0
  292. htmlgraph/sdk/strategic/__init__.py +26 -0
  293. htmlgraph/sdk/strategic/mixin.py +563 -0
  294. htmlgraph/server.py +685 -180
  295. htmlgraph/services/__init__.py +10 -0
  296. htmlgraph/services/claiming.py +199 -0
  297. htmlgraph/session_hooks.py +300 -0
  298. htmlgraph/session_manager.py +1392 -175
  299. htmlgraph/session_registry.py +587 -0
  300. htmlgraph/session_state.py +436 -0
  301. htmlgraph/session_warning.py +201 -0
  302. htmlgraph/sessions/__init__.py +23 -0
  303. htmlgraph/sessions/handoff.py +756 -0
  304. htmlgraph/setup.py +34 -17
  305. htmlgraph/spike_index.py +143 -0
  306. htmlgraph/sync_docs.py +12 -15
  307. htmlgraph/system_prompts.py +450 -0
  308. htmlgraph/templates/AGENTS.md.template +366 -0
  309. htmlgraph/templates/CLAUDE.md.template +97 -0
  310. htmlgraph/templates/GEMINI.md.template +87 -0
  311. htmlgraph/templates/orchestration-view.html +350 -0
  312. htmlgraph/track_builder.py +146 -15
  313. htmlgraph/track_manager.py +69 -21
  314. htmlgraph/transcript.py +890 -0
  315. htmlgraph/transcript_analytics.py +699 -0
  316. htmlgraph/types.py +323 -0
  317. htmlgraph/validation.py +115 -0
  318. htmlgraph/watch.py +8 -5
  319. htmlgraph/work_type_utils.py +3 -2
  320. {htmlgraph-0.9.3.data → htmlgraph-0.27.5.data}/data/htmlgraph/dashboard.html +2406 -307
  321. htmlgraph-0.27.5.data/data/htmlgraph/templates/AGENTS.md.template +366 -0
  322. htmlgraph-0.27.5.data/data/htmlgraph/templates/CLAUDE.md.template +97 -0
  323. htmlgraph-0.27.5.data/data/htmlgraph/templates/GEMINI.md.template +87 -0
  324. {htmlgraph-0.9.3.dist-info → htmlgraph-0.27.5.dist-info}/METADATA +97 -64
  325. htmlgraph-0.27.5.dist-info/RECORD +337 -0
  326. {htmlgraph-0.9.3.dist-info → htmlgraph-0.27.5.dist-info}/entry_points.txt +1 -1
  327. htmlgraph/cli.py +0 -2688
  328. htmlgraph/sdk.py +0 -709
  329. htmlgraph-0.9.3.dist-info/RECORD +0 -61
  330. {htmlgraph-0.9.3.data → htmlgraph-0.27.5.data}/data/htmlgraph/styles.css +0 -0
  331. {htmlgraph-0.9.3.dist-info → htmlgraph-0.27.5.dist-info}/WHEEL +0 -0
@@ -0,0 +1,244 @@
1
+ """
2
+ Archive search engine with three-tier optimization.
3
+
4
+ Tier 1: Bloom filters (skip 70-90% of archives)
5
+ Tier 2: SQLite FTS5 with BM25 ranking (O(log n) search)
6
+ Tier 3: Snippet extraction and highlighting
7
+
8
+ Target: 67x faster than naive multi-file search.
9
+ """
10
+
11
+ from dataclasses import dataclass
12
+ from functools import lru_cache
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ from htmlgraph.archive.bloom import BloomFilter
17
+ from htmlgraph.archive.fts import ArchiveFTS5Index
18
+
19
+
20
+ @dataclass
21
+ class SearchResult:
22
+ """
23
+ Search result from archive search.
24
+
25
+ Attributes:
26
+ entity_id: Entity identifier
27
+ archive_file: Archive file containing entity
28
+ entity_type: Type of entity (feature, bug, etc.)
29
+ status: Entity status
30
+ title_snippet: Title with highlighted matches
31
+ description_snippet: Description with highlighted matches
32
+ rank: BM25 relevance score (lower is better)
33
+ """
34
+
35
+ entity_id: str
36
+ archive_file: str
37
+ entity_type: str
38
+ status: str
39
+ title_snippet: str
40
+ description_snippet: str
41
+ rank: float
42
+
43
+
44
+ class ArchiveSearchEngine:
45
+ """
46
+ Orchestrates three-tier archive search.
47
+
48
+ Workflow:
49
+ 1. Check Bloom filters to skip irrelevant archives (70-90% filtered)
50
+ 2. Search remaining archives with FTS5 + BM25 ranking
51
+ 3. Extract and highlight snippets for top results
52
+ """
53
+
54
+ def __init__(self, archive_dir: Path, index_dir: Path) -> None:
55
+ """
56
+ Initialize search engine.
57
+
58
+ Args:
59
+ archive_dir: Directory containing archive HTML files
60
+ index_dir: Directory for Bloom filters and FTS5 index
61
+ """
62
+ self.archive_dir = archive_dir
63
+ self.index_dir = index_dir
64
+ self.index_dir.mkdir(parents=True, exist_ok=True)
65
+
66
+ # Initialize FTS5 index
67
+ self.fts_index = ArchiveFTS5Index(index_dir / "archives.db")
68
+
69
+ # Cache for Bloom filters (avoid reloading)
70
+ self._bloom_cache: dict[str, BloomFilter] = {}
71
+
72
+ def _get_bloom_filter(self, archive_file: str) -> BloomFilter | None:
73
+ """
74
+ Get Bloom filter for archive (with caching).
75
+
76
+ Args:
77
+ archive_file: Archive filename
78
+
79
+ Returns:
80
+ BloomFilter or None if not indexed
81
+ """
82
+ if archive_file in self._bloom_cache:
83
+ return self._bloom_cache[archive_file]
84
+
85
+ bloom_path = self.index_dir / f"{archive_file}.bloom"
86
+ if bloom_path.exists():
87
+ bloom = BloomFilter.load(bloom_path)
88
+ self._bloom_cache[archive_file] = bloom
89
+ return bloom
90
+
91
+ return None
92
+
93
+ def _filter_archives_with_bloom(
94
+ self, query: str, archive_files: list[str]
95
+ ) -> list[str]:
96
+ """
97
+ Filter archive files using Bloom filters.
98
+
99
+ Args:
100
+ query: Search query
101
+ archive_files: List of all archive files
102
+
103
+ Returns:
104
+ Filtered list of archives that might contain query
105
+ """
106
+ # Tokenize query into words
107
+ query_tokens = query.lower().split()
108
+
109
+ candidates = []
110
+
111
+ for archive_file in archive_files:
112
+ bloom = self._get_bloom_filter(archive_file)
113
+
114
+ if bloom is None:
115
+ # No Bloom filter - include archive
116
+ candidates.append(archive_file)
117
+ continue
118
+
119
+ # Check if any query token might be in archive
120
+ might_match = any(bloom.might_contain(token) for token in query_tokens)
121
+
122
+ if might_match:
123
+ candidates.append(archive_file)
124
+
125
+ return candidates
126
+
127
+ @lru_cache(maxsize=100)
128
+ def _cached_search(self, query: str, limit: int) -> tuple[SearchResult, ...]:
129
+ """
130
+ Cached search implementation.
131
+
132
+ Args:
133
+ query: Search query
134
+ limit: Maximum results
135
+
136
+ Returns:
137
+ Tuple of SearchResult (immutable for caching)
138
+ """
139
+ # Get all archive files
140
+ archive_files = [f.name for f in self.archive_dir.glob("*.html")]
141
+
142
+ if not archive_files:
143
+ return tuple()
144
+
145
+ # Tier 1: Filter with Bloom filters
146
+ candidate_archives = self._filter_archives_with_bloom(query, archive_files)
147
+
148
+ if not candidate_archives:
149
+ return tuple()
150
+
151
+ # Tier 2 & 3: Search with FTS5 (includes snippet highlighting)
152
+ results = self.fts_index.search(
153
+ query, limit=limit, archive_files=candidate_archives
154
+ )
155
+
156
+ # Convert to SearchResult objects
157
+ search_results = [
158
+ SearchResult(
159
+ entity_id=r["entity_id"],
160
+ archive_file=r["archive_file"],
161
+ entity_type=r["entity_type"],
162
+ status=r["status"],
163
+ title_snippet=r["title_snippet"],
164
+ description_snippet=r["description_snippet"],
165
+ rank=r["rank"],
166
+ )
167
+ for r in results
168
+ ]
169
+
170
+ return tuple(search_results)
171
+
172
+ def search(
173
+ self, query: str, include_archived: bool = True, limit: int = 10
174
+ ) -> list[SearchResult]:
175
+ """
176
+ Search archives with three-tier optimization.
177
+
178
+ Args:
179
+ query: Search query
180
+ include_archived: Whether to search archives (future: also search active)
181
+ limit: Maximum results to return
182
+
183
+ Returns:
184
+ List of SearchResult objects sorted by relevance
185
+ """
186
+ if not include_archived:
187
+ return []
188
+
189
+ # Use cached search
190
+ results = self._cached_search(query, limit)
191
+ return list(results)
192
+
193
+ def get_search_stats(self, query: str) -> dict[str, Any]:
194
+ """
195
+ Get statistics about a search query.
196
+
197
+ Args:
198
+ query: Search query
199
+
200
+ Returns:
201
+ Dictionary with bloom_filtered_count, searched_count, etc.
202
+ """
203
+ archive_files = [f.name for f in self.archive_dir.glob("*.html")]
204
+ total_archives = len(archive_files)
205
+
206
+ candidate_archives = self._filter_archives_with_bloom(query, archive_files)
207
+ searched_count = len(candidate_archives)
208
+
209
+ bloom_filtered = total_archives - searched_count
210
+
211
+ return {
212
+ "total_archives": total_archives,
213
+ "bloom_filtered": bloom_filtered,
214
+ "searched_count": searched_count,
215
+ "filter_rate": bloom_filtered / total_archives if total_archives > 0 else 0,
216
+ }
217
+
218
+ def rebuild_bloom_filters(self) -> None:
219
+ """
220
+ Rebuild all Bloom filters from scratch.
221
+
222
+ Useful after archiving new entities.
223
+ """
224
+ # This will be implemented by ArchiveManager when creating archives
225
+ # For now, this is a placeholder
226
+ pass
227
+
228
+ def clear_cache(self) -> None:
229
+ """Clear the search cache."""
230
+ self._cached_search.cache_clear()
231
+ self._bloom_cache.clear()
232
+
233
+ def close(self) -> None:
234
+ """Close all resources."""
235
+ self.fts_index.close()
236
+ self._bloom_cache.clear()
237
+
238
+ def __enter__(self) -> "ArchiveSearchEngine":
239
+ """Context manager entry."""
240
+ return self
241
+
242
+ def __exit__(self, *args: Any) -> None:
243
+ """Context manager exit."""
244
+ self.close()