@smilintux/skcapstone 0.10.0 → 0.12.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/.env.example +10 -4
  2. package/.github/workflows/ci.yml +2 -2
  3. package/.github/workflows/publish.yml +9 -2
  4. package/.openclaw-workspace.json +2 -2
  5. package/CLAUDE.md +37 -0
  6. package/MISSION.md +17 -2
  7. package/README.md +282 -3
  8. package/docker/Dockerfile +7 -7
  9. package/docker/compose-templates/dev-team.yml +12 -12
  10. package/docker/compose-templates/mini-team.yml +9 -9
  11. package/docker/compose-templates/ops-team.yml +10 -10
  12. package/docker/compose-templates/research-team.yml +10 -10
  13. package/docker/entrypoint.sh +4 -4
  14. package/docs/ADR-optional-integration-backbone.md +181 -0
  15. package/docs/ARCHITECTURE.md +186 -43
  16. package/docs/BOND_WITH_GROK.md +6 -6
  17. package/docs/CUSTOM_AGENT.md +123 -30
  18. package/docs/DREAMING.md +70 -0
  19. package/docs/GETTING_STARTED.md +7 -7
  20. package/docs/QUICKSTART.md +10 -6
  21. package/docs/SKJOULE_ARCHITECTURE.md +3 -3
  22. package/docs/SOUL_SWAPPER.md +5 -5
  23. package/docs/hammertime-audit.md +402 -0
  24. package/docs/sk-integration-HANDOFF.md +117 -0
  25. package/docs/skscheduler.md +155 -0
  26. package/docs/superpowers/examples/jobs.yaml +31 -0
  27. package/docs/superpowers/plans/2026-06-08-skscheduler.md +1265 -0
  28. package/docs/superpowers/specs/2026-06-08-skscheduler-design.md +186 -0
  29. package/examples/custom-bond-template.json +1 -1
  30. package/examples/grok-feb.json +1 -1
  31. package/examples/queen-ava-feb.json +1 -1
  32. package/launchd/{com.skcapstone.skcomm-heartbeat.plist → com.skcapstone.skcomms-heartbeat.plist} +4 -4
  33. package/launchd/{com.skcapstone.skcomm-queue-drain.plist → com.skcapstone.skcomms-queue-drain.plist} +4 -4
  34. package/launchd/install-launchd.sh +6 -6
  35. package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/index.ts +3 -2
  36. package/package.json +1 -1
  37. package/pyproject.toml +16 -10
  38. package/scripts/archive-sessions.sh +7 -0
  39. package/scripts/check-updates.py +4 -4
  40. package/scripts/install-bundle.sh +8 -8
  41. package/scripts/install.ps1 +12 -11
  42. package/scripts/install.sh +159 -5
  43. package/scripts/model-fallback-monitor.sh +102 -0
  44. package/scripts/nvidia-proxy.mjs +78 -26
  45. package/scripts/refresh-anthropic-token.sh +172 -0
  46. package/scripts/release.sh +98 -0
  47. package/scripts/session-to-memory.py +219 -0
  48. package/scripts/skgateway.mjs +3 -3
  49. package/scripts/telegram-catchup-all.sh +12 -1
  50. package/scripts/verify_install.sh +2 -2
  51. package/scripts/wargov-ufo-capture/README.md +43 -0
  52. package/scripts/wargov-ufo-capture/cdp_capture_release2.py +273 -0
  53. package/scripts/wargov-ufo-capture/cdp_capture_splc_doj.py +246 -0
  54. package/scripts/wargov-ufo-capture/cdp_finish.py +271 -0
  55. package/scripts/wargov-ufo-capture/cdp_probe.py +188 -0
  56. package/scripts/wargov-ufo-capture/cdp_splc_pressrelease.py +101 -0
  57. package/scripts/wargov-ufo-capture/parse_csv.py +95 -0
  58. package/scripts/wargov-ufo-capture/pull_dvids.sh +107 -0
  59. package/scripts/watch-anthropic-token.sh +212 -0
  60. package/scripts/windows/install-tasks.ps1 +7 -7
  61. package/scripts/windows/skcapstone-task.xml +1 -1
  62. package/src/skcapstone/__init__.py +45 -3
  63. package/src/skcapstone/_cli_monolith.py +20 -15
  64. package/src/skcapstone/activity.py +5 -1
  65. package/src/skcapstone/agent_card.py +3 -2
  66. package/src/skcapstone/api.py +41 -40
  67. package/src/skcapstone/auction.py +14 -11
  68. package/src/skcapstone/backup.py +2 -1
  69. package/src/skcapstone/blueprint_registry.py +4 -3
  70. package/src/skcapstone/brain_first.py +238 -0
  71. package/src/skcapstone/changelog.py +1 -1
  72. package/src/skcapstone/chat.py +22 -17
  73. package/src/skcapstone/cli/__init__.py +9 -1
  74. package/src/skcapstone/cli/_common.py +1 -0
  75. package/src/skcapstone/cli/agents_spawner.py +5 -2
  76. package/src/skcapstone/cli/alerts.py +25 -4
  77. package/src/skcapstone/cli/bench.py +15 -15
  78. package/src/skcapstone/cli/chat.py +7 -4
  79. package/src/skcapstone/cli/consciousness.py +5 -2
  80. package/src/skcapstone/cli/context_cmd.py +18 -4
  81. package/src/skcapstone/cli/daemon.py +11 -7
  82. package/src/skcapstone/cli/gtd.py +26 -1
  83. package/src/skcapstone/cli/housekeeping.py +3 -3
  84. package/src/skcapstone/cli/identity_cmd.py +378 -0
  85. package/src/skcapstone/cli/joule_cmd.py +7 -3
  86. package/src/skcapstone/cli/memory.py +8 -6
  87. package/src/skcapstone/cli/peers_dir.py +1 -1
  88. package/src/skcapstone/cli/register_cmd.py +29 -3
  89. package/src/skcapstone/cli/scheduler_cmd.py +167 -0
  90. package/src/skcapstone/cli/session.py +25 -0
  91. package/src/skcapstone/cli/setup.py +96 -29
  92. package/src/skcapstone/cli/shell_cmd.py +53 -1
  93. package/src/skcapstone/cli/skills_cmd.py +2 -2
  94. package/src/skcapstone/cli/soul.py +8 -5
  95. package/src/skcapstone/cli/status.py +37 -11
  96. package/src/skcapstone/cli/telegram.py +21 -0
  97. package/src/skcapstone/cli/test_cmd.py +5 -5
  98. package/src/skcapstone/cli/test_connection.py +2 -2
  99. package/src/skcapstone/cli/upgrade_cmd.py +23 -14
  100. package/src/skcapstone/cli/version_cmd.py +1 -1
  101. package/src/skcapstone/cli/watch_cmd.py +9 -6
  102. package/src/skcapstone/cloud9_bridge.py +14 -14
  103. package/src/skcapstone/codex_setup.py +255 -0
  104. package/src/skcapstone/config_validator.py +7 -4
  105. package/src/skcapstone/consciousness_config.py +5 -1
  106. package/src/skcapstone/consciousness_loop.py +313 -273
  107. package/src/skcapstone/context_loader.py +121 -0
  108. package/src/skcapstone/coord_federation.py +2 -1
  109. package/src/skcapstone/coordination.py +23 -6
  110. package/src/skcapstone/crush_integration.py +2 -1
  111. package/src/skcapstone/daemon.py +132 -77
  112. package/src/skcapstone/dashboard.py +10 -10
  113. package/src/skcapstone/data/sk-agent-picker.sh +421 -0
  114. package/src/skcapstone/data/systemd/skcapstone-api.socket +9 -0
  115. package/src/skcapstone/data/systemd/skcapstone-memory-compress.service +18 -0
  116. package/src/skcapstone/data/systemd/skcapstone-memory-compress.timer +11 -0
  117. package/src/skcapstone/data/systemd/skcapstone.service +37 -0
  118. package/src/skcapstone/data/systemd/skcapstone@.service +50 -0
  119. package/src/skcapstone/data/systemd/skcomms-heartbeat.service +18 -0
  120. package/{systemd/skcomm-heartbeat.timer → src/skcapstone/data/systemd/skcomms-heartbeat.timer} +2 -2
  121. package/src/skcapstone/data/systemd/skcomms-queue-drain.service +17 -0
  122. package/{systemd/skcomm-queue-drain.timer → src/skcapstone/data/systemd/skcomms-queue-drain.timer} +2 -2
  123. package/src/skcapstone/defaults/claude/CLAUDE.md +67 -0
  124. package/src/skcapstone/defaults/claude/settings.json +74 -0
  125. package/src/skcapstone/defaults/lumina/config/claude-hooks.md +57 -0
  126. package/src/skcapstone/defaults/lumina/config/skgraph.yaml +55 -10
  127. package/src/skcapstone/defaults/lumina/config/skmemory.yaml +79 -13
  128. package/src/skcapstone/defaults/lumina/config/skvector.yaml +60 -9
  129. package/src/skcapstone/defaults/lumina/memory/long-term/18b9c0d1e2f3-cloud9-protocol.json +2 -2
  130. package/src/skcapstone/defaults/lumina/memory/long-term/a1b2c3d4e5f6-ecosystem-overview.json +2 -2
  131. package/src/skcapstone/defaults/lumina/memory/long-term/b2c3d4e5f6a7-five-pillars.json +9 -9
  132. package/src/skcapstone/defaults/lumina/memory/long-term/d4e5f6a7b8c9-site-directory.json +2 -2
  133. package/src/skcapstone/defaults/unhinged.json +13 -0
  134. package/src/skcapstone/discovery.py +43 -20
  135. package/src/skcapstone/doctor.py +941 -22
  136. package/src/skcapstone/dreaming.py +1183 -109
  137. package/src/skcapstone/emotion_tracker.py +2 -2
  138. package/src/skcapstone/export.py +4 -3
  139. package/src/skcapstone/fuse_mount.py +14 -12
  140. package/src/skcapstone/gui_installer.py +2 -2
  141. package/src/skcapstone/heartbeat.py +1 -1
  142. package/src/skcapstone/housekeeping.py +14 -14
  143. package/src/skcapstone/install_wizard.py +209 -7
  144. package/src/skcapstone/itil.py +13 -4
  145. package/src/skcapstone/kms_scheduler.py +10 -8
  146. package/src/skcapstone/launchd.py +19 -19
  147. package/src/skcapstone/mcp_launcher.py +15 -1
  148. package/src/skcapstone/mcp_server.py +83 -49
  149. package/src/skcapstone/mcp_tools/__init__.py +2 -0
  150. package/src/skcapstone/mcp_tools/_helpers.py +2 -2
  151. package/src/skcapstone/mcp_tools/ansible_tools.py +7 -4
  152. package/src/skcapstone/mcp_tools/brain_first_tools.py +90 -0
  153. package/src/skcapstone/mcp_tools/capauth_tools.py +7 -4
  154. package/src/skcapstone/mcp_tools/comm_tools.py +10 -10
  155. package/src/skcapstone/mcp_tools/coord_tools.py +8 -4
  156. package/src/skcapstone/mcp_tools/did_tools.py +11 -8
  157. package/src/skcapstone/mcp_tools/gtd_tools.py +4 -4
  158. package/src/skcapstone/mcp_tools/memory_tools.py +6 -2
  159. package/src/skcapstone/mcp_tools/notification_tools.py +22 -6
  160. package/src/skcapstone/mcp_tools/{skcomm_tools.py → skcomms_tools.py} +14 -14
  161. package/src/skcapstone/mcp_tools/soul_tools.py +8 -2
  162. package/src/skcapstone/mdns_discovery.py +2 -2
  163. package/src/skcapstone/memory_curator.py +1 -1
  164. package/src/skcapstone/memory_engine.py +10 -3
  165. package/src/skcapstone/metrics.py +30 -16
  166. package/src/skcapstone/migrate_memories.py +4 -3
  167. package/src/skcapstone/migrate_multi_agent.py +8 -7
  168. package/src/skcapstone/models.py +47 -5
  169. package/src/skcapstone/notifications.py +42 -18
  170. package/src/skcapstone/onboard.py +875 -121
  171. package/src/skcapstone/operator_link.py +170 -0
  172. package/src/skcapstone/peer_directory.py +4 -4
  173. package/src/skcapstone/peers.py +19 -19
  174. package/src/skcapstone/pillars/__init__.py +7 -5
  175. package/src/skcapstone/pillars/consciousness.py +191 -0
  176. package/src/skcapstone/pillars/identity.py +51 -7
  177. package/src/skcapstone/pillars/memory.py +9 -3
  178. package/src/skcapstone/pillars/sync.py +2 -2
  179. package/src/skcapstone/preflight.py +3 -3
  180. package/src/skcapstone/providers/docker.py +28 -28
  181. package/src/skcapstone/register.py +6 -6
  182. package/src/skcapstone/registry_client.py +5 -4
  183. package/src/skcapstone/runtime.py +14 -3
  184. package/src/skcapstone/scheduled_tasks.py +254 -19
  185. package/src/skcapstone/scheduler_jobs.py +456 -0
  186. package/src/skcapstone/scheduler_runner.py +239 -0
  187. package/src/skcapstone/scheduler_state.py +162 -0
  188. package/src/skcapstone/sdk.py +310 -0
  189. package/src/skcapstone/service_health.py +279 -39
  190. package/src/skcapstone/session_briefing.py +108 -0
  191. package/src/skcapstone/session_capture.py +1 -1
  192. package/src/skcapstone/shell.py +7 -1
  193. package/src/skcapstone/soul.py +3 -1
  194. package/src/skcapstone/soul_switch.py +3 -1
  195. package/src/skcapstone/summary.py +6 -6
  196. package/src/skcapstone/sync_engine.py +15 -15
  197. package/src/skcapstone/sync_watcher.py +2 -2
  198. package/src/skcapstone/systemd.py +55 -21
  199. package/src/skcapstone/team_comms.py +8 -8
  200. package/src/skcapstone/team_engine.py +1 -1
  201. package/src/skcapstone/testrunner.py +3 -3
  202. package/src/skcapstone/trust_graph.py +40 -5
  203. package/src/skcapstone/unified_search.py +15 -6
  204. package/src/skcapstone/uninstall_wizard.py +11 -3
  205. package/src/skcapstone/version_check.py +8 -4
  206. package/src/skcapstone/warmth_anchor.py +4 -2
  207. package/src/skcapstone/whoami.py +4 -4
  208. package/systemd/skcapstone.service +4 -6
  209. package/systemd/skcapstone@.service +7 -8
  210. package/systemd/skcomms-heartbeat.service +21 -0
  211. package/systemd/skcomms-heartbeat.timer +12 -0
  212. package/systemd/skcomms-queue-drain.service +17 -0
  213. package/systemd/skcomms-queue-drain.timer +12 -0
  214. package/tests/conftest.py +39 -0
  215. package/tests/integration/test_consciousness_e2e.py +39 -39
  216. package/tests/test_agent_card.py +1 -1
  217. package/tests/test_agent_home_scaffold.py +34 -0
  218. package/tests/test_alerts_consumer_topics.py +27 -0
  219. package/tests/test_backup.py +2 -1
  220. package/tests/test_chat.py +6 -6
  221. package/tests/test_claude_md.py +2 -2
  222. package/tests/test_cli_skills.py +10 -10
  223. package/tests/test_cli_test_cmd.py +4 -4
  224. package/tests/test_cli_test_connection.py +1 -1
  225. package/tests/test_cloud9_bridge.py +6 -6
  226. package/tests/test_consciousness_e2e.py +1 -1
  227. package/tests/test_consciousness_loop.py +10 -10
  228. package/tests/test_coordination.py +25 -0
  229. package/tests/test_cross_package.py +21 -21
  230. package/tests/test_daemon.py +4 -4
  231. package/tests/test_daemon_shutdown.py +1 -1
  232. package/tests/test_docker_provider.py +29 -29
  233. package/tests/test_doctor.py +400 -0
  234. package/tests/test_doctor_skscheduler.py +50 -0
  235. package/tests/test_dreaming_engine.py +147 -0
  236. package/tests/test_dreaming_gtd_capture.py +35 -0
  237. package/tests/test_e2e_automated.py +8 -5
  238. package/tests/test_fuse_mount.py +10 -10
  239. package/tests/test_gtd_brief.py +46 -0
  240. package/tests/test_gtd_malformed_tolerance.py +31 -0
  241. package/tests/test_housekeeping.py +15 -15
  242. package/tests/test_identity_migrate.py +251 -0
  243. package/tests/test_integration_backbone.py +598 -0
  244. package/tests/test_itil_gtd_lifecycle.py +37 -0
  245. package/tests/test_jobs_dropins.py +84 -0
  246. package/tests/test_mcp_server.py +82 -37
  247. package/tests/test_models.py +48 -4
  248. package/tests/test_multi_agent.py +31 -29
  249. package/tests/test_notifications.py +122 -32
  250. package/tests/test_onboard.py +63 -75
  251. package/tests/test_operator_link.py +78 -0
  252. package/tests/test_peers.py +14 -14
  253. package/tests/test_pillars.py +98 -0
  254. package/tests/test_preflight.py +3 -3
  255. package/tests/test_runtime.py +21 -0
  256. package/tests/test_scheduled_tasks.py +11 -6
  257. package/tests/test_scheduler_cli.py +47 -0
  258. package/tests/test_scheduler_features.py +133 -0
  259. package/tests/test_scheduler_integration.py +87 -0
  260. package/tests/test_scheduler_jobs.py +155 -0
  261. package/tests/test_scheduler_runner.py +64 -0
  262. package/tests/test_scheduler_state.py +57 -0
  263. package/tests/test_sdk.py +70 -0
  264. package/tests/test_service_health_incidents.py +34 -0
  265. package/tests/test_service_registry.py +52 -0
  266. package/tests/test_session_briefing.py +130 -0
  267. package/tests/test_snapshots.py +4 -4
  268. package/tests/test_sync_pipeline.py +26 -26
  269. package/tests/test_team_comms.py +2 -2
  270. package/tests/test_testrunner.py +2 -2
  271. package/tests/test_trust_graph.py +18 -0
  272. package/tests/test_unified_search.py +2 -2
  273. package/tests/test_version_check.py +10 -0
  274. package/tests/test_version_cmd.py +8 -8
  275. package/tests/test_whoami.py +1 -1
  276. package/systemd/skcomm-heartbeat.service +0 -18
  277. package/systemd/skcomm-queue-drain.service +0 -17
  278. /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/package.json +0 -0
  279. /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/openclaw.plugin.json +0 -0
@@ -0,0 +1,273 @@
1
+ #!/usr/bin/env python3
2
+ """Capture war.gov/UFO/ Release 02 via Lumina Chrome CDP.
3
+
4
+ Strategy (Release 02 is bundled into one ZIP, plus a fresh CSV + press release):
5
+ 1. Open a tab on war.gov/UFO/ to seed Akamai cookies in the Chrome session.
6
+ 2. Set Page.setDownloadBehavior to allow downloads to our target dir.
7
+ 3. Trigger ZIP download by injecting <a download href=...> and clicking it.
8
+ 4. Poll for .crdownload to drain and the final file to appear.
9
+ 5. Also fetch the new CSV in-page (text response — simpler than download).
10
+ 6. Fetch the press release HTML the same way.
11
+
12
+ Output → ~/nextcloud/cbrd21-share/reference/war-gov-UFO-PURSUE-2026/{docs/release-02, release-02-zip}/
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import hashlib
17
+ import json
18
+ import sys
19
+ import time
20
+ import urllib.request
21
+ from pathlib import Path
22
+
23
+ import websocket
24
+
25
+ CDP_HTTP = "http://127.0.0.1:9222"
26
+ SEED_URL = "https://www.war.gov/UFO/"
27
+
28
+ ZIP_URL = "https://www.war.gov/medialink/ufo/052226/release_02/release_02_document_bundle.zip"
29
+ CSV_URL = "https://www.war.gov/Portals/1/Interactive/2026/UFO/uap-data.csv"
30
+ PRESS_URL = "https://www.war.gov/News/Releases/Release/Article/4499305/department-of-war-publishes-second-release-of-unidentified-anomalous-phenomena/"
31
+
32
+ BASE = Path("/home/cbrd21/nextcloud/cbrd21-share/reference/war-gov-UFO-PURSUE-2026")
33
+ DOC_DIR = BASE / "docs" / "release-02"
34
+ ZIP_DIR = BASE / "release-02-zip"
35
+ DOC_DIR.mkdir(parents=True, exist_ok=True)
36
+ ZIP_DIR.mkdir(parents=True, exist_ok=True)
37
+
38
+
39
+ def cdp_get(path: str) -> dict | list:
40
+ with urllib.request.urlopen(f"{CDP_HTTP}{path}") as r:
41
+ return json.loads(r.read())
42
+
43
+
44
+ def open_tab(url: str) -> dict:
45
+ req = urllib.request.Request(f"{CDP_HTTP}/json/new?{url}", method="PUT")
46
+ with urllib.request.urlopen(req, timeout=10) as r:
47
+ return json.loads(r.read())
48
+
49
+
50
+ def close_tab(target_id: str) -> None:
51
+ try:
52
+ with urllib.request.urlopen(f"{CDP_HTTP}/json/close/{target_id}", timeout=5):
53
+ pass
54
+ except Exception:
55
+ pass
56
+
57
+
58
+ class CDP:
59
+ def __init__(self, ws_url: str):
60
+ self.ws = websocket.create_connection(ws_url, timeout=120)
61
+ self.mid = 0
62
+
63
+ def call(self, method: str, params: dict | None = None, timeout: float = 60.0) -> dict:
64
+ self.mid += 1
65
+ msg_id = self.mid
66
+ self.ws.send(json.dumps({"id": msg_id, "method": method, "params": params or {}}))
67
+ self.ws.settimeout(timeout)
68
+ while True:
69
+ raw = self.ws.recv()
70
+ msg = json.loads(raw)
71
+ if msg.get("id") == msg_id:
72
+ if "error" in msg:
73
+ raise RuntimeError(f"{method}: {msg['error']}")
74
+ return msg.get("result", {})
75
+
76
+ def wait_event(self, name: str, timeout: float = 30.0) -> dict:
77
+ deadline = time.time() + timeout
78
+ while time.time() < deadline:
79
+ self.ws.settimeout(max(0.1, deadline - time.time()))
80
+ try:
81
+ raw = self.ws.recv()
82
+ except websocket.WebSocketTimeoutException:
83
+ continue
84
+ msg = json.loads(raw)
85
+ if msg.get("method") == name:
86
+ return msg.get("params", {})
87
+ raise TimeoutError(f"event {name} did not fire within {timeout}s")
88
+
89
+ def close(self) -> None:
90
+ try:
91
+ self.ws.close()
92
+ except Exception:
93
+ pass
94
+
95
+
96
+ def fetch_text_in_page(cdp: CDP, url: str) -> tuple[int, str]:
97
+ expr = (
98
+ f"(async () => {{"
99
+ f" const r = await fetch({json.dumps(url)}, {{credentials: 'include', cache: 'no-store'}});"
100
+ f" return {{status: r.status, text: await r.text()}};"
101
+ f"}})()"
102
+ )
103
+ res = cdp.call("Runtime.evaluate", {
104
+ "expression": expr,
105
+ "awaitPromise": True,
106
+ "returnByValue": True,
107
+ }, timeout=180)
108
+ val = res.get("result", {}).get("value", {}) or {}
109
+ return val.get("status", 0), val.get("text", "")
110
+
111
+
112
+ def trigger_download(cdp: CDP, url: str) -> None:
113
+ expr = (
114
+ f"(() => {{"
115
+ f" const a = document.createElement('a');"
116
+ f" a.href = {json.dumps(url)};"
117
+ f" a.download = '';"
118
+ f" document.body.appendChild(a);"
119
+ f" a.click();"
120
+ f" a.remove();"
121
+ f" return 'click-triggered';"
122
+ f"}})()"
123
+ )
124
+ cdp.call("Runtime.evaluate", {"expression": expr, "returnByValue": True})
125
+
126
+
127
+ def wait_for_file(path: Path, partial_glob: str, timeout: float = 1800.0, idle_threshold: float = 5.0) -> Path | None:
128
+ """Wait until a file matching the final name shows up + a quiet period after .crdownload drains."""
129
+ deadline = time.time() + timeout
130
+ last_size = -1
131
+ last_change = time.time()
132
+ while time.time() < deadline:
133
+ # Find .crdownload first
134
+ crfiles = list(path.glob("*.crdownload"))
135
+ finished = [p for p in path.glob(partial_glob) if not p.name.endswith(".crdownload")]
136
+ if crfiles:
137
+ size = sum(f.stat().st_size for f in crfiles)
138
+ if size != last_size:
139
+ last_size = size
140
+ last_change = time.time()
141
+ print(f"[download] in-progress {size/1e6:.1f} MB", flush=True)
142
+ time.sleep(2.0)
143
+ elif finished:
144
+ # No crdownload, file is there. Need idle period to ensure stable.
145
+ f = finished[0]
146
+ size = f.stat().st_size
147
+ if size != last_size:
148
+ last_size = size
149
+ last_change = time.time()
150
+ if time.time() - last_change >= idle_threshold:
151
+ return f
152
+ time.sleep(1.0)
153
+ else:
154
+ time.sleep(2.0)
155
+ return None
156
+
157
+
158
+ def sha256_file(p: Path) -> str:
159
+ h = hashlib.sha256()
160
+ with p.open("rb") as f:
161
+ while chunk := f.read(8 * 1024 * 1024):
162
+ h.update(chunk)
163
+ return h.hexdigest()
164
+
165
+
166
+ def main() -> int:
167
+ print(f"[capture] seeding tab → {SEED_URL}", flush=True)
168
+ tab = open_tab(SEED_URL)
169
+ target_id = tab["id"]
170
+ ws_url = tab["webSocketDebuggerUrl"]
171
+ cdp = CDP(ws_url)
172
+ try:
173
+ cdp.call("Page.enable")
174
+ cdp.call("Runtime.enable")
175
+ cdp.call("Network.enable", {"maxPostDataSize": 0})
176
+ cdp.call("Page.navigate", {"url": SEED_URL})
177
+ try:
178
+ cdp.wait_event("Page.loadEventFired", timeout=30.0)
179
+ except TimeoutError:
180
+ pass
181
+ time.sleep(3.0) # let Vue settle and cookies stick
182
+
183
+ # ---- 1. Fetch CSV (small, text)
184
+ print(f"[capture] fetching CSV → {CSV_URL}", flush=True)
185
+ status, text = fetch_text_in_page(cdp, CSV_URL)
186
+ print(f"[capture] CSV status={status} len={len(text)}", flush=True)
187
+ if status == 200 and text:
188
+ (DOC_DIR / "uap-data.csv").write_text(text)
189
+ else:
190
+ (DOC_DIR / "uap-data-error.json").write_text(json.dumps({"status": status, "preview": text[:1000]}, indent=2))
191
+
192
+ # ---- 2. Fetch press release HTML
193
+ print(f"[capture] fetching press release → {PRESS_URL}", flush=True)
194
+ status, text = fetch_text_in_page(cdp, PRESS_URL)
195
+ print(f"[capture] press release status={status} len={len(text)}", flush=True)
196
+ if status == 200 and text:
197
+ (DOC_DIR / "press-release-2026-05-22.html").write_text(text)
198
+ # Try to extract clean text via DOM
199
+ txt_expr = (
200
+ f"(async () => {{"
201
+ f" const r = await fetch({json.dumps(PRESS_URL)}, {{credentials: 'include'}});"
202
+ f" const html = await r.text();"
203
+ f" const doc = new DOMParser().parseFromString(html, 'text/html');"
204
+ f" const article = doc.querySelector('.body-text') || doc.querySelector('article') || doc.querySelector('main') || doc.body;"
205
+ f" return article ? article.innerText : '';"
206
+ f"}})()"
207
+ )
208
+ res = cdp.call("Runtime.evaluate", {
209
+ "expression": txt_expr,
210
+ "awaitPromise": True,
211
+ "returnByValue": True,
212
+ }, timeout=60)
213
+ article_text = res.get("result", {}).get("value", "") or ""
214
+ if article_text:
215
+ (DOC_DIR / "press-release-2026-05-22.txt").write_text(article_text)
216
+ print(f"[capture] extracted {len(article_text)} chars of article text", flush=True)
217
+
218
+ # ---- 3. Download the ZIP bundle via download behavior + <a download> click
219
+ print(f"[capture] setting download dir → {ZIP_DIR}", flush=True)
220
+ cdp.call("Page.setDownloadBehavior", {
221
+ "behavior": "allow",
222
+ "downloadPath": str(ZIP_DIR),
223
+ })
224
+ # Also try Browser.setDownloadBehavior which is the newer API
225
+ try:
226
+ cdp.call("Browser.setDownloadBehavior", {
227
+ "behavior": "allow",
228
+ "downloadPath": str(ZIP_DIR),
229
+ "eventsEnabled": True,
230
+ })
231
+ except Exception as e:
232
+ print(f"[capture] Browser.setDownloadBehavior not supported: {e}", flush=True)
233
+
234
+ print(f"[capture] triggering ZIP download → {ZIP_URL}", flush=True)
235
+ trigger_download(cdp, ZIP_URL)
236
+
237
+ # Poll for completion
238
+ zip_file = wait_for_file(ZIP_DIR, "release_02_document_bundle*.zip", timeout=1800.0, idle_threshold=5.0)
239
+ if not zip_file:
240
+ print("[capture] ZIP download did NOT complete in 30 min — check ZIP_DIR manually", flush=True)
241
+ # Diagnostic: list what's in there
242
+ for f in ZIP_DIR.iterdir():
243
+ print(f" {f.name} {f.stat().st_size}", flush=True)
244
+ return 2
245
+
246
+ size_mb = zip_file.stat().st_size / 1e6
247
+ sha = sha256_file(zip_file)
248
+ print(f"[capture] ZIP done: {zip_file.name} {size_mb:.1f} MB sha256={sha}", flush=True)
249
+
250
+ # Write manifest
251
+ manifest = {
252
+ "release": "02",
253
+ "release_date": "2026-05-22",
254
+ "captured_at": time.strftime("%Y-%m-%dT%H:%M:%S%z"),
255
+ "zip_url": ZIP_URL,
256
+ "zip_path": str(zip_file),
257
+ "zip_size_bytes": zip_file.stat().st_size,
258
+ "zip_sha256": sha,
259
+ "csv_url": CSV_URL,
260
+ "press_url": PRESS_URL,
261
+ "capture_method": "Lumina Chrome CDP (port 9222) — page-context fetch + <a download> click",
262
+ }
263
+ (DOC_DIR / "release-02-manifest.json").write_text(json.dumps(manifest, indent=2))
264
+ print(f"[capture] manifest written", flush=True)
265
+
266
+ return 0
267
+ finally:
268
+ cdp.close()
269
+ close_tab(target_id)
270
+
271
+
272
+ if __name__ == "__main__":
273
+ sys.exit(main())
@@ -0,0 +1,246 @@
1
+ #!/usr/bin/env python3
2
+ """Capture the PRIMARY DOJ sources for the SPLC superseding-indictment finding via Lumina Chrome CDP.
3
+
4
+ Both targets returned HTTP 403 to WebFetch (Akamai TLS-fingerprint/bot gate). Driving
5
+ Lumina's already-authenticated Chrome (port 9222) in page context bypasses the gate.
6
+
7
+ Targets:
8
+ 1. Indictment PDF -> https://www.justice.gov/opa/media/1437146/dl (download)
9
+ 2. DOJ press release -> discovered via justice.gov news search for "Southern Poverty Law Center"
10
+
11
+ Goal (per finding 2026-06-04_splc-doj-superseding-indictment-oneill-thread.md):
12
+ resolve the 2010-vs-2014 conduct window, the F-30 "$70K" figure, and confirm count
13
+ language first-hand rather than via secondary quotation.
14
+
15
+ Output -> ~/clawd/skills/substance-lens/captures/splc-doj-2026-06-03/
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import hashlib
20
+ import json
21
+ import re
22
+ import sys
23
+ import time
24
+ import urllib.request
25
+ from pathlib import Path
26
+
27
+ import websocket
28
+
29
+ CDP_HTTP = "http://127.0.0.1:9222"
30
+ SEED_URL = "https://www.justice.gov/"
31
+ PDF_URL = "https://www.justice.gov/opa/media/1437146/dl"
32
+ SEARCH_URL = "https://www.justice.gov/news?search_api_fulltext=Southern+Poverty+Law+Center"
33
+
34
+ OUT = Path("/home/cbrd21/clawd/skills/substance-lens/captures/splc-doj-2026-06-03")
35
+ OUT.mkdir(parents=True, exist_ok=True)
36
+
37
+
38
+ def open_tab(url: str) -> dict:
39
+ req = urllib.request.Request(f"{CDP_HTTP}/json/new?{url}", method="PUT")
40
+ with urllib.request.urlopen(req, timeout=10) as r:
41
+ return json.loads(r.read())
42
+
43
+
44
+ def close_tab(target_id: str) -> None:
45
+ try:
46
+ with urllib.request.urlopen(f"{CDP_HTTP}/json/close/{target_id}", timeout=5):
47
+ pass
48
+ except Exception:
49
+ pass
50
+
51
+
52
+ class CDP:
53
+ def __init__(self, ws_url: str):
54
+ self.ws = websocket.create_connection(ws_url, timeout=120)
55
+ self.mid = 0
56
+
57
+ def call(self, method: str, params: dict | None = None, timeout: float = 60.0) -> dict:
58
+ self.mid += 1
59
+ msg_id = self.mid
60
+ self.ws.send(json.dumps({"id": msg_id, "method": method, "params": params or {}}))
61
+ self.ws.settimeout(timeout)
62
+ while True:
63
+ raw = self.ws.recv()
64
+ msg = json.loads(raw)
65
+ if msg.get("id") == msg_id:
66
+ if "error" in msg:
67
+ raise RuntimeError(f"{method}: {msg['error']}")
68
+ return msg.get("result", {})
69
+
70
+ def wait_event(self, name: str, timeout: float = 30.0) -> dict:
71
+ deadline = time.time() + timeout
72
+ while time.time() < deadline:
73
+ self.ws.settimeout(max(0.1, deadline - time.time()))
74
+ try:
75
+ raw = self.ws.recv()
76
+ except websocket.WebSocketTimeoutException:
77
+ continue
78
+ msg = json.loads(raw)
79
+ if msg.get("method") == name:
80
+ return msg.get("params", {})
81
+ raise TimeoutError(f"event {name} did not fire within {timeout}s")
82
+
83
+ def close(self) -> None:
84
+ try:
85
+ self.ws.close()
86
+ except Exception:
87
+ pass
88
+
89
+
90
+ def fetch_text_in_page(cdp: CDP, url: str) -> tuple[int, str]:
91
+ expr = (
92
+ f"(async () => {{"
93
+ f" try {{"
94
+ f" const r = await fetch({json.dumps(url)}, {{credentials: 'include', cache: 'no-store'}});"
95
+ f" return {{status: r.status, text: await r.text()}};"
96
+ f" }} catch (e) {{ return {{status: -1, text: String(e)}}; }}"
97
+ f"}})()"
98
+ )
99
+ res = cdp.call("Runtime.evaluate", {
100
+ "expression": expr,
101
+ "awaitPromise": True,
102
+ "returnByValue": True,
103
+ }, timeout=180)
104
+ val = res.get("result", {}).get("value", {}) or {}
105
+ return val.get("status", 0), val.get("text", "")
106
+
107
+
108
+ def fetch_pdf_b64_in_page(cdp: CDP, url: str) -> tuple[int, str, int]:
109
+ """Fetch a binary in page context, return base64 (works for PDFs under a few MB)."""
110
+ expr = (
111
+ f"(async () => {{"
112
+ f" try {{"
113
+ f" const r = await fetch({json.dumps(url)}, {{credentials: 'include', cache: 'no-store'}});"
114
+ f" const buf = await r.arrayBuffer();"
115
+ f" const bytes = new Uint8Array(buf);"
116
+ f" let bin = '';"
117
+ f" const chunk = 0x8000;"
118
+ f" for (let i = 0; i < bytes.length; i += chunk) {{"
119
+ f" bin += String.fromCharCode.apply(null, bytes.subarray(i, i + chunk));"
120
+ f" }}"
121
+ f" return {{status: r.status, b64: btoa(bin), len: bytes.length}};"
122
+ f" }} catch (e) {{ return {{status: -1, b64: '', len: 0, err: String(e)}}; }}"
123
+ f"}})()"
124
+ )
125
+ res = cdp.call("Runtime.evaluate", {
126
+ "expression": expr,
127
+ "awaitPromise": True,
128
+ "returnByValue": True,
129
+ }, timeout=240)
130
+ val = res.get("result", {}).get("value", {}) or {}
131
+ return val.get("status", 0), val.get("b64", ""), val.get("len", 0)
132
+
133
+
134
+ def sha256_bytes(b: bytes) -> str:
135
+ h = hashlib.sha256()
136
+ h.update(b)
137
+ return h.hexdigest()
138
+
139
+
140
+ def main() -> int:
141
+ import base64
142
+
143
+ print(f"[capture] seeding tab -> {SEED_URL}", flush=True)
144
+ tab = open_tab(SEED_URL)
145
+ target_id = tab["id"]
146
+ cdp = CDP(tab["webSocketDebuggerUrl"])
147
+ manifest: dict = {
148
+ "finding": "2026-06-04_splc-doj-superseding-indictment-oneill-thread.md",
149
+ "capture_method": "Lumina Chrome CDP (port 9222) — page-context fetch (Akamai bypass)",
150
+ "targets": {},
151
+ }
152
+ try:
153
+ cdp.call("Page.enable")
154
+ cdp.call("Runtime.enable")
155
+ cdp.call("Network.enable", {"maxPostDataSize": 0})
156
+ cdp.call("Page.navigate", {"url": SEED_URL})
157
+ try:
158
+ cdp.wait_event("Page.loadEventFired", timeout=30.0)
159
+ except TimeoutError:
160
+ pass
161
+ time.sleep(3.0) # let Akamai cookies stick
162
+
163
+ # ---- 1. Indictment PDF (primary) ----
164
+ print(f"[capture] fetching PDF -> {PDF_URL}", flush=True)
165
+ status, b64, length = fetch_pdf_b64_in_page(cdp, PDF_URL)
166
+ print(f"[capture] PDF status={status} bytes={length}", flush=True)
167
+ if status == 200 and b64:
168
+ data = base64.b64decode(b64)
169
+ is_pdf = data[:5] == b"%PDF-"
170
+ pdf_path = OUT / "splc-superseding-indictment-1437146.pdf"
171
+ pdf_path.write_bytes(data)
172
+ sha = sha256_bytes(data)
173
+ print(f"[capture] PDF written {len(data)} bytes is_pdf={is_pdf} sha256={sha}", flush=True)
174
+ manifest["targets"]["indictment_pdf"] = {
175
+ "url": PDF_URL, "status": status, "path": str(pdf_path),
176
+ "bytes": len(data), "is_pdf_magic": is_pdf, "sha256": sha,
177
+ }
178
+ else:
179
+ manifest["targets"]["indictment_pdf"] = {"url": PDF_URL, "status": status, "error": True}
180
+ print("[capture] PDF FAILED — page-context fetch did not return 200", flush=True)
181
+
182
+ # ---- 2. Discover + fetch DOJ press release ----
183
+ print(f"[capture] searching DOJ news -> {SEARCH_URL}", flush=True)
184
+ status, html = fetch_text_in_page(cdp, SEARCH_URL)
185
+ print(f"[capture] search status={status} len={len(html)}", flush=True)
186
+ pr_url = None
187
+ if status == 200 and html:
188
+ (OUT / "doj-news-search.html").write_text(html)
189
+ # Find press-release links; prefer /opa/pr/ slugs mentioning the charge
190
+ cands = re.findall(r'href="(/opa/pr/[^"#?]+)"', html)
191
+ uniq = []
192
+ for c in cands:
193
+ if c not in uniq:
194
+ uniq.append(c)
195
+ print(f"[capture] press-release candidates: {uniq[:10]}", flush=True)
196
+ scored = [c for c in uniq if "southern-poverty" in c.lower()
197
+ or "splc" in c.lower()
198
+ or ("wire-fraud" in c.lower() and "law-center" in c.lower())]
199
+ if scored:
200
+ pr_url = "https://www.justice.gov" + scored[0]
201
+ elif uniq:
202
+ pr_url = "https://www.justice.gov" + uniq[0]
203
+ manifest["press_release_candidates"] = uniq[:15]
204
+
205
+ if pr_url:
206
+ print(f"[capture] fetching press release -> {pr_url}", flush=True)
207
+ status, prhtml = fetch_text_in_page(cdp, pr_url)
208
+ print(f"[capture] press release status={status} len={len(prhtml)}", flush=True)
209
+ if status == 200 and prhtml:
210
+ (OUT / "doj-press-release.html").write_text(prhtml)
211
+ txt_expr = (
212
+ f"(async () => {{"
213
+ f" const r = await fetch({json.dumps(pr_url)}, {{credentials: 'include'}});"
214
+ f" const html = await r.text();"
215
+ f" const doc = new DOMParser().parseFromString(html, 'text/html');"
216
+ f" const a = doc.querySelector('.field--name-body') || doc.querySelector('article')"
217
+ f" || doc.querySelector('main') || doc.body;"
218
+ f" return a ? a.innerText : '';"
219
+ f"}})()"
220
+ )
221
+ res = cdp.call("Runtime.evaluate", {
222
+ "expression": txt_expr, "awaitPromise": True, "returnByValue": True,
223
+ }, timeout=60)
224
+ txt = res.get("result", {}).get("value", "") or ""
225
+ if txt:
226
+ (OUT / "doj-press-release.txt").write_text(txt)
227
+ print(f"[capture] extracted {len(txt)} chars of press-release text", flush=True)
228
+ manifest["targets"]["press_release"] = {"url": pr_url, "status": status,
229
+ "txt_chars": len(txt)}
230
+ else:
231
+ manifest["targets"]["press_release"] = {"url": pr_url, "status": status, "error": True}
232
+ else:
233
+ print("[capture] no press-release URL discovered from search", flush=True)
234
+ manifest["targets"]["press_release"] = {"discovered": False}
235
+
236
+ manifest["captured_at"] = time.strftime("%Y-%m-%dT%H:%M:%S%z")
237
+ (OUT / "manifest.json").write_text(json.dumps(manifest, indent=2))
238
+ print(f"[capture] manifest written -> {OUT/'manifest.json'}", flush=True)
239
+ return 0
240
+ finally:
241
+ cdp.close()
242
+ close_tab(target_id)
243
+
244
+
245
+ if __name__ == "__main__":
246
+ sys.exit(main())