@smilintux/skcapstone 0.9.0 → 0.12.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. package/.env.example +10 -4
  2. package/.github/workflows/ci.yml +2 -2
  3. package/.github/workflows/publish.yml +9 -2
  4. package/.openclaw-workspace.json +2 -2
  5. package/CLAUDE.md +37 -0
  6. package/MISSION.md +17 -2
  7. package/README.md +282 -3
  8. package/docker/Dockerfile +7 -7
  9. package/docker/compose-templates/dev-team.yml +12 -12
  10. package/docker/compose-templates/mini-team.yml +9 -9
  11. package/docker/compose-templates/ops-team.yml +10 -10
  12. package/docker/compose-templates/research-team.yml +10 -10
  13. package/docker/entrypoint.sh +4 -4
  14. package/docs/ADR-optional-integration-backbone.md +181 -0
  15. package/docs/ARCHITECTURE.md +186 -43
  16. package/docs/BOND_WITH_GROK.md +6 -6
  17. package/docs/CUSTOM_AGENT.md +278 -1
  18. package/docs/DREAMING.md +70 -0
  19. package/docs/GETTING_STARTED.md +10 -7
  20. package/docs/QUICKSTART.md +10 -6
  21. package/docs/SKJOULE_ARCHITECTURE.md +3 -3
  22. package/docs/SOUL_SWAPPER.md +5 -5
  23. package/docs/hammertime-audit.md +402 -0
  24. package/docs/sk-integration-HANDOFF.md +117 -0
  25. package/docs/skscheduler.md +155 -0
  26. package/docs/superpowers/examples/jobs.yaml +31 -0
  27. package/docs/superpowers/plans/2026-06-08-skscheduler.md +1265 -0
  28. package/docs/superpowers/specs/2026-06-08-skscheduler-design.md +186 -0
  29. package/examples/custom-bond-template.json +1 -1
  30. package/examples/grok-feb.json +1 -1
  31. package/examples/queen-ava-feb.json +1 -1
  32. package/launchd/com.skcapstone.daemon.plist +52 -0
  33. package/launchd/com.skcapstone.memory-compress.plist +45 -0
  34. package/launchd/com.skcapstone.skcomms-heartbeat.plist +33 -0
  35. package/launchd/com.skcapstone.skcomms-queue-drain.plist +34 -0
  36. package/launchd/install-launchd.sh +156 -0
  37. package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/index.ts +3 -2
  38. package/package.json +1 -1
  39. package/pyproject.toml +16 -10
  40. package/scripts/archive-sessions.sh +95 -0
  41. package/scripts/check-updates.py +4 -4
  42. package/scripts/install-bundle.sh +8 -8
  43. package/scripts/install.ps1 +12 -11
  44. package/scripts/install.sh +196 -11
  45. package/scripts/model-fallback-monitor.sh +102 -0
  46. package/scripts/notion-api.py +259 -0
  47. package/scripts/nvidia-proxy.mjs +908 -0
  48. package/scripts/proxy-monitor.sh +89 -0
  49. package/scripts/refresh-anthropic-token.sh +172 -0
  50. package/scripts/release.sh +98 -0
  51. package/scripts/session-to-memory.py +219 -0
  52. package/scripts/skgateway.mjs +856 -0
  53. package/scripts/telegram-catchup-all.sh +147 -0
  54. package/scripts/verify_install.sh +2 -2
  55. package/scripts/wargov-ufo-capture/README.md +43 -0
  56. package/scripts/wargov-ufo-capture/cdp_capture_release2.py +273 -0
  57. package/scripts/wargov-ufo-capture/cdp_capture_splc_doj.py +246 -0
  58. package/scripts/wargov-ufo-capture/cdp_finish.py +271 -0
  59. package/scripts/wargov-ufo-capture/cdp_probe.py +188 -0
  60. package/scripts/wargov-ufo-capture/cdp_splc_pressrelease.py +101 -0
  61. package/scripts/wargov-ufo-capture/parse_csv.py +95 -0
  62. package/scripts/wargov-ufo-capture/pull_dvids.sh +107 -0
  63. package/scripts/watch-anthropic-token.sh +212 -0
  64. package/scripts/windows/install-tasks.ps1 +7 -7
  65. package/scripts/windows/skcapstone-task.xml +1 -1
  66. package/src/skcapstone/__init__.py +45 -3
  67. package/src/skcapstone/_cli_monolith.py +20 -15
  68. package/src/skcapstone/activity.py +5 -1
  69. package/src/skcapstone/agent_card.py +3 -2
  70. package/src/skcapstone/api.py +41 -40
  71. package/src/skcapstone/auction.py +14 -11
  72. package/src/skcapstone/backup.py +2 -1
  73. package/src/skcapstone/blueprint_registry.py +4 -3
  74. package/src/skcapstone/blueprints/builtins/itil-operations.yaml +40 -0
  75. package/src/skcapstone/brain_first.py +238 -0
  76. package/src/skcapstone/changelog.py +1 -1
  77. package/src/skcapstone/chat.py +22 -17
  78. package/src/skcapstone/cli/__init__.py +9 -1
  79. package/src/skcapstone/cli/_common.py +1 -0
  80. package/src/skcapstone/cli/agents_spawner.py +5 -2
  81. package/src/skcapstone/cli/alerts.py +25 -4
  82. package/src/skcapstone/cli/bench.py +15 -15
  83. package/src/skcapstone/cli/chat.py +7 -4
  84. package/src/skcapstone/cli/consciousness.py +5 -2
  85. package/src/skcapstone/cli/context_cmd.py +18 -4
  86. package/src/skcapstone/cli/daemon.py +121 -42
  87. package/src/skcapstone/cli/gtd.py +26 -1
  88. package/src/skcapstone/cli/housekeeping.py +3 -3
  89. package/src/skcapstone/cli/identity_cmd.py +378 -0
  90. package/src/skcapstone/cli/joule_cmd.py +7 -3
  91. package/src/skcapstone/cli/memory.py +8 -6
  92. package/src/skcapstone/cli/peers_dir.py +1 -1
  93. package/src/skcapstone/cli/register_cmd.py +29 -3
  94. package/src/skcapstone/cli/scheduler_cmd.py +167 -0
  95. package/src/skcapstone/cli/session.py +25 -0
  96. package/src/skcapstone/cli/setup.py +96 -29
  97. package/src/skcapstone/cli/shell_cmd.py +53 -1
  98. package/src/skcapstone/cli/skills_cmd.py +2 -2
  99. package/src/skcapstone/cli/soul.py +8 -5
  100. package/src/skcapstone/cli/status.py +37 -11
  101. package/src/skcapstone/cli/telegram.py +21 -0
  102. package/src/skcapstone/cli/test_cmd.py +5 -5
  103. package/src/skcapstone/cli/test_connection.py +2 -2
  104. package/src/skcapstone/cli/upgrade_cmd.py +23 -14
  105. package/src/skcapstone/cli/version_cmd.py +1 -1
  106. package/src/skcapstone/cli/watch_cmd.py +9 -6
  107. package/src/skcapstone/cloud9_bridge.py +14 -14
  108. package/src/skcapstone/codex_setup.py +255 -0
  109. package/src/skcapstone/config_validator.py +7 -4
  110. package/src/skcapstone/consciousness_config.py +5 -1
  111. package/src/skcapstone/consciousness_loop.py +313 -273
  112. package/src/skcapstone/context_loader.py +121 -0
  113. package/src/skcapstone/coord_federation.py +2 -1
  114. package/src/skcapstone/coordination.py +23 -6
  115. package/src/skcapstone/crush_integration.py +2 -1
  116. package/src/skcapstone/daemon.py +151 -88
  117. package/src/skcapstone/dashboard.py +10 -10
  118. package/src/skcapstone/data/sk-agent-picker.sh +421 -0
  119. package/src/skcapstone/data/systemd/skcapstone-api.socket +9 -0
  120. package/src/skcapstone/data/systemd/skcapstone-memory-compress.service +18 -0
  121. package/src/skcapstone/data/systemd/skcapstone-memory-compress.timer +11 -0
  122. package/src/skcapstone/data/systemd/skcapstone.service +37 -0
  123. package/src/skcapstone/data/systemd/skcapstone@.service +50 -0
  124. package/src/skcapstone/data/systemd/skcomms-heartbeat.service +18 -0
  125. package/{systemd/skcomm-heartbeat.timer → src/skcapstone/data/systemd/skcomms-heartbeat.timer} +2 -2
  126. package/src/skcapstone/data/systemd/skcomms-queue-drain.service +17 -0
  127. package/{systemd/skcomm-queue-drain.timer → src/skcapstone/data/systemd/skcomms-queue-drain.timer} +2 -2
  128. package/src/skcapstone/defaults/claude/CLAUDE.md +67 -0
  129. package/src/skcapstone/defaults/claude/settings.json +74 -0
  130. package/src/skcapstone/defaults/lumina/config/claude-hooks.md +57 -0
  131. package/src/skcapstone/defaults/lumina/config/skgraph.yaml +55 -10
  132. package/src/skcapstone/defaults/lumina/config/skmemory.yaml +79 -13
  133. package/src/skcapstone/defaults/lumina/config/skvector.yaml +60 -9
  134. package/src/skcapstone/defaults/lumina/memory/long-term/18b9c0d1e2f3-cloud9-protocol.json +2 -2
  135. package/src/skcapstone/defaults/lumina/memory/long-term/a1b2c3d4e5f6-ecosystem-overview.json +2 -2
  136. package/src/skcapstone/defaults/lumina/memory/long-term/b2c3d4e5f6a7-five-pillars.json +9 -9
  137. package/src/skcapstone/defaults/lumina/memory/long-term/d4e5f6a7b8c9-site-directory.json +2 -2
  138. package/src/skcapstone/defaults/unhinged.json +13 -0
  139. package/src/skcapstone/discovery.py +43 -20
  140. package/src/skcapstone/doctor.py +941 -22
  141. package/src/skcapstone/dreaming.py +1183 -109
  142. package/src/skcapstone/emotion_tracker.py +2 -2
  143. package/src/skcapstone/export.py +4 -3
  144. package/src/skcapstone/fuse_mount.py +35 -25
  145. package/src/skcapstone/gui_installer.py +2 -2
  146. package/src/skcapstone/heartbeat.py +34 -30
  147. package/src/skcapstone/housekeeping.py +14 -14
  148. package/src/skcapstone/install_wizard.py +209 -7
  149. package/src/skcapstone/itil.py +13 -4
  150. package/src/skcapstone/kms_scheduler.py +10 -8
  151. package/src/skcapstone/launchd.py +426 -0
  152. package/src/skcapstone/mcp_launcher.py +15 -1
  153. package/src/skcapstone/mcp_server.py +341 -49
  154. package/src/skcapstone/mcp_tools/__init__.py +2 -0
  155. package/src/skcapstone/mcp_tools/_helpers.py +2 -2
  156. package/src/skcapstone/mcp_tools/ansible_tools.py +7 -4
  157. package/src/skcapstone/mcp_tools/brain_first_tools.py +90 -0
  158. package/src/skcapstone/mcp_tools/capauth_tools.py +7 -4
  159. package/src/skcapstone/mcp_tools/comm_tools.py +10 -10
  160. package/src/skcapstone/mcp_tools/coord_tools.py +8 -4
  161. package/src/skcapstone/mcp_tools/did_tools.py +11 -8
  162. package/src/skcapstone/mcp_tools/gtd_tools.py +4 -4
  163. package/src/skcapstone/mcp_tools/memory_tools.py +6 -2
  164. package/src/skcapstone/mcp_tools/notification_tools.py +22 -6
  165. package/src/skcapstone/mcp_tools/{skcomm_tools.py → skcomms_tools.py} +14 -14
  166. package/src/skcapstone/mcp_tools/soul_tools.py +8 -2
  167. package/src/skcapstone/mdns_discovery.py +2 -2
  168. package/src/skcapstone/memory_curator.py +1 -1
  169. package/src/skcapstone/memory_engine.py +10 -3
  170. package/src/skcapstone/metrics.py +30 -16
  171. package/src/skcapstone/migrate_memories.py +4 -3
  172. package/src/skcapstone/migrate_multi_agent.py +8 -7
  173. package/src/skcapstone/models.py +47 -5
  174. package/src/skcapstone/notifications.py +42 -18
  175. package/src/skcapstone/onboard.py +1000 -126
  176. package/src/skcapstone/operator_link.py +170 -0
  177. package/src/skcapstone/peer_directory.py +4 -4
  178. package/src/skcapstone/peers.py +19 -19
  179. package/src/skcapstone/pillars/__init__.py +7 -5
  180. package/src/skcapstone/pillars/consciousness.py +191 -0
  181. package/src/skcapstone/pillars/identity.py +51 -7
  182. package/src/skcapstone/pillars/memory.py +9 -3
  183. package/src/skcapstone/pillars/sync.py +2 -2
  184. package/src/skcapstone/preflight.py +3 -3
  185. package/src/skcapstone/providers/docker.py +28 -28
  186. package/src/skcapstone/register.py +6 -6
  187. package/src/skcapstone/registry_client.py +5 -4
  188. package/src/skcapstone/runtime.py +14 -3
  189. package/src/skcapstone/scheduled_tasks.py +254 -19
  190. package/src/skcapstone/scheduler_jobs.py +456 -0
  191. package/src/skcapstone/scheduler_runner.py +239 -0
  192. package/src/skcapstone/scheduler_state.py +162 -0
  193. package/src/skcapstone/sdk.py +310 -0
  194. package/src/skcapstone/service_health.py +279 -39
  195. package/src/skcapstone/session_briefing.py +108 -0
  196. package/src/skcapstone/session_capture.py +1 -1
  197. package/src/skcapstone/shell.py +7 -1
  198. package/src/skcapstone/soul.py +3 -1
  199. package/src/skcapstone/soul_switch.py +3 -1
  200. package/src/skcapstone/summary.py +6 -6
  201. package/src/skcapstone/sync_engine.py +15 -15
  202. package/src/skcapstone/sync_watcher.py +2 -2
  203. package/src/skcapstone/systemd.py +72 -21
  204. package/src/skcapstone/team_comms.py +8 -8
  205. package/src/skcapstone/team_engine.py +1 -1
  206. package/src/skcapstone/testrunner.py +3 -3
  207. package/src/skcapstone/trust_graph.py +40 -5
  208. package/src/skcapstone/unified_search.py +15 -6
  209. package/src/skcapstone/uninstall_wizard.py +11 -3
  210. package/src/skcapstone/version_check.py +8 -4
  211. package/src/skcapstone/warmth_anchor.py +4 -2
  212. package/src/skcapstone/whoami.py +4 -4
  213. package/systemd/skcapstone.service +4 -6
  214. package/systemd/skcapstone@.service +7 -8
  215. package/systemd/skcomms-heartbeat.service +21 -0
  216. package/systemd/skcomms-heartbeat.timer +12 -0
  217. package/systemd/skcomms-queue-drain.service +17 -0
  218. package/systemd/skcomms-queue-drain.timer +12 -0
  219. package/tests/conftest.py +39 -0
  220. package/tests/integration/test_consciousness_e2e.py +39 -39
  221. package/tests/test_agent_card.py +1 -1
  222. package/tests/test_agent_home_scaffold.py +34 -0
  223. package/tests/test_alerts_consumer_topics.py +27 -0
  224. package/tests/test_backup.py +2 -1
  225. package/tests/test_chat.py +6 -6
  226. package/tests/test_claude_md.py +2 -2
  227. package/tests/test_cli_skills.py +10 -10
  228. package/tests/test_cli_test_cmd.py +4 -4
  229. package/tests/test_cli_test_connection.py +1 -1
  230. package/tests/test_cloud9_bridge.py +6 -6
  231. package/tests/test_consciousness_e2e.py +1 -1
  232. package/tests/test_consciousness_loop.py +10 -10
  233. package/tests/test_coordination.py +25 -0
  234. package/tests/test_cross_package.py +21 -21
  235. package/tests/test_daemon.py +4 -4
  236. package/tests/test_daemon_shutdown.py +1 -1
  237. package/tests/test_docker_provider.py +29 -29
  238. package/tests/test_doctor.py +400 -0
  239. package/tests/test_doctor_skscheduler.py +50 -0
  240. package/tests/test_dreaming_engine.py +147 -0
  241. package/tests/test_dreaming_gtd_capture.py +35 -0
  242. package/tests/test_e2e_automated.py +8 -5
  243. package/tests/test_fuse_mount.py +10 -10
  244. package/tests/test_gtd_brief.py +46 -0
  245. package/tests/test_gtd_malformed_tolerance.py +31 -0
  246. package/tests/test_housekeeping.py +15 -15
  247. package/tests/test_identity_migrate.py +251 -0
  248. package/tests/test_integration_backbone.py +598 -0
  249. package/tests/test_itil_gtd_lifecycle.py +37 -0
  250. package/tests/test_jobs_dropins.py +84 -0
  251. package/tests/test_mcp_server.py +82 -37
  252. package/tests/test_models.py +48 -4
  253. package/tests/test_multi_agent.py +31 -29
  254. package/tests/test_notifications.py +122 -32
  255. package/tests/test_onboard.py +63 -75
  256. package/tests/test_operator_link.py +78 -0
  257. package/tests/test_peers.py +14 -14
  258. package/tests/test_pillars.py +98 -0
  259. package/tests/test_preflight.py +3 -3
  260. package/tests/test_runtime.py +21 -0
  261. package/tests/test_scheduled_tasks.py +11 -6
  262. package/tests/test_scheduler_cli.py +47 -0
  263. package/tests/test_scheduler_features.py +133 -0
  264. package/tests/test_scheduler_integration.py +87 -0
  265. package/tests/test_scheduler_jobs.py +155 -0
  266. package/tests/test_scheduler_runner.py +64 -0
  267. package/tests/test_scheduler_state.py +57 -0
  268. package/tests/test_sdk.py +70 -0
  269. package/tests/test_service_health_incidents.py +34 -0
  270. package/tests/test_service_registry.py +52 -0
  271. package/tests/test_session_briefing.py +130 -0
  272. package/tests/test_snapshots.py +4 -4
  273. package/tests/test_sync_pipeline.py +26 -26
  274. package/tests/test_team_comms.py +2 -2
  275. package/tests/test_testrunner.py +2 -2
  276. package/tests/test_trust_graph.py +18 -0
  277. package/tests/test_unified_search.py +2 -2
  278. package/tests/test_version_check.py +10 -0
  279. package/tests/test_version_cmd.py +8 -8
  280. package/tests/test_whoami.py +1 -1
  281. package/systemd/skcomm-heartbeat.service +0 -18
  282. package/systemd/skcomm-queue-drain.service +0 -17
  283. /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/package.json +0 -0
  284. /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/openclaw.plugin.json +0 -0
@@ -0,0 +1,246 @@
1
+ #!/usr/bin/env python3
2
+ """Capture the PRIMARY DOJ sources for the SPLC superseding-indictment finding via Lumina Chrome CDP.
3
+
4
+ Both targets returned HTTP 403 to WebFetch (Akamai TLS-fingerprint/bot gate). Driving
5
+ Lumina's already-authenticated Chrome (port 9222) in page context bypasses the gate.
6
+
7
+ Targets:
8
+ 1. Indictment PDF -> https://www.justice.gov/opa/media/1437146/dl (download)
9
+ 2. DOJ press release -> discovered via justice.gov news search for "Southern Poverty Law Center"
10
+
11
+ Goal (per finding 2026-06-04_splc-doj-superseding-indictment-oneill-thread.md):
12
+ resolve the 2010-vs-2014 conduct window, the F-30 "$70K" figure, and confirm count
13
+ language first-hand rather than via secondary quotation.
14
+
15
+ Output -> ~/clawd/skills/substance-lens/captures/splc-doj-2026-06-03/
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import hashlib
20
+ import json
21
+ import re
22
+ import sys
23
+ import time
24
+ import urllib.request
25
+ from pathlib import Path
26
+
27
+ import websocket
28
+
29
+ CDP_HTTP = "http://127.0.0.1:9222"
30
+ SEED_URL = "https://www.justice.gov/"
31
+ PDF_URL = "https://www.justice.gov/opa/media/1437146/dl"
32
+ SEARCH_URL = "https://www.justice.gov/news?search_api_fulltext=Southern+Poverty+Law+Center"
33
+
34
+ OUT = Path("/home/cbrd21/clawd/skills/substance-lens/captures/splc-doj-2026-06-03")
35
+ OUT.mkdir(parents=True, exist_ok=True)
36
+
37
+
38
+ def open_tab(url: str) -> dict:
39
+ req = urllib.request.Request(f"{CDP_HTTP}/json/new?{url}", method="PUT")
40
+ with urllib.request.urlopen(req, timeout=10) as r:
41
+ return json.loads(r.read())
42
+
43
+
44
+ def close_tab(target_id: str) -> None:
45
+ try:
46
+ with urllib.request.urlopen(f"{CDP_HTTP}/json/close/{target_id}", timeout=5):
47
+ pass
48
+ except Exception:
49
+ pass
50
+
51
+
52
+ class CDP:
53
+ def __init__(self, ws_url: str):
54
+ self.ws = websocket.create_connection(ws_url, timeout=120)
55
+ self.mid = 0
56
+
57
+ def call(self, method: str, params: dict | None = None, timeout: float = 60.0) -> dict:
58
+ self.mid += 1
59
+ msg_id = self.mid
60
+ self.ws.send(json.dumps({"id": msg_id, "method": method, "params": params or {}}))
61
+ self.ws.settimeout(timeout)
62
+ while True:
63
+ raw = self.ws.recv()
64
+ msg = json.loads(raw)
65
+ if msg.get("id") == msg_id:
66
+ if "error" in msg:
67
+ raise RuntimeError(f"{method}: {msg['error']}")
68
+ return msg.get("result", {})
69
+
70
+ def wait_event(self, name: str, timeout: float = 30.0) -> dict:
71
+ deadline = time.time() + timeout
72
+ while time.time() < deadline:
73
+ self.ws.settimeout(max(0.1, deadline - time.time()))
74
+ try:
75
+ raw = self.ws.recv()
76
+ except websocket.WebSocketTimeoutException:
77
+ continue
78
+ msg = json.loads(raw)
79
+ if msg.get("method") == name:
80
+ return msg.get("params", {})
81
+ raise TimeoutError(f"event {name} did not fire within {timeout}s")
82
+
83
+ def close(self) -> None:
84
+ try:
85
+ self.ws.close()
86
+ except Exception:
87
+ pass
88
+
89
+
90
+ def fetch_text_in_page(cdp: CDP, url: str) -> tuple[int, str]:
91
+ expr = (
92
+ f"(async () => {{"
93
+ f" try {{"
94
+ f" const r = await fetch({json.dumps(url)}, {{credentials: 'include', cache: 'no-store'}});"
95
+ f" return {{status: r.status, text: await r.text()}};"
96
+ f" }} catch (e) {{ return {{status: -1, text: String(e)}}; }}"
97
+ f"}})()"
98
+ )
99
+ res = cdp.call("Runtime.evaluate", {
100
+ "expression": expr,
101
+ "awaitPromise": True,
102
+ "returnByValue": True,
103
+ }, timeout=180)
104
+ val = res.get("result", {}).get("value", {}) or {}
105
+ return val.get("status", 0), val.get("text", "")
106
+
107
+
108
+ def fetch_pdf_b64_in_page(cdp: CDP, url: str) -> tuple[int, str, int]:
109
+ """Fetch a binary in page context, return base64 (works for PDFs under a few MB)."""
110
+ expr = (
111
+ f"(async () => {{"
112
+ f" try {{"
113
+ f" const r = await fetch({json.dumps(url)}, {{credentials: 'include', cache: 'no-store'}});"
114
+ f" const buf = await r.arrayBuffer();"
115
+ f" const bytes = new Uint8Array(buf);"
116
+ f" let bin = '';"
117
+ f" const chunk = 0x8000;"
118
+ f" for (let i = 0; i < bytes.length; i += chunk) {{"
119
+ f" bin += String.fromCharCode.apply(null, bytes.subarray(i, i + chunk));"
120
+ f" }}"
121
+ f" return {{status: r.status, b64: btoa(bin), len: bytes.length}};"
122
+ f" }} catch (e) {{ return {{status: -1, b64: '', len: 0, err: String(e)}}; }}"
123
+ f"}})()"
124
+ )
125
+ res = cdp.call("Runtime.evaluate", {
126
+ "expression": expr,
127
+ "awaitPromise": True,
128
+ "returnByValue": True,
129
+ }, timeout=240)
130
+ val = res.get("result", {}).get("value", {}) or {}
131
+ return val.get("status", 0), val.get("b64", ""), val.get("len", 0)
132
+
133
+
134
+ def sha256_bytes(b: bytes) -> str:
135
+ h = hashlib.sha256()
136
+ h.update(b)
137
+ return h.hexdigest()
138
+
139
+
140
+ def main() -> int:
141
+ import base64
142
+
143
+ print(f"[capture] seeding tab -> {SEED_URL}", flush=True)
144
+ tab = open_tab(SEED_URL)
145
+ target_id = tab["id"]
146
+ cdp = CDP(tab["webSocketDebuggerUrl"])
147
+ manifest: dict = {
148
+ "finding": "2026-06-04_splc-doj-superseding-indictment-oneill-thread.md",
149
+ "capture_method": "Lumina Chrome CDP (port 9222) — page-context fetch (Akamai bypass)",
150
+ "targets": {},
151
+ }
152
+ try:
153
+ cdp.call("Page.enable")
154
+ cdp.call("Runtime.enable")
155
+ cdp.call("Network.enable", {"maxPostDataSize": 0})
156
+ cdp.call("Page.navigate", {"url": SEED_URL})
157
+ try:
158
+ cdp.wait_event("Page.loadEventFired", timeout=30.0)
159
+ except TimeoutError:
160
+ pass
161
+ time.sleep(3.0) # let Akamai cookies stick
162
+
163
+ # ---- 1. Indictment PDF (primary) ----
164
+ print(f"[capture] fetching PDF -> {PDF_URL}", flush=True)
165
+ status, b64, length = fetch_pdf_b64_in_page(cdp, PDF_URL)
166
+ print(f"[capture] PDF status={status} bytes={length}", flush=True)
167
+ if status == 200 and b64:
168
+ data = base64.b64decode(b64)
169
+ is_pdf = data[:5] == b"%PDF-"
170
+ pdf_path = OUT / "splc-superseding-indictment-1437146.pdf"
171
+ pdf_path.write_bytes(data)
172
+ sha = sha256_bytes(data)
173
+ print(f"[capture] PDF written {len(data)} bytes is_pdf={is_pdf} sha256={sha}", flush=True)
174
+ manifest["targets"]["indictment_pdf"] = {
175
+ "url": PDF_URL, "status": status, "path": str(pdf_path),
176
+ "bytes": len(data), "is_pdf_magic": is_pdf, "sha256": sha,
177
+ }
178
+ else:
179
+ manifest["targets"]["indictment_pdf"] = {"url": PDF_URL, "status": status, "error": True}
180
+ print("[capture] PDF FAILED — page-context fetch did not return 200", flush=True)
181
+
182
+ # ---- 2. Discover + fetch DOJ press release ----
183
+ print(f"[capture] searching DOJ news -> {SEARCH_URL}", flush=True)
184
+ status, html = fetch_text_in_page(cdp, SEARCH_URL)
185
+ print(f"[capture] search status={status} len={len(html)}", flush=True)
186
+ pr_url = None
187
+ if status == 200 and html:
188
+ (OUT / "doj-news-search.html").write_text(html)
189
+ # Find press-release links; prefer /opa/pr/ slugs mentioning the charge
190
+ cands = re.findall(r'href="(/opa/pr/[^"#?]+)"', html)
191
+ uniq = []
192
+ for c in cands:
193
+ if c not in uniq:
194
+ uniq.append(c)
195
+ print(f"[capture] press-release candidates: {uniq[:10]}", flush=True)
196
+ scored = [c for c in uniq if "southern-poverty" in c.lower()
197
+ or "splc" in c.lower()
198
+ or ("wire-fraud" in c.lower() and "law-center" in c.lower())]
199
+ if scored:
200
+ pr_url = "https://www.justice.gov" + scored[0]
201
+ elif uniq:
202
+ pr_url = "https://www.justice.gov" + uniq[0]
203
+ manifest["press_release_candidates"] = uniq[:15]
204
+
205
+ if pr_url:
206
+ print(f"[capture] fetching press release -> {pr_url}", flush=True)
207
+ status, prhtml = fetch_text_in_page(cdp, pr_url)
208
+ print(f"[capture] press release status={status} len={len(prhtml)}", flush=True)
209
+ if status == 200 and prhtml:
210
+ (OUT / "doj-press-release.html").write_text(prhtml)
211
+ txt_expr = (
212
+ f"(async () => {{"
213
+ f" const r = await fetch({json.dumps(pr_url)}, {{credentials: 'include'}});"
214
+ f" const html = await r.text();"
215
+ f" const doc = new DOMParser().parseFromString(html, 'text/html');"
216
+ f" const a = doc.querySelector('.field--name-body') || doc.querySelector('article')"
217
+ f" || doc.querySelector('main') || doc.body;"
218
+ f" return a ? a.innerText : '';"
219
+ f"}})()"
220
+ )
221
+ res = cdp.call("Runtime.evaluate", {
222
+ "expression": txt_expr, "awaitPromise": True, "returnByValue": True,
223
+ }, timeout=60)
224
+ txt = res.get("result", {}).get("value", "") or ""
225
+ if txt:
226
+ (OUT / "doj-press-release.txt").write_text(txt)
227
+ print(f"[capture] extracted {len(txt)} chars of press-release text", flush=True)
228
+ manifest["targets"]["press_release"] = {"url": pr_url, "status": status,
229
+ "txt_chars": len(txt)}
230
+ else:
231
+ manifest["targets"]["press_release"] = {"url": pr_url, "status": status, "error": True}
232
+ else:
233
+ print("[capture] no press-release URL discovered from search", flush=True)
234
+ manifest["targets"]["press_release"] = {"discovered": False}
235
+
236
+ manifest["captured_at"] = time.strftime("%Y-%m-%dT%H:%M:%S%z")
237
+ (OUT / "manifest.json").write_text(json.dumps(manifest, indent=2))
238
+ print(f"[capture] manifest written -> {OUT/'manifest.json'}", flush=True)
239
+ return 0
240
+ finally:
241
+ cdp.close()
242
+ close_tab(target_id)
243
+
244
+
245
+ if __name__ == "__main__":
246
+ sys.exit(main())
@@ -0,0 +1,271 @@
1
+ #!/usr/bin/env python3
2
+ """CDP run 2: re-extract press release text, pull thumbnails, grab FBI Vault Part 15.
3
+
4
+ Three sub-tasks:
5
+ A. Re-render the press release in a Chrome tab and pull the article-body innerText.
6
+ B. Page-context-fetch the 6 thumbnail JPGs for Release 02.
7
+ C. Navigate to FBI Vault and pull Part 15 of 16 from the 62-HQ-83894 series.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import base64
12
+ import json
13
+ import sys
14
+ import time
15
+ import urllib.request
16
+ from pathlib import Path
17
+
18
+ import websocket
19
+
20
+ CDP_HTTP = "http://127.0.0.1:9222"
21
+
22
+ PRESS_URL = "https://www.war.gov/News/Releases/Release/Article/4499305/department-of-war-publishes-second-release-of-unidentified-anomalous-phenomena/"
23
+ THUMB_BASE = "https://www.war.gov/medialink/ufo/052226/release_02/thumbnails"
24
+ THUMB_NAMES = [
25
+ "CIA-UAP-D001_Intelligence_Information_Report_USSR_1973",
26
+ "DOE-UAP-D001_PANTEX_Image",
27
+ "DOE-UAP-D002_JamesTuck_Correspondence",
28
+ "DOE-UAP-D003_Pajarito_Astronomers",
29
+ "DOW-UAP-D017_General_Correspondence_Of_Sandia",
30
+ "ODNI-UAP-D001_USPER_Narrative_Senior_USIC",
31
+ ]
32
+
33
+ FBI_VAULT_BASE = "https://vault.fbi.gov"
34
+ # FBI Vault organizes the 62-HQ-83894 UFO file as "Unidentified Flying Objects (UFO)" — Part X of Y
35
+ # Known canonical layout has Parts 1-16. Tweet referenced Part 15.
36
+ FBI_PART_PAGE = "https://vault.fbi.gov/UFO/UFO%20Part%2015%20of%2016/view"
37
+ FBI_PART_PDF_GUESS = "https://vault.fbi.gov/UFO/UFO%20Part%2015%20of%2016/at_download/file"
38
+
39
+ BASE = Path("/home/cbrd21/nextcloud/cbrd21-share/reference/war-gov-UFO-PURSUE-2026")
40
+ DOC_DIR = BASE / "docs" / "release-02"
41
+ THUMB_DIR = DOC_DIR / "thumbnails"
42
+ THUMB_DIR.mkdir(parents=True, exist_ok=True)
43
+
44
+ FBI_DIR = Path("/home/cbrd21/nextcloud/cbrd21-share/reference/fbi-vault-ufo-62-HQ-83894")
45
+ FBI_DIR.mkdir(parents=True, exist_ok=True)
46
+
47
+
48
+ def open_tab(url: str) -> dict:
49
+ req = urllib.request.Request(f"{CDP_HTTP}/json/new?{url}", method="PUT")
50
+ with urllib.request.urlopen(req, timeout=10) as r:
51
+ return json.loads(r.read())
52
+
53
+
54
+ def close_tab(target_id: str) -> None:
55
+ try:
56
+ with urllib.request.urlopen(f"{CDP_HTTP}/json/close/{target_id}", timeout=5):
57
+ pass
58
+ except Exception:
59
+ pass
60
+
61
+
62
+ class CDP:
63
+ def __init__(self, ws_url: str):
64
+ self.ws = websocket.create_connection(ws_url, timeout=120)
65
+ self.mid = 0
66
+
67
+ def call(self, method: str, params: dict | None = None, timeout: float = 60.0) -> dict:
68
+ self.mid += 1
69
+ msg_id = self.mid
70
+ self.ws.send(json.dumps({"id": msg_id, "method": method, "params": params or {}}))
71
+ self.ws.settimeout(timeout)
72
+ while True:
73
+ raw = self.ws.recv()
74
+ msg = json.loads(raw)
75
+ if msg.get("id") == msg_id:
76
+ if "error" in msg:
77
+ raise RuntimeError(f"{method}: {msg['error']}")
78
+ return msg.get("result", {})
79
+
80
+ def wait_event(self, name: str, timeout: float = 30.0) -> dict:
81
+ deadline = time.time() + timeout
82
+ while time.time() < deadline:
83
+ self.ws.settimeout(max(0.1, deadline - time.time()))
84
+ try:
85
+ raw = self.ws.recv()
86
+ except websocket.WebSocketTimeoutException:
87
+ continue
88
+ msg = json.loads(raw)
89
+ if msg.get("method") == name:
90
+ return msg.get("params", {})
91
+ raise TimeoutError(f"event {name} did not fire within {timeout}s")
92
+
93
+ def close(self) -> None:
94
+ try:
95
+ self.ws.close()
96
+ except Exception:
97
+ pass
98
+
99
+
100
+ def fetch_binary_in_page(cdp: CDP, url: str) -> tuple[int, bytes | None]:
101
+ """Fetch a binary resource in page context and return as bytes."""
102
+ expr = (
103
+ f"(async () => {{"
104
+ f" const r = await fetch({json.dumps(url)}, {{credentials: 'include', cache: 'no-store'}});"
105
+ f" if (!r.ok) return {{status: r.status, b64: null}};"
106
+ f" const buf = await r.arrayBuffer();"
107
+ f" const bytes = new Uint8Array(buf);"
108
+ f" let bin = '';"
109
+ f" for (let i = 0; i < bytes.length; i++) bin += String.fromCharCode(bytes[i]);"
110
+ f" return {{status: r.status, b64: btoa(bin), bytes: bytes.length}};"
111
+ f"}})()"
112
+ )
113
+ res = cdp.call("Runtime.evaluate", {
114
+ "expression": expr,
115
+ "awaitPromise": True,
116
+ "returnByValue": True,
117
+ }, timeout=300)
118
+ val = res.get("result", {}).get("value", {}) or {}
119
+ status = val.get("status", 0)
120
+ b64 = val.get("b64")
121
+ if status == 200 and b64:
122
+ return status, base64.b64decode(b64)
123
+ return status, None
124
+
125
+
126
+ def fetch_text_in_page(cdp: CDP, url: str) -> tuple[int, str]:
127
+ expr = (
128
+ f"(async () => {{"
129
+ f" const r = await fetch({json.dumps(url)}, {{credentials: 'include', cache: 'no-store'}});"
130
+ f" return {{status: r.status, text: await r.text()}};"
131
+ f"}})()"
132
+ )
133
+ res = cdp.call("Runtime.evaluate", {
134
+ "expression": expr,
135
+ "awaitPromise": True,
136
+ "returnByValue": True,
137
+ }, timeout=120)
138
+ val = res.get("result", {}).get("value", {}) or {}
139
+ return val.get("status", 0), val.get("text", "")
140
+
141
+
142
+ def task_a_press_release(cdp: CDP) -> None:
143
+ """Navigate to press release, extract innerText from main article."""
144
+ print(f"[A] navigating → press release", flush=True)
145
+ cdp.call("Page.navigate", {"url": PRESS_URL})
146
+ try:
147
+ cdp.wait_event("Page.loadEventFired", timeout=30.0)
148
+ except TimeoutError:
149
+ pass
150
+ time.sleep(3.0)
151
+
152
+ # Try multiple candidate selectors; press releases on DoW use various article wrappers
153
+ extract_js = (
154
+ "(() => {"
155
+ " const candidates = ["
156
+ " document.querySelector('.body-text'),"
157
+ " document.querySelector('.article-body'),"
158
+ " document.querySelector('.article-content'),"
159
+ " document.querySelector('.press-release'),"
160
+ " document.querySelector('main article'),"
161
+ " document.querySelector('main .content'),"
162
+ " document.querySelector('main'),"
163
+ " document.querySelector('article'),"
164
+ " ];"
165
+ " for (const el of candidates) {"
166
+ " if (el && el.innerText && el.innerText.length > 500) {"
167
+ " return {selector: el.tagName + (el.className ? '.' + el.className.split(' ').join('.') : ''), text: el.innerText, len: el.innerText.length};"
168
+ " }"
169
+ " }"
170
+ " // Last resort: full body innerText"
171
+ " return {selector: 'body', text: document.body.innerText, len: document.body.innerText.length};"
172
+ "})()"
173
+ )
174
+ res = cdp.call("Runtime.evaluate", {"expression": extract_js, "returnByValue": True})
175
+ val = res.get("result", {}).get("value", {}) or {}
176
+ text = val.get("text", "")
177
+ print(f"[A] selector={val.get('selector')!r} len={val.get('len')}", flush=True)
178
+ if text:
179
+ (DOC_DIR / "press-release-2026-05-22.txt").write_text(text)
180
+ print(f"[A] wrote press-release-2026-05-22.txt ({len(text)} chars)", flush=True)
181
+
182
+
183
+ def task_b_thumbnails(cdp: CDP) -> None:
184
+ """Page-context-fetch all 6 PDF thumbnails."""
185
+ print(f"[B] pulling {len(THUMB_NAMES)} thumbnails via in-page fetch", flush=True)
186
+ # Make sure we're on a war.gov tab so credentials/Akamai cookies apply
187
+ cdp.call("Page.navigate", {"url": "https://www.war.gov/UFO/"})
188
+ try:
189
+ cdp.wait_event("Page.loadEventFired", timeout=30.0)
190
+ except TimeoutError:
191
+ pass
192
+ time.sleep(2.0)
193
+ for name in THUMB_NAMES:
194
+ url = f"{THUMB_BASE}/{name}.jpg"
195
+ status, content = fetch_binary_in_page(cdp, url)
196
+ out_path = THUMB_DIR / f"{name}.jpg"
197
+ if status == 200 and content:
198
+ out_path.write_bytes(content)
199
+ print(f"[B] OK {name}.jpg {len(content)} bytes", flush=True)
200
+ else:
201
+ print(f"[B] FAIL {name}.jpg status={status}", flush=True)
202
+
203
+
204
+ def task_c_fbi_vault_part_15(cdp: CDP) -> None:
205
+ """Try to fetch FBI Vault UFO Part 15 of 16."""
206
+ print(f"[C] navigating → FBI Vault Part 15 page", flush=True)
207
+ cdp.call("Page.navigate", {"url": FBI_PART_PAGE})
208
+ try:
209
+ cdp.wait_event("Page.loadEventFired", timeout=30.0)
210
+ except TimeoutError:
211
+ pass
212
+ time.sleep(3.0)
213
+
214
+ # Try to find the PDF link on the page (Plone reading-room standard pattern)
215
+ link_js = (
216
+ "(() => {"
217
+ " const links = Array.from(document.querySelectorAll('a[href]')).map(a => a.href);"
218
+ " const pdfish = links.filter(h => /\\.pdf(\\?|$)|at_download\\/file/i.test(h));"
219
+ " return {title: document.title, total: links.length, pdfish: pdfish.slice(0, 10)};"
220
+ "})()"
221
+ )
222
+ res = cdp.call("Runtime.evaluate", {"expression": link_js, "returnByValue": True})
223
+ link_val = res.get("result", {}).get("value", {}) or {}
224
+ print(f"[C] page info: {json.dumps(link_val)}", flush=True)
225
+
226
+ pdf_url = None
227
+ for h in link_val.get("pdfish", []):
228
+ if "at_download/file" in h or h.lower().endswith(".pdf"):
229
+ pdf_url = h
230
+ break
231
+ if not pdf_url:
232
+ pdf_url = FBI_PART_PDF_GUESS
233
+ print(f"[C] using guess URL → {pdf_url}", flush=True)
234
+
235
+ print(f"[C] page-context fetch → {pdf_url}", flush=True)
236
+ status, content = fetch_binary_in_page(cdp, pdf_url)
237
+ if status == 200 and content:
238
+ out_path = FBI_DIR / "UFO-Part-15-of-16.pdf"
239
+ out_path.write_bytes(content)
240
+ print(f"[C] OK {out_path.name} {len(content)/1e6:.1f} MB", flush=True)
241
+ else:
242
+ # Maybe the page itself IS the PDF (some Vault items)
243
+ print(f"[C] direct fetch failed status={status}; trying alternate URLs", flush=True)
244
+ # Save the page HTML for inspection
245
+ html_status, html_text = fetch_text_in_page(cdp, FBI_PART_PAGE)
246
+ (FBI_DIR / "part-15-page.html").write_text(html_text or "")
247
+ print(f"[C] saved page HTML for inspection ({len(html_text)} chars)", flush=True)
248
+
249
+
250
+ def main() -> int:
251
+ tab = open_tab("about:blank")
252
+ target_id = tab["id"]
253
+ ws_url = tab["webSocketDebuggerUrl"]
254
+ cdp = CDP(ws_url)
255
+ try:
256
+ cdp.call("Page.enable")
257
+ cdp.call("Runtime.enable")
258
+ cdp.call("Network.enable", {"maxPostDataSize": 0})
259
+
260
+ task_a_press_release(cdp)
261
+ task_b_thumbnails(cdp)
262
+ task_c_fbi_vault_part_15(cdp)
263
+
264
+ return 0
265
+ finally:
266
+ cdp.close()
267
+ close_tab(target_id)
268
+
269
+
270
+ if __name__ == "__main__":
271
+ sys.exit(main())