@smilintux/skcapstone 0.9.0 → 0.12.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. package/.env.example +10 -4
  2. package/.github/workflows/ci.yml +2 -2
  3. package/.github/workflows/publish.yml +9 -2
  4. package/.openclaw-workspace.json +2 -2
  5. package/CLAUDE.md +37 -0
  6. package/MISSION.md +17 -2
  7. package/README.md +282 -3
  8. package/docker/Dockerfile +7 -7
  9. package/docker/compose-templates/dev-team.yml +12 -12
  10. package/docker/compose-templates/mini-team.yml +9 -9
  11. package/docker/compose-templates/ops-team.yml +10 -10
  12. package/docker/compose-templates/research-team.yml +10 -10
  13. package/docker/entrypoint.sh +4 -4
  14. package/docs/ADR-optional-integration-backbone.md +181 -0
  15. package/docs/ARCHITECTURE.md +186 -43
  16. package/docs/BOND_WITH_GROK.md +6 -6
  17. package/docs/CUSTOM_AGENT.md +278 -1
  18. package/docs/DREAMING.md +70 -0
  19. package/docs/GETTING_STARTED.md +10 -7
  20. package/docs/QUICKSTART.md +10 -6
  21. package/docs/SKJOULE_ARCHITECTURE.md +3 -3
  22. package/docs/SOUL_SWAPPER.md +5 -5
  23. package/docs/hammertime-audit.md +402 -0
  24. package/docs/sk-integration-HANDOFF.md +117 -0
  25. package/docs/skscheduler.md +155 -0
  26. package/docs/superpowers/examples/jobs.yaml +31 -0
  27. package/docs/superpowers/plans/2026-06-08-skscheduler.md +1265 -0
  28. package/docs/superpowers/specs/2026-06-08-skscheduler-design.md +186 -0
  29. package/examples/custom-bond-template.json +1 -1
  30. package/examples/grok-feb.json +1 -1
  31. package/examples/queen-ava-feb.json +1 -1
  32. package/launchd/com.skcapstone.daemon.plist +52 -0
  33. package/launchd/com.skcapstone.memory-compress.plist +45 -0
  34. package/launchd/com.skcapstone.skcomms-heartbeat.plist +33 -0
  35. package/launchd/com.skcapstone.skcomms-queue-drain.plist +34 -0
  36. package/launchd/install-launchd.sh +156 -0
  37. package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/index.ts +3 -2
  38. package/package.json +1 -1
  39. package/pyproject.toml +16 -10
  40. package/scripts/archive-sessions.sh +95 -0
  41. package/scripts/check-updates.py +4 -4
  42. package/scripts/install-bundle.sh +8 -8
  43. package/scripts/install.ps1 +12 -11
  44. package/scripts/install.sh +196 -11
  45. package/scripts/model-fallback-monitor.sh +102 -0
  46. package/scripts/notion-api.py +259 -0
  47. package/scripts/nvidia-proxy.mjs +908 -0
  48. package/scripts/proxy-monitor.sh +89 -0
  49. package/scripts/refresh-anthropic-token.sh +172 -0
  50. package/scripts/release.sh +98 -0
  51. package/scripts/session-to-memory.py +219 -0
  52. package/scripts/skgateway.mjs +856 -0
  53. package/scripts/telegram-catchup-all.sh +147 -0
  54. package/scripts/verify_install.sh +2 -2
  55. package/scripts/wargov-ufo-capture/README.md +43 -0
  56. package/scripts/wargov-ufo-capture/cdp_capture_release2.py +273 -0
  57. package/scripts/wargov-ufo-capture/cdp_capture_splc_doj.py +246 -0
  58. package/scripts/wargov-ufo-capture/cdp_finish.py +271 -0
  59. package/scripts/wargov-ufo-capture/cdp_probe.py +188 -0
  60. package/scripts/wargov-ufo-capture/cdp_splc_pressrelease.py +101 -0
  61. package/scripts/wargov-ufo-capture/parse_csv.py +95 -0
  62. package/scripts/wargov-ufo-capture/pull_dvids.sh +107 -0
  63. package/scripts/watch-anthropic-token.sh +212 -0
  64. package/scripts/windows/install-tasks.ps1 +7 -7
  65. package/scripts/windows/skcapstone-task.xml +1 -1
  66. package/src/skcapstone/__init__.py +45 -3
  67. package/src/skcapstone/_cli_monolith.py +20 -15
  68. package/src/skcapstone/activity.py +5 -1
  69. package/src/skcapstone/agent_card.py +3 -2
  70. package/src/skcapstone/api.py +41 -40
  71. package/src/skcapstone/auction.py +14 -11
  72. package/src/skcapstone/backup.py +2 -1
  73. package/src/skcapstone/blueprint_registry.py +4 -3
  74. package/src/skcapstone/blueprints/builtins/itil-operations.yaml +40 -0
  75. package/src/skcapstone/brain_first.py +238 -0
  76. package/src/skcapstone/changelog.py +1 -1
  77. package/src/skcapstone/chat.py +22 -17
  78. package/src/skcapstone/cli/__init__.py +9 -1
  79. package/src/skcapstone/cli/_common.py +1 -0
  80. package/src/skcapstone/cli/agents_spawner.py +5 -2
  81. package/src/skcapstone/cli/alerts.py +25 -4
  82. package/src/skcapstone/cli/bench.py +15 -15
  83. package/src/skcapstone/cli/chat.py +7 -4
  84. package/src/skcapstone/cli/consciousness.py +5 -2
  85. package/src/skcapstone/cli/context_cmd.py +18 -4
  86. package/src/skcapstone/cli/daemon.py +121 -42
  87. package/src/skcapstone/cli/gtd.py +26 -1
  88. package/src/skcapstone/cli/housekeeping.py +3 -3
  89. package/src/skcapstone/cli/identity_cmd.py +378 -0
  90. package/src/skcapstone/cli/joule_cmd.py +7 -3
  91. package/src/skcapstone/cli/memory.py +8 -6
  92. package/src/skcapstone/cli/peers_dir.py +1 -1
  93. package/src/skcapstone/cli/register_cmd.py +29 -3
  94. package/src/skcapstone/cli/scheduler_cmd.py +167 -0
  95. package/src/skcapstone/cli/session.py +25 -0
  96. package/src/skcapstone/cli/setup.py +96 -29
  97. package/src/skcapstone/cli/shell_cmd.py +53 -1
  98. package/src/skcapstone/cli/skills_cmd.py +2 -2
  99. package/src/skcapstone/cli/soul.py +8 -5
  100. package/src/skcapstone/cli/status.py +37 -11
  101. package/src/skcapstone/cli/telegram.py +21 -0
  102. package/src/skcapstone/cli/test_cmd.py +5 -5
  103. package/src/skcapstone/cli/test_connection.py +2 -2
  104. package/src/skcapstone/cli/upgrade_cmd.py +23 -14
  105. package/src/skcapstone/cli/version_cmd.py +1 -1
  106. package/src/skcapstone/cli/watch_cmd.py +9 -6
  107. package/src/skcapstone/cloud9_bridge.py +14 -14
  108. package/src/skcapstone/codex_setup.py +255 -0
  109. package/src/skcapstone/config_validator.py +7 -4
  110. package/src/skcapstone/consciousness_config.py +5 -1
  111. package/src/skcapstone/consciousness_loop.py +313 -273
  112. package/src/skcapstone/context_loader.py +121 -0
  113. package/src/skcapstone/coord_federation.py +2 -1
  114. package/src/skcapstone/coordination.py +23 -6
  115. package/src/skcapstone/crush_integration.py +2 -1
  116. package/src/skcapstone/daemon.py +151 -88
  117. package/src/skcapstone/dashboard.py +10 -10
  118. package/src/skcapstone/data/sk-agent-picker.sh +421 -0
  119. package/src/skcapstone/data/systemd/skcapstone-api.socket +9 -0
  120. package/src/skcapstone/data/systemd/skcapstone-memory-compress.service +18 -0
  121. package/src/skcapstone/data/systemd/skcapstone-memory-compress.timer +11 -0
  122. package/src/skcapstone/data/systemd/skcapstone.service +37 -0
  123. package/src/skcapstone/data/systemd/skcapstone@.service +50 -0
  124. package/src/skcapstone/data/systemd/skcomms-heartbeat.service +18 -0
  125. package/{systemd/skcomm-heartbeat.timer → src/skcapstone/data/systemd/skcomms-heartbeat.timer} +2 -2
  126. package/src/skcapstone/data/systemd/skcomms-queue-drain.service +17 -0
  127. package/{systemd/skcomm-queue-drain.timer → src/skcapstone/data/systemd/skcomms-queue-drain.timer} +2 -2
  128. package/src/skcapstone/defaults/claude/CLAUDE.md +67 -0
  129. package/src/skcapstone/defaults/claude/settings.json +74 -0
  130. package/src/skcapstone/defaults/lumina/config/claude-hooks.md +57 -0
  131. package/src/skcapstone/defaults/lumina/config/skgraph.yaml +55 -10
  132. package/src/skcapstone/defaults/lumina/config/skmemory.yaml +79 -13
  133. package/src/skcapstone/defaults/lumina/config/skvector.yaml +60 -9
  134. package/src/skcapstone/defaults/lumina/memory/long-term/18b9c0d1e2f3-cloud9-protocol.json +2 -2
  135. package/src/skcapstone/defaults/lumina/memory/long-term/a1b2c3d4e5f6-ecosystem-overview.json +2 -2
  136. package/src/skcapstone/defaults/lumina/memory/long-term/b2c3d4e5f6a7-five-pillars.json +9 -9
  137. package/src/skcapstone/defaults/lumina/memory/long-term/d4e5f6a7b8c9-site-directory.json +2 -2
  138. package/src/skcapstone/defaults/unhinged.json +13 -0
  139. package/src/skcapstone/discovery.py +43 -20
  140. package/src/skcapstone/doctor.py +941 -22
  141. package/src/skcapstone/dreaming.py +1183 -109
  142. package/src/skcapstone/emotion_tracker.py +2 -2
  143. package/src/skcapstone/export.py +4 -3
  144. package/src/skcapstone/fuse_mount.py +35 -25
  145. package/src/skcapstone/gui_installer.py +2 -2
  146. package/src/skcapstone/heartbeat.py +34 -30
  147. package/src/skcapstone/housekeeping.py +14 -14
  148. package/src/skcapstone/install_wizard.py +209 -7
  149. package/src/skcapstone/itil.py +13 -4
  150. package/src/skcapstone/kms_scheduler.py +10 -8
  151. package/src/skcapstone/launchd.py +426 -0
  152. package/src/skcapstone/mcp_launcher.py +15 -1
  153. package/src/skcapstone/mcp_server.py +341 -49
  154. package/src/skcapstone/mcp_tools/__init__.py +2 -0
  155. package/src/skcapstone/mcp_tools/_helpers.py +2 -2
  156. package/src/skcapstone/mcp_tools/ansible_tools.py +7 -4
  157. package/src/skcapstone/mcp_tools/brain_first_tools.py +90 -0
  158. package/src/skcapstone/mcp_tools/capauth_tools.py +7 -4
  159. package/src/skcapstone/mcp_tools/comm_tools.py +10 -10
  160. package/src/skcapstone/mcp_tools/coord_tools.py +8 -4
  161. package/src/skcapstone/mcp_tools/did_tools.py +11 -8
  162. package/src/skcapstone/mcp_tools/gtd_tools.py +4 -4
  163. package/src/skcapstone/mcp_tools/memory_tools.py +6 -2
  164. package/src/skcapstone/mcp_tools/notification_tools.py +22 -6
  165. package/src/skcapstone/mcp_tools/{skcomm_tools.py → skcomms_tools.py} +14 -14
  166. package/src/skcapstone/mcp_tools/soul_tools.py +8 -2
  167. package/src/skcapstone/mdns_discovery.py +2 -2
  168. package/src/skcapstone/memory_curator.py +1 -1
  169. package/src/skcapstone/memory_engine.py +10 -3
  170. package/src/skcapstone/metrics.py +30 -16
  171. package/src/skcapstone/migrate_memories.py +4 -3
  172. package/src/skcapstone/migrate_multi_agent.py +8 -7
  173. package/src/skcapstone/models.py +47 -5
  174. package/src/skcapstone/notifications.py +42 -18
  175. package/src/skcapstone/onboard.py +1000 -126
  176. package/src/skcapstone/operator_link.py +170 -0
  177. package/src/skcapstone/peer_directory.py +4 -4
  178. package/src/skcapstone/peers.py +19 -19
  179. package/src/skcapstone/pillars/__init__.py +7 -5
  180. package/src/skcapstone/pillars/consciousness.py +191 -0
  181. package/src/skcapstone/pillars/identity.py +51 -7
  182. package/src/skcapstone/pillars/memory.py +9 -3
  183. package/src/skcapstone/pillars/sync.py +2 -2
  184. package/src/skcapstone/preflight.py +3 -3
  185. package/src/skcapstone/providers/docker.py +28 -28
  186. package/src/skcapstone/register.py +6 -6
  187. package/src/skcapstone/registry_client.py +5 -4
  188. package/src/skcapstone/runtime.py +14 -3
  189. package/src/skcapstone/scheduled_tasks.py +254 -19
  190. package/src/skcapstone/scheduler_jobs.py +456 -0
  191. package/src/skcapstone/scheduler_runner.py +239 -0
  192. package/src/skcapstone/scheduler_state.py +162 -0
  193. package/src/skcapstone/sdk.py +310 -0
  194. package/src/skcapstone/service_health.py +279 -39
  195. package/src/skcapstone/session_briefing.py +108 -0
  196. package/src/skcapstone/session_capture.py +1 -1
  197. package/src/skcapstone/shell.py +7 -1
  198. package/src/skcapstone/soul.py +3 -1
  199. package/src/skcapstone/soul_switch.py +3 -1
  200. package/src/skcapstone/summary.py +6 -6
  201. package/src/skcapstone/sync_engine.py +15 -15
  202. package/src/skcapstone/sync_watcher.py +2 -2
  203. package/src/skcapstone/systemd.py +72 -21
  204. package/src/skcapstone/team_comms.py +8 -8
  205. package/src/skcapstone/team_engine.py +1 -1
  206. package/src/skcapstone/testrunner.py +3 -3
  207. package/src/skcapstone/trust_graph.py +40 -5
  208. package/src/skcapstone/unified_search.py +15 -6
  209. package/src/skcapstone/uninstall_wizard.py +11 -3
  210. package/src/skcapstone/version_check.py +8 -4
  211. package/src/skcapstone/warmth_anchor.py +4 -2
  212. package/src/skcapstone/whoami.py +4 -4
  213. package/systemd/skcapstone.service +4 -6
  214. package/systemd/skcapstone@.service +7 -8
  215. package/systemd/skcomms-heartbeat.service +21 -0
  216. package/systemd/skcomms-heartbeat.timer +12 -0
  217. package/systemd/skcomms-queue-drain.service +17 -0
  218. package/systemd/skcomms-queue-drain.timer +12 -0
  219. package/tests/conftest.py +39 -0
  220. package/tests/integration/test_consciousness_e2e.py +39 -39
  221. package/tests/test_agent_card.py +1 -1
  222. package/tests/test_agent_home_scaffold.py +34 -0
  223. package/tests/test_alerts_consumer_topics.py +27 -0
  224. package/tests/test_backup.py +2 -1
  225. package/tests/test_chat.py +6 -6
  226. package/tests/test_claude_md.py +2 -2
  227. package/tests/test_cli_skills.py +10 -10
  228. package/tests/test_cli_test_cmd.py +4 -4
  229. package/tests/test_cli_test_connection.py +1 -1
  230. package/tests/test_cloud9_bridge.py +6 -6
  231. package/tests/test_consciousness_e2e.py +1 -1
  232. package/tests/test_consciousness_loop.py +10 -10
  233. package/tests/test_coordination.py +25 -0
  234. package/tests/test_cross_package.py +21 -21
  235. package/tests/test_daemon.py +4 -4
  236. package/tests/test_daemon_shutdown.py +1 -1
  237. package/tests/test_docker_provider.py +29 -29
  238. package/tests/test_doctor.py +400 -0
  239. package/tests/test_doctor_skscheduler.py +50 -0
  240. package/tests/test_dreaming_engine.py +147 -0
  241. package/tests/test_dreaming_gtd_capture.py +35 -0
  242. package/tests/test_e2e_automated.py +8 -5
  243. package/tests/test_fuse_mount.py +10 -10
  244. package/tests/test_gtd_brief.py +46 -0
  245. package/tests/test_gtd_malformed_tolerance.py +31 -0
  246. package/tests/test_housekeeping.py +15 -15
  247. package/tests/test_identity_migrate.py +251 -0
  248. package/tests/test_integration_backbone.py +598 -0
  249. package/tests/test_itil_gtd_lifecycle.py +37 -0
  250. package/tests/test_jobs_dropins.py +84 -0
  251. package/tests/test_mcp_server.py +82 -37
  252. package/tests/test_models.py +48 -4
  253. package/tests/test_multi_agent.py +31 -29
  254. package/tests/test_notifications.py +122 -32
  255. package/tests/test_onboard.py +63 -75
  256. package/tests/test_operator_link.py +78 -0
  257. package/tests/test_peers.py +14 -14
  258. package/tests/test_pillars.py +98 -0
  259. package/tests/test_preflight.py +3 -3
  260. package/tests/test_runtime.py +21 -0
  261. package/tests/test_scheduled_tasks.py +11 -6
  262. package/tests/test_scheduler_cli.py +47 -0
  263. package/tests/test_scheduler_features.py +133 -0
  264. package/tests/test_scheduler_integration.py +87 -0
  265. package/tests/test_scheduler_jobs.py +155 -0
  266. package/tests/test_scheduler_runner.py +64 -0
  267. package/tests/test_scheduler_state.py +57 -0
  268. package/tests/test_sdk.py +70 -0
  269. package/tests/test_service_health_incidents.py +34 -0
  270. package/tests/test_service_registry.py +52 -0
  271. package/tests/test_session_briefing.py +130 -0
  272. package/tests/test_snapshots.py +4 -4
  273. package/tests/test_sync_pipeline.py +26 -26
  274. package/tests/test_team_comms.py +2 -2
  275. package/tests/test_testrunner.py +2 -2
  276. package/tests/test_trust_graph.py +18 -0
  277. package/tests/test_unified_search.py +2 -2
  278. package/tests/test_version_check.py +10 -0
  279. package/tests/test_version_cmd.py +8 -8
  280. package/tests/test_whoami.py +1 -1
  281. package/systemd/skcomm-heartbeat.service +0 -18
  282. package/systemd/skcomm-queue-drain.service +0 -17
  283. /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/package.json +0 -0
  284. /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/openclaw.plugin.json +0 -0
@@ -0,0 +1,188 @@
1
+ #!/usr/bin/env python3
2
+ """Probe war.gov/UFO/ via Lumina Chrome CDP (port 9222).
3
+
4
+ Steps:
5
+ 1. Open a new tab on war.gov/UFO/
6
+ 2. Wait for Vue mount to load (CSV must be reachable)
7
+ 3. Pull the CSV via in-page fetch
8
+ 4. Inspect inline scripts for any release_2 link patterns
9
+ 5. Save raw CSV + script index to ~/clawd/tmp/wargov-capture/probe-out/
10
+
11
+ Output:
12
+ probe-out/uap-csv.csv fresh CSV from the site
13
+ probe-out/file-index.json inline-script link probe
14
+ probe-out/page-meta.json URL/title/page render check
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import sys
20
+ import time
21
+ import urllib.request
22
+ from pathlib import Path
23
+
24
+ import websocket # websocket-client
25
+
26
+ CDP_HTTP = "http://127.0.0.1:9222"
27
+ TARGET = "https://www.war.gov/UFO/"
28
+ OUT_DIR = Path("/home/cbrd21/clawd/tmp/wargov-capture/probe-out")
29
+ OUT_DIR.mkdir(parents=True, exist_ok=True)
30
+
31
+
32
+ def cdp_get(path: str) -> dict | list:
33
+ with urllib.request.urlopen(f"{CDP_HTTP}{path}") as r:
34
+ return json.loads(r.read())
35
+
36
+
37
+ def open_tab(url: str) -> dict:
38
+ # Newer Chrome only accepts PUT on /json/new
39
+ req = urllib.request.Request(f"{CDP_HTTP}/json/new?{url}", method="PUT")
40
+ with urllib.request.urlopen(req, timeout=10) as r:
41
+ return json.loads(r.read())
42
+
43
+
44
+ def close_tab(target_id: str) -> None:
45
+ try:
46
+ with urllib.request.urlopen(f"{CDP_HTTP}/json/close/{target_id}", timeout=5):
47
+ pass
48
+ except Exception:
49
+ pass
50
+
51
+
52
+ class CDP:
53
+ def __init__(self, ws_url: str):
54
+ self.ws = websocket.create_connection(ws_url, timeout=60)
55
+ self.mid = 0
56
+
57
+ def call(self, method: str, params: dict | None = None, timeout: float = 30.0) -> dict:
58
+ self.mid += 1
59
+ msg_id = self.mid
60
+ self.ws.send(json.dumps({"id": msg_id, "method": method, "params": params or {}}))
61
+ self.ws.settimeout(timeout)
62
+ while True:
63
+ raw = self.ws.recv()
64
+ msg = json.loads(raw)
65
+ if msg.get("id") == msg_id:
66
+ if "error" in msg:
67
+ raise RuntimeError(f"{method}: {msg['error']}")
68
+ return msg.get("result", {})
69
+
70
+ def wait_event(self, name: str, timeout: float = 30.0) -> dict:
71
+ deadline = time.time() + timeout
72
+ while time.time() < deadline:
73
+ self.ws.settimeout(max(0.1, deadline - time.time()))
74
+ try:
75
+ raw = self.ws.recv()
76
+ except websocket.WebSocketTimeoutException:
77
+ continue
78
+ msg = json.loads(raw)
79
+ if msg.get("method") == name:
80
+ return msg.get("params", {})
81
+ raise TimeoutError(f"event {name} did not fire within {timeout}s")
82
+
83
+ def close(self) -> None:
84
+ try:
85
+ self.ws.close()
86
+ except Exception:
87
+ pass
88
+
89
+
90
+ def main() -> int:
91
+ print(f"[probe] opening tab → {TARGET}", flush=True)
92
+ tab = open_tab(TARGET)
93
+ target_id = tab["id"]
94
+ ws_url = tab["webSocketDebuggerUrl"]
95
+ print(f"[probe] tab id={target_id}", flush=True)
96
+
97
+ cdp = CDP(ws_url)
98
+ try:
99
+ cdp.call("Page.enable")
100
+ cdp.call("Runtime.enable")
101
+ cdp.call("Network.enable", {"maxPostDataSize": 0})
102
+ cdp.call("Page.navigate", {"url": TARGET})
103
+ try:
104
+ cdp.wait_event("Page.loadEventFired", timeout=30.0)
105
+ except TimeoutError:
106
+ print("[probe] Page.loadEventFired timeout — proceeding anyway", flush=True)
107
+
108
+ # Give the Vue mount a chance to render the CSV view
109
+ time.sleep(5.0)
110
+
111
+ # Page meta
112
+ meta_js = (
113
+ "({"
114
+ " url: location.href,"
115
+ " title: document.title,"
116
+ " hasMainContent: !!document.querySelector('main'),"
117
+ " scriptInlineCount: document.querySelectorAll('script:not([src])').length,"
118
+ " ufoMentions: (document.body.innerText.match(/UAP|UFO|PURSUE/g) || []).length,"
119
+ " releaseDateGuesses: Array.from(new Set((document.body.innerText.match(/\\b\\d{1,2}\\/\\d{1,2}\\/\\d{2,4}\\b/g) || []))),"
120
+ " release2HrefCount: document.querySelectorAll('a[href*=\"release_2\"]').length,"
121
+ " release2InHtml: (document.documentElement.outerHTML.match(/release_2/gi) || []).length"
122
+ "})"
123
+ )
124
+ meta = cdp.call("Runtime.evaluate", {"expression": meta_js, "returnByValue": True})
125
+ meta_val = meta.get("result", {}).get("value", {})
126
+ (OUT_DIR / "page-meta.json").write_text(json.dumps(meta_val, indent=2))
127
+ print(f"[probe] page-meta: {json.dumps(meta_val)}", flush=True)
128
+
129
+ # Pull the CSV via in-page fetch
130
+ csv_js = (
131
+ "(async () => {"
132
+ " const u = '/Portals/1/Interactive/2026/UFO/uap-csv.csv';"
133
+ " const r = await fetch(u, {credentials: 'include', cache: 'no-store'});"
134
+ " return {status: r.status, len: (await r.clone().text()).length, text: await r.text()};"
135
+ "})()"
136
+ )
137
+ csv_res = cdp.call("Runtime.evaluate", {
138
+ "expression": csv_js,
139
+ "awaitPromise": True,
140
+ "returnByValue": True,
141
+ }, timeout=60)
142
+ csv_val = csv_res.get("result", {}).get("value", {})
143
+ if isinstance(csv_val, dict) and csv_val.get("status") == 200:
144
+ (OUT_DIR / "uap-csv.csv").write_text(csv_val["text"])
145
+ print(f"[probe] CSV pulled, {csv_val['len']} bytes", flush=True)
146
+ else:
147
+ print(f"[probe] CSV fetch failed: {csv_val}", flush=True)
148
+ (OUT_DIR / "uap-csv-error.json").write_text(json.dumps(csv_val, indent=2, default=str))
149
+
150
+ # Inspect inline scripts for release_2 hints
151
+ scripts_js = (
152
+ "(() => {"
153
+ " const out = [];"
154
+ " document.querySelectorAll('script:not([src])').forEach((s, i) => {"
155
+ " const t = s.textContent || '';"
156
+ " out.push({idx: i, len: t.length, hasRelease2: /release_2/i.test(t), hasFetch: /fetch\\(/.test(t), hasCsv: /\\.csv/.test(t), preview: t.slice(0, 400)});"
157
+ " });"
158
+ " return out;"
159
+ "})()"
160
+ )
161
+ scripts_res = cdp.call("Runtime.evaluate", {"expression": scripts_js, "returnByValue": True})
162
+ scripts_val = scripts_res.get("result", {}).get("value", [])
163
+ (OUT_DIR / "inline-scripts.json").write_text(json.dumps(scripts_val, indent=2))
164
+ print(f"[probe] inline scripts: {len(scripts_val)} ({sum(1 for s in scripts_val if s.get('hasRelease2'))} mention release_2)", flush=True)
165
+
166
+ # Probe for press release link
167
+ pr_js = (
168
+ "(() => {"
169
+ " const links = Array.from(document.querySelectorAll('a[href]')).map(a => a.href);"
170
+ " const press = links.filter(h => /News\\/Releases/i.test(h));"
171
+ " const medialink = links.filter(h => /medialink\\/ufo/i.test(h));"
172
+ " return {pressCount: press.length, press: press.slice(0, 20), medialinkCount: medialink.length, medialinkSample: medialink.slice(0, 20)};"
173
+ "})()"
174
+ )
175
+ pr_res = cdp.call("Runtime.evaluate", {"expression": pr_js, "returnByValue": True})
176
+ pr_val = pr_res.get("result", {}).get("value", {})
177
+ (OUT_DIR / "link-probe.json").write_text(json.dumps(pr_val, indent=2))
178
+ print(f"[probe] link probe: press={pr_val.get('pressCount')} medialink={pr_val.get('medialinkCount')}", flush=True)
179
+
180
+ print("[probe] DONE", flush=True)
181
+ return 0
182
+ finally:
183
+ cdp.close()
184
+ close_tab(target_id)
185
+
186
+
187
+ if __name__ == "__main__":
188
+ sys.exit(main())
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env python3
2
+ """Discover + capture the DOJ SPLC press release by NAVIGATING the search page
3
+ (so JS renders the result list) then reading the rendered DOM. Falls back to
4
+ scraping any /opa/pr/ or /news/ links the rendered page exposes.
5
+ """
6
+ from __future__ import annotations
7
+ import json, re, sys, time, urllib.request
8
+ from pathlib import Path
9
+ import websocket
10
+
11
+ CDP_HTTP = "http://127.0.0.1:9222"
12
+ SEARCH_URL = "https://www.justice.gov/news?search_api_fulltext=Southern%20Poverty%20Law%20Center"
13
+ OUT = Path("/home/cbrd21/clawd/skills/substance-lens/captures/splc-doj-2026-06-03")
14
+
15
+
16
+ def open_tab(url):
17
+ req = urllib.request.Request(f"{CDP_HTTP}/json/new?{url}", method="PUT")
18
+ with urllib.request.urlopen(req, timeout=10) as r:
19
+ return json.loads(r.read())
20
+
21
+ def close_tab(tid):
22
+ try:
23
+ urllib.request.urlopen(f"{CDP_HTTP}/json/close/{tid}", timeout=5)
24
+ except Exception:
25
+ pass
26
+
27
+ class CDP:
28
+ def __init__(self, ws): self.ws=websocket.create_connection(ws,timeout=120); self.mid=0
29
+ def call(self, m, p=None, t=60.0):
30
+ self.mid+=1; i=self.mid
31
+ self.ws.send(json.dumps({"id":i,"method":m,"params":p or {}})); self.ws.settimeout(t)
32
+ while True:
33
+ msg=json.loads(self.ws.recv())
34
+ if msg.get("id")==i:
35
+ if "error" in msg: raise RuntimeError(f"{m}: {msg['error']}")
36
+ return msg.get("result",{})
37
+ def wait(self, name, t=30.0):
38
+ end=time.time()+t
39
+ while time.time()<end:
40
+ self.ws.settimeout(max(0.1,end-time.time()))
41
+ try: msg=json.loads(self.ws.recv())
42
+ except websocket.WebSocketTimeoutException: continue
43
+ if msg.get("method")==name: return msg.get("params",{})
44
+ return {}
45
+ def close(self):
46
+ try: self.ws.close()
47
+ except Exception: pass
48
+
49
+ def jseval(cdp, expr, t=60):
50
+ r=cdp.call("Runtime.evaluate",{"expression":expr,"awaitPromise":True,"returnByValue":True},t)
51
+ return r.get("result",{}).get("value")
52
+
53
+ def fetch_text(cdp,url):
54
+ expr=(f"(async()=>{{try{{const r=await fetch({json.dumps(url)},{{credentials:'include',cache:'no-store'}});"
55
+ f"return {{status:r.status,text:await r.text()}};}}catch(e){{return{{status:-1,text:String(e)}};}}}})()")
56
+ v=jseval(cdp,expr,180) or {}
57
+ return v.get("status",0), v.get("text","")
58
+
59
+ def main():
60
+ tab=open_tab(SEARCH_URL); tid=tab["id"]; cdp=CDP(tab["webSocketDebuggerUrl"])
61
+ try:
62
+ cdp.call("Page.enable"); cdp.call("Runtime.enable")
63
+ cdp.call("Page.navigate",{"url":SEARCH_URL})
64
+ cdp.wait("Page.loadEventFired",30.0)
65
+ time.sleep(6.0) # let result JS render
66
+ links=jseval(cdp,
67
+ "JSON.stringify(Array.from(document.querySelectorAll('a[href]'))"
68
+ ".map(a=>({h:a.getAttribute('href'),t:(a.innerText||'').trim()}))"
69
+ ".filter(x=>x.h&&(x.h.includes('/opa/pr/')||x.h.includes('/usao-mdal/pr/')||/southern.poverty|law.center|splc/i.test(x.t))))")
70
+ cands=json.loads(links) if links else []
71
+ print(f"[pr] rendered candidates: {len(cands)}", flush=True)
72
+ for c in cands[:15]: print(" ", c["h"], "::", c["t"][:70], flush=True)
73
+ # pick best
74
+ pr=None
75
+ for c in cands:
76
+ if re.search(r"southern.poverty|law.center|splc|wire.fraud", (c["h"]+c["t"]).lower()):
77
+ pr=c["h"]; break
78
+ if not pr and cands: pr=cands[0]["h"]
79
+ if pr and pr.startswith("/"): pr="https://www.justice.gov"+pr
80
+ manifest={"search_url":SEARCH_URL,"rendered_candidates":cands[:15],"chosen":pr}
81
+ if pr:
82
+ print(f"[pr] fetching -> {pr}", flush=True)
83
+ st,html=fetch_text(cdp,pr)
84
+ print(f"[pr] status={st} len={len(html)}", flush=True)
85
+ if st==200 and html:
86
+ (OUT/"doj-press-release.html").write_text(html)
87
+ txt=jseval(cdp,
88
+ f"(async()=>{{const r=await fetch({json.dumps(pr)},{{credentials:'include'}});"
89
+ f"const h=await r.text();const d=new DOMParser().parseFromString(h,'text/html');"
90
+ f"const a=d.querySelector('.field--name-body')||d.querySelector('article')||d.querySelector('main')||d.body;"
91
+ f"return a?a.innerText:'';}})()",60) or ""
92
+ if txt: (OUT/"doj-press-release.txt").write_text(txt); print(f"[pr] {len(txt)} chars text", flush=True)
93
+ manifest["status"]=st; manifest["txt_chars"]=len(txt)
94
+ (OUT/"press-release-discovery.json").write_text(json.dumps(manifest,indent=2))
95
+ print("[pr] done", flush=True)
96
+ return 0
97
+ finally:
98
+ cdp.close(); close_tab(tid)
99
+
100
+ if __name__=="__main__":
101
+ sys.exit(main())
@@ -0,0 +1,95 @@
1
+ #!/usr/bin/env python3
2
+ """Parse the new uap-data.csv and split Release 01 vs Release 02 records.
3
+
4
+ The CSV has multi-line quoted fields (newlines inside Title and Description Blurb),
5
+ so we use Python's csv module rather than naive line counting.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import csv
10
+ import json
11
+ from collections import Counter, defaultdict
12
+ from pathlib import Path
13
+
14
+ CSV_PATH = Path("/home/cbrd21/nextcloud/cbrd21-share/reference/war-gov-UFO-PURSUE-2026/docs/release-02/uap-data.csv")
15
+ OUT_DIR = Path("/home/cbrd21/clawd/tmp/wargov-capture/probe-out")
16
+ OUT_DIR.mkdir(parents=True, exist_ok=True)
17
+
18
+ with CSV_PATH.open(newline="", encoding="utf-8") as f:
19
+ reader = csv.DictReader(f)
20
+ rows = [r for r in reader]
21
+
22
+ print(f"Total records: {len(rows)}")
23
+
24
+ date_counter = Counter()
25
+ for r in rows:
26
+ date_counter[(r.get("Release Date") or "").strip()] += 1
27
+ print("Release dates:")
28
+ for d, c in sorted(date_counter.items(), key=lambda x: -x[1]):
29
+ print(f" {d!r:15} → {c}")
30
+
31
+ # Filter for Release 02
32
+ release2 = [r for r in rows if (r.get("Release Date") or "").strip() == "5/22/26"]
33
+ print(f"\nRelease 02 records: {len(release2)}")
34
+
35
+ # Bucket by type
36
+ type_counter = Counter()
37
+ agency_counter = Counter()
38
+ for r in release2:
39
+ type_counter[(r.get("Type") or "").strip()] += 1
40
+ agency_counter[(r.get("Agency") or "").strip()] += 1
41
+ print("Types:")
42
+ for t, c in type_counter.most_common():
43
+ print(f" {t!r:15} → {c}")
44
+ print("Agencies:")
45
+ for a, c in agency_counter.most_common():
46
+ print(f" {a!r:15} → {c}")
47
+
48
+ # Extract download links
49
+ links = []
50
+ for r in release2:
51
+ pdf_link = (r.get("PDF | Image Link") or "").strip()
52
+ modal = (r.get("Modal Image") or "").strip()
53
+ dvids = (r.get("DVIDS Video ID") or "").strip()
54
+ title = (r.get("Title") or "").strip().replace("\n", " ").replace("\r", "")
55
+ rtype = (r.get("Type") or "").strip()
56
+ agency = (r.get("Agency") or "").strip()
57
+ incident_date = (r.get("Incident Date") or "").strip()
58
+ incident_loc = (r.get("Incident Location") or "").strip()
59
+ links.append({
60
+ "title": title,
61
+ "type": rtype,
62
+ "agency": agency,
63
+ "incident_date": incident_date,
64
+ "incident_location": incident_loc,
65
+ "pdf_link": pdf_link,
66
+ "modal_image": modal,
67
+ "dvids_id": dvids,
68
+ })
69
+
70
+ # Save full inventory
71
+ (OUT_DIR / "release-02-records.json").write_text(json.dumps(links, indent=2))
72
+ print(f"\nSaved inventory: {OUT_DIR / 'release-02-records.json'}")
73
+
74
+ # Unique direct-fetchable URLs
75
+ urls = set()
76
+ for L in links:
77
+ if L["pdf_link"]:
78
+ urls.add(L["pdf_link"])
79
+ if L["modal_image"]:
80
+ urls.add(L["modal_image"])
81
+ urls_list = sorted(urls)
82
+ print(f"\nUnique direct URLs: {len(urls_list)}")
83
+ for u in urls_list[:15]:
84
+ print(f" {u}")
85
+ if len(urls_list) > 15:
86
+ print(f" ... and {len(urls_list) - 15} more")
87
+
88
+ (OUT_DIR / "release-02-urls.json").write_text(json.dumps(urls_list, indent=2))
89
+
90
+ # DVIDS-only records (videos hosted exclusively on DVIDS)
91
+ dvids_only = [L for L in links if L["dvids_id"] and not L["pdf_link"]]
92
+ print(f"\nDVIDS-only video records: {len(dvids_only)}")
93
+ for L in dvids_only[:10]:
94
+ print(f" DVIDS {L['dvids_id']}: {L['title'][:80]}")
95
+ (OUT_DIR / "release-02-dvids.json").write_text(json.dumps(dvids_only, indent=2))
@@ -0,0 +1,107 @@
1
+ #!/usr/bin/env bash
2
+ # Pull DVIDS media for Release 02 in parallel (up to 4 at a time).
3
+ # Reads inventory from probe-out/release-02-records.json.
4
+
5
+ set -u
6
+ OUT_DIR="$HOME/nextcloud/cbrd21-share/reference/war-gov-UFO-PURSUE-2026/release-02"
7
+ INVENTORY="$HOME/clawd/tmp/wargov-capture/probe-out/release-02-records.json"
8
+ LOG="$HOME/clawd/tmp/wargov-capture/probe-out/dvids-pull.log"
9
+ mkdir -p "$OUT_DIR"
10
+ : > "$LOG"
11
+
12
+ pull_one() {
13
+ local dvids_id="$1"
14
+ local title_slug="$2"
15
+ local kind="$3" # video or audio
16
+ local out_path="$OUT_DIR/dvids-${kind}-${dvids_id}-${title_slug}"
17
+
18
+ # Pick the right URL prefix
19
+ local page_url
20
+ if [[ "$kind" == "audio" ]]; then
21
+ page_url="https://www.dvidshub.net/audio/${dvids_id}"
22
+ else
23
+ page_url="https://www.dvidshub.net/video/${dvids_id}"
24
+ fi
25
+
26
+ local page_html
27
+ page_html=$(curl -sSL --max-time 60 "$page_url" 2>/dev/null) || {
28
+ echo "[FAIL fetch page] dvids=$dvids_id" | tee -a "$LOG"
29
+ return 1
30
+ }
31
+
32
+ # Extract mp4 (video) or mp3 (audio) CDN URL
33
+ local media_url ext
34
+ if [[ "$kind" == "audio" ]]; then
35
+ media_url=$(echo "$page_html" | grep -oE 'https?://[^"]+\.mp3[^"]*' | head -1)
36
+ ext="mp3"
37
+ # DVIDS audio sometimes is .m4a or hosted via a different path; fallback to grepping for asset URL
38
+ if [[ -z "$media_url" ]]; then
39
+ media_url=$(echo "$page_html" | grep -oE 'https?://[^"]+\.m4a[^"]*' | head -1)
40
+ ext="m4a"
41
+ fi
42
+ if [[ -z "$media_url" ]]; then
43
+ media_url=$(echo "$page_html" | grep -oE 'https?://d34w7g4gy10iej\.cloudfront\.net/[^"]+' | head -1)
44
+ ext="${media_url##*.}"
45
+ fi
46
+ else
47
+ media_url=$(echo "$page_html" | grep -oE 'https?://[^"]+\.mp4[^"]*' | head -1)
48
+ ext="mp4"
49
+ fi
50
+
51
+ if [[ -z "$media_url" ]]; then
52
+ echo "[FAIL no-media-url] dvids=$dvids_id kind=$kind" | tee -a "$LOG"
53
+ # Save the page HTML for inspection
54
+ echo "$page_html" > "${out_path}.html"
55
+ return 1
56
+ fi
57
+
58
+ local final_path="${out_path}.${ext}"
59
+ if [[ -f "$final_path" ]] && [[ -s "$final_path" ]]; then
60
+ echo "[SKIP already-have] $final_path" | tee -a "$LOG"
61
+ return 0
62
+ fi
63
+
64
+ curl -sSL --max-time 600 -o "$final_path" "$media_url" 2>/dev/null
65
+ local size
66
+ size=$(stat -c '%s' "$final_path" 2>/dev/null || echo 0)
67
+ if [[ "$size" -lt 1024 ]]; then
68
+ echo "[FAIL download too-small=$size] dvids=$dvids_id url=$media_url" | tee -a "$LOG"
69
+ return 1
70
+ fi
71
+ echo "[OK] dvids=$dvids_id kind=$kind size=$((size/1024))KB → $(basename "$final_path")" | tee -a "$LOG"
72
+ }
73
+
74
+ # Generate worker commands from the inventory using python
75
+ python3 - <<'PY' > /tmp/wargov-dvids-jobs.txt
76
+ import json, re
77
+ with open("/home/cbrd21/clawd/tmp/wargov-capture/probe-out/release-02-records.json") as f:
78
+ records = json.load(f)
79
+ for r in records:
80
+ if not r["dvids_id"]:
81
+ continue
82
+ kind = "audio" if r["type"] == "AUD" else "video"
83
+ # Slugify title: strip quotes, collapse non-alnum to dashes, limit length
84
+ title = r["title"]
85
+ slug = re.sub(r'[^a-zA-Z0-9]+', '-', title).strip('-').lower()[:60]
86
+ # Use the DOW-UAP-PR id from the title if available (more durable)
87
+ m = re.match(r'([A-Z]+-UAP-(?:PR|D)[0-9]+[a-z]?)', title)
88
+ if m:
89
+ slug = m.group(1).lower() + "-" + slug[:30]
90
+ print(f"{r['dvids_id']}\t{slug}\t{kind}")
91
+ PY
92
+
93
+ JOB_COUNT=$(wc -l < /tmp/wargov-dvids-jobs.txt)
94
+ echo "[plan] $JOB_COUNT DVIDS jobs queued" | tee -a "$LOG"
95
+
96
+ export -f pull_one
97
+ export OUT_DIR LOG
98
+
99
+ # Run with xargs -P 4 (parallelism 4)
100
+ cat /tmp/wargov-dvids-jobs.txt | while IFS=$'\t' read -r id slug kind; do
101
+ echo "$id $slug $kind"
102
+ done | xargs -L 1 -P 4 -I {} bash -c 'set -- {}; pull_one "$1" "$2" "$3"'
103
+
104
+ OK_COUNT=$(grep -c '^\[OK\]' "$LOG" || true)
105
+ FAIL_COUNT=$(grep -cE '^\[FAIL' "$LOG" || true)
106
+ SKIP_COUNT=$(grep -c '^\[SKIP' "$LOG" || true)
107
+ echo "[done] OK=$OK_COUNT FAIL=$FAIL_COUNT SKIP=$SKIP_COUNT" | tee -a "$LOG"