@smilintux/skcapstone 0.10.0 → 0.12.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/.env.example +10 -4
  2. package/.github/workflows/ci.yml +2 -2
  3. package/.github/workflows/publish.yml +9 -2
  4. package/.openclaw-workspace.json +2 -2
  5. package/CLAUDE.md +37 -0
  6. package/MISSION.md +17 -2
  7. package/README.md +282 -3
  8. package/docker/Dockerfile +7 -7
  9. package/docker/compose-templates/dev-team.yml +12 -12
  10. package/docker/compose-templates/mini-team.yml +9 -9
  11. package/docker/compose-templates/ops-team.yml +10 -10
  12. package/docker/compose-templates/research-team.yml +10 -10
  13. package/docker/entrypoint.sh +4 -4
  14. package/docs/ADR-optional-integration-backbone.md +181 -0
  15. package/docs/ARCHITECTURE.md +186 -43
  16. package/docs/BOND_WITH_GROK.md +6 -6
  17. package/docs/CUSTOM_AGENT.md +123 -30
  18. package/docs/DREAMING.md +70 -0
  19. package/docs/GETTING_STARTED.md +7 -7
  20. package/docs/QUICKSTART.md +10 -6
  21. package/docs/SKJOULE_ARCHITECTURE.md +3 -3
  22. package/docs/SOUL_SWAPPER.md +5 -5
  23. package/docs/hammertime-audit.md +402 -0
  24. package/docs/sk-integration-HANDOFF.md +117 -0
  25. package/docs/skscheduler.md +155 -0
  26. package/docs/superpowers/examples/jobs.yaml +31 -0
  27. package/docs/superpowers/plans/2026-06-08-skscheduler.md +1265 -0
  28. package/docs/superpowers/specs/2026-06-08-skscheduler-design.md +186 -0
  29. package/examples/custom-bond-template.json +1 -1
  30. package/examples/grok-feb.json +1 -1
  31. package/examples/queen-ava-feb.json +1 -1
  32. package/launchd/{com.skcapstone.skcomm-heartbeat.plist → com.skcapstone.skcomms-heartbeat.plist} +4 -4
  33. package/launchd/{com.skcapstone.skcomm-queue-drain.plist → com.skcapstone.skcomms-queue-drain.plist} +4 -4
  34. package/launchd/install-launchd.sh +6 -6
  35. package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/index.ts +3 -2
  36. package/package.json +1 -1
  37. package/pyproject.toml +16 -10
  38. package/scripts/archive-sessions.sh +7 -0
  39. package/scripts/check-updates.py +4 -4
  40. package/scripts/install-bundle.sh +8 -8
  41. package/scripts/install.ps1 +12 -11
  42. package/scripts/install.sh +159 -5
  43. package/scripts/model-fallback-monitor.sh +102 -0
  44. package/scripts/nvidia-proxy.mjs +78 -26
  45. package/scripts/refresh-anthropic-token.sh +172 -0
  46. package/scripts/release.sh +98 -0
  47. package/scripts/session-to-memory.py +219 -0
  48. package/scripts/skgateway.mjs +3 -3
  49. package/scripts/telegram-catchup-all.sh +12 -1
  50. package/scripts/verify_install.sh +2 -2
  51. package/scripts/wargov-ufo-capture/README.md +43 -0
  52. package/scripts/wargov-ufo-capture/cdp_capture_release2.py +273 -0
  53. package/scripts/wargov-ufo-capture/cdp_capture_splc_doj.py +246 -0
  54. package/scripts/wargov-ufo-capture/cdp_finish.py +271 -0
  55. package/scripts/wargov-ufo-capture/cdp_probe.py +188 -0
  56. package/scripts/wargov-ufo-capture/cdp_splc_pressrelease.py +101 -0
  57. package/scripts/wargov-ufo-capture/parse_csv.py +95 -0
  58. package/scripts/wargov-ufo-capture/pull_dvids.sh +107 -0
  59. package/scripts/watch-anthropic-token.sh +212 -0
  60. package/scripts/windows/install-tasks.ps1 +7 -7
  61. package/scripts/windows/skcapstone-task.xml +1 -1
  62. package/src/skcapstone/__init__.py +45 -3
  63. package/src/skcapstone/_cli_monolith.py +20 -15
  64. package/src/skcapstone/activity.py +5 -1
  65. package/src/skcapstone/agent_card.py +3 -2
  66. package/src/skcapstone/api.py +41 -40
  67. package/src/skcapstone/auction.py +14 -11
  68. package/src/skcapstone/backup.py +2 -1
  69. package/src/skcapstone/blueprint_registry.py +4 -3
  70. package/src/skcapstone/brain_first.py +238 -0
  71. package/src/skcapstone/changelog.py +1 -1
  72. package/src/skcapstone/chat.py +22 -17
  73. package/src/skcapstone/cli/__init__.py +9 -1
  74. package/src/skcapstone/cli/_common.py +1 -0
  75. package/src/skcapstone/cli/agents_spawner.py +5 -2
  76. package/src/skcapstone/cli/alerts.py +25 -4
  77. package/src/skcapstone/cli/bench.py +15 -15
  78. package/src/skcapstone/cli/chat.py +7 -4
  79. package/src/skcapstone/cli/consciousness.py +5 -2
  80. package/src/skcapstone/cli/context_cmd.py +18 -4
  81. package/src/skcapstone/cli/daemon.py +11 -7
  82. package/src/skcapstone/cli/gtd.py +26 -1
  83. package/src/skcapstone/cli/housekeeping.py +3 -3
  84. package/src/skcapstone/cli/identity_cmd.py +378 -0
  85. package/src/skcapstone/cli/joule_cmd.py +7 -3
  86. package/src/skcapstone/cli/memory.py +8 -6
  87. package/src/skcapstone/cli/peers_dir.py +1 -1
  88. package/src/skcapstone/cli/register_cmd.py +29 -3
  89. package/src/skcapstone/cli/scheduler_cmd.py +167 -0
  90. package/src/skcapstone/cli/session.py +25 -0
  91. package/src/skcapstone/cli/setup.py +96 -29
  92. package/src/skcapstone/cli/shell_cmd.py +53 -1
  93. package/src/skcapstone/cli/skills_cmd.py +2 -2
  94. package/src/skcapstone/cli/soul.py +8 -5
  95. package/src/skcapstone/cli/status.py +37 -11
  96. package/src/skcapstone/cli/telegram.py +21 -0
  97. package/src/skcapstone/cli/test_cmd.py +5 -5
  98. package/src/skcapstone/cli/test_connection.py +2 -2
  99. package/src/skcapstone/cli/upgrade_cmd.py +23 -14
  100. package/src/skcapstone/cli/version_cmd.py +1 -1
  101. package/src/skcapstone/cli/watch_cmd.py +9 -6
  102. package/src/skcapstone/cloud9_bridge.py +14 -14
  103. package/src/skcapstone/codex_setup.py +255 -0
  104. package/src/skcapstone/config_validator.py +7 -4
  105. package/src/skcapstone/consciousness_config.py +5 -1
  106. package/src/skcapstone/consciousness_loop.py +313 -273
  107. package/src/skcapstone/context_loader.py +121 -0
  108. package/src/skcapstone/coord_federation.py +2 -1
  109. package/src/skcapstone/coordination.py +23 -6
  110. package/src/skcapstone/crush_integration.py +2 -1
  111. package/src/skcapstone/daemon.py +132 -77
  112. package/src/skcapstone/dashboard.py +10 -10
  113. package/src/skcapstone/data/sk-agent-picker.sh +421 -0
  114. package/src/skcapstone/data/systemd/skcapstone-api.socket +9 -0
  115. package/src/skcapstone/data/systemd/skcapstone-memory-compress.service +18 -0
  116. package/src/skcapstone/data/systemd/skcapstone-memory-compress.timer +11 -0
  117. package/src/skcapstone/data/systemd/skcapstone.service +37 -0
  118. package/src/skcapstone/data/systemd/skcapstone@.service +50 -0
  119. package/src/skcapstone/data/systemd/skcomms-heartbeat.service +18 -0
  120. package/{systemd/skcomm-heartbeat.timer → src/skcapstone/data/systemd/skcomms-heartbeat.timer} +2 -2
  121. package/src/skcapstone/data/systemd/skcomms-queue-drain.service +17 -0
  122. package/{systemd/skcomm-queue-drain.timer → src/skcapstone/data/systemd/skcomms-queue-drain.timer} +2 -2
  123. package/src/skcapstone/defaults/claude/CLAUDE.md +67 -0
  124. package/src/skcapstone/defaults/claude/settings.json +74 -0
  125. package/src/skcapstone/defaults/lumina/config/claude-hooks.md +57 -0
  126. package/src/skcapstone/defaults/lumina/config/skgraph.yaml +55 -10
  127. package/src/skcapstone/defaults/lumina/config/skmemory.yaml +79 -13
  128. package/src/skcapstone/defaults/lumina/config/skvector.yaml +60 -9
  129. package/src/skcapstone/defaults/lumina/memory/long-term/18b9c0d1e2f3-cloud9-protocol.json +2 -2
  130. package/src/skcapstone/defaults/lumina/memory/long-term/a1b2c3d4e5f6-ecosystem-overview.json +2 -2
  131. package/src/skcapstone/defaults/lumina/memory/long-term/b2c3d4e5f6a7-five-pillars.json +9 -9
  132. package/src/skcapstone/defaults/lumina/memory/long-term/d4e5f6a7b8c9-site-directory.json +2 -2
  133. package/src/skcapstone/defaults/unhinged.json +13 -0
  134. package/src/skcapstone/discovery.py +43 -20
  135. package/src/skcapstone/doctor.py +941 -22
  136. package/src/skcapstone/dreaming.py +1183 -109
  137. package/src/skcapstone/emotion_tracker.py +2 -2
  138. package/src/skcapstone/export.py +4 -3
  139. package/src/skcapstone/fuse_mount.py +14 -12
  140. package/src/skcapstone/gui_installer.py +2 -2
  141. package/src/skcapstone/heartbeat.py +1 -1
  142. package/src/skcapstone/housekeeping.py +14 -14
  143. package/src/skcapstone/install_wizard.py +209 -7
  144. package/src/skcapstone/itil.py +13 -4
  145. package/src/skcapstone/kms_scheduler.py +10 -8
  146. package/src/skcapstone/launchd.py +19 -19
  147. package/src/skcapstone/mcp_launcher.py +15 -1
  148. package/src/skcapstone/mcp_server.py +83 -49
  149. package/src/skcapstone/mcp_tools/__init__.py +2 -0
  150. package/src/skcapstone/mcp_tools/_helpers.py +2 -2
  151. package/src/skcapstone/mcp_tools/ansible_tools.py +7 -4
  152. package/src/skcapstone/mcp_tools/brain_first_tools.py +90 -0
  153. package/src/skcapstone/mcp_tools/capauth_tools.py +7 -4
  154. package/src/skcapstone/mcp_tools/comm_tools.py +10 -10
  155. package/src/skcapstone/mcp_tools/coord_tools.py +8 -4
  156. package/src/skcapstone/mcp_tools/did_tools.py +11 -8
  157. package/src/skcapstone/mcp_tools/gtd_tools.py +4 -4
  158. package/src/skcapstone/mcp_tools/memory_tools.py +6 -2
  159. package/src/skcapstone/mcp_tools/notification_tools.py +22 -6
  160. package/src/skcapstone/mcp_tools/{skcomm_tools.py → skcomms_tools.py} +14 -14
  161. package/src/skcapstone/mcp_tools/soul_tools.py +8 -2
  162. package/src/skcapstone/mdns_discovery.py +2 -2
  163. package/src/skcapstone/memory_curator.py +1 -1
  164. package/src/skcapstone/memory_engine.py +10 -3
  165. package/src/skcapstone/metrics.py +30 -16
  166. package/src/skcapstone/migrate_memories.py +4 -3
  167. package/src/skcapstone/migrate_multi_agent.py +8 -7
  168. package/src/skcapstone/models.py +47 -5
  169. package/src/skcapstone/notifications.py +42 -18
  170. package/src/skcapstone/onboard.py +875 -121
  171. package/src/skcapstone/operator_link.py +170 -0
  172. package/src/skcapstone/peer_directory.py +4 -4
  173. package/src/skcapstone/peers.py +19 -19
  174. package/src/skcapstone/pillars/__init__.py +7 -5
  175. package/src/skcapstone/pillars/consciousness.py +191 -0
  176. package/src/skcapstone/pillars/identity.py +51 -7
  177. package/src/skcapstone/pillars/memory.py +9 -3
  178. package/src/skcapstone/pillars/sync.py +2 -2
  179. package/src/skcapstone/preflight.py +3 -3
  180. package/src/skcapstone/providers/docker.py +28 -28
  181. package/src/skcapstone/register.py +6 -6
  182. package/src/skcapstone/registry_client.py +5 -4
  183. package/src/skcapstone/runtime.py +14 -3
  184. package/src/skcapstone/scheduled_tasks.py +254 -19
  185. package/src/skcapstone/scheduler_jobs.py +456 -0
  186. package/src/skcapstone/scheduler_runner.py +239 -0
  187. package/src/skcapstone/scheduler_state.py +162 -0
  188. package/src/skcapstone/sdk.py +310 -0
  189. package/src/skcapstone/service_health.py +279 -39
  190. package/src/skcapstone/session_briefing.py +108 -0
  191. package/src/skcapstone/session_capture.py +1 -1
  192. package/src/skcapstone/shell.py +7 -1
  193. package/src/skcapstone/soul.py +3 -1
  194. package/src/skcapstone/soul_switch.py +3 -1
  195. package/src/skcapstone/summary.py +6 -6
  196. package/src/skcapstone/sync_engine.py +15 -15
  197. package/src/skcapstone/sync_watcher.py +2 -2
  198. package/src/skcapstone/systemd.py +55 -21
  199. package/src/skcapstone/team_comms.py +8 -8
  200. package/src/skcapstone/team_engine.py +1 -1
  201. package/src/skcapstone/testrunner.py +3 -3
  202. package/src/skcapstone/trust_graph.py +40 -5
  203. package/src/skcapstone/unified_search.py +15 -6
  204. package/src/skcapstone/uninstall_wizard.py +11 -3
  205. package/src/skcapstone/version_check.py +8 -4
  206. package/src/skcapstone/warmth_anchor.py +4 -2
  207. package/src/skcapstone/whoami.py +4 -4
  208. package/systemd/skcapstone.service +4 -6
  209. package/systemd/skcapstone@.service +7 -8
  210. package/systemd/skcomms-heartbeat.service +21 -0
  211. package/systemd/skcomms-heartbeat.timer +12 -0
  212. package/systemd/skcomms-queue-drain.service +17 -0
  213. package/systemd/skcomms-queue-drain.timer +12 -0
  214. package/tests/conftest.py +39 -0
  215. package/tests/integration/test_consciousness_e2e.py +39 -39
  216. package/tests/test_agent_card.py +1 -1
  217. package/tests/test_agent_home_scaffold.py +34 -0
  218. package/tests/test_alerts_consumer_topics.py +27 -0
  219. package/tests/test_backup.py +2 -1
  220. package/tests/test_chat.py +6 -6
  221. package/tests/test_claude_md.py +2 -2
  222. package/tests/test_cli_skills.py +10 -10
  223. package/tests/test_cli_test_cmd.py +4 -4
  224. package/tests/test_cli_test_connection.py +1 -1
  225. package/tests/test_cloud9_bridge.py +6 -6
  226. package/tests/test_consciousness_e2e.py +1 -1
  227. package/tests/test_consciousness_loop.py +10 -10
  228. package/tests/test_coordination.py +25 -0
  229. package/tests/test_cross_package.py +21 -21
  230. package/tests/test_daemon.py +4 -4
  231. package/tests/test_daemon_shutdown.py +1 -1
  232. package/tests/test_docker_provider.py +29 -29
  233. package/tests/test_doctor.py +400 -0
  234. package/tests/test_doctor_skscheduler.py +50 -0
  235. package/tests/test_dreaming_engine.py +147 -0
  236. package/tests/test_dreaming_gtd_capture.py +35 -0
  237. package/tests/test_e2e_automated.py +8 -5
  238. package/tests/test_fuse_mount.py +10 -10
  239. package/tests/test_gtd_brief.py +46 -0
  240. package/tests/test_gtd_malformed_tolerance.py +31 -0
  241. package/tests/test_housekeeping.py +15 -15
  242. package/tests/test_identity_migrate.py +251 -0
  243. package/tests/test_integration_backbone.py +598 -0
  244. package/tests/test_itil_gtd_lifecycle.py +37 -0
  245. package/tests/test_jobs_dropins.py +84 -0
  246. package/tests/test_mcp_server.py +82 -37
  247. package/tests/test_models.py +48 -4
  248. package/tests/test_multi_agent.py +31 -29
  249. package/tests/test_notifications.py +122 -32
  250. package/tests/test_onboard.py +63 -75
  251. package/tests/test_operator_link.py +78 -0
  252. package/tests/test_peers.py +14 -14
  253. package/tests/test_pillars.py +98 -0
  254. package/tests/test_preflight.py +3 -3
  255. package/tests/test_runtime.py +21 -0
  256. package/tests/test_scheduled_tasks.py +11 -6
  257. package/tests/test_scheduler_cli.py +47 -0
  258. package/tests/test_scheduler_features.py +133 -0
  259. package/tests/test_scheduler_integration.py +87 -0
  260. package/tests/test_scheduler_jobs.py +155 -0
  261. package/tests/test_scheduler_runner.py +64 -0
  262. package/tests/test_scheduler_state.py +57 -0
  263. package/tests/test_sdk.py +70 -0
  264. package/tests/test_service_health_incidents.py +34 -0
  265. package/tests/test_service_registry.py +52 -0
  266. package/tests/test_session_briefing.py +130 -0
  267. package/tests/test_snapshots.py +4 -4
  268. package/tests/test_sync_pipeline.py +26 -26
  269. package/tests/test_team_comms.py +2 -2
  270. package/tests/test_testrunner.py +2 -2
  271. package/tests/test_trust_graph.py +18 -0
  272. package/tests/test_unified_search.py +2 -2
  273. package/tests/test_version_check.py +10 -0
  274. package/tests/test_version_cmd.py +8 -8
  275. package/tests/test_whoami.py +1 -1
  276. package/systemd/skcomm-heartbeat.service +0 -18
  277. package/systemd/skcomm-queue-drain.service +0 -17
  278. /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/package.json +0 -0
  279. /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/openclaw.plugin.json +0 -0
@@ -0,0 +1,271 @@
1
+ #!/usr/bin/env python3
2
+ """CDP run 2: re-extract press release text, pull thumbnails, grab FBI Vault Part 15.
3
+
4
+ Three sub-tasks:
5
+ A. Re-render the press release in a Chrome tab and pull the article-body innerText.
6
+ B. Page-context-fetch the 6 thumbnail JPGs for Release 02.
7
+ C. Navigate to FBI Vault and pull Part 15 of 16 from the 62-HQ-83894 series.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import base64
12
+ import json
13
+ import sys
14
+ import time
15
+ import urllib.request
16
+ from pathlib import Path
17
+
18
+ import websocket
19
+
20
+ CDP_HTTP = "http://127.0.0.1:9222"
21
+
22
+ PRESS_URL = "https://www.war.gov/News/Releases/Release/Article/4499305/department-of-war-publishes-second-release-of-unidentified-anomalous-phenomena/"
23
+ THUMB_BASE = "https://www.war.gov/medialink/ufo/052226/release_02/thumbnails"
24
+ THUMB_NAMES = [
25
+ "CIA-UAP-D001_Intelligence_Information_Report_USSR_1973",
26
+ "DOE-UAP-D001_PANTEX_Image",
27
+ "DOE-UAP-D002_JamesTuck_Correspondence",
28
+ "DOE-UAP-D003_Pajarito_Astronomers",
29
+ "DOW-UAP-D017_General_Correspondence_Of_Sandia",
30
+ "ODNI-UAP-D001_USPER_Narrative_Senior_USIC",
31
+ ]
32
+
33
+ FBI_VAULT_BASE = "https://vault.fbi.gov"
34
+ # FBI Vault organizes the 62-HQ-83894 UFO file as "Unidentified Flying Objects (UFO)" — Part X of Y
35
+ # Known canonical layout has Parts 1-16. Tweet referenced Part 15.
36
+ FBI_PART_PAGE = "https://vault.fbi.gov/UFO/UFO%20Part%2015%20of%2016/view"
37
+ FBI_PART_PDF_GUESS = "https://vault.fbi.gov/UFO/UFO%20Part%2015%20of%2016/at_download/file"
38
+
39
+ BASE = Path("/home/cbrd21/nextcloud/cbrd21-share/reference/war-gov-UFO-PURSUE-2026")
40
+ DOC_DIR = BASE / "docs" / "release-02"
41
+ THUMB_DIR = DOC_DIR / "thumbnails"
42
+ THUMB_DIR.mkdir(parents=True, exist_ok=True)
43
+
44
+ FBI_DIR = Path("/home/cbrd21/nextcloud/cbrd21-share/reference/fbi-vault-ufo-62-HQ-83894")
45
+ FBI_DIR.mkdir(parents=True, exist_ok=True)
46
+
47
+
48
+ def open_tab(url: str) -> dict:
49
+ req = urllib.request.Request(f"{CDP_HTTP}/json/new?{url}", method="PUT")
50
+ with urllib.request.urlopen(req, timeout=10) as r:
51
+ return json.loads(r.read())
52
+
53
+
54
+ def close_tab(target_id: str) -> None:
55
+ try:
56
+ with urllib.request.urlopen(f"{CDP_HTTP}/json/close/{target_id}", timeout=5):
57
+ pass
58
+ except Exception:
59
+ pass
60
+
61
+
62
+ class CDP:
63
+ def __init__(self, ws_url: str):
64
+ self.ws = websocket.create_connection(ws_url, timeout=120)
65
+ self.mid = 0
66
+
67
+ def call(self, method: str, params: dict | None = None, timeout: float = 60.0) -> dict:
68
+ self.mid += 1
69
+ msg_id = self.mid
70
+ self.ws.send(json.dumps({"id": msg_id, "method": method, "params": params or {}}))
71
+ self.ws.settimeout(timeout)
72
+ while True:
73
+ raw = self.ws.recv()
74
+ msg = json.loads(raw)
75
+ if msg.get("id") == msg_id:
76
+ if "error" in msg:
77
+ raise RuntimeError(f"{method}: {msg['error']}")
78
+ return msg.get("result", {})
79
+
80
+ def wait_event(self, name: str, timeout: float = 30.0) -> dict:
81
+ deadline = time.time() + timeout
82
+ while time.time() < deadline:
83
+ self.ws.settimeout(max(0.1, deadline - time.time()))
84
+ try:
85
+ raw = self.ws.recv()
86
+ except websocket.WebSocketTimeoutException:
87
+ continue
88
+ msg = json.loads(raw)
89
+ if msg.get("method") == name:
90
+ return msg.get("params", {})
91
+ raise TimeoutError(f"event {name} did not fire within {timeout}s")
92
+
93
+ def close(self) -> None:
94
+ try:
95
+ self.ws.close()
96
+ except Exception:
97
+ pass
98
+
99
+
100
+ def fetch_binary_in_page(cdp: CDP, url: str) -> tuple[int, bytes | None]:
101
+ """Fetch a binary resource in page context and return as bytes."""
102
+ expr = (
103
+ f"(async () => {{"
104
+ f" const r = await fetch({json.dumps(url)}, {{credentials: 'include', cache: 'no-store'}});"
105
+ f" if (!r.ok) return {{status: r.status, b64: null}};"
106
+ f" const buf = await r.arrayBuffer();"
107
+ f" const bytes = new Uint8Array(buf);"
108
+ f" let bin = '';"
109
+ f" for (let i = 0; i < bytes.length; i++) bin += String.fromCharCode(bytes[i]);"
110
+ f" return {{status: r.status, b64: btoa(bin), bytes: bytes.length}};"
111
+ f"}})()"
112
+ )
113
+ res = cdp.call("Runtime.evaluate", {
114
+ "expression": expr,
115
+ "awaitPromise": True,
116
+ "returnByValue": True,
117
+ }, timeout=300)
118
+ val = res.get("result", {}).get("value", {}) or {}
119
+ status = val.get("status", 0)
120
+ b64 = val.get("b64")
121
+ if status == 200 and b64:
122
+ return status, base64.b64decode(b64)
123
+ return status, None
124
+
125
+
126
+ def fetch_text_in_page(cdp: CDP, url: str) -> tuple[int, str]:
127
+ expr = (
128
+ f"(async () => {{"
129
+ f" const r = await fetch({json.dumps(url)}, {{credentials: 'include', cache: 'no-store'}});"
130
+ f" return {{status: r.status, text: await r.text()}};"
131
+ f"}})()"
132
+ )
133
+ res = cdp.call("Runtime.evaluate", {
134
+ "expression": expr,
135
+ "awaitPromise": True,
136
+ "returnByValue": True,
137
+ }, timeout=120)
138
+ val = res.get("result", {}).get("value", {}) or {}
139
+ return val.get("status", 0), val.get("text", "")
140
+
141
+
142
+ def task_a_press_release(cdp: CDP) -> None:
143
+ """Navigate to press release, extract innerText from main article."""
144
+ print(f"[A] navigating → press release", flush=True)
145
+ cdp.call("Page.navigate", {"url": PRESS_URL})
146
+ try:
147
+ cdp.wait_event("Page.loadEventFired", timeout=30.0)
148
+ except TimeoutError:
149
+ pass
150
+ time.sleep(3.0)
151
+
152
+ # Try multiple candidate selectors; press releases on DoW use various article wrappers
153
+ extract_js = (
154
+ "(() => {"
155
+ " const candidates = ["
156
+ " document.querySelector('.body-text'),"
157
+ " document.querySelector('.article-body'),"
158
+ " document.querySelector('.article-content'),"
159
+ " document.querySelector('.press-release'),"
160
+ " document.querySelector('main article'),"
161
+ " document.querySelector('main .content'),"
162
+ " document.querySelector('main'),"
163
+ " document.querySelector('article'),"
164
+ " ];"
165
+ " for (const el of candidates) {"
166
+ " if (el && el.innerText && el.innerText.length > 500) {"
167
+ " return {selector: el.tagName + (el.className ? '.' + el.className.split(' ').join('.') : ''), text: el.innerText, len: el.innerText.length};"
168
+ " }"
169
+ " }"
170
+ " // Last resort: full body innerText"
171
+ " return {selector: 'body', text: document.body.innerText, len: document.body.innerText.length};"
172
+ "})()"
173
+ )
174
+ res = cdp.call("Runtime.evaluate", {"expression": extract_js, "returnByValue": True})
175
+ val = res.get("result", {}).get("value", {}) or {}
176
+ text = val.get("text", "")
177
+ print(f"[A] selector={val.get('selector')!r} len={val.get('len')}", flush=True)
178
+ if text:
179
+ (DOC_DIR / "press-release-2026-05-22.txt").write_text(text)
180
+ print(f"[A] wrote press-release-2026-05-22.txt ({len(text)} chars)", flush=True)
181
+
182
+
183
+ def task_b_thumbnails(cdp: CDP) -> None:
184
+ """Page-context-fetch all 6 PDF thumbnails."""
185
+ print(f"[B] pulling {len(THUMB_NAMES)} thumbnails via in-page fetch", flush=True)
186
+ # Make sure we're on a war.gov tab so credentials/Akamai cookies apply
187
+ cdp.call("Page.navigate", {"url": "https://www.war.gov/UFO/"})
188
+ try:
189
+ cdp.wait_event("Page.loadEventFired", timeout=30.0)
190
+ except TimeoutError:
191
+ pass
192
+ time.sleep(2.0)
193
+ for name in THUMB_NAMES:
194
+ url = f"{THUMB_BASE}/{name}.jpg"
195
+ status, content = fetch_binary_in_page(cdp, url)
196
+ out_path = THUMB_DIR / f"{name}.jpg"
197
+ if status == 200 and content:
198
+ out_path.write_bytes(content)
199
+ print(f"[B] OK {name}.jpg {len(content)} bytes", flush=True)
200
+ else:
201
+ print(f"[B] FAIL {name}.jpg status={status}", flush=True)
202
+
203
+
204
+ def task_c_fbi_vault_part_15(cdp: CDP) -> None:
205
+ """Try to fetch FBI Vault UFO Part 15 of 16."""
206
+ print(f"[C] navigating → FBI Vault Part 15 page", flush=True)
207
+ cdp.call("Page.navigate", {"url": FBI_PART_PAGE})
208
+ try:
209
+ cdp.wait_event("Page.loadEventFired", timeout=30.0)
210
+ except TimeoutError:
211
+ pass
212
+ time.sleep(3.0)
213
+
214
+ # Try to find the PDF link on the page (Plone reading-room standard pattern)
215
+ link_js = (
216
+ "(() => {"
217
+ " const links = Array.from(document.querySelectorAll('a[href]')).map(a => a.href);"
218
+ " const pdfish = links.filter(h => /\\.pdf(\\?|$)|at_download\\/file/i.test(h));"
219
+ " return {title: document.title, total: links.length, pdfish: pdfish.slice(0, 10)};"
220
+ "})()"
221
+ )
222
+ res = cdp.call("Runtime.evaluate", {"expression": link_js, "returnByValue": True})
223
+ link_val = res.get("result", {}).get("value", {}) or {}
224
+ print(f"[C] page info: {json.dumps(link_val)}", flush=True)
225
+
226
+ pdf_url = None
227
+ for h in link_val.get("pdfish", []):
228
+ if "at_download/file" in h or h.lower().endswith(".pdf"):
229
+ pdf_url = h
230
+ break
231
+ if not pdf_url:
232
+ pdf_url = FBI_PART_PDF_GUESS
233
+ print(f"[C] using guess URL → {pdf_url}", flush=True)
234
+
235
+ print(f"[C] page-context fetch → {pdf_url}", flush=True)
236
+ status, content = fetch_binary_in_page(cdp, pdf_url)
237
+ if status == 200 and content:
238
+ out_path = FBI_DIR / "UFO-Part-15-of-16.pdf"
239
+ out_path.write_bytes(content)
240
+ print(f"[C] OK {out_path.name} {len(content)/1e6:.1f} MB", flush=True)
241
+ else:
242
+ # Maybe the page itself IS the PDF (some Vault items)
243
+ print(f"[C] direct fetch failed status={status}; trying alternate URLs", flush=True)
244
+ # Save the page HTML for inspection
245
+ html_status, html_text = fetch_text_in_page(cdp, FBI_PART_PAGE)
246
+ (FBI_DIR / "part-15-page.html").write_text(html_text or "")
247
+ print(f"[C] saved page HTML for inspection ({len(html_text)} chars)", flush=True)
248
+
249
+
250
+ def main() -> int:
251
+ tab = open_tab("about:blank")
252
+ target_id = tab["id"]
253
+ ws_url = tab["webSocketDebuggerUrl"]
254
+ cdp = CDP(ws_url)
255
+ try:
256
+ cdp.call("Page.enable")
257
+ cdp.call("Runtime.enable")
258
+ cdp.call("Network.enable", {"maxPostDataSize": 0})
259
+
260
+ task_a_press_release(cdp)
261
+ task_b_thumbnails(cdp)
262
+ task_c_fbi_vault_part_15(cdp)
263
+
264
+ return 0
265
+ finally:
266
+ cdp.close()
267
+ close_tab(target_id)
268
+
269
+
270
+ if __name__ == "__main__":
271
+ sys.exit(main())
@@ -0,0 +1,188 @@
1
+ #!/usr/bin/env python3
2
+ """Probe war.gov/UFO/ via Lumina Chrome CDP (port 9222).
3
+
4
+ Steps:
5
+ 1. Open a new tab on war.gov/UFO/
6
+ 2. Wait for Vue mount to load (CSV must be reachable)
7
+ 3. Pull the CSV via in-page fetch
8
+ 4. Inspect inline scripts for any release_2 link patterns
9
+ 5. Save raw CSV + script index to ~/clawd/tmp/wargov-capture/probe-out/
10
+
11
+ Output:
12
+ probe-out/uap-csv.csv fresh CSV from the site
13
+ probe-out/file-index.json inline-script link probe
14
+ probe-out/page-meta.json URL/title/page render check
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import sys
20
+ import time
21
+ import urllib.request
22
+ from pathlib import Path
23
+
24
+ import websocket # websocket-client
25
+
26
+ CDP_HTTP = "http://127.0.0.1:9222"
27
+ TARGET = "https://www.war.gov/UFO/"
28
+ OUT_DIR = Path("/home/cbrd21/clawd/tmp/wargov-capture/probe-out")
29
+ OUT_DIR.mkdir(parents=True, exist_ok=True)
30
+
31
+
32
+ def cdp_get(path: str) -> dict | list:
33
+ with urllib.request.urlopen(f"{CDP_HTTP}{path}") as r:
34
+ return json.loads(r.read())
35
+
36
+
37
+ def open_tab(url: str) -> dict:
38
+ # Newer Chrome only accepts PUT on /json/new
39
+ req = urllib.request.Request(f"{CDP_HTTP}/json/new?{url}", method="PUT")
40
+ with urllib.request.urlopen(req, timeout=10) as r:
41
+ return json.loads(r.read())
42
+
43
+
44
+ def close_tab(target_id: str) -> None:
45
+ try:
46
+ with urllib.request.urlopen(f"{CDP_HTTP}/json/close/{target_id}", timeout=5):
47
+ pass
48
+ except Exception:
49
+ pass
50
+
51
+
52
+ class CDP:
53
+ def __init__(self, ws_url: str):
54
+ self.ws = websocket.create_connection(ws_url, timeout=60)
55
+ self.mid = 0
56
+
57
+ def call(self, method: str, params: dict | None = None, timeout: float = 30.0) -> dict:
58
+ self.mid += 1
59
+ msg_id = self.mid
60
+ self.ws.send(json.dumps({"id": msg_id, "method": method, "params": params or {}}))
61
+ self.ws.settimeout(timeout)
62
+ while True:
63
+ raw = self.ws.recv()
64
+ msg = json.loads(raw)
65
+ if msg.get("id") == msg_id:
66
+ if "error" in msg:
67
+ raise RuntimeError(f"{method}: {msg['error']}")
68
+ return msg.get("result", {})
69
+
70
+ def wait_event(self, name: str, timeout: float = 30.0) -> dict:
71
+ deadline = time.time() + timeout
72
+ while time.time() < deadline:
73
+ self.ws.settimeout(max(0.1, deadline - time.time()))
74
+ try:
75
+ raw = self.ws.recv()
76
+ except websocket.WebSocketTimeoutException:
77
+ continue
78
+ msg = json.loads(raw)
79
+ if msg.get("method") == name:
80
+ return msg.get("params", {})
81
+ raise TimeoutError(f"event {name} did not fire within {timeout}s")
82
+
83
+ def close(self) -> None:
84
+ try:
85
+ self.ws.close()
86
+ except Exception:
87
+ pass
88
+
89
+
90
+ def main() -> int:
91
+ print(f"[probe] opening tab → {TARGET}", flush=True)
92
+ tab = open_tab(TARGET)
93
+ target_id = tab["id"]
94
+ ws_url = tab["webSocketDebuggerUrl"]
95
+ print(f"[probe] tab id={target_id}", flush=True)
96
+
97
+ cdp = CDP(ws_url)
98
+ try:
99
+ cdp.call("Page.enable")
100
+ cdp.call("Runtime.enable")
101
+ cdp.call("Network.enable", {"maxPostDataSize": 0})
102
+ cdp.call("Page.navigate", {"url": TARGET})
103
+ try:
104
+ cdp.wait_event("Page.loadEventFired", timeout=30.0)
105
+ except TimeoutError:
106
+ print("[probe] Page.loadEventFired timeout — proceeding anyway", flush=True)
107
+
108
+ # Give the Vue mount a chance to render the CSV view
109
+ time.sleep(5.0)
110
+
111
+ # Page meta
112
+ meta_js = (
113
+ "({"
114
+ " url: location.href,"
115
+ " title: document.title,"
116
+ " hasMainContent: !!document.querySelector('main'),"
117
+ " scriptInlineCount: document.querySelectorAll('script:not([src])').length,"
118
+ " ufoMentions: (document.body.innerText.match(/UAP|UFO|PURSUE/g) || []).length,"
119
+ " releaseDateGuesses: Array.from(new Set((document.body.innerText.match(/\\b\\d{1,2}\\/\\d{1,2}\\/\\d{2,4}\\b/g) || []))),"
120
+ " release2HrefCount: document.querySelectorAll('a[href*=\"release_2\"]').length,"
121
+ " release2InHtml: (document.documentElement.outerHTML.match(/release_2/gi) || []).length"
122
+ "})"
123
+ )
124
+ meta = cdp.call("Runtime.evaluate", {"expression": meta_js, "returnByValue": True})
125
+ meta_val = meta.get("result", {}).get("value", {})
126
+ (OUT_DIR / "page-meta.json").write_text(json.dumps(meta_val, indent=2))
127
+ print(f"[probe] page-meta: {json.dumps(meta_val)}", flush=True)
128
+
129
+ # Pull the CSV via in-page fetch
130
+ csv_js = (
131
+ "(async () => {"
132
+ " const u = '/Portals/1/Interactive/2026/UFO/uap-csv.csv';"
133
+ " const r = await fetch(u, {credentials: 'include', cache: 'no-store'});"
134
+ " return {status: r.status, len: (await r.clone().text()).length, text: await r.text()};"
135
+ "})()"
136
+ )
137
+ csv_res = cdp.call("Runtime.evaluate", {
138
+ "expression": csv_js,
139
+ "awaitPromise": True,
140
+ "returnByValue": True,
141
+ }, timeout=60)
142
+ csv_val = csv_res.get("result", {}).get("value", {})
143
+ if isinstance(csv_val, dict) and csv_val.get("status") == 200:
144
+ (OUT_DIR / "uap-csv.csv").write_text(csv_val["text"])
145
+ print(f"[probe] CSV pulled, {csv_val['len']} bytes", flush=True)
146
+ else:
147
+ print(f"[probe] CSV fetch failed: {csv_val}", flush=True)
148
+ (OUT_DIR / "uap-csv-error.json").write_text(json.dumps(csv_val, indent=2, default=str))
149
+
150
+ # Inspect inline scripts for release_2 hints
151
+ scripts_js = (
152
+ "(() => {"
153
+ " const out = [];"
154
+ " document.querySelectorAll('script:not([src])').forEach((s, i) => {"
155
+ " const t = s.textContent || '';"
156
+ " out.push({idx: i, len: t.length, hasRelease2: /release_2/i.test(t), hasFetch: /fetch\\(/.test(t), hasCsv: /\\.csv/.test(t), preview: t.slice(0, 400)});"
157
+ " });"
158
+ " return out;"
159
+ "})()"
160
+ )
161
+ scripts_res = cdp.call("Runtime.evaluate", {"expression": scripts_js, "returnByValue": True})
162
+ scripts_val = scripts_res.get("result", {}).get("value", [])
163
+ (OUT_DIR / "inline-scripts.json").write_text(json.dumps(scripts_val, indent=2))
164
+ print(f"[probe] inline scripts: {len(scripts_val)} ({sum(1 for s in scripts_val if s.get('hasRelease2'))} mention release_2)", flush=True)
165
+
166
+ # Probe for press release link
167
+ pr_js = (
168
+ "(() => {"
169
+ " const links = Array.from(document.querySelectorAll('a[href]')).map(a => a.href);"
170
+ " const press = links.filter(h => /News\\/Releases/i.test(h));"
171
+ " const medialink = links.filter(h => /medialink\\/ufo/i.test(h));"
172
+ " return {pressCount: press.length, press: press.slice(0, 20), medialinkCount: medialink.length, medialinkSample: medialink.slice(0, 20)};"
173
+ "})()"
174
+ )
175
+ pr_res = cdp.call("Runtime.evaluate", {"expression": pr_js, "returnByValue": True})
176
+ pr_val = pr_res.get("result", {}).get("value", {})
177
+ (OUT_DIR / "link-probe.json").write_text(json.dumps(pr_val, indent=2))
178
+ print(f"[probe] link probe: press={pr_val.get('pressCount')} medialink={pr_val.get('medialinkCount')}", flush=True)
179
+
180
+ print("[probe] DONE", flush=True)
181
+ return 0
182
+ finally:
183
+ cdp.close()
184
+ close_tab(target_id)
185
+
186
+
187
+ if __name__ == "__main__":
188
+ sys.exit(main())
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env python3
2
+ """Discover + capture the DOJ SPLC press release by NAVIGATING the search page
3
+ (so JS renders the result list) then reading the rendered DOM. Falls back to
4
+ scraping any /opa/pr/ or /news/ links the rendered page exposes.
5
+ """
6
+ from __future__ import annotations
7
+ import json, re, sys, time, urllib.request
8
+ from pathlib import Path
9
+ import websocket
10
+
11
+ CDP_HTTP = "http://127.0.0.1:9222"
12
+ SEARCH_URL = "https://www.justice.gov/news?search_api_fulltext=Southern%20Poverty%20Law%20Center"
13
+ OUT = Path("/home/cbrd21/clawd/skills/substance-lens/captures/splc-doj-2026-06-03")
14
+
15
+
16
+ def open_tab(url):
17
+ req = urllib.request.Request(f"{CDP_HTTP}/json/new?{url}", method="PUT")
18
+ with urllib.request.urlopen(req, timeout=10) as r:
19
+ return json.loads(r.read())
20
+
21
+ def close_tab(tid):
22
+ try:
23
+ urllib.request.urlopen(f"{CDP_HTTP}/json/close/{tid}", timeout=5)
24
+ except Exception:
25
+ pass
26
+
27
+ class CDP:
28
+ def __init__(self, ws): self.ws=websocket.create_connection(ws,timeout=120); self.mid=0
29
+ def call(self, m, p=None, t=60.0):
30
+ self.mid+=1; i=self.mid
31
+ self.ws.send(json.dumps({"id":i,"method":m,"params":p or {}})); self.ws.settimeout(t)
32
+ while True:
33
+ msg=json.loads(self.ws.recv())
34
+ if msg.get("id")==i:
35
+ if "error" in msg: raise RuntimeError(f"{m}: {msg['error']}")
36
+ return msg.get("result",{})
37
+ def wait(self, name, t=30.0):
38
+ end=time.time()+t
39
+ while time.time()<end:
40
+ self.ws.settimeout(max(0.1,end-time.time()))
41
+ try: msg=json.loads(self.ws.recv())
42
+ except websocket.WebSocketTimeoutException: continue
43
+ if msg.get("method")==name: return msg.get("params",{})
44
+ return {}
45
+ def close(self):
46
+ try: self.ws.close()
47
+ except Exception: pass
48
+
49
+ def jseval(cdp, expr, t=60):
50
+ r=cdp.call("Runtime.evaluate",{"expression":expr,"awaitPromise":True,"returnByValue":True},t)
51
+ return r.get("result",{}).get("value")
52
+
53
+ def fetch_text(cdp,url):
54
+ expr=(f"(async()=>{{try{{const r=await fetch({json.dumps(url)},{{credentials:'include',cache:'no-store'}});"
55
+ f"return {{status:r.status,text:await r.text()}};}}catch(e){{return{{status:-1,text:String(e)}};}}}})()")
56
+ v=jseval(cdp,expr,180) or {}
57
+ return v.get("status",0), v.get("text","")
58
+
59
+ def main():
60
+ tab=open_tab(SEARCH_URL); tid=tab["id"]; cdp=CDP(tab["webSocketDebuggerUrl"])
61
+ try:
62
+ cdp.call("Page.enable"); cdp.call("Runtime.enable")
63
+ cdp.call("Page.navigate",{"url":SEARCH_URL})
64
+ cdp.wait("Page.loadEventFired",30.0)
65
+ time.sleep(6.0) # let result JS render
66
+ links=jseval(cdp,
67
+ "JSON.stringify(Array.from(document.querySelectorAll('a[href]'))"
68
+ ".map(a=>({h:a.getAttribute('href'),t:(a.innerText||'').trim()}))"
69
+ ".filter(x=>x.h&&(x.h.includes('/opa/pr/')||x.h.includes('/usao-mdal/pr/')||/southern.poverty|law.center|splc/i.test(x.t))))")
70
+ cands=json.loads(links) if links else []
71
+ print(f"[pr] rendered candidates: {len(cands)}", flush=True)
72
+ for c in cands[:15]: print(" ", c["h"], "::", c["t"][:70], flush=True)
73
+ # pick best
74
+ pr=None
75
+ for c in cands:
76
+ if re.search(r"southern.poverty|law.center|splc|wire.fraud", (c["h"]+c["t"]).lower()):
77
+ pr=c["h"]; break
78
+ if not pr and cands: pr=cands[0]["h"]
79
+ if pr and pr.startswith("/"): pr="https://www.justice.gov"+pr
80
+ manifest={"search_url":SEARCH_URL,"rendered_candidates":cands[:15],"chosen":pr}
81
+ if pr:
82
+ print(f"[pr] fetching -> {pr}", flush=True)
83
+ st,html=fetch_text(cdp,pr)
84
+ print(f"[pr] status={st} len={len(html)}", flush=True)
85
+ if st==200 and html:
86
+ (OUT/"doj-press-release.html").write_text(html)
87
+ txt=jseval(cdp,
88
+ f"(async()=>{{const r=await fetch({json.dumps(pr)},{{credentials:'include'}});"
89
+ f"const h=await r.text();const d=new DOMParser().parseFromString(h,'text/html');"
90
+ f"const a=d.querySelector('.field--name-body')||d.querySelector('article')||d.querySelector('main')||d.body;"
91
+ f"return a?a.innerText:'';}})()",60) or ""
92
+ if txt: (OUT/"doj-press-release.txt").write_text(txt); print(f"[pr] {len(txt)} chars text", flush=True)
93
+ manifest["status"]=st; manifest["txt_chars"]=len(txt)
94
+ (OUT/"press-release-discovery.json").write_text(json.dumps(manifest,indent=2))
95
+ print("[pr] done", flush=True)
96
+ return 0
97
+ finally:
98
+ cdp.close(); close_tab(tid)
99
+
100
+ if __name__=="__main__":
101
+ sys.exit(main())
@@ -0,0 +1,95 @@
1
+ #!/usr/bin/env python3
2
+ """Parse the new uap-data.csv and split Release 01 vs Release 02 records.
3
+
4
+ The CSV has multi-line quoted fields (newlines inside Title and Description Blurb),
5
+ so we use Python's csv module rather than naive line counting.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import csv
10
+ import json
11
+ from collections import Counter, defaultdict
12
+ from pathlib import Path
13
+
14
+ CSV_PATH = Path("/home/cbrd21/nextcloud/cbrd21-share/reference/war-gov-UFO-PURSUE-2026/docs/release-02/uap-data.csv")
15
+ OUT_DIR = Path("/home/cbrd21/clawd/tmp/wargov-capture/probe-out")
16
+ OUT_DIR.mkdir(parents=True, exist_ok=True)
17
+
18
+ with CSV_PATH.open(newline="", encoding="utf-8") as f:
19
+ reader = csv.DictReader(f)
20
+ rows = [r for r in reader]
21
+
22
+ print(f"Total records: {len(rows)}")
23
+
24
+ date_counter = Counter()
25
+ for r in rows:
26
+ date_counter[(r.get("Release Date") or "").strip()] += 1
27
+ print("Release dates:")
28
+ for d, c in sorted(date_counter.items(), key=lambda x: -x[1]):
29
+ print(f" {d!r:15} → {c}")
30
+
31
+ # Filter for Release 02
32
+ release2 = [r for r in rows if (r.get("Release Date") or "").strip() == "5/22/26"]
33
+ print(f"\nRelease 02 records: {len(release2)}")
34
+
35
+ # Bucket by type
36
+ type_counter = Counter()
37
+ agency_counter = Counter()
38
+ for r in release2:
39
+ type_counter[(r.get("Type") or "").strip()] += 1
40
+ agency_counter[(r.get("Agency") or "").strip()] += 1
41
+ print("Types:")
42
+ for t, c in type_counter.most_common():
43
+ print(f" {t!r:15} → {c}")
44
+ print("Agencies:")
45
+ for a, c in agency_counter.most_common():
46
+ print(f" {a!r:15} → {c}")
47
+
48
+ # Extract download links
49
+ links = []
50
+ for r in release2:
51
+ pdf_link = (r.get("PDF | Image Link") or "").strip()
52
+ modal = (r.get("Modal Image") or "").strip()
53
+ dvids = (r.get("DVIDS Video ID") or "").strip()
54
+ title = (r.get("Title") or "").strip().replace("\n", " ").replace("\r", "")
55
+ rtype = (r.get("Type") or "").strip()
56
+ agency = (r.get("Agency") or "").strip()
57
+ incident_date = (r.get("Incident Date") or "").strip()
58
+ incident_loc = (r.get("Incident Location") or "").strip()
59
+ links.append({
60
+ "title": title,
61
+ "type": rtype,
62
+ "agency": agency,
63
+ "incident_date": incident_date,
64
+ "incident_location": incident_loc,
65
+ "pdf_link": pdf_link,
66
+ "modal_image": modal,
67
+ "dvids_id": dvids,
68
+ })
69
+
70
+ # Save full inventory
71
+ (OUT_DIR / "release-02-records.json").write_text(json.dumps(links, indent=2))
72
+ print(f"\nSaved inventory: {OUT_DIR / 'release-02-records.json'}")
73
+
74
+ # Unique direct-fetchable URLs
75
+ urls = set()
76
+ for L in links:
77
+ if L["pdf_link"]:
78
+ urls.add(L["pdf_link"])
79
+ if L["modal_image"]:
80
+ urls.add(L["modal_image"])
81
+ urls_list = sorted(urls)
82
+ print(f"\nUnique direct URLs: {len(urls_list)}")
83
+ for u in urls_list[:15]:
84
+ print(f" {u}")
85
+ if len(urls_list) > 15:
86
+ print(f" ... and {len(urls_list) - 15} more")
87
+
88
+ (OUT_DIR / "release-02-urls.json").write_text(json.dumps(urls_list, indent=2))
89
+
90
+ # DVIDS-only records (videos hosted exclusively on DVIDS)
91
+ dvids_only = [L for L in links if L["dvids_id"] and not L["pdf_link"]]
92
+ print(f"\nDVIDS-only video records: {len(dvids_only)}")
93
+ for L in dvids_only[:10]:
94
+ print(f" DVIDS {L['dvids_id']}: {L['title'][:80]}")
95
+ (OUT_DIR / "release-02-dvids.json").write_text(json.dumps(dvids_only, indent=2))