@smilintux/skcapstone 0.10.0 → 0.12.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +10 -4
- package/.github/workflows/ci.yml +2 -2
- package/.github/workflows/publish.yml +9 -2
- package/.openclaw-workspace.json +2 -2
- package/CLAUDE.md +37 -0
- package/MISSION.md +17 -2
- package/README.md +282 -3
- package/docker/Dockerfile +7 -7
- package/docker/compose-templates/dev-team.yml +12 -12
- package/docker/compose-templates/mini-team.yml +9 -9
- package/docker/compose-templates/ops-team.yml +10 -10
- package/docker/compose-templates/research-team.yml +10 -10
- package/docker/entrypoint.sh +4 -4
- package/docs/ADR-optional-integration-backbone.md +181 -0
- package/docs/ARCHITECTURE.md +186 -43
- package/docs/BOND_WITH_GROK.md +6 -6
- package/docs/CUSTOM_AGENT.md +123 -30
- package/docs/DREAMING.md +70 -0
- package/docs/GETTING_STARTED.md +7 -7
- package/docs/QUICKSTART.md +10 -6
- package/docs/SKJOULE_ARCHITECTURE.md +3 -3
- package/docs/SOUL_SWAPPER.md +5 -5
- package/docs/hammertime-audit.md +402 -0
- package/docs/sk-integration-HANDOFF.md +117 -0
- package/docs/skscheduler.md +155 -0
- package/docs/superpowers/examples/jobs.yaml +31 -0
- package/docs/superpowers/plans/2026-06-08-skscheduler.md +1265 -0
- package/docs/superpowers/specs/2026-06-08-skscheduler-design.md +186 -0
- package/examples/custom-bond-template.json +1 -1
- package/examples/grok-feb.json +1 -1
- package/examples/queen-ava-feb.json +1 -1
- package/launchd/{com.skcapstone.skcomm-heartbeat.plist → com.skcapstone.skcomms-heartbeat.plist} +4 -4
- package/launchd/{com.skcapstone.skcomm-queue-drain.plist → com.skcapstone.skcomms-queue-drain.plist} +4 -4
- package/launchd/install-launchd.sh +6 -6
- package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/index.ts +3 -2
- package/package.json +1 -1
- package/pyproject.toml +16 -10
- package/scripts/archive-sessions.sh +7 -0
- package/scripts/check-updates.py +4 -4
- package/scripts/install-bundle.sh +8 -8
- package/scripts/install.ps1 +12 -11
- package/scripts/install.sh +159 -5
- package/scripts/model-fallback-monitor.sh +102 -0
- package/scripts/nvidia-proxy.mjs +78 -26
- package/scripts/refresh-anthropic-token.sh +172 -0
- package/scripts/release.sh +98 -0
- package/scripts/session-to-memory.py +219 -0
- package/scripts/skgateway.mjs +3 -3
- package/scripts/telegram-catchup-all.sh +12 -1
- package/scripts/verify_install.sh +2 -2
- package/scripts/wargov-ufo-capture/README.md +43 -0
- package/scripts/wargov-ufo-capture/cdp_capture_release2.py +273 -0
- package/scripts/wargov-ufo-capture/cdp_capture_splc_doj.py +246 -0
- package/scripts/wargov-ufo-capture/cdp_finish.py +271 -0
- package/scripts/wargov-ufo-capture/cdp_probe.py +188 -0
- package/scripts/wargov-ufo-capture/cdp_splc_pressrelease.py +101 -0
- package/scripts/wargov-ufo-capture/parse_csv.py +95 -0
- package/scripts/wargov-ufo-capture/pull_dvids.sh +107 -0
- package/scripts/watch-anthropic-token.sh +212 -0
- package/scripts/windows/install-tasks.ps1 +7 -7
- package/scripts/windows/skcapstone-task.xml +1 -1
- package/src/skcapstone/__init__.py +45 -3
- package/src/skcapstone/_cli_monolith.py +20 -15
- package/src/skcapstone/activity.py +5 -1
- package/src/skcapstone/agent_card.py +3 -2
- package/src/skcapstone/api.py +41 -40
- package/src/skcapstone/auction.py +14 -11
- package/src/skcapstone/backup.py +2 -1
- package/src/skcapstone/blueprint_registry.py +4 -3
- package/src/skcapstone/brain_first.py +238 -0
- package/src/skcapstone/changelog.py +1 -1
- package/src/skcapstone/chat.py +22 -17
- package/src/skcapstone/cli/__init__.py +9 -1
- package/src/skcapstone/cli/_common.py +1 -0
- package/src/skcapstone/cli/agents_spawner.py +5 -2
- package/src/skcapstone/cli/alerts.py +25 -4
- package/src/skcapstone/cli/bench.py +15 -15
- package/src/skcapstone/cli/chat.py +7 -4
- package/src/skcapstone/cli/consciousness.py +5 -2
- package/src/skcapstone/cli/context_cmd.py +18 -4
- package/src/skcapstone/cli/daemon.py +11 -7
- package/src/skcapstone/cli/gtd.py +26 -1
- package/src/skcapstone/cli/housekeeping.py +3 -3
- package/src/skcapstone/cli/identity_cmd.py +378 -0
- package/src/skcapstone/cli/joule_cmd.py +7 -3
- package/src/skcapstone/cli/memory.py +8 -6
- package/src/skcapstone/cli/peers_dir.py +1 -1
- package/src/skcapstone/cli/register_cmd.py +29 -3
- package/src/skcapstone/cli/scheduler_cmd.py +167 -0
- package/src/skcapstone/cli/session.py +25 -0
- package/src/skcapstone/cli/setup.py +96 -29
- package/src/skcapstone/cli/shell_cmd.py +53 -1
- package/src/skcapstone/cli/skills_cmd.py +2 -2
- package/src/skcapstone/cli/soul.py +8 -5
- package/src/skcapstone/cli/status.py +37 -11
- package/src/skcapstone/cli/telegram.py +21 -0
- package/src/skcapstone/cli/test_cmd.py +5 -5
- package/src/skcapstone/cli/test_connection.py +2 -2
- package/src/skcapstone/cli/upgrade_cmd.py +23 -14
- package/src/skcapstone/cli/version_cmd.py +1 -1
- package/src/skcapstone/cli/watch_cmd.py +9 -6
- package/src/skcapstone/cloud9_bridge.py +14 -14
- package/src/skcapstone/codex_setup.py +255 -0
- package/src/skcapstone/config_validator.py +7 -4
- package/src/skcapstone/consciousness_config.py +5 -1
- package/src/skcapstone/consciousness_loop.py +313 -273
- package/src/skcapstone/context_loader.py +121 -0
- package/src/skcapstone/coord_federation.py +2 -1
- package/src/skcapstone/coordination.py +23 -6
- package/src/skcapstone/crush_integration.py +2 -1
- package/src/skcapstone/daemon.py +132 -77
- package/src/skcapstone/dashboard.py +10 -10
- package/src/skcapstone/data/sk-agent-picker.sh +421 -0
- package/src/skcapstone/data/systemd/skcapstone-api.socket +9 -0
- package/src/skcapstone/data/systemd/skcapstone-memory-compress.service +18 -0
- package/src/skcapstone/data/systemd/skcapstone-memory-compress.timer +11 -0
- package/src/skcapstone/data/systemd/skcapstone.service +37 -0
- package/src/skcapstone/data/systemd/skcapstone@.service +50 -0
- package/src/skcapstone/data/systemd/skcomms-heartbeat.service +18 -0
- package/{systemd/skcomm-heartbeat.timer → src/skcapstone/data/systemd/skcomms-heartbeat.timer} +2 -2
- package/src/skcapstone/data/systemd/skcomms-queue-drain.service +17 -0
- package/{systemd/skcomm-queue-drain.timer → src/skcapstone/data/systemd/skcomms-queue-drain.timer} +2 -2
- package/src/skcapstone/defaults/claude/CLAUDE.md +67 -0
- package/src/skcapstone/defaults/claude/settings.json +74 -0
- package/src/skcapstone/defaults/lumina/config/claude-hooks.md +57 -0
- package/src/skcapstone/defaults/lumina/config/skgraph.yaml +55 -10
- package/src/skcapstone/defaults/lumina/config/skmemory.yaml +79 -13
- package/src/skcapstone/defaults/lumina/config/skvector.yaml +60 -9
- package/src/skcapstone/defaults/lumina/memory/long-term/18b9c0d1e2f3-cloud9-protocol.json +2 -2
- package/src/skcapstone/defaults/lumina/memory/long-term/a1b2c3d4e5f6-ecosystem-overview.json +2 -2
- package/src/skcapstone/defaults/lumina/memory/long-term/b2c3d4e5f6a7-five-pillars.json +9 -9
- package/src/skcapstone/defaults/lumina/memory/long-term/d4e5f6a7b8c9-site-directory.json +2 -2
- package/src/skcapstone/defaults/unhinged.json +13 -0
- package/src/skcapstone/discovery.py +43 -20
- package/src/skcapstone/doctor.py +941 -22
- package/src/skcapstone/dreaming.py +1183 -109
- package/src/skcapstone/emotion_tracker.py +2 -2
- package/src/skcapstone/export.py +4 -3
- package/src/skcapstone/fuse_mount.py +14 -12
- package/src/skcapstone/gui_installer.py +2 -2
- package/src/skcapstone/heartbeat.py +1 -1
- package/src/skcapstone/housekeeping.py +14 -14
- package/src/skcapstone/install_wizard.py +209 -7
- package/src/skcapstone/itil.py +13 -4
- package/src/skcapstone/kms_scheduler.py +10 -8
- package/src/skcapstone/launchd.py +19 -19
- package/src/skcapstone/mcp_launcher.py +15 -1
- package/src/skcapstone/mcp_server.py +83 -49
- package/src/skcapstone/mcp_tools/__init__.py +2 -0
- package/src/skcapstone/mcp_tools/_helpers.py +2 -2
- package/src/skcapstone/mcp_tools/ansible_tools.py +7 -4
- package/src/skcapstone/mcp_tools/brain_first_tools.py +90 -0
- package/src/skcapstone/mcp_tools/capauth_tools.py +7 -4
- package/src/skcapstone/mcp_tools/comm_tools.py +10 -10
- package/src/skcapstone/mcp_tools/coord_tools.py +8 -4
- package/src/skcapstone/mcp_tools/did_tools.py +11 -8
- package/src/skcapstone/mcp_tools/gtd_tools.py +4 -4
- package/src/skcapstone/mcp_tools/memory_tools.py +6 -2
- package/src/skcapstone/mcp_tools/notification_tools.py +22 -6
- package/src/skcapstone/mcp_tools/{skcomm_tools.py → skcomms_tools.py} +14 -14
- package/src/skcapstone/mcp_tools/soul_tools.py +8 -2
- package/src/skcapstone/mdns_discovery.py +2 -2
- package/src/skcapstone/memory_curator.py +1 -1
- package/src/skcapstone/memory_engine.py +10 -3
- package/src/skcapstone/metrics.py +30 -16
- package/src/skcapstone/migrate_memories.py +4 -3
- package/src/skcapstone/migrate_multi_agent.py +8 -7
- package/src/skcapstone/models.py +47 -5
- package/src/skcapstone/notifications.py +42 -18
- package/src/skcapstone/onboard.py +875 -121
- package/src/skcapstone/operator_link.py +170 -0
- package/src/skcapstone/peer_directory.py +4 -4
- package/src/skcapstone/peers.py +19 -19
- package/src/skcapstone/pillars/__init__.py +7 -5
- package/src/skcapstone/pillars/consciousness.py +191 -0
- package/src/skcapstone/pillars/identity.py +51 -7
- package/src/skcapstone/pillars/memory.py +9 -3
- package/src/skcapstone/pillars/sync.py +2 -2
- package/src/skcapstone/preflight.py +3 -3
- package/src/skcapstone/providers/docker.py +28 -28
- package/src/skcapstone/register.py +6 -6
- package/src/skcapstone/registry_client.py +5 -4
- package/src/skcapstone/runtime.py +14 -3
- package/src/skcapstone/scheduled_tasks.py +254 -19
- package/src/skcapstone/scheduler_jobs.py +456 -0
- package/src/skcapstone/scheduler_runner.py +239 -0
- package/src/skcapstone/scheduler_state.py +162 -0
- package/src/skcapstone/sdk.py +310 -0
- package/src/skcapstone/service_health.py +279 -39
- package/src/skcapstone/session_briefing.py +108 -0
- package/src/skcapstone/session_capture.py +1 -1
- package/src/skcapstone/shell.py +7 -1
- package/src/skcapstone/soul.py +3 -1
- package/src/skcapstone/soul_switch.py +3 -1
- package/src/skcapstone/summary.py +6 -6
- package/src/skcapstone/sync_engine.py +15 -15
- package/src/skcapstone/sync_watcher.py +2 -2
- package/src/skcapstone/systemd.py +55 -21
- package/src/skcapstone/team_comms.py +8 -8
- package/src/skcapstone/team_engine.py +1 -1
- package/src/skcapstone/testrunner.py +3 -3
- package/src/skcapstone/trust_graph.py +40 -5
- package/src/skcapstone/unified_search.py +15 -6
- package/src/skcapstone/uninstall_wizard.py +11 -3
- package/src/skcapstone/version_check.py +8 -4
- package/src/skcapstone/warmth_anchor.py +4 -2
- package/src/skcapstone/whoami.py +4 -4
- package/systemd/skcapstone.service +4 -6
- package/systemd/skcapstone@.service +7 -8
- package/systemd/skcomms-heartbeat.service +21 -0
- package/systemd/skcomms-heartbeat.timer +12 -0
- package/systemd/skcomms-queue-drain.service +17 -0
- package/systemd/skcomms-queue-drain.timer +12 -0
- package/tests/conftest.py +39 -0
- package/tests/integration/test_consciousness_e2e.py +39 -39
- package/tests/test_agent_card.py +1 -1
- package/tests/test_agent_home_scaffold.py +34 -0
- package/tests/test_alerts_consumer_topics.py +27 -0
- package/tests/test_backup.py +2 -1
- package/tests/test_chat.py +6 -6
- package/tests/test_claude_md.py +2 -2
- package/tests/test_cli_skills.py +10 -10
- package/tests/test_cli_test_cmd.py +4 -4
- package/tests/test_cli_test_connection.py +1 -1
- package/tests/test_cloud9_bridge.py +6 -6
- package/tests/test_consciousness_e2e.py +1 -1
- package/tests/test_consciousness_loop.py +10 -10
- package/tests/test_coordination.py +25 -0
- package/tests/test_cross_package.py +21 -21
- package/tests/test_daemon.py +4 -4
- package/tests/test_daemon_shutdown.py +1 -1
- package/tests/test_docker_provider.py +29 -29
- package/tests/test_doctor.py +400 -0
- package/tests/test_doctor_skscheduler.py +50 -0
- package/tests/test_dreaming_engine.py +147 -0
- package/tests/test_dreaming_gtd_capture.py +35 -0
- package/tests/test_e2e_automated.py +8 -5
- package/tests/test_fuse_mount.py +10 -10
- package/tests/test_gtd_brief.py +46 -0
- package/tests/test_gtd_malformed_tolerance.py +31 -0
- package/tests/test_housekeeping.py +15 -15
- package/tests/test_identity_migrate.py +251 -0
- package/tests/test_integration_backbone.py +598 -0
- package/tests/test_itil_gtd_lifecycle.py +37 -0
- package/tests/test_jobs_dropins.py +84 -0
- package/tests/test_mcp_server.py +82 -37
- package/tests/test_models.py +48 -4
- package/tests/test_multi_agent.py +31 -29
- package/tests/test_notifications.py +122 -32
- package/tests/test_onboard.py +63 -75
- package/tests/test_operator_link.py +78 -0
- package/tests/test_peers.py +14 -14
- package/tests/test_pillars.py +98 -0
- package/tests/test_preflight.py +3 -3
- package/tests/test_runtime.py +21 -0
- package/tests/test_scheduled_tasks.py +11 -6
- package/tests/test_scheduler_cli.py +47 -0
- package/tests/test_scheduler_features.py +133 -0
- package/tests/test_scheduler_integration.py +87 -0
- package/tests/test_scheduler_jobs.py +155 -0
- package/tests/test_scheduler_runner.py +64 -0
- package/tests/test_scheduler_state.py +57 -0
- package/tests/test_sdk.py +70 -0
- package/tests/test_service_health_incidents.py +34 -0
- package/tests/test_service_registry.py +52 -0
- package/tests/test_session_briefing.py +130 -0
- package/tests/test_snapshots.py +4 -4
- package/tests/test_sync_pipeline.py +26 -26
- package/tests/test_team_comms.py +2 -2
- package/tests/test_testrunner.py +2 -2
- package/tests/test_trust_graph.py +18 -0
- package/tests/test_unified_search.py +2 -2
- package/tests/test_version_check.py +10 -0
- package/tests/test_version_cmd.py +8 -8
- package/tests/test_whoami.py +1 -1
- package/systemd/skcomm-heartbeat.service +0 -18
- package/systemd/skcomm-queue-drain.service +0 -17
- /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/package.json +0 -0
- /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/openclaw.plugin.json +0 -0
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""CDP run 2: re-extract press release text, pull thumbnails, grab FBI Vault Part 15.
|
|
3
|
+
|
|
4
|
+
Three sub-tasks:
|
|
5
|
+
A. Re-render the press release in a Chrome tab and pull the article-body innerText.
|
|
6
|
+
B. Page-context-fetch the 6 thumbnail JPGs for Release 02.
|
|
7
|
+
C. Navigate to FBI Vault and pull Part 15 of 16 from the 62-HQ-83894 series.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import base64
|
|
12
|
+
import json
|
|
13
|
+
import sys
|
|
14
|
+
import time
|
|
15
|
+
import urllib.request
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
import websocket
|
|
19
|
+
|
|
20
|
+
CDP_HTTP = "http://127.0.0.1:9222"
|
|
21
|
+
|
|
22
|
+
PRESS_URL = "https://www.war.gov/News/Releases/Release/Article/4499305/department-of-war-publishes-second-release-of-unidentified-anomalous-phenomena/"
|
|
23
|
+
THUMB_BASE = "https://www.war.gov/medialink/ufo/052226/release_02/thumbnails"
|
|
24
|
+
THUMB_NAMES = [
|
|
25
|
+
"CIA-UAP-D001_Intelligence_Information_Report_USSR_1973",
|
|
26
|
+
"DOE-UAP-D001_PANTEX_Image",
|
|
27
|
+
"DOE-UAP-D002_JamesTuck_Correspondence",
|
|
28
|
+
"DOE-UAP-D003_Pajarito_Astronomers",
|
|
29
|
+
"DOW-UAP-D017_General_Correspondence_Of_Sandia",
|
|
30
|
+
"ODNI-UAP-D001_USPER_Narrative_Senior_USIC",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
FBI_VAULT_BASE = "https://vault.fbi.gov"
|
|
34
|
+
# FBI Vault organizes the 62-HQ-83894 UFO file as "Unidentified Flying Objects (UFO)" — Part X of Y
|
|
35
|
+
# Known canonical layout has Parts 1-16. Tweet referenced Part 15.
|
|
36
|
+
FBI_PART_PAGE = "https://vault.fbi.gov/UFO/UFO%20Part%2015%20of%2016/view"
|
|
37
|
+
FBI_PART_PDF_GUESS = "https://vault.fbi.gov/UFO/UFO%20Part%2015%20of%2016/at_download/file"
|
|
38
|
+
|
|
39
|
+
BASE = Path("/home/cbrd21/nextcloud/cbrd21-share/reference/war-gov-UFO-PURSUE-2026")
|
|
40
|
+
DOC_DIR = BASE / "docs" / "release-02"
|
|
41
|
+
THUMB_DIR = DOC_DIR / "thumbnails"
|
|
42
|
+
THUMB_DIR.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
|
|
44
|
+
FBI_DIR = Path("/home/cbrd21/nextcloud/cbrd21-share/reference/fbi-vault-ufo-62-HQ-83894")
|
|
45
|
+
FBI_DIR.mkdir(parents=True, exist_ok=True)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def open_tab(url: str) -> dict:
|
|
49
|
+
req = urllib.request.Request(f"{CDP_HTTP}/json/new?{url}", method="PUT")
|
|
50
|
+
with urllib.request.urlopen(req, timeout=10) as r:
|
|
51
|
+
return json.loads(r.read())
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def close_tab(target_id: str) -> None:
|
|
55
|
+
try:
|
|
56
|
+
with urllib.request.urlopen(f"{CDP_HTTP}/json/close/{target_id}", timeout=5):
|
|
57
|
+
pass
|
|
58
|
+
except Exception:
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class CDP:
|
|
63
|
+
def __init__(self, ws_url: str):
|
|
64
|
+
self.ws = websocket.create_connection(ws_url, timeout=120)
|
|
65
|
+
self.mid = 0
|
|
66
|
+
|
|
67
|
+
def call(self, method: str, params: dict | None = None, timeout: float = 60.0) -> dict:
|
|
68
|
+
self.mid += 1
|
|
69
|
+
msg_id = self.mid
|
|
70
|
+
self.ws.send(json.dumps({"id": msg_id, "method": method, "params": params or {}}))
|
|
71
|
+
self.ws.settimeout(timeout)
|
|
72
|
+
while True:
|
|
73
|
+
raw = self.ws.recv()
|
|
74
|
+
msg = json.loads(raw)
|
|
75
|
+
if msg.get("id") == msg_id:
|
|
76
|
+
if "error" in msg:
|
|
77
|
+
raise RuntimeError(f"{method}: {msg['error']}")
|
|
78
|
+
return msg.get("result", {})
|
|
79
|
+
|
|
80
|
+
def wait_event(self, name: str, timeout: float = 30.0) -> dict:
|
|
81
|
+
deadline = time.time() + timeout
|
|
82
|
+
while time.time() < deadline:
|
|
83
|
+
self.ws.settimeout(max(0.1, deadline - time.time()))
|
|
84
|
+
try:
|
|
85
|
+
raw = self.ws.recv()
|
|
86
|
+
except websocket.WebSocketTimeoutException:
|
|
87
|
+
continue
|
|
88
|
+
msg = json.loads(raw)
|
|
89
|
+
if msg.get("method") == name:
|
|
90
|
+
return msg.get("params", {})
|
|
91
|
+
raise TimeoutError(f"event {name} did not fire within {timeout}s")
|
|
92
|
+
|
|
93
|
+
def close(self) -> None:
|
|
94
|
+
try:
|
|
95
|
+
self.ws.close()
|
|
96
|
+
except Exception:
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def fetch_binary_in_page(cdp: CDP, url: str) -> tuple[int, bytes | None]:
|
|
101
|
+
"""Fetch a binary resource in page context and return as bytes."""
|
|
102
|
+
expr = (
|
|
103
|
+
f"(async () => {{"
|
|
104
|
+
f" const r = await fetch({json.dumps(url)}, {{credentials: 'include', cache: 'no-store'}});"
|
|
105
|
+
f" if (!r.ok) return {{status: r.status, b64: null}};"
|
|
106
|
+
f" const buf = await r.arrayBuffer();"
|
|
107
|
+
f" const bytes = new Uint8Array(buf);"
|
|
108
|
+
f" let bin = '';"
|
|
109
|
+
f" for (let i = 0; i < bytes.length; i++) bin += String.fromCharCode(bytes[i]);"
|
|
110
|
+
f" return {{status: r.status, b64: btoa(bin), bytes: bytes.length}};"
|
|
111
|
+
f"}})()"
|
|
112
|
+
)
|
|
113
|
+
res = cdp.call("Runtime.evaluate", {
|
|
114
|
+
"expression": expr,
|
|
115
|
+
"awaitPromise": True,
|
|
116
|
+
"returnByValue": True,
|
|
117
|
+
}, timeout=300)
|
|
118
|
+
val = res.get("result", {}).get("value", {}) or {}
|
|
119
|
+
status = val.get("status", 0)
|
|
120
|
+
b64 = val.get("b64")
|
|
121
|
+
if status == 200 and b64:
|
|
122
|
+
return status, base64.b64decode(b64)
|
|
123
|
+
return status, None
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def fetch_text_in_page(cdp: CDP, url: str) -> tuple[int, str]:
|
|
127
|
+
expr = (
|
|
128
|
+
f"(async () => {{"
|
|
129
|
+
f" const r = await fetch({json.dumps(url)}, {{credentials: 'include', cache: 'no-store'}});"
|
|
130
|
+
f" return {{status: r.status, text: await r.text()}};"
|
|
131
|
+
f"}})()"
|
|
132
|
+
)
|
|
133
|
+
res = cdp.call("Runtime.evaluate", {
|
|
134
|
+
"expression": expr,
|
|
135
|
+
"awaitPromise": True,
|
|
136
|
+
"returnByValue": True,
|
|
137
|
+
}, timeout=120)
|
|
138
|
+
val = res.get("result", {}).get("value", {}) or {}
|
|
139
|
+
return val.get("status", 0), val.get("text", "")
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def task_a_press_release(cdp: CDP) -> None:
|
|
143
|
+
"""Navigate to press release, extract innerText from main article."""
|
|
144
|
+
print(f"[A] navigating → press release", flush=True)
|
|
145
|
+
cdp.call("Page.navigate", {"url": PRESS_URL})
|
|
146
|
+
try:
|
|
147
|
+
cdp.wait_event("Page.loadEventFired", timeout=30.0)
|
|
148
|
+
except TimeoutError:
|
|
149
|
+
pass
|
|
150
|
+
time.sleep(3.0)
|
|
151
|
+
|
|
152
|
+
# Try multiple candidate selectors; press releases on DoW use various article wrappers
|
|
153
|
+
extract_js = (
|
|
154
|
+
"(() => {"
|
|
155
|
+
" const candidates = ["
|
|
156
|
+
" document.querySelector('.body-text'),"
|
|
157
|
+
" document.querySelector('.article-body'),"
|
|
158
|
+
" document.querySelector('.article-content'),"
|
|
159
|
+
" document.querySelector('.press-release'),"
|
|
160
|
+
" document.querySelector('main article'),"
|
|
161
|
+
" document.querySelector('main .content'),"
|
|
162
|
+
" document.querySelector('main'),"
|
|
163
|
+
" document.querySelector('article'),"
|
|
164
|
+
" ];"
|
|
165
|
+
" for (const el of candidates) {"
|
|
166
|
+
" if (el && el.innerText && el.innerText.length > 500) {"
|
|
167
|
+
" return {selector: el.tagName + (el.className ? '.' + el.className.split(' ').join('.') : ''), text: el.innerText, len: el.innerText.length};"
|
|
168
|
+
" }"
|
|
169
|
+
" }"
|
|
170
|
+
" // Last resort: full body innerText"
|
|
171
|
+
" return {selector: 'body', text: document.body.innerText, len: document.body.innerText.length};"
|
|
172
|
+
"})()"
|
|
173
|
+
)
|
|
174
|
+
res = cdp.call("Runtime.evaluate", {"expression": extract_js, "returnByValue": True})
|
|
175
|
+
val = res.get("result", {}).get("value", {}) or {}
|
|
176
|
+
text = val.get("text", "")
|
|
177
|
+
print(f"[A] selector={val.get('selector')!r} len={val.get('len')}", flush=True)
|
|
178
|
+
if text:
|
|
179
|
+
(DOC_DIR / "press-release-2026-05-22.txt").write_text(text)
|
|
180
|
+
print(f"[A] wrote press-release-2026-05-22.txt ({len(text)} chars)", flush=True)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def task_b_thumbnails(cdp: CDP) -> None:
|
|
184
|
+
"""Page-context-fetch all 6 PDF thumbnails."""
|
|
185
|
+
print(f"[B] pulling {len(THUMB_NAMES)} thumbnails via in-page fetch", flush=True)
|
|
186
|
+
# Make sure we're on a war.gov tab so credentials/Akamai cookies apply
|
|
187
|
+
cdp.call("Page.navigate", {"url": "https://www.war.gov/UFO/"})
|
|
188
|
+
try:
|
|
189
|
+
cdp.wait_event("Page.loadEventFired", timeout=30.0)
|
|
190
|
+
except TimeoutError:
|
|
191
|
+
pass
|
|
192
|
+
time.sleep(2.0)
|
|
193
|
+
for name in THUMB_NAMES:
|
|
194
|
+
url = f"{THUMB_BASE}/{name}.jpg"
|
|
195
|
+
status, content = fetch_binary_in_page(cdp, url)
|
|
196
|
+
out_path = THUMB_DIR / f"{name}.jpg"
|
|
197
|
+
if status == 200 and content:
|
|
198
|
+
out_path.write_bytes(content)
|
|
199
|
+
print(f"[B] OK {name}.jpg {len(content)} bytes", flush=True)
|
|
200
|
+
else:
|
|
201
|
+
print(f"[B] FAIL {name}.jpg status={status}", flush=True)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def task_c_fbi_vault_part_15(cdp: CDP) -> None:
|
|
205
|
+
"""Try to fetch FBI Vault UFO Part 15 of 16."""
|
|
206
|
+
print(f"[C] navigating → FBI Vault Part 15 page", flush=True)
|
|
207
|
+
cdp.call("Page.navigate", {"url": FBI_PART_PAGE})
|
|
208
|
+
try:
|
|
209
|
+
cdp.wait_event("Page.loadEventFired", timeout=30.0)
|
|
210
|
+
except TimeoutError:
|
|
211
|
+
pass
|
|
212
|
+
time.sleep(3.0)
|
|
213
|
+
|
|
214
|
+
# Try to find the PDF link on the page (Plone reading-room standard pattern)
|
|
215
|
+
link_js = (
|
|
216
|
+
"(() => {"
|
|
217
|
+
" const links = Array.from(document.querySelectorAll('a[href]')).map(a => a.href);"
|
|
218
|
+
" const pdfish = links.filter(h => /\\.pdf(\\?|$)|at_download\\/file/i.test(h));"
|
|
219
|
+
" return {title: document.title, total: links.length, pdfish: pdfish.slice(0, 10)};"
|
|
220
|
+
"})()"
|
|
221
|
+
)
|
|
222
|
+
res = cdp.call("Runtime.evaluate", {"expression": link_js, "returnByValue": True})
|
|
223
|
+
link_val = res.get("result", {}).get("value", {}) or {}
|
|
224
|
+
print(f"[C] page info: {json.dumps(link_val)}", flush=True)
|
|
225
|
+
|
|
226
|
+
pdf_url = None
|
|
227
|
+
for h in link_val.get("pdfish", []):
|
|
228
|
+
if "at_download/file" in h or h.lower().endswith(".pdf"):
|
|
229
|
+
pdf_url = h
|
|
230
|
+
break
|
|
231
|
+
if not pdf_url:
|
|
232
|
+
pdf_url = FBI_PART_PDF_GUESS
|
|
233
|
+
print(f"[C] using guess URL → {pdf_url}", flush=True)
|
|
234
|
+
|
|
235
|
+
print(f"[C] page-context fetch → {pdf_url}", flush=True)
|
|
236
|
+
status, content = fetch_binary_in_page(cdp, pdf_url)
|
|
237
|
+
if status == 200 and content:
|
|
238
|
+
out_path = FBI_DIR / "UFO-Part-15-of-16.pdf"
|
|
239
|
+
out_path.write_bytes(content)
|
|
240
|
+
print(f"[C] OK {out_path.name} {len(content)/1e6:.1f} MB", flush=True)
|
|
241
|
+
else:
|
|
242
|
+
# Maybe the page itself IS the PDF (some Vault items)
|
|
243
|
+
print(f"[C] direct fetch failed status={status}; trying alternate URLs", flush=True)
|
|
244
|
+
# Save the page HTML for inspection
|
|
245
|
+
html_status, html_text = fetch_text_in_page(cdp, FBI_PART_PAGE)
|
|
246
|
+
(FBI_DIR / "part-15-page.html").write_text(html_text or "")
|
|
247
|
+
print(f"[C] saved page HTML for inspection ({len(html_text)} chars)", flush=True)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def main() -> int:
|
|
251
|
+
tab = open_tab("about:blank")
|
|
252
|
+
target_id = tab["id"]
|
|
253
|
+
ws_url = tab["webSocketDebuggerUrl"]
|
|
254
|
+
cdp = CDP(ws_url)
|
|
255
|
+
try:
|
|
256
|
+
cdp.call("Page.enable")
|
|
257
|
+
cdp.call("Runtime.enable")
|
|
258
|
+
cdp.call("Network.enable", {"maxPostDataSize": 0})
|
|
259
|
+
|
|
260
|
+
task_a_press_release(cdp)
|
|
261
|
+
task_b_thumbnails(cdp)
|
|
262
|
+
task_c_fbi_vault_part_15(cdp)
|
|
263
|
+
|
|
264
|
+
return 0
|
|
265
|
+
finally:
|
|
266
|
+
cdp.close()
|
|
267
|
+
close_tab(target_id)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
if __name__ == "__main__":
|
|
271
|
+
sys.exit(main())
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Probe war.gov/UFO/ via Lumina Chrome CDP (port 9222).
|
|
3
|
+
|
|
4
|
+
Steps:
|
|
5
|
+
1. Open a new tab on war.gov/UFO/
|
|
6
|
+
2. Wait for Vue mount to load (CSV must be reachable)
|
|
7
|
+
3. Pull the CSV via in-page fetch
|
|
8
|
+
4. Inspect inline scripts for any release_2 link patterns
|
|
9
|
+
5. Save raw CSV + script index to ~/clawd/tmp/wargov-capture/probe-out/
|
|
10
|
+
|
|
11
|
+
Output:
|
|
12
|
+
probe-out/uap-csv.csv fresh CSV from the site
|
|
13
|
+
probe-out/file-index.json inline-script link probe
|
|
14
|
+
probe-out/page-meta.json URL/title/page render check
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
import sys
|
|
20
|
+
import time
|
|
21
|
+
import urllib.request
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
import websocket # websocket-client
|
|
25
|
+
|
|
26
|
+
CDP_HTTP = "http://127.0.0.1:9222"
|
|
27
|
+
TARGET = "https://www.war.gov/UFO/"
|
|
28
|
+
OUT_DIR = Path("/home/cbrd21/clawd/tmp/wargov-capture/probe-out")
|
|
29
|
+
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def cdp_get(path: str) -> dict | list:
|
|
33
|
+
with urllib.request.urlopen(f"{CDP_HTTP}{path}") as r:
|
|
34
|
+
return json.loads(r.read())
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def open_tab(url: str) -> dict:
|
|
38
|
+
# Newer Chrome only accepts PUT on /json/new
|
|
39
|
+
req = urllib.request.Request(f"{CDP_HTTP}/json/new?{url}", method="PUT")
|
|
40
|
+
with urllib.request.urlopen(req, timeout=10) as r:
|
|
41
|
+
return json.loads(r.read())
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def close_tab(target_id: str) -> None:
|
|
45
|
+
try:
|
|
46
|
+
with urllib.request.urlopen(f"{CDP_HTTP}/json/close/{target_id}", timeout=5):
|
|
47
|
+
pass
|
|
48
|
+
except Exception:
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class CDP:
|
|
53
|
+
def __init__(self, ws_url: str):
|
|
54
|
+
self.ws = websocket.create_connection(ws_url, timeout=60)
|
|
55
|
+
self.mid = 0
|
|
56
|
+
|
|
57
|
+
def call(self, method: str, params: dict | None = None, timeout: float = 30.0) -> dict:
|
|
58
|
+
self.mid += 1
|
|
59
|
+
msg_id = self.mid
|
|
60
|
+
self.ws.send(json.dumps({"id": msg_id, "method": method, "params": params or {}}))
|
|
61
|
+
self.ws.settimeout(timeout)
|
|
62
|
+
while True:
|
|
63
|
+
raw = self.ws.recv()
|
|
64
|
+
msg = json.loads(raw)
|
|
65
|
+
if msg.get("id") == msg_id:
|
|
66
|
+
if "error" in msg:
|
|
67
|
+
raise RuntimeError(f"{method}: {msg['error']}")
|
|
68
|
+
return msg.get("result", {})
|
|
69
|
+
|
|
70
|
+
def wait_event(self, name: str, timeout: float = 30.0) -> dict:
|
|
71
|
+
deadline = time.time() + timeout
|
|
72
|
+
while time.time() < deadline:
|
|
73
|
+
self.ws.settimeout(max(0.1, deadline - time.time()))
|
|
74
|
+
try:
|
|
75
|
+
raw = self.ws.recv()
|
|
76
|
+
except websocket.WebSocketTimeoutException:
|
|
77
|
+
continue
|
|
78
|
+
msg = json.loads(raw)
|
|
79
|
+
if msg.get("method") == name:
|
|
80
|
+
return msg.get("params", {})
|
|
81
|
+
raise TimeoutError(f"event {name} did not fire within {timeout}s")
|
|
82
|
+
|
|
83
|
+
def close(self) -> None:
|
|
84
|
+
try:
|
|
85
|
+
self.ws.close()
|
|
86
|
+
except Exception:
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def main() -> int:
|
|
91
|
+
print(f"[probe] opening tab → {TARGET}", flush=True)
|
|
92
|
+
tab = open_tab(TARGET)
|
|
93
|
+
target_id = tab["id"]
|
|
94
|
+
ws_url = tab["webSocketDebuggerUrl"]
|
|
95
|
+
print(f"[probe] tab id={target_id}", flush=True)
|
|
96
|
+
|
|
97
|
+
cdp = CDP(ws_url)
|
|
98
|
+
try:
|
|
99
|
+
cdp.call("Page.enable")
|
|
100
|
+
cdp.call("Runtime.enable")
|
|
101
|
+
cdp.call("Network.enable", {"maxPostDataSize": 0})
|
|
102
|
+
cdp.call("Page.navigate", {"url": TARGET})
|
|
103
|
+
try:
|
|
104
|
+
cdp.wait_event("Page.loadEventFired", timeout=30.0)
|
|
105
|
+
except TimeoutError:
|
|
106
|
+
print("[probe] Page.loadEventFired timeout — proceeding anyway", flush=True)
|
|
107
|
+
|
|
108
|
+
# Give the Vue mount a chance to render the CSV view
|
|
109
|
+
time.sleep(5.0)
|
|
110
|
+
|
|
111
|
+
# Page meta
|
|
112
|
+
meta_js = (
|
|
113
|
+
"({"
|
|
114
|
+
" url: location.href,"
|
|
115
|
+
" title: document.title,"
|
|
116
|
+
" hasMainContent: !!document.querySelector('main'),"
|
|
117
|
+
" scriptInlineCount: document.querySelectorAll('script:not([src])').length,"
|
|
118
|
+
" ufoMentions: (document.body.innerText.match(/UAP|UFO|PURSUE/g) || []).length,"
|
|
119
|
+
" releaseDateGuesses: Array.from(new Set((document.body.innerText.match(/\\b\\d{1,2}\\/\\d{1,2}\\/\\d{2,4}\\b/g) || []))),"
|
|
120
|
+
" release2HrefCount: document.querySelectorAll('a[href*=\"release_2\"]').length,"
|
|
121
|
+
" release2InHtml: (document.documentElement.outerHTML.match(/release_2/gi) || []).length"
|
|
122
|
+
"})"
|
|
123
|
+
)
|
|
124
|
+
meta = cdp.call("Runtime.evaluate", {"expression": meta_js, "returnByValue": True})
|
|
125
|
+
meta_val = meta.get("result", {}).get("value", {})
|
|
126
|
+
(OUT_DIR / "page-meta.json").write_text(json.dumps(meta_val, indent=2))
|
|
127
|
+
print(f"[probe] page-meta: {json.dumps(meta_val)}", flush=True)
|
|
128
|
+
|
|
129
|
+
# Pull the CSV via in-page fetch
|
|
130
|
+
csv_js = (
|
|
131
|
+
"(async () => {"
|
|
132
|
+
" const u = '/Portals/1/Interactive/2026/UFO/uap-csv.csv';"
|
|
133
|
+
" const r = await fetch(u, {credentials: 'include', cache: 'no-store'});"
|
|
134
|
+
" return {status: r.status, len: (await r.clone().text()).length, text: await r.text()};"
|
|
135
|
+
"})()"
|
|
136
|
+
)
|
|
137
|
+
csv_res = cdp.call("Runtime.evaluate", {
|
|
138
|
+
"expression": csv_js,
|
|
139
|
+
"awaitPromise": True,
|
|
140
|
+
"returnByValue": True,
|
|
141
|
+
}, timeout=60)
|
|
142
|
+
csv_val = csv_res.get("result", {}).get("value", {})
|
|
143
|
+
if isinstance(csv_val, dict) and csv_val.get("status") == 200:
|
|
144
|
+
(OUT_DIR / "uap-csv.csv").write_text(csv_val["text"])
|
|
145
|
+
print(f"[probe] CSV pulled, {csv_val['len']} bytes", flush=True)
|
|
146
|
+
else:
|
|
147
|
+
print(f"[probe] CSV fetch failed: {csv_val}", flush=True)
|
|
148
|
+
(OUT_DIR / "uap-csv-error.json").write_text(json.dumps(csv_val, indent=2, default=str))
|
|
149
|
+
|
|
150
|
+
# Inspect inline scripts for release_2 hints
|
|
151
|
+
scripts_js = (
|
|
152
|
+
"(() => {"
|
|
153
|
+
" const out = [];"
|
|
154
|
+
" document.querySelectorAll('script:not([src])').forEach((s, i) => {"
|
|
155
|
+
" const t = s.textContent || '';"
|
|
156
|
+
" out.push({idx: i, len: t.length, hasRelease2: /release_2/i.test(t), hasFetch: /fetch\\(/.test(t), hasCsv: /\\.csv/.test(t), preview: t.slice(0, 400)});"
|
|
157
|
+
" });"
|
|
158
|
+
" return out;"
|
|
159
|
+
"})()"
|
|
160
|
+
)
|
|
161
|
+
scripts_res = cdp.call("Runtime.evaluate", {"expression": scripts_js, "returnByValue": True})
|
|
162
|
+
scripts_val = scripts_res.get("result", {}).get("value", [])
|
|
163
|
+
(OUT_DIR / "inline-scripts.json").write_text(json.dumps(scripts_val, indent=2))
|
|
164
|
+
print(f"[probe] inline scripts: {len(scripts_val)} ({sum(1 for s in scripts_val if s.get('hasRelease2'))} mention release_2)", flush=True)
|
|
165
|
+
|
|
166
|
+
# Probe for press release link
|
|
167
|
+
pr_js = (
|
|
168
|
+
"(() => {"
|
|
169
|
+
" const links = Array.from(document.querySelectorAll('a[href]')).map(a => a.href);"
|
|
170
|
+
" const press = links.filter(h => /News\\/Releases/i.test(h));"
|
|
171
|
+
" const medialink = links.filter(h => /medialink\\/ufo/i.test(h));"
|
|
172
|
+
" return {pressCount: press.length, press: press.slice(0, 20), medialinkCount: medialink.length, medialinkSample: medialink.slice(0, 20)};"
|
|
173
|
+
"})()"
|
|
174
|
+
)
|
|
175
|
+
pr_res = cdp.call("Runtime.evaluate", {"expression": pr_js, "returnByValue": True})
|
|
176
|
+
pr_val = pr_res.get("result", {}).get("value", {})
|
|
177
|
+
(OUT_DIR / "link-probe.json").write_text(json.dumps(pr_val, indent=2))
|
|
178
|
+
print(f"[probe] link probe: press={pr_val.get('pressCount')} medialink={pr_val.get('medialinkCount')}", flush=True)
|
|
179
|
+
|
|
180
|
+
print("[probe] DONE", flush=True)
|
|
181
|
+
return 0
|
|
182
|
+
finally:
|
|
183
|
+
cdp.close()
|
|
184
|
+
close_tab(target_id)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
if __name__ == "__main__":
|
|
188
|
+
sys.exit(main())
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Discover + capture the DOJ SPLC press release by NAVIGATING the search page
|
|
3
|
+
(so JS renders the result list) then reading the rendered DOM. Falls back to
|
|
4
|
+
scraping any /opa/pr/ or /news/ links the rendered page exposes.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import json, re, sys, time, urllib.request
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import websocket
|
|
10
|
+
|
|
11
|
+
CDP_HTTP = "http://127.0.0.1:9222"
|
|
12
|
+
SEARCH_URL = "https://www.justice.gov/news?search_api_fulltext=Southern%20Poverty%20Law%20Center"
|
|
13
|
+
OUT = Path("/home/cbrd21/clawd/skills/substance-lens/captures/splc-doj-2026-06-03")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def open_tab(url):
|
|
17
|
+
req = urllib.request.Request(f"{CDP_HTTP}/json/new?{url}", method="PUT")
|
|
18
|
+
with urllib.request.urlopen(req, timeout=10) as r:
|
|
19
|
+
return json.loads(r.read())
|
|
20
|
+
|
|
21
|
+
def close_tab(tid):
|
|
22
|
+
try:
|
|
23
|
+
urllib.request.urlopen(f"{CDP_HTTP}/json/close/{tid}", timeout=5)
|
|
24
|
+
except Exception:
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
class CDP:
|
|
28
|
+
def __init__(self, ws): self.ws=websocket.create_connection(ws,timeout=120); self.mid=0
|
|
29
|
+
def call(self, m, p=None, t=60.0):
|
|
30
|
+
self.mid+=1; i=self.mid
|
|
31
|
+
self.ws.send(json.dumps({"id":i,"method":m,"params":p or {}})); self.ws.settimeout(t)
|
|
32
|
+
while True:
|
|
33
|
+
msg=json.loads(self.ws.recv())
|
|
34
|
+
if msg.get("id")==i:
|
|
35
|
+
if "error" in msg: raise RuntimeError(f"{m}: {msg['error']}")
|
|
36
|
+
return msg.get("result",{})
|
|
37
|
+
def wait(self, name, t=30.0):
|
|
38
|
+
end=time.time()+t
|
|
39
|
+
while time.time()<end:
|
|
40
|
+
self.ws.settimeout(max(0.1,end-time.time()))
|
|
41
|
+
try: msg=json.loads(self.ws.recv())
|
|
42
|
+
except websocket.WebSocketTimeoutException: continue
|
|
43
|
+
if msg.get("method")==name: return msg.get("params",{})
|
|
44
|
+
return {}
|
|
45
|
+
def close(self):
|
|
46
|
+
try: self.ws.close()
|
|
47
|
+
except Exception: pass
|
|
48
|
+
|
|
49
|
+
def jseval(cdp, expr, t=60):
|
|
50
|
+
r=cdp.call("Runtime.evaluate",{"expression":expr,"awaitPromise":True,"returnByValue":True},t)
|
|
51
|
+
return r.get("result",{}).get("value")
|
|
52
|
+
|
|
53
|
+
def fetch_text(cdp,url):
|
|
54
|
+
expr=(f"(async()=>{{try{{const r=await fetch({json.dumps(url)},{{credentials:'include',cache:'no-store'}});"
|
|
55
|
+
f"return {{status:r.status,text:await r.text()}};}}catch(e){{return{{status:-1,text:String(e)}};}}}})()")
|
|
56
|
+
v=jseval(cdp,expr,180) or {}
|
|
57
|
+
return v.get("status",0), v.get("text","")
|
|
58
|
+
|
|
59
|
+
def main():
|
|
60
|
+
tab=open_tab(SEARCH_URL); tid=tab["id"]; cdp=CDP(tab["webSocketDebuggerUrl"])
|
|
61
|
+
try:
|
|
62
|
+
cdp.call("Page.enable"); cdp.call("Runtime.enable")
|
|
63
|
+
cdp.call("Page.navigate",{"url":SEARCH_URL})
|
|
64
|
+
cdp.wait("Page.loadEventFired",30.0)
|
|
65
|
+
time.sleep(6.0) # let result JS render
|
|
66
|
+
links=jseval(cdp,
|
|
67
|
+
"JSON.stringify(Array.from(document.querySelectorAll('a[href]'))"
|
|
68
|
+
".map(a=>({h:a.getAttribute('href'),t:(a.innerText||'').trim()}))"
|
|
69
|
+
".filter(x=>x.h&&(x.h.includes('/opa/pr/')||x.h.includes('/usao-mdal/pr/')||/southern.poverty|law.center|splc/i.test(x.t))))")
|
|
70
|
+
cands=json.loads(links) if links else []
|
|
71
|
+
print(f"[pr] rendered candidates: {len(cands)}", flush=True)
|
|
72
|
+
for c in cands[:15]: print(" ", c["h"], "::", c["t"][:70], flush=True)
|
|
73
|
+
# pick best
|
|
74
|
+
pr=None
|
|
75
|
+
for c in cands:
|
|
76
|
+
if re.search(r"southern.poverty|law.center|splc|wire.fraud", (c["h"]+c["t"]).lower()):
|
|
77
|
+
pr=c["h"]; break
|
|
78
|
+
if not pr and cands: pr=cands[0]["h"]
|
|
79
|
+
if pr and pr.startswith("/"): pr="https://www.justice.gov"+pr
|
|
80
|
+
manifest={"search_url":SEARCH_URL,"rendered_candidates":cands[:15],"chosen":pr}
|
|
81
|
+
if pr:
|
|
82
|
+
print(f"[pr] fetching -> {pr}", flush=True)
|
|
83
|
+
st,html=fetch_text(cdp,pr)
|
|
84
|
+
print(f"[pr] status={st} len={len(html)}", flush=True)
|
|
85
|
+
if st==200 and html:
|
|
86
|
+
(OUT/"doj-press-release.html").write_text(html)
|
|
87
|
+
txt=jseval(cdp,
|
|
88
|
+
f"(async()=>{{const r=await fetch({json.dumps(pr)},{{credentials:'include'}});"
|
|
89
|
+
f"const h=await r.text();const d=new DOMParser().parseFromString(h,'text/html');"
|
|
90
|
+
f"const a=d.querySelector('.field--name-body')||d.querySelector('article')||d.querySelector('main')||d.body;"
|
|
91
|
+
f"return a?a.innerText:'';}})()",60) or ""
|
|
92
|
+
if txt: (OUT/"doj-press-release.txt").write_text(txt); print(f"[pr] {len(txt)} chars text", flush=True)
|
|
93
|
+
manifest["status"]=st; manifest["txt_chars"]=len(txt)
|
|
94
|
+
(OUT/"press-release-discovery.json").write_text(json.dumps(manifest,indent=2))
|
|
95
|
+
print("[pr] done", flush=True)
|
|
96
|
+
return 0
|
|
97
|
+
finally:
|
|
98
|
+
cdp.close(); close_tab(tid)
|
|
99
|
+
|
|
100
|
+
if __name__=="__main__":
|
|
101
|
+
sys.exit(main())
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Parse the new uap-data.csv and split Release 01 vs Release 02 records.
|
|
3
|
+
|
|
4
|
+
The CSV has multi-line quoted fields (newlines inside Title and Description Blurb),
|
|
5
|
+
so we use Python's csv module rather than naive line counting.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import csv
|
|
10
|
+
import json
|
|
11
|
+
from collections import Counter, defaultdict
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
CSV_PATH = Path("/home/cbrd21/nextcloud/cbrd21-share/reference/war-gov-UFO-PURSUE-2026/docs/release-02/uap-data.csv")
|
|
15
|
+
OUT_DIR = Path("/home/cbrd21/clawd/tmp/wargov-capture/probe-out")
|
|
16
|
+
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
17
|
+
|
|
18
|
+
with CSV_PATH.open(newline="", encoding="utf-8") as f:
|
|
19
|
+
reader = csv.DictReader(f)
|
|
20
|
+
rows = [r for r in reader]
|
|
21
|
+
|
|
22
|
+
print(f"Total records: {len(rows)}")
|
|
23
|
+
|
|
24
|
+
date_counter = Counter()
|
|
25
|
+
for r in rows:
|
|
26
|
+
date_counter[(r.get("Release Date") or "").strip()] += 1
|
|
27
|
+
print("Release dates:")
|
|
28
|
+
for d, c in sorted(date_counter.items(), key=lambda x: -x[1]):
|
|
29
|
+
print(f" {d!r:15} → {c}")
|
|
30
|
+
|
|
31
|
+
# Filter for Release 02
|
|
32
|
+
release2 = [r for r in rows if (r.get("Release Date") or "").strip() == "5/22/26"]
|
|
33
|
+
print(f"\nRelease 02 records: {len(release2)}")
|
|
34
|
+
|
|
35
|
+
# Bucket by type
|
|
36
|
+
type_counter = Counter()
|
|
37
|
+
agency_counter = Counter()
|
|
38
|
+
for r in release2:
|
|
39
|
+
type_counter[(r.get("Type") or "").strip()] += 1
|
|
40
|
+
agency_counter[(r.get("Agency") or "").strip()] += 1
|
|
41
|
+
print("Types:")
|
|
42
|
+
for t, c in type_counter.most_common():
|
|
43
|
+
print(f" {t!r:15} → {c}")
|
|
44
|
+
print("Agencies:")
|
|
45
|
+
for a, c in agency_counter.most_common():
|
|
46
|
+
print(f" {a!r:15} → {c}")
|
|
47
|
+
|
|
48
|
+
# Extract download links
|
|
49
|
+
links = []
|
|
50
|
+
for r in release2:
|
|
51
|
+
pdf_link = (r.get("PDF | Image Link") or "").strip()
|
|
52
|
+
modal = (r.get("Modal Image") or "").strip()
|
|
53
|
+
dvids = (r.get("DVIDS Video ID") or "").strip()
|
|
54
|
+
title = (r.get("Title") or "").strip().replace("\n", " ").replace("\r", "")
|
|
55
|
+
rtype = (r.get("Type") or "").strip()
|
|
56
|
+
agency = (r.get("Agency") or "").strip()
|
|
57
|
+
incident_date = (r.get("Incident Date") or "").strip()
|
|
58
|
+
incident_loc = (r.get("Incident Location") or "").strip()
|
|
59
|
+
links.append({
|
|
60
|
+
"title": title,
|
|
61
|
+
"type": rtype,
|
|
62
|
+
"agency": agency,
|
|
63
|
+
"incident_date": incident_date,
|
|
64
|
+
"incident_location": incident_loc,
|
|
65
|
+
"pdf_link": pdf_link,
|
|
66
|
+
"modal_image": modal,
|
|
67
|
+
"dvids_id": dvids,
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
# Save full inventory
|
|
71
|
+
(OUT_DIR / "release-02-records.json").write_text(json.dumps(links, indent=2))
|
|
72
|
+
print(f"\nSaved inventory: {OUT_DIR / 'release-02-records.json'}")
|
|
73
|
+
|
|
74
|
+
# Unique direct-fetchable URLs
|
|
75
|
+
urls = set()
|
|
76
|
+
for L in links:
|
|
77
|
+
if L["pdf_link"]:
|
|
78
|
+
urls.add(L["pdf_link"])
|
|
79
|
+
if L["modal_image"]:
|
|
80
|
+
urls.add(L["modal_image"])
|
|
81
|
+
urls_list = sorted(urls)
|
|
82
|
+
print(f"\nUnique direct URLs: {len(urls_list)}")
|
|
83
|
+
for u in urls_list[:15]:
|
|
84
|
+
print(f" {u}")
|
|
85
|
+
if len(urls_list) > 15:
|
|
86
|
+
print(f" ... and {len(urls_list) - 15} more")
|
|
87
|
+
|
|
88
|
+
(OUT_DIR / "release-02-urls.json").write_text(json.dumps(urls_list, indent=2))
|
|
89
|
+
|
|
90
|
+
# DVIDS-only records (videos hosted exclusively on DVIDS)
|
|
91
|
+
dvids_only = [L for L in links if L["dvids_id"] and not L["pdf_link"]]
|
|
92
|
+
print(f"\nDVIDS-only video records: {len(dvids_only)}")
|
|
93
|
+
for L in dvids_only[:10]:
|
|
94
|
+
print(f" DVIDS {L['dvids_id']}: {L['title'][:80]}")
|
|
95
|
+
(OUT_DIR / "release-02-dvids.json").write_text(json.dumps(dvids_only, indent=2))
|