@smilintux/skcapstone 0.10.0 → 0.12.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +10 -4
- package/.github/workflows/ci.yml +2 -2
- package/.github/workflows/publish.yml +9 -2
- package/.openclaw-workspace.json +2 -2
- package/CLAUDE.md +37 -0
- package/MISSION.md +17 -2
- package/README.md +282 -3
- package/docker/Dockerfile +7 -7
- package/docker/compose-templates/dev-team.yml +12 -12
- package/docker/compose-templates/mini-team.yml +9 -9
- package/docker/compose-templates/ops-team.yml +10 -10
- package/docker/compose-templates/research-team.yml +10 -10
- package/docker/entrypoint.sh +4 -4
- package/docs/ADR-optional-integration-backbone.md +181 -0
- package/docs/ARCHITECTURE.md +186 -43
- package/docs/BOND_WITH_GROK.md +6 -6
- package/docs/CUSTOM_AGENT.md +123 -30
- package/docs/DREAMING.md +70 -0
- package/docs/GETTING_STARTED.md +7 -7
- package/docs/QUICKSTART.md +10 -6
- package/docs/SKJOULE_ARCHITECTURE.md +3 -3
- package/docs/SOUL_SWAPPER.md +5 -5
- package/docs/hammertime-audit.md +402 -0
- package/docs/sk-integration-HANDOFF.md +117 -0
- package/docs/skscheduler.md +155 -0
- package/docs/superpowers/examples/jobs.yaml +31 -0
- package/docs/superpowers/plans/2026-06-08-skscheduler.md +1265 -0
- package/docs/superpowers/specs/2026-06-08-skscheduler-design.md +186 -0
- package/examples/custom-bond-template.json +1 -1
- package/examples/grok-feb.json +1 -1
- package/examples/queen-ava-feb.json +1 -1
- package/launchd/{com.skcapstone.skcomm-heartbeat.plist → com.skcapstone.skcomms-heartbeat.plist} +4 -4
- package/launchd/{com.skcapstone.skcomm-queue-drain.plist → com.skcapstone.skcomms-queue-drain.plist} +4 -4
- package/launchd/install-launchd.sh +6 -6
- package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/index.ts +3 -2
- package/package.json +1 -1
- package/pyproject.toml +16 -10
- package/scripts/archive-sessions.sh +7 -0
- package/scripts/check-updates.py +4 -4
- package/scripts/install-bundle.sh +8 -8
- package/scripts/install.ps1 +12 -11
- package/scripts/install.sh +159 -5
- package/scripts/model-fallback-monitor.sh +102 -0
- package/scripts/nvidia-proxy.mjs +78 -26
- package/scripts/refresh-anthropic-token.sh +172 -0
- package/scripts/release.sh +98 -0
- package/scripts/session-to-memory.py +219 -0
- package/scripts/skgateway.mjs +3 -3
- package/scripts/telegram-catchup-all.sh +12 -1
- package/scripts/verify_install.sh +2 -2
- package/scripts/wargov-ufo-capture/README.md +43 -0
- package/scripts/wargov-ufo-capture/cdp_capture_release2.py +273 -0
- package/scripts/wargov-ufo-capture/cdp_capture_splc_doj.py +246 -0
- package/scripts/wargov-ufo-capture/cdp_finish.py +271 -0
- package/scripts/wargov-ufo-capture/cdp_probe.py +188 -0
- package/scripts/wargov-ufo-capture/cdp_splc_pressrelease.py +101 -0
- package/scripts/wargov-ufo-capture/parse_csv.py +95 -0
- package/scripts/wargov-ufo-capture/pull_dvids.sh +107 -0
- package/scripts/watch-anthropic-token.sh +212 -0
- package/scripts/windows/install-tasks.ps1 +7 -7
- package/scripts/windows/skcapstone-task.xml +1 -1
- package/src/skcapstone/__init__.py +45 -3
- package/src/skcapstone/_cli_monolith.py +20 -15
- package/src/skcapstone/activity.py +5 -1
- package/src/skcapstone/agent_card.py +3 -2
- package/src/skcapstone/api.py +41 -40
- package/src/skcapstone/auction.py +14 -11
- package/src/skcapstone/backup.py +2 -1
- package/src/skcapstone/blueprint_registry.py +4 -3
- package/src/skcapstone/brain_first.py +238 -0
- package/src/skcapstone/changelog.py +1 -1
- package/src/skcapstone/chat.py +22 -17
- package/src/skcapstone/cli/__init__.py +9 -1
- package/src/skcapstone/cli/_common.py +1 -0
- package/src/skcapstone/cli/agents_spawner.py +5 -2
- package/src/skcapstone/cli/alerts.py +25 -4
- package/src/skcapstone/cli/bench.py +15 -15
- package/src/skcapstone/cli/chat.py +7 -4
- package/src/skcapstone/cli/consciousness.py +5 -2
- package/src/skcapstone/cli/context_cmd.py +18 -4
- package/src/skcapstone/cli/daemon.py +11 -7
- package/src/skcapstone/cli/gtd.py +26 -1
- package/src/skcapstone/cli/housekeeping.py +3 -3
- package/src/skcapstone/cli/identity_cmd.py +378 -0
- package/src/skcapstone/cli/joule_cmd.py +7 -3
- package/src/skcapstone/cli/memory.py +8 -6
- package/src/skcapstone/cli/peers_dir.py +1 -1
- package/src/skcapstone/cli/register_cmd.py +29 -3
- package/src/skcapstone/cli/scheduler_cmd.py +167 -0
- package/src/skcapstone/cli/session.py +25 -0
- package/src/skcapstone/cli/setup.py +96 -29
- package/src/skcapstone/cli/shell_cmd.py +53 -1
- package/src/skcapstone/cli/skills_cmd.py +2 -2
- package/src/skcapstone/cli/soul.py +8 -5
- package/src/skcapstone/cli/status.py +37 -11
- package/src/skcapstone/cli/telegram.py +21 -0
- package/src/skcapstone/cli/test_cmd.py +5 -5
- package/src/skcapstone/cli/test_connection.py +2 -2
- package/src/skcapstone/cli/upgrade_cmd.py +23 -14
- package/src/skcapstone/cli/version_cmd.py +1 -1
- package/src/skcapstone/cli/watch_cmd.py +9 -6
- package/src/skcapstone/cloud9_bridge.py +14 -14
- package/src/skcapstone/codex_setup.py +255 -0
- package/src/skcapstone/config_validator.py +7 -4
- package/src/skcapstone/consciousness_config.py +5 -1
- package/src/skcapstone/consciousness_loop.py +313 -273
- package/src/skcapstone/context_loader.py +121 -0
- package/src/skcapstone/coord_federation.py +2 -1
- package/src/skcapstone/coordination.py +23 -6
- package/src/skcapstone/crush_integration.py +2 -1
- package/src/skcapstone/daemon.py +132 -77
- package/src/skcapstone/dashboard.py +10 -10
- package/src/skcapstone/data/sk-agent-picker.sh +421 -0
- package/src/skcapstone/data/systemd/skcapstone-api.socket +9 -0
- package/src/skcapstone/data/systemd/skcapstone-memory-compress.service +18 -0
- package/src/skcapstone/data/systemd/skcapstone-memory-compress.timer +11 -0
- package/src/skcapstone/data/systemd/skcapstone.service +37 -0
- package/src/skcapstone/data/systemd/skcapstone@.service +50 -0
- package/src/skcapstone/data/systemd/skcomms-heartbeat.service +18 -0
- package/{systemd/skcomm-heartbeat.timer → src/skcapstone/data/systemd/skcomms-heartbeat.timer} +2 -2
- package/src/skcapstone/data/systemd/skcomms-queue-drain.service +17 -0
- package/{systemd/skcomm-queue-drain.timer → src/skcapstone/data/systemd/skcomms-queue-drain.timer} +2 -2
- package/src/skcapstone/defaults/claude/CLAUDE.md +67 -0
- package/src/skcapstone/defaults/claude/settings.json +74 -0
- package/src/skcapstone/defaults/lumina/config/claude-hooks.md +57 -0
- package/src/skcapstone/defaults/lumina/config/skgraph.yaml +55 -10
- package/src/skcapstone/defaults/lumina/config/skmemory.yaml +79 -13
- package/src/skcapstone/defaults/lumina/config/skvector.yaml +60 -9
- package/src/skcapstone/defaults/lumina/memory/long-term/18b9c0d1e2f3-cloud9-protocol.json +2 -2
- package/src/skcapstone/defaults/lumina/memory/long-term/a1b2c3d4e5f6-ecosystem-overview.json +2 -2
- package/src/skcapstone/defaults/lumina/memory/long-term/b2c3d4e5f6a7-five-pillars.json +9 -9
- package/src/skcapstone/defaults/lumina/memory/long-term/d4e5f6a7b8c9-site-directory.json +2 -2
- package/src/skcapstone/defaults/unhinged.json +13 -0
- package/src/skcapstone/discovery.py +43 -20
- package/src/skcapstone/doctor.py +941 -22
- package/src/skcapstone/dreaming.py +1183 -109
- package/src/skcapstone/emotion_tracker.py +2 -2
- package/src/skcapstone/export.py +4 -3
- package/src/skcapstone/fuse_mount.py +14 -12
- package/src/skcapstone/gui_installer.py +2 -2
- package/src/skcapstone/heartbeat.py +1 -1
- package/src/skcapstone/housekeeping.py +14 -14
- package/src/skcapstone/install_wizard.py +209 -7
- package/src/skcapstone/itil.py +13 -4
- package/src/skcapstone/kms_scheduler.py +10 -8
- package/src/skcapstone/launchd.py +19 -19
- package/src/skcapstone/mcp_launcher.py +15 -1
- package/src/skcapstone/mcp_server.py +83 -49
- package/src/skcapstone/mcp_tools/__init__.py +2 -0
- package/src/skcapstone/mcp_tools/_helpers.py +2 -2
- package/src/skcapstone/mcp_tools/ansible_tools.py +7 -4
- package/src/skcapstone/mcp_tools/brain_first_tools.py +90 -0
- package/src/skcapstone/mcp_tools/capauth_tools.py +7 -4
- package/src/skcapstone/mcp_tools/comm_tools.py +10 -10
- package/src/skcapstone/mcp_tools/coord_tools.py +8 -4
- package/src/skcapstone/mcp_tools/did_tools.py +11 -8
- package/src/skcapstone/mcp_tools/gtd_tools.py +4 -4
- package/src/skcapstone/mcp_tools/memory_tools.py +6 -2
- package/src/skcapstone/mcp_tools/notification_tools.py +22 -6
- package/src/skcapstone/mcp_tools/{skcomm_tools.py → skcomms_tools.py} +14 -14
- package/src/skcapstone/mcp_tools/soul_tools.py +8 -2
- package/src/skcapstone/mdns_discovery.py +2 -2
- package/src/skcapstone/memory_curator.py +1 -1
- package/src/skcapstone/memory_engine.py +10 -3
- package/src/skcapstone/metrics.py +30 -16
- package/src/skcapstone/migrate_memories.py +4 -3
- package/src/skcapstone/migrate_multi_agent.py +8 -7
- package/src/skcapstone/models.py +47 -5
- package/src/skcapstone/notifications.py +42 -18
- package/src/skcapstone/onboard.py +875 -121
- package/src/skcapstone/operator_link.py +170 -0
- package/src/skcapstone/peer_directory.py +4 -4
- package/src/skcapstone/peers.py +19 -19
- package/src/skcapstone/pillars/__init__.py +7 -5
- package/src/skcapstone/pillars/consciousness.py +191 -0
- package/src/skcapstone/pillars/identity.py +51 -7
- package/src/skcapstone/pillars/memory.py +9 -3
- package/src/skcapstone/pillars/sync.py +2 -2
- package/src/skcapstone/preflight.py +3 -3
- package/src/skcapstone/providers/docker.py +28 -28
- package/src/skcapstone/register.py +6 -6
- package/src/skcapstone/registry_client.py +5 -4
- package/src/skcapstone/runtime.py +14 -3
- package/src/skcapstone/scheduled_tasks.py +254 -19
- package/src/skcapstone/scheduler_jobs.py +456 -0
- package/src/skcapstone/scheduler_runner.py +239 -0
- package/src/skcapstone/scheduler_state.py +162 -0
- package/src/skcapstone/sdk.py +310 -0
- package/src/skcapstone/service_health.py +279 -39
- package/src/skcapstone/session_briefing.py +108 -0
- package/src/skcapstone/session_capture.py +1 -1
- package/src/skcapstone/shell.py +7 -1
- package/src/skcapstone/soul.py +3 -1
- package/src/skcapstone/soul_switch.py +3 -1
- package/src/skcapstone/summary.py +6 -6
- package/src/skcapstone/sync_engine.py +15 -15
- package/src/skcapstone/sync_watcher.py +2 -2
- package/src/skcapstone/systemd.py +55 -21
- package/src/skcapstone/team_comms.py +8 -8
- package/src/skcapstone/team_engine.py +1 -1
- package/src/skcapstone/testrunner.py +3 -3
- package/src/skcapstone/trust_graph.py +40 -5
- package/src/skcapstone/unified_search.py +15 -6
- package/src/skcapstone/uninstall_wizard.py +11 -3
- package/src/skcapstone/version_check.py +8 -4
- package/src/skcapstone/warmth_anchor.py +4 -2
- package/src/skcapstone/whoami.py +4 -4
- package/systemd/skcapstone.service +4 -6
- package/systemd/skcapstone@.service +7 -8
- package/systemd/skcomms-heartbeat.service +21 -0
- package/systemd/skcomms-heartbeat.timer +12 -0
- package/systemd/skcomms-queue-drain.service +17 -0
- package/systemd/skcomms-queue-drain.timer +12 -0
- package/tests/conftest.py +39 -0
- package/tests/integration/test_consciousness_e2e.py +39 -39
- package/tests/test_agent_card.py +1 -1
- package/tests/test_agent_home_scaffold.py +34 -0
- package/tests/test_alerts_consumer_topics.py +27 -0
- package/tests/test_backup.py +2 -1
- package/tests/test_chat.py +6 -6
- package/tests/test_claude_md.py +2 -2
- package/tests/test_cli_skills.py +10 -10
- package/tests/test_cli_test_cmd.py +4 -4
- package/tests/test_cli_test_connection.py +1 -1
- package/tests/test_cloud9_bridge.py +6 -6
- package/tests/test_consciousness_e2e.py +1 -1
- package/tests/test_consciousness_loop.py +10 -10
- package/tests/test_coordination.py +25 -0
- package/tests/test_cross_package.py +21 -21
- package/tests/test_daemon.py +4 -4
- package/tests/test_daemon_shutdown.py +1 -1
- package/tests/test_docker_provider.py +29 -29
- package/tests/test_doctor.py +400 -0
- package/tests/test_doctor_skscheduler.py +50 -0
- package/tests/test_dreaming_engine.py +147 -0
- package/tests/test_dreaming_gtd_capture.py +35 -0
- package/tests/test_e2e_automated.py +8 -5
- package/tests/test_fuse_mount.py +10 -10
- package/tests/test_gtd_brief.py +46 -0
- package/tests/test_gtd_malformed_tolerance.py +31 -0
- package/tests/test_housekeeping.py +15 -15
- package/tests/test_identity_migrate.py +251 -0
- package/tests/test_integration_backbone.py +598 -0
- package/tests/test_itil_gtd_lifecycle.py +37 -0
- package/tests/test_jobs_dropins.py +84 -0
- package/tests/test_mcp_server.py +82 -37
- package/tests/test_models.py +48 -4
- package/tests/test_multi_agent.py +31 -29
- package/tests/test_notifications.py +122 -32
- package/tests/test_onboard.py +63 -75
- package/tests/test_operator_link.py +78 -0
- package/tests/test_peers.py +14 -14
- package/tests/test_pillars.py +98 -0
- package/tests/test_preflight.py +3 -3
- package/tests/test_runtime.py +21 -0
- package/tests/test_scheduled_tasks.py +11 -6
- package/tests/test_scheduler_cli.py +47 -0
- package/tests/test_scheduler_features.py +133 -0
- package/tests/test_scheduler_integration.py +87 -0
- package/tests/test_scheduler_jobs.py +155 -0
- package/tests/test_scheduler_runner.py +64 -0
- package/tests/test_scheduler_state.py +57 -0
- package/tests/test_sdk.py +70 -0
- package/tests/test_service_health_incidents.py +34 -0
- package/tests/test_service_registry.py +52 -0
- package/tests/test_session_briefing.py +130 -0
- package/tests/test_snapshots.py +4 -4
- package/tests/test_sync_pipeline.py +26 -26
- package/tests/test_team_comms.py +2 -2
- package/tests/test_testrunner.py +2 -2
- package/tests/test_trust_graph.py +18 -0
- package/tests/test_unified_search.py +2 -2
- package/tests/test_version_check.py +10 -0
- package/tests/test_version_cmd.py +8 -8
- package/tests/test_whoami.py +1 -1
- package/systemd/skcomm-heartbeat.service +0 -18
- package/systemd/skcomm-queue-drain.service +0 -17
- /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/package.json +0 -0
- /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/openclaw.plugin.json +0 -0
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Capture war.gov/UFO/ Release 02 via Lumina Chrome CDP.
|
|
3
|
+
|
|
4
|
+
Strategy (Release 02 is bundled into one ZIP, plus a fresh CSV + press release):
|
|
5
|
+
1. Open a tab on war.gov/UFO/ to seed Akamai cookies in the Chrome session.
|
|
6
|
+
2. Set Page.setDownloadBehavior to allow downloads to our target dir.
|
|
7
|
+
3. Trigger ZIP download by injecting <a download href=...> and clicking it.
|
|
8
|
+
4. Poll for .crdownload to drain and the final file to appear.
|
|
9
|
+
5. Also fetch the new CSV in-page (text response — simpler than download).
|
|
10
|
+
6. Fetch the press release HTML the same way.
|
|
11
|
+
|
|
12
|
+
Output → ~/nextcloud/cbrd21-share/reference/war-gov-UFO-PURSUE-2026/{docs/release-02, release-02-zip}/
|
|
13
|
+
"""
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import hashlib
|
|
17
|
+
import json
|
|
18
|
+
import sys
|
|
19
|
+
import time
|
|
20
|
+
import urllib.request
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
import websocket
|
|
24
|
+
|
|
25
|
+
CDP_HTTP = "http://127.0.0.1:9222"
|
|
26
|
+
SEED_URL = "https://www.war.gov/UFO/"
|
|
27
|
+
|
|
28
|
+
ZIP_URL = "https://www.war.gov/medialink/ufo/052226/release_02/release_02_document_bundle.zip"
|
|
29
|
+
CSV_URL = "https://www.war.gov/Portals/1/Interactive/2026/UFO/uap-data.csv"
|
|
30
|
+
PRESS_URL = "https://www.war.gov/News/Releases/Release/Article/4499305/department-of-war-publishes-second-release-of-unidentified-anomalous-phenomena/"
|
|
31
|
+
|
|
32
|
+
BASE = Path("/home/cbrd21/nextcloud/cbrd21-share/reference/war-gov-UFO-PURSUE-2026")
|
|
33
|
+
DOC_DIR = BASE / "docs" / "release-02"
|
|
34
|
+
ZIP_DIR = BASE / "release-02-zip"
|
|
35
|
+
DOC_DIR.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
ZIP_DIR.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def cdp_get(path: str) -> dict | list:
|
|
40
|
+
with urllib.request.urlopen(f"{CDP_HTTP}{path}") as r:
|
|
41
|
+
return json.loads(r.read())
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def open_tab(url: str) -> dict:
|
|
45
|
+
req = urllib.request.Request(f"{CDP_HTTP}/json/new?{url}", method="PUT")
|
|
46
|
+
with urllib.request.urlopen(req, timeout=10) as r:
|
|
47
|
+
return json.loads(r.read())
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def close_tab(target_id: str) -> None:
|
|
51
|
+
try:
|
|
52
|
+
with urllib.request.urlopen(f"{CDP_HTTP}/json/close/{target_id}", timeout=5):
|
|
53
|
+
pass
|
|
54
|
+
except Exception:
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class CDP:
|
|
59
|
+
def __init__(self, ws_url: str):
|
|
60
|
+
self.ws = websocket.create_connection(ws_url, timeout=120)
|
|
61
|
+
self.mid = 0
|
|
62
|
+
|
|
63
|
+
def call(self, method: str, params: dict | None = None, timeout: float = 60.0) -> dict:
|
|
64
|
+
self.mid += 1
|
|
65
|
+
msg_id = self.mid
|
|
66
|
+
self.ws.send(json.dumps({"id": msg_id, "method": method, "params": params or {}}))
|
|
67
|
+
self.ws.settimeout(timeout)
|
|
68
|
+
while True:
|
|
69
|
+
raw = self.ws.recv()
|
|
70
|
+
msg = json.loads(raw)
|
|
71
|
+
if msg.get("id") == msg_id:
|
|
72
|
+
if "error" in msg:
|
|
73
|
+
raise RuntimeError(f"{method}: {msg['error']}")
|
|
74
|
+
return msg.get("result", {})
|
|
75
|
+
|
|
76
|
+
def wait_event(self, name: str, timeout: float = 30.0) -> dict:
|
|
77
|
+
deadline = time.time() + timeout
|
|
78
|
+
while time.time() < deadline:
|
|
79
|
+
self.ws.settimeout(max(0.1, deadline - time.time()))
|
|
80
|
+
try:
|
|
81
|
+
raw = self.ws.recv()
|
|
82
|
+
except websocket.WebSocketTimeoutException:
|
|
83
|
+
continue
|
|
84
|
+
msg = json.loads(raw)
|
|
85
|
+
if msg.get("method") == name:
|
|
86
|
+
return msg.get("params", {})
|
|
87
|
+
raise TimeoutError(f"event {name} did not fire within {timeout}s")
|
|
88
|
+
|
|
89
|
+
def close(self) -> None:
|
|
90
|
+
try:
|
|
91
|
+
self.ws.close()
|
|
92
|
+
except Exception:
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def fetch_text_in_page(cdp: CDP, url: str) -> tuple[int, str]:
|
|
97
|
+
expr = (
|
|
98
|
+
f"(async () => {{"
|
|
99
|
+
f" const r = await fetch({json.dumps(url)}, {{credentials: 'include', cache: 'no-store'}});"
|
|
100
|
+
f" return {{status: r.status, text: await r.text()}};"
|
|
101
|
+
f"}})()"
|
|
102
|
+
)
|
|
103
|
+
res = cdp.call("Runtime.evaluate", {
|
|
104
|
+
"expression": expr,
|
|
105
|
+
"awaitPromise": True,
|
|
106
|
+
"returnByValue": True,
|
|
107
|
+
}, timeout=180)
|
|
108
|
+
val = res.get("result", {}).get("value", {}) or {}
|
|
109
|
+
return val.get("status", 0), val.get("text", "")
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def trigger_download(cdp: CDP, url: str) -> None:
|
|
113
|
+
expr = (
|
|
114
|
+
f"(() => {{"
|
|
115
|
+
f" const a = document.createElement('a');"
|
|
116
|
+
f" a.href = {json.dumps(url)};"
|
|
117
|
+
f" a.download = '';"
|
|
118
|
+
f" document.body.appendChild(a);"
|
|
119
|
+
f" a.click();"
|
|
120
|
+
f" a.remove();"
|
|
121
|
+
f" return 'click-triggered';"
|
|
122
|
+
f"}})()"
|
|
123
|
+
)
|
|
124
|
+
cdp.call("Runtime.evaluate", {"expression": expr, "returnByValue": True})
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def wait_for_file(path: Path, partial_glob: str, timeout: float = 1800.0, idle_threshold: float = 5.0) -> Path | None:
|
|
128
|
+
"""Wait until a file matching the final name shows up + a quiet period after .crdownload drains."""
|
|
129
|
+
deadline = time.time() + timeout
|
|
130
|
+
last_size = -1
|
|
131
|
+
last_change = time.time()
|
|
132
|
+
while time.time() < deadline:
|
|
133
|
+
# Find .crdownload first
|
|
134
|
+
crfiles = list(path.glob("*.crdownload"))
|
|
135
|
+
finished = [p for p in path.glob(partial_glob) if not p.name.endswith(".crdownload")]
|
|
136
|
+
if crfiles:
|
|
137
|
+
size = sum(f.stat().st_size for f in crfiles)
|
|
138
|
+
if size != last_size:
|
|
139
|
+
last_size = size
|
|
140
|
+
last_change = time.time()
|
|
141
|
+
print(f"[download] in-progress {size/1e6:.1f} MB", flush=True)
|
|
142
|
+
time.sleep(2.0)
|
|
143
|
+
elif finished:
|
|
144
|
+
# No crdownload, file is there. Need idle period to ensure stable.
|
|
145
|
+
f = finished[0]
|
|
146
|
+
size = f.stat().st_size
|
|
147
|
+
if size != last_size:
|
|
148
|
+
last_size = size
|
|
149
|
+
last_change = time.time()
|
|
150
|
+
if time.time() - last_change >= idle_threshold:
|
|
151
|
+
return f
|
|
152
|
+
time.sleep(1.0)
|
|
153
|
+
else:
|
|
154
|
+
time.sleep(2.0)
|
|
155
|
+
return None
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def sha256_file(p: Path) -> str:
|
|
159
|
+
h = hashlib.sha256()
|
|
160
|
+
with p.open("rb") as f:
|
|
161
|
+
while chunk := f.read(8 * 1024 * 1024):
|
|
162
|
+
h.update(chunk)
|
|
163
|
+
return h.hexdigest()
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def main() -> int:
|
|
167
|
+
print(f"[capture] seeding tab → {SEED_URL}", flush=True)
|
|
168
|
+
tab = open_tab(SEED_URL)
|
|
169
|
+
target_id = tab["id"]
|
|
170
|
+
ws_url = tab["webSocketDebuggerUrl"]
|
|
171
|
+
cdp = CDP(ws_url)
|
|
172
|
+
try:
|
|
173
|
+
cdp.call("Page.enable")
|
|
174
|
+
cdp.call("Runtime.enable")
|
|
175
|
+
cdp.call("Network.enable", {"maxPostDataSize": 0})
|
|
176
|
+
cdp.call("Page.navigate", {"url": SEED_URL})
|
|
177
|
+
try:
|
|
178
|
+
cdp.wait_event("Page.loadEventFired", timeout=30.0)
|
|
179
|
+
except TimeoutError:
|
|
180
|
+
pass
|
|
181
|
+
time.sleep(3.0) # let Vue settle and cookies stick
|
|
182
|
+
|
|
183
|
+
# ---- 1. Fetch CSV (small, text)
|
|
184
|
+
print(f"[capture] fetching CSV → {CSV_URL}", flush=True)
|
|
185
|
+
status, text = fetch_text_in_page(cdp, CSV_URL)
|
|
186
|
+
print(f"[capture] CSV status={status} len={len(text)}", flush=True)
|
|
187
|
+
if status == 200 and text:
|
|
188
|
+
(DOC_DIR / "uap-data.csv").write_text(text)
|
|
189
|
+
else:
|
|
190
|
+
(DOC_DIR / "uap-data-error.json").write_text(json.dumps({"status": status, "preview": text[:1000]}, indent=2))
|
|
191
|
+
|
|
192
|
+
# ---- 2. Fetch press release HTML
|
|
193
|
+
print(f"[capture] fetching press release → {PRESS_URL}", flush=True)
|
|
194
|
+
status, text = fetch_text_in_page(cdp, PRESS_URL)
|
|
195
|
+
print(f"[capture] press release status={status} len={len(text)}", flush=True)
|
|
196
|
+
if status == 200 and text:
|
|
197
|
+
(DOC_DIR / "press-release-2026-05-22.html").write_text(text)
|
|
198
|
+
# Try to extract clean text via DOM
|
|
199
|
+
txt_expr = (
|
|
200
|
+
f"(async () => {{"
|
|
201
|
+
f" const r = await fetch({json.dumps(PRESS_URL)}, {{credentials: 'include'}});"
|
|
202
|
+
f" const html = await r.text();"
|
|
203
|
+
f" const doc = new DOMParser().parseFromString(html, 'text/html');"
|
|
204
|
+
f" const article = doc.querySelector('.body-text') || doc.querySelector('article') || doc.querySelector('main') || doc.body;"
|
|
205
|
+
f" return article ? article.innerText : '';"
|
|
206
|
+
f"}})()"
|
|
207
|
+
)
|
|
208
|
+
res = cdp.call("Runtime.evaluate", {
|
|
209
|
+
"expression": txt_expr,
|
|
210
|
+
"awaitPromise": True,
|
|
211
|
+
"returnByValue": True,
|
|
212
|
+
}, timeout=60)
|
|
213
|
+
article_text = res.get("result", {}).get("value", "") or ""
|
|
214
|
+
if article_text:
|
|
215
|
+
(DOC_DIR / "press-release-2026-05-22.txt").write_text(article_text)
|
|
216
|
+
print(f"[capture] extracted {len(article_text)} chars of article text", flush=True)
|
|
217
|
+
|
|
218
|
+
# ---- 3. Download the ZIP bundle via download behavior + <a download> click
|
|
219
|
+
print(f"[capture] setting download dir → {ZIP_DIR}", flush=True)
|
|
220
|
+
cdp.call("Page.setDownloadBehavior", {
|
|
221
|
+
"behavior": "allow",
|
|
222
|
+
"downloadPath": str(ZIP_DIR),
|
|
223
|
+
})
|
|
224
|
+
# Also try Browser.setDownloadBehavior which is the newer API
|
|
225
|
+
try:
|
|
226
|
+
cdp.call("Browser.setDownloadBehavior", {
|
|
227
|
+
"behavior": "allow",
|
|
228
|
+
"downloadPath": str(ZIP_DIR),
|
|
229
|
+
"eventsEnabled": True,
|
|
230
|
+
})
|
|
231
|
+
except Exception as e:
|
|
232
|
+
print(f"[capture] Browser.setDownloadBehavior not supported: {e}", flush=True)
|
|
233
|
+
|
|
234
|
+
print(f"[capture] triggering ZIP download → {ZIP_URL}", flush=True)
|
|
235
|
+
trigger_download(cdp, ZIP_URL)
|
|
236
|
+
|
|
237
|
+
# Poll for completion
|
|
238
|
+
zip_file = wait_for_file(ZIP_DIR, "release_02_document_bundle*.zip", timeout=1800.0, idle_threshold=5.0)
|
|
239
|
+
if not zip_file:
|
|
240
|
+
print("[capture] ZIP download did NOT complete in 30 min — check ZIP_DIR manually", flush=True)
|
|
241
|
+
# Diagnostic: list what's in there
|
|
242
|
+
for f in ZIP_DIR.iterdir():
|
|
243
|
+
print(f" {f.name} {f.stat().st_size}", flush=True)
|
|
244
|
+
return 2
|
|
245
|
+
|
|
246
|
+
size_mb = zip_file.stat().st_size / 1e6
|
|
247
|
+
sha = sha256_file(zip_file)
|
|
248
|
+
print(f"[capture] ZIP done: {zip_file.name} {size_mb:.1f} MB sha256={sha}", flush=True)
|
|
249
|
+
|
|
250
|
+
# Write manifest
|
|
251
|
+
manifest = {
|
|
252
|
+
"release": "02",
|
|
253
|
+
"release_date": "2026-05-22",
|
|
254
|
+
"captured_at": time.strftime("%Y-%m-%dT%H:%M:%S%z"),
|
|
255
|
+
"zip_url": ZIP_URL,
|
|
256
|
+
"zip_path": str(zip_file),
|
|
257
|
+
"zip_size_bytes": zip_file.stat().st_size,
|
|
258
|
+
"zip_sha256": sha,
|
|
259
|
+
"csv_url": CSV_URL,
|
|
260
|
+
"press_url": PRESS_URL,
|
|
261
|
+
"capture_method": "Lumina Chrome CDP (port 9222) — page-context fetch + <a download> click",
|
|
262
|
+
}
|
|
263
|
+
(DOC_DIR / "release-02-manifest.json").write_text(json.dumps(manifest, indent=2))
|
|
264
|
+
print(f"[capture] manifest written", flush=True)
|
|
265
|
+
|
|
266
|
+
return 0
|
|
267
|
+
finally:
|
|
268
|
+
cdp.close()
|
|
269
|
+
close_tab(target_id)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
if __name__ == "__main__":
|
|
273
|
+
sys.exit(main())
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Capture the PRIMARY DOJ sources for the SPLC superseding-indictment finding via Lumina Chrome CDP.
|
|
3
|
+
|
|
4
|
+
Both targets returned HTTP 403 to WebFetch (Akamai TLS-fingerprint/bot gate). Driving
|
|
5
|
+
Lumina's already-authenticated Chrome (port 9222) in page context bypasses the gate.
|
|
6
|
+
|
|
7
|
+
Targets:
|
|
8
|
+
1. Indictment PDF -> https://www.justice.gov/opa/media/1437146/dl (download)
|
|
9
|
+
2. DOJ press release -> discovered via justice.gov news search for "Southern Poverty Law Center"
|
|
10
|
+
|
|
11
|
+
Goal (per finding 2026-06-04_splc-doj-superseding-indictment-oneill-thread.md):
|
|
12
|
+
resolve the 2010-vs-2014 conduct window, the F-30 "$70K" figure, and confirm count
|
|
13
|
+
language first-hand rather than via secondary quotation.
|
|
14
|
+
|
|
15
|
+
Output -> ~/clawd/skills/substance-lens/captures/splc-doj-2026-06-03/
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import hashlib
|
|
20
|
+
import json
|
|
21
|
+
import re
|
|
22
|
+
import sys
|
|
23
|
+
import time
|
|
24
|
+
import urllib.request
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
import websocket
|
|
28
|
+
|
|
29
|
+
CDP_HTTP = "http://127.0.0.1:9222"
|
|
30
|
+
SEED_URL = "https://www.justice.gov/"
|
|
31
|
+
PDF_URL = "https://www.justice.gov/opa/media/1437146/dl"
|
|
32
|
+
SEARCH_URL = "https://www.justice.gov/news?search_api_fulltext=Southern+Poverty+Law+Center"
|
|
33
|
+
|
|
34
|
+
OUT = Path("/home/cbrd21/clawd/skills/substance-lens/captures/splc-doj-2026-06-03")
|
|
35
|
+
OUT.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def open_tab(url: str) -> dict:
|
|
39
|
+
req = urllib.request.Request(f"{CDP_HTTP}/json/new?{url}", method="PUT")
|
|
40
|
+
with urllib.request.urlopen(req, timeout=10) as r:
|
|
41
|
+
return json.loads(r.read())
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def close_tab(target_id: str) -> None:
|
|
45
|
+
try:
|
|
46
|
+
with urllib.request.urlopen(f"{CDP_HTTP}/json/close/{target_id}", timeout=5):
|
|
47
|
+
pass
|
|
48
|
+
except Exception:
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class CDP:
|
|
53
|
+
def __init__(self, ws_url: str):
|
|
54
|
+
self.ws = websocket.create_connection(ws_url, timeout=120)
|
|
55
|
+
self.mid = 0
|
|
56
|
+
|
|
57
|
+
def call(self, method: str, params: dict | None = None, timeout: float = 60.0) -> dict:
|
|
58
|
+
self.mid += 1
|
|
59
|
+
msg_id = self.mid
|
|
60
|
+
self.ws.send(json.dumps({"id": msg_id, "method": method, "params": params or {}}))
|
|
61
|
+
self.ws.settimeout(timeout)
|
|
62
|
+
while True:
|
|
63
|
+
raw = self.ws.recv()
|
|
64
|
+
msg = json.loads(raw)
|
|
65
|
+
if msg.get("id") == msg_id:
|
|
66
|
+
if "error" in msg:
|
|
67
|
+
raise RuntimeError(f"{method}: {msg['error']}")
|
|
68
|
+
return msg.get("result", {})
|
|
69
|
+
|
|
70
|
+
def wait_event(self, name: str, timeout: float = 30.0) -> dict:
|
|
71
|
+
deadline = time.time() + timeout
|
|
72
|
+
while time.time() < deadline:
|
|
73
|
+
self.ws.settimeout(max(0.1, deadline - time.time()))
|
|
74
|
+
try:
|
|
75
|
+
raw = self.ws.recv()
|
|
76
|
+
except websocket.WebSocketTimeoutException:
|
|
77
|
+
continue
|
|
78
|
+
msg = json.loads(raw)
|
|
79
|
+
if msg.get("method") == name:
|
|
80
|
+
return msg.get("params", {})
|
|
81
|
+
raise TimeoutError(f"event {name} did not fire within {timeout}s")
|
|
82
|
+
|
|
83
|
+
def close(self) -> None:
|
|
84
|
+
try:
|
|
85
|
+
self.ws.close()
|
|
86
|
+
except Exception:
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def fetch_text_in_page(cdp: CDP, url: str) -> tuple[int, str]:
|
|
91
|
+
expr = (
|
|
92
|
+
f"(async () => {{"
|
|
93
|
+
f" try {{"
|
|
94
|
+
f" const r = await fetch({json.dumps(url)}, {{credentials: 'include', cache: 'no-store'}});"
|
|
95
|
+
f" return {{status: r.status, text: await r.text()}};"
|
|
96
|
+
f" }} catch (e) {{ return {{status: -1, text: String(e)}}; }}"
|
|
97
|
+
f"}})()"
|
|
98
|
+
)
|
|
99
|
+
res = cdp.call("Runtime.evaluate", {
|
|
100
|
+
"expression": expr,
|
|
101
|
+
"awaitPromise": True,
|
|
102
|
+
"returnByValue": True,
|
|
103
|
+
}, timeout=180)
|
|
104
|
+
val = res.get("result", {}).get("value", {}) or {}
|
|
105
|
+
return val.get("status", 0), val.get("text", "")
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def fetch_pdf_b64_in_page(cdp: CDP, url: str) -> tuple[int, str, int]:
|
|
109
|
+
"""Fetch a binary in page context, return base64 (works for PDFs under a few MB)."""
|
|
110
|
+
expr = (
|
|
111
|
+
f"(async () => {{"
|
|
112
|
+
f" try {{"
|
|
113
|
+
f" const r = await fetch({json.dumps(url)}, {{credentials: 'include', cache: 'no-store'}});"
|
|
114
|
+
f" const buf = await r.arrayBuffer();"
|
|
115
|
+
f" const bytes = new Uint8Array(buf);"
|
|
116
|
+
f" let bin = '';"
|
|
117
|
+
f" const chunk = 0x8000;"
|
|
118
|
+
f" for (let i = 0; i < bytes.length; i += chunk) {{"
|
|
119
|
+
f" bin += String.fromCharCode.apply(null, bytes.subarray(i, i + chunk));"
|
|
120
|
+
f" }}"
|
|
121
|
+
f" return {{status: r.status, b64: btoa(bin), len: bytes.length}};"
|
|
122
|
+
f" }} catch (e) {{ return {{status: -1, b64: '', len: 0, err: String(e)}}; }}"
|
|
123
|
+
f"}})()"
|
|
124
|
+
)
|
|
125
|
+
res = cdp.call("Runtime.evaluate", {
|
|
126
|
+
"expression": expr,
|
|
127
|
+
"awaitPromise": True,
|
|
128
|
+
"returnByValue": True,
|
|
129
|
+
}, timeout=240)
|
|
130
|
+
val = res.get("result", {}).get("value", {}) or {}
|
|
131
|
+
return val.get("status", 0), val.get("b64", ""), val.get("len", 0)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def sha256_bytes(b: bytes) -> str:
|
|
135
|
+
h = hashlib.sha256()
|
|
136
|
+
h.update(b)
|
|
137
|
+
return h.hexdigest()
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def main() -> int:
|
|
141
|
+
import base64
|
|
142
|
+
|
|
143
|
+
print(f"[capture] seeding tab -> {SEED_URL}", flush=True)
|
|
144
|
+
tab = open_tab(SEED_URL)
|
|
145
|
+
target_id = tab["id"]
|
|
146
|
+
cdp = CDP(tab["webSocketDebuggerUrl"])
|
|
147
|
+
manifest: dict = {
|
|
148
|
+
"finding": "2026-06-04_splc-doj-superseding-indictment-oneill-thread.md",
|
|
149
|
+
"capture_method": "Lumina Chrome CDP (port 9222) — page-context fetch (Akamai bypass)",
|
|
150
|
+
"targets": {},
|
|
151
|
+
}
|
|
152
|
+
try:
|
|
153
|
+
cdp.call("Page.enable")
|
|
154
|
+
cdp.call("Runtime.enable")
|
|
155
|
+
cdp.call("Network.enable", {"maxPostDataSize": 0})
|
|
156
|
+
cdp.call("Page.navigate", {"url": SEED_URL})
|
|
157
|
+
try:
|
|
158
|
+
cdp.wait_event("Page.loadEventFired", timeout=30.0)
|
|
159
|
+
except TimeoutError:
|
|
160
|
+
pass
|
|
161
|
+
time.sleep(3.0) # let Akamai cookies stick
|
|
162
|
+
|
|
163
|
+
# ---- 1. Indictment PDF (primary) ----
|
|
164
|
+
print(f"[capture] fetching PDF -> {PDF_URL}", flush=True)
|
|
165
|
+
status, b64, length = fetch_pdf_b64_in_page(cdp, PDF_URL)
|
|
166
|
+
print(f"[capture] PDF status={status} bytes={length}", flush=True)
|
|
167
|
+
if status == 200 and b64:
|
|
168
|
+
data = base64.b64decode(b64)
|
|
169
|
+
is_pdf = data[:5] == b"%PDF-"
|
|
170
|
+
pdf_path = OUT / "splc-superseding-indictment-1437146.pdf"
|
|
171
|
+
pdf_path.write_bytes(data)
|
|
172
|
+
sha = sha256_bytes(data)
|
|
173
|
+
print(f"[capture] PDF written {len(data)} bytes is_pdf={is_pdf} sha256={sha}", flush=True)
|
|
174
|
+
manifest["targets"]["indictment_pdf"] = {
|
|
175
|
+
"url": PDF_URL, "status": status, "path": str(pdf_path),
|
|
176
|
+
"bytes": len(data), "is_pdf_magic": is_pdf, "sha256": sha,
|
|
177
|
+
}
|
|
178
|
+
else:
|
|
179
|
+
manifest["targets"]["indictment_pdf"] = {"url": PDF_URL, "status": status, "error": True}
|
|
180
|
+
print("[capture] PDF FAILED — page-context fetch did not return 200", flush=True)
|
|
181
|
+
|
|
182
|
+
# ---- 2. Discover + fetch DOJ press release ----
|
|
183
|
+
print(f"[capture] searching DOJ news -> {SEARCH_URL}", flush=True)
|
|
184
|
+
status, html = fetch_text_in_page(cdp, SEARCH_URL)
|
|
185
|
+
print(f"[capture] search status={status} len={len(html)}", flush=True)
|
|
186
|
+
pr_url = None
|
|
187
|
+
if status == 200 and html:
|
|
188
|
+
(OUT / "doj-news-search.html").write_text(html)
|
|
189
|
+
# Find press-release links; prefer /opa/pr/ slugs mentioning the charge
|
|
190
|
+
cands = re.findall(r'href="(/opa/pr/[^"#?]+)"', html)
|
|
191
|
+
uniq = []
|
|
192
|
+
for c in cands:
|
|
193
|
+
if c not in uniq:
|
|
194
|
+
uniq.append(c)
|
|
195
|
+
print(f"[capture] press-release candidates: {uniq[:10]}", flush=True)
|
|
196
|
+
scored = [c for c in uniq if "southern-poverty" in c.lower()
|
|
197
|
+
or "splc" in c.lower()
|
|
198
|
+
or ("wire-fraud" in c.lower() and "law-center" in c.lower())]
|
|
199
|
+
if scored:
|
|
200
|
+
pr_url = "https://www.justice.gov" + scored[0]
|
|
201
|
+
elif uniq:
|
|
202
|
+
pr_url = "https://www.justice.gov" + uniq[0]
|
|
203
|
+
manifest["press_release_candidates"] = uniq[:15]
|
|
204
|
+
|
|
205
|
+
if pr_url:
|
|
206
|
+
print(f"[capture] fetching press release -> {pr_url}", flush=True)
|
|
207
|
+
status, prhtml = fetch_text_in_page(cdp, pr_url)
|
|
208
|
+
print(f"[capture] press release status={status} len={len(prhtml)}", flush=True)
|
|
209
|
+
if status == 200 and prhtml:
|
|
210
|
+
(OUT / "doj-press-release.html").write_text(prhtml)
|
|
211
|
+
txt_expr = (
|
|
212
|
+
f"(async () => {{"
|
|
213
|
+
f" const r = await fetch({json.dumps(pr_url)}, {{credentials: 'include'}});"
|
|
214
|
+
f" const html = await r.text();"
|
|
215
|
+
f" const doc = new DOMParser().parseFromString(html, 'text/html');"
|
|
216
|
+
f" const a = doc.querySelector('.field--name-body') || doc.querySelector('article')"
|
|
217
|
+
f" || doc.querySelector('main') || doc.body;"
|
|
218
|
+
f" return a ? a.innerText : '';"
|
|
219
|
+
f"}})()"
|
|
220
|
+
)
|
|
221
|
+
res = cdp.call("Runtime.evaluate", {
|
|
222
|
+
"expression": txt_expr, "awaitPromise": True, "returnByValue": True,
|
|
223
|
+
}, timeout=60)
|
|
224
|
+
txt = res.get("result", {}).get("value", "") or ""
|
|
225
|
+
if txt:
|
|
226
|
+
(OUT / "doj-press-release.txt").write_text(txt)
|
|
227
|
+
print(f"[capture] extracted {len(txt)} chars of press-release text", flush=True)
|
|
228
|
+
manifest["targets"]["press_release"] = {"url": pr_url, "status": status,
|
|
229
|
+
"txt_chars": len(txt)}
|
|
230
|
+
else:
|
|
231
|
+
manifest["targets"]["press_release"] = {"url": pr_url, "status": status, "error": True}
|
|
232
|
+
else:
|
|
233
|
+
print("[capture] no press-release URL discovered from search", flush=True)
|
|
234
|
+
manifest["targets"]["press_release"] = {"discovered": False}
|
|
235
|
+
|
|
236
|
+
manifest["captured_at"] = time.strftime("%Y-%m-%dT%H:%M:%S%z")
|
|
237
|
+
(OUT / "manifest.json").write_text(json.dumps(manifest, indent=2))
|
|
238
|
+
print(f"[capture] manifest written -> {OUT/'manifest.json'}", flush=True)
|
|
239
|
+
return 0
|
|
240
|
+
finally:
|
|
241
|
+
cdp.close()
|
|
242
|
+
close_tab(target_id)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
if __name__ == "__main__":
|
|
246
|
+
sys.exit(main())
|