ultimate-pi 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-decisions/SKILL.md +37 -0
- package/.agents/skills/harness-governor/SKILL.md +1 -1
- package/.agents/skills/harness-orchestration/SKILL.md +54 -0
- package/.agents/skills/harness-plan/SKILL.md +4 -3
- package/.agents/skills/harness-sentrux-setup/SKILL.md +57 -0
- package/.agents/skills/scrapling-web/SKILL.md +93 -0
- package/.pi/PACKAGING.md +2 -2
- package/.pi/SYSTEM.md +13 -15
- package/.pi/agents/harness/adversary.md +3 -0
- package/.pi/agents/harness/evaluator.md +3 -0
- package/.pi/agents/harness/executor.md +4 -1
- package/.pi/agents/harness/meta-optimizer.md +2 -1
- package/.pi/agents/harness/planner.md +22 -1
- package/.pi/agents/harness/sentrux-bootstrap.md +42 -0
- package/.pi/agents/harness/tie-breaker.md +2 -0
- package/.pi/extensions/harness-ask-user.ts +74 -0
- package/.pi/extensions/harness-subagents.ts +9 -0
- package/.pi/extensions/lib/ask-user/dialog.ts +260 -0
- package/.pi/extensions/lib/ask-user/fallback.ts +78 -0
- package/.pi/extensions/lib/ask-user/render.ts +66 -0
- package/.pi/extensions/lib/ask-user/schema.ts +69 -0
- package/.pi/extensions/lib/ask-user/types.ts +41 -0
- package/.pi/extensions/lib/ask-user/validate-core.mjs +79 -0
- package/.pi/extensions/lib/ask-user/validate.ts +92 -0
- package/.pi/extensions/lib/harness-subagents/agent-loader.ts +126 -0
- package/.pi/extensions/lib/harness-subagents/agent-manifest.ts +119 -0
- package/.pi/extensions/lib/harness-subagents/agent-parser.ts +87 -0
- package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +118 -0
- package/.pi/extensions/lib/harness-subagents/blackboard.ts +175 -0
- package/.pi/extensions/lib/harness-subagents/spawn-policy.ts +27 -0
- package/.pi/extensions/lib/harness-subagents/types-blackboard.ts +27 -0
- package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +553 -0
- package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +637 -0
- package/.pi/extensions/lib/harness-subagents/vendored/agent-types.ts +175 -0
- package/.pi/extensions/lib/harness-subagents/vendored/context.ts +59 -0
- package/.pi/extensions/lib/harness-subagents/vendored/cross-extension-rpc.ts +134 -0
- package/.pi/extensions/lib/harness-subagents/vendored/custom-agents.ts +5 -0
- package/.pi/extensions/lib/harness-subagents/vendored/default-agents.ts +123 -0
- package/.pi/extensions/lib/harness-subagents/vendored/env.ts +43 -0
- package/.pi/extensions/lib/harness-subagents/vendored/group-join.ts +144 -0
- package/.pi/extensions/lib/harness-subagents/vendored/index.ts +2447 -0
- package/.pi/extensions/lib/harness-subagents/vendored/invocation-config.ts +52 -0
- package/.pi/extensions/lib/harness-subagents/vendored/memory.ts +182 -0
- package/.pi/extensions/lib/harness-subagents/vendored/model-resolver.ts +92 -0
- package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +115 -0
- package/.pi/extensions/lib/harness-subagents/vendored/prompts.ts +103 -0
- package/.pi/extensions/lib/harness-subagents/vendored/schedule-store.ts +177 -0
- package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +416 -0
- package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +210 -0
- package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +108 -0
- package/.pi/extensions/lib/harness-subagents/vendored/types.ts +187 -0
- package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +637 -0
- package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +324 -0
- package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +110 -0
- package/.pi/extensions/lib/harness-subagents/vendored/usage.ts +71 -0
- package/.pi/extensions/lib/harness-subagents/vendored/worktree.ts +195 -0
- package/.pi/extensions/policy-gate.ts +18 -0
- package/.pi/extensions/provider-payload-sanitize.ts +66 -0
- package/.pi/harness/README.md +2 -1
- package/.pi/harness/agents.manifest.json +80 -0
- package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +9 -5
- package/.pi/harness/env.harness.template +28 -0
- package/.pi/harness/sentrux/architecture.manifest.json +6 -1
- package/.pi/prompts/harness-auto.md +2 -2
- package/.pi/prompts/harness-plan.md +2 -2
- package/.pi/prompts/harness-router-tune.md +2 -2
- package/.pi/prompts/harness-run.md +1 -0
- package/.pi/prompts/harness-setup.md +182 -339
- package/.pi/scripts/README.md +6 -1
- package/.pi/scripts/harness-agents-manifest.mjs +123 -0
- package/.pi/scripts/harness-cli-verify.sh +60 -11
- package/.pi/scripts/harness-generate-model-router.mjs +242 -0
- package/.pi/scripts/harness-graphify-bootstrap.sh +1 -6
- package/.pi/scripts/harness-resolve-up-pkg.mjs +71 -0
- package/.pi/scripts/harness-seed-project-contracts.mjs +81 -0
- package/.pi/scripts/harness-sentrux-bootstrap.mjs +146 -0
- package/.pi/scripts/harness-sync-env.mjs +148 -0
- package/.pi/scripts/harness-verify.mjs +19 -0
- package/.pi/scripts/harness-web-search.md +33 -0
- package/.pi/scripts/harness-web.py +177 -0
- package/.pi/scripts/harness_web/__init__.py +1 -0
- package/.pi/scripts/harness_web/config.py +80 -0
- package/.pi/scripts/harness_web/output.py +55 -0
- package/.pi/scripts/harness_web/scrape.py +120 -0
- package/.pi/scripts/harness_web/search_ddg.py +106 -0
- package/.pi/scripts/release.sh +338 -0
- package/.pi/scripts/sentrux-rules-sync.mjs +29 -7
- package/.pi/settings.example.json +0 -1
- package/.sentrux/rules.toml +1 -1
- package/AGENTS.md +1 -1
- package/CHANGELOG.md +20 -0
- package/THIRD_PARTY_NOTICES.md +22 -0
- package/package.json +12 -9
- package/.agents/skills/firecrawl/SKILL.md +0 -150
- package/.agents/skills/firecrawl/rules/install.md +0 -82
- package/.agents/skills/firecrawl/rules/security.md +0 -26
- package/.agents/skills/firecrawl-agent/SKILL.md +0 -57
- package/.agents/skills/firecrawl-build-interact/SKILL.md +0 -67
- package/.agents/skills/firecrawl-build-onboarding/SKILL.md +0 -102
- package/.agents/skills/firecrawl-build-onboarding/references/auth-flow.md +0 -39
- package/.agents/skills/firecrawl-build-onboarding/references/project-setup.md +0 -20
- package/.agents/skills/firecrawl-build-onboarding/references/sdk-installation.md +0 -17
- package/.agents/skills/firecrawl-build-scrape/SKILL.md +0 -68
- package/.agents/skills/firecrawl-build-search/SKILL.md +0 -68
- package/.agents/skills/firecrawl-crawl/SKILL.md +0 -58
- package/.agents/skills/firecrawl-download/SKILL.md +0 -69
- package/.agents/skills/firecrawl-interact/SKILL.md +0 -83
- package/.agents/skills/firecrawl-map/SKILL.md +0 -50
- package/.agents/skills/firecrawl-parse/SKILL.md +0 -61
- package/.agents/skills/firecrawl-scrape/SKILL.md +0 -68
- package/.agents/skills/firecrawl-search/SKILL.md +0 -59
- package/firecrawl/.env.template +0 -62
- package/firecrawl/README.md +0 -49
- package/firecrawl/docker-compose.yaml +0 -201
- package/firecrawl/searxng/searxng.env +0 -3
- package/firecrawl/searxng/settings.yml +0 -85
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Idempotent Sentrux rules bootstrap for harness projects.
|
|
4
|
+
*
|
|
5
|
+
* 1. Seeds `.pi/harness/sentrux/architecture.manifest.json` from the package template when missing
|
|
6
|
+
* 2. Personalizes `project` on first seed from target package.json / directory name
|
|
7
|
+
* 3. Runs `sentrux-rules-sync.mjs` (merge-safe; preserves custom TOML outside managed markers)
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* node "$UP_PKG/.pi/scripts/harness-sentrux-bootstrap.mjs [PROJECT_ROOT] [--force] [--check]
|
|
11
|
+
*
|
|
12
|
+
* Bootstrap vs force:
|
|
13
|
+
* - Default (harness-setup): no --force — skips write when manifest hash unchanged
|
|
14
|
+
* - After editing architecture.manifest.json: `--force` or `/harness-sentrux-sync`
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { readFile, writeFile, mkdir, access, copyFile } from "node:fs/promises";
|
|
18
|
+
import { constants } from "node:fs";
|
|
19
|
+
import { join, dirname, basename } from "node:path";
|
|
20
|
+
import { fileURLToPath } from "node:url";
|
|
21
|
+
import { spawn } from "node:child_process";
|
|
22
|
+
|
|
23
|
+
const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url));
|
|
24
|
+
const UP_PKG = join(SCRIPT_DIR, "..", "..");
|
|
25
|
+
|
|
26
|
+
const args = process.argv.slice(2).filter((a) => !a.startsWith("-"));
|
|
27
|
+
const flags = process.argv.slice(2).filter((a) => a.startsWith("-"));
|
|
28
|
+
const force = flags.includes("--force");
|
|
29
|
+
const checkOnly = flags.includes("--check");
|
|
30
|
+
|
|
31
|
+
const PROJECT_ROOT = args[0] || process.cwd();
|
|
32
|
+
const MANIFEST = join(
|
|
33
|
+
PROJECT_ROOT,
|
|
34
|
+
".pi",
|
|
35
|
+
"harness",
|
|
36
|
+
"sentrux",
|
|
37
|
+
"architecture.manifest.json",
|
|
38
|
+
);
|
|
39
|
+
const MANIFEST_TEMPLATE = join(
|
|
40
|
+
UP_PKG,
|
|
41
|
+
".pi",
|
|
42
|
+
"harness",
|
|
43
|
+
"sentrux",
|
|
44
|
+
"architecture.manifest.json",
|
|
45
|
+
);
|
|
46
|
+
const SYNC_SCRIPT = join(SCRIPT_DIR, "sentrux-rules-sync.mjs");
|
|
47
|
+
|
|
48
|
+
async function fileExists(path) {
|
|
49
|
+
try {
|
|
50
|
+
await access(path, constants.R_OK);
|
|
51
|
+
return true;
|
|
52
|
+
} catch {
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
async function resolveProjectName(root) {
|
|
58
|
+
const pkgPath = join(root, "package.json");
|
|
59
|
+
if (await fileExists(pkgPath)) {
|
|
60
|
+
try {
|
|
61
|
+
const pkg = JSON.parse(await readFile(pkgPath, "utf-8"));
|
|
62
|
+
if (typeof pkg.name === "string" && pkg.name.trim()) {
|
|
63
|
+
return pkg.name.trim();
|
|
64
|
+
}
|
|
65
|
+
} catch {
|
|
66
|
+
/* ignore */
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return basename(root) || "project";
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async function seedManifestIfMissing() {
|
|
73
|
+
if (await fileExists(MANIFEST)) {
|
|
74
|
+
return { seeded: false };
|
|
75
|
+
}
|
|
76
|
+
if (!(await fileExists(MANIFEST_TEMPLATE))) {
|
|
77
|
+
console.error(
|
|
78
|
+
"harness-sentrux-bootstrap: missing package template",
|
|
79
|
+
MANIFEST_TEMPLATE,
|
|
80
|
+
);
|
|
81
|
+
process.exit(1);
|
|
82
|
+
}
|
|
83
|
+
await mkdir(dirname(MANIFEST), { recursive: true });
|
|
84
|
+
await copyFile(MANIFEST_TEMPLATE, MANIFEST);
|
|
85
|
+
const projectName = await resolveProjectName(PROJECT_ROOT);
|
|
86
|
+
const manifest = JSON.parse(await readFile(MANIFEST, "utf-8"));
|
|
87
|
+
manifest.project = projectName;
|
|
88
|
+
await writeFile(MANIFEST, `${JSON.stringify(manifest, null, 2)}\n`, "utf-8");
|
|
89
|
+
console.log(
|
|
90
|
+
`harness-sentrux-bootstrap: seeded manifest -> ${MANIFEST} (project: ${projectName})`,
|
|
91
|
+
);
|
|
92
|
+
return { seeded: true, projectName };
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function runSync(extraArgs) {
|
|
96
|
+
return new Promise((resolve) => {
|
|
97
|
+
const child = spawn(
|
|
98
|
+
process.execPath,
|
|
99
|
+
[SYNC_SCRIPT, ...extraArgs, PROJECT_ROOT],
|
|
100
|
+
{
|
|
101
|
+
cwd: PROJECT_ROOT,
|
|
102
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
103
|
+
env: process.env,
|
|
104
|
+
},
|
|
105
|
+
);
|
|
106
|
+
let out = "";
|
|
107
|
+
child.stdout?.on("data", (d) => {
|
|
108
|
+
out += d.toString();
|
|
109
|
+
});
|
|
110
|
+
child.stderr?.on("data", (d) => {
|
|
111
|
+
out += d.toString();
|
|
112
|
+
});
|
|
113
|
+
child.on("close", (code) => resolve({ code: code ?? 1, out: out.trim() }));
|
|
114
|
+
child.on("error", (err) =>
|
|
115
|
+
resolve({ code: 1, out: String(err.message) }),
|
|
116
|
+
);
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
async function main() {
|
|
121
|
+
const { seeded } = await seedManifestIfMissing();
|
|
122
|
+
if (!seeded) {
|
|
123
|
+
console.log(
|
|
124
|
+
"harness-sentrux-bootstrap: manifest present (edit layers/boundaries there, then re-run with --force)",
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const syncArgs = [];
|
|
129
|
+
if (checkOnly) syncArgs.push("--check");
|
|
130
|
+
else if (force) syncArgs.push("--force");
|
|
131
|
+
|
|
132
|
+
const { code, out } = await runSync(syncArgs);
|
|
133
|
+
if (out) console.log(out);
|
|
134
|
+
if (code !== 0) process.exit(code);
|
|
135
|
+
|
|
136
|
+
if (!checkOnly && !force) {
|
|
137
|
+
console.log(
|
|
138
|
+
"harness-sentrux-bootstrap: done (idempotent). After manifest edits: node \"$UP_PKG/.pi/scripts/harness-sentrux-bootstrap.mjs\" --force",
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
main().catch((err) => {
|
|
144
|
+
console.error(err);
|
|
145
|
+
process.exit(1);
|
|
146
|
+
});
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Sync project-root `.env` with harness-required keys (non-destructive).
|
|
4
|
+
*
|
|
5
|
+
* - Never overwrites existing keys or values.
|
|
6
|
+
* - If `.env` is missing: exit 2 and print instructions (use --create-missing after user confirms).
|
|
7
|
+
* - If `.env` exists: append only missing keys inside a managed block at EOF.
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* node harness-sync-env.mjs [--create-missing] [--dry-run]
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { existsSync, readFileSync, writeFileSync } from "node:fs";
|
|
14
|
+
import { dirname, join } from "node:path";
|
|
15
|
+
import { fileURLToPath } from "node:url";
|
|
16
|
+
|
|
17
|
+
const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url));
|
|
18
|
+
const UP_PKG = join(SCRIPT_DIR, "..", "..");
|
|
19
|
+
const TEMPLATE_PATH = join(UP_PKG, ".pi", "harness", "env.harness.template");
|
|
20
|
+
const ENV_PATH = join(process.cwd(), ".env");
|
|
21
|
+
|
|
22
|
+
const MANAGED_START = "# --- harness:env:start ---";
|
|
23
|
+
const MANAGED_END = "# --- harness:env:end ---";
|
|
24
|
+
|
|
25
|
+
function parseEnvKeys(content) {
|
|
26
|
+
const keys = new Set();
|
|
27
|
+
for (const raw of content.split("\n")) {
|
|
28
|
+
const line = raw.trim();
|
|
29
|
+
if (!line || line.startsWith("#")) continue;
|
|
30
|
+
const eq = line.indexOf("=");
|
|
31
|
+
if (eq === -1) continue;
|
|
32
|
+
const key = line.slice(0, eq).trim();
|
|
33
|
+
if (/^[A-Za-z_][A-Za-z0-9_]*$/.test(key)) keys.add(key);
|
|
34
|
+
}
|
|
35
|
+
return keys;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/** @returns {{ key: string, line: string }[]} */
|
|
39
|
+
function parseTemplateEntries(templateText) {
|
|
40
|
+
const entries = [];
|
|
41
|
+
for (const raw of templateText.split("\n")) {
|
|
42
|
+
const trimmed = raw.trim();
|
|
43
|
+
if (!trimmed || trimmed.startsWith("#")) {
|
|
44
|
+
if (trimmed) entries.push({ key: null, line: raw });
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
const eq = trimmed.indexOf("=");
|
|
48
|
+
if (eq === -1) continue;
|
|
49
|
+
const key = trimmed.slice(0, eq).trim();
|
|
50
|
+
if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(key)) continue;
|
|
51
|
+
entries.push({ key, line: raw });
|
|
52
|
+
}
|
|
53
|
+
return entries;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function buildManagedBlock(missingEntries) {
|
|
57
|
+
const lines = [
|
|
58
|
+
"",
|
|
59
|
+
MANAGED_START,
|
|
60
|
+
"# Added by /harness-setup — edit values; existing keys elsewhere in .env are never changed.",
|
|
61
|
+
];
|
|
62
|
+
for (const entry of missingEntries) {
|
|
63
|
+
lines.push(entry.line);
|
|
64
|
+
}
|
|
65
|
+
lines.push(MANAGED_END);
|
|
66
|
+
lines.push("");
|
|
67
|
+
return lines.join("\n");
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function insertIntoManagedBlock(content, missingEntries) {
|
|
71
|
+
const block = buildManagedBlock(missingEntries).trimEnd();
|
|
72
|
+
if (!content.includes(MANAGED_START)) {
|
|
73
|
+
const sep = content.endsWith("\n") || content.length === 0 ? "" : "\n";
|
|
74
|
+
return content + sep + block + "\n";
|
|
75
|
+
}
|
|
76
|
+
const start = content.indexOf(MANAGED_START);
|
|
77
|
+
const end = content.indexOf(MANAGED_END);
|
|
78
|
+
if (end === -1 || end < start) {
|
|
79
|
+
const sep = content.endsWith("\n") ? "" : "\n";
|
|
80
|
+
return content + sep + block + "\n";
|
|
81
|
+
}
|
|
82
|
+
const before = content.slice(0, end + MANAGED_END.length);
|
|
83
|
+
const after = content.slice(end + MANAGED_END.length);
|
|
84
|
+
const additions = missingEntries.map((e) => e.line).join("\n");
|
|
85
|
+
return `${before}\n${additions}${after}`;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function main() {
|
|
89
|
+
const createMissing = process.argv.includes("--create-missing");
|
|
90
|
+
const dryRun = process.argv.includes("--dry-run");
|
|
91
|
+
|
|
92
|
+
if (!existsSync(TEMPLATE_PATH)) {
|
|
93
|
+
console.error(`harness-sync-env: missing template ${TEMPLATE_PATH}`);
|
|
94
|
+
process.exit(1);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const templateText = readFileSync(TEMPLATE_PATH, "utf8");
|
|
98
|
+
const templateEntries = parseTemplateEntries(templateText);
|
|
99
|
+
const templateKeys = templateEntries.filter((e) => e.key);
|
|
100
|
+
|
|
101
|
+
if (!existsSync(ENV_PATH)) {
|
|
102
|
+
if (createMissing) {
|
|
103
|
+
const body = `${templateText.trimEnd()}\n`;
|
|
104
|
+
if (dryRun) {
|
|
105
|
+
console.log("[dry-run] would create .env from harness template");
|
|
106
|
+
process.stdout.write(body);
|
|
107
|
+
process.exit(0);
|
|
108
|
+
}
|
|
109
|
+
writeFileSync(ENV_PATH, body, "utf8");
|
|
110
|
+
console.log("✓ Created .env from harness template (edit secrets locally)");
|
|
111
|
+
process.exit(0);
|
|
112
|
+
}
|
|
113
|
+
console.log("✗ No .env at project root");
|
|
114
|
+
console.log("");
|
|
115
|
+
console.log("Create one, then re-run harness env sync:");
|
|
116
|
+
console.log(` cp "${TEMPLATE_PATH}" .env`);
|
|
117
|
+
console.log(" # edit .env with your API keys");
|
|
118
|
+
console.log(` node "${join(UP_PKG, ".pi", "scripts", "harness-sync-env.mjs")}"`);
|
|
119
|
+
console.log("");
|
|
120
|
+
console.log("Or, after user confirms:");
|
|
121
|
+
console.log(
|
|
122
|
+
` node "${join(UP_PKG, ".pi", "scripts", "harness-sync-env.mjs")}" --create-missing`,
|
|
123
|
+
);
|
|
124
|
+
process.exit(2);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const existing = readFileSync(ENV_PATH, "utf8");
|
|
128
|
+
const existingKeys = parseEnvKeys(existing);
|
|
129
|
+
const missing = templateKeys.filter((e) => !existingKeys.has(e.key));
|
|
130
|
+
|
|
131
|
+
if (missing.length === 0) {
|
|
132
|
+
console.log("✓ .env contains all harness template keys — no changes");
|
|
133
|
+
process.exit(0);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const next = insertIntoManagedBlock(existing, missing);
|
|
137
|
+
if (dryRun) {
|
|
138
|
+
console.log(`[dry-run] would append ${missing.length} key(s): ${missing.map((m) => m.key).join(", ")}`);
|
|
139
|
+
process.exit(0);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
writeFileSync(ENV_PATH, next, "utf8");
|
|
143
|
+
console.log(
|
|
144
|
+
`✓ Appended ${missing.length} harness env key(s) to .env (existing values preserved): ${missing.map((m) => m.key).join(", ")}`,
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
main();
|
|
@@ -40,8 +40,11 @@ const REQUIRED_EXTENSIONS = [
|
|
|
40
40
|
"observation-bus.ts",
|
|
41
41
|
"drift-monitor.ts",
|
|
42
42
|
"sentrux-rules-sync.ts",
|
|
43
|
+
"harness-subagents.ts",
|
|
43
44
|
];
|
|
44
45
|
|
|
46
|
+
const AGENTS_MANIFEST = join(ROOT, ".pi", "harness", "agents.manifest.json");
|
|
47
|
+
|
|
45
48
|
const SENTRUX_MANIFEST = join(
|
|
46
49
|
ROOT,
|
|
47
50
|
".pi",
|
|
@@ -203,6 +206,22 @@ async function main() {
|
|
|
203
206
|
|
|
204
207
|
await checkSentruxGate();
|
|
205
208
|
|
|
209
|
+
if (!(await fileExists(AGENTS_MANIFEST))) {
|
|
210
|
+
fail(
|
|
211
|
+
"missing .pi/harness/agents.manifest.json — run node \"$UP_PKG/.pi/scripts/harness-agents-manifest.mjs\" --write",
|
|
212
|
+
);
|
|
213
|
+
}
|
|
214
|
+
ok("agents.manifest.json present");
|
|
215
|
+
|
|
216
|
+
const { code: manifestCode, out: manifestOut } = await runNodeScript(
|
|
217
|
+
join(ROOT, ".pi", "scripts", "harness-agents-manifest.mjs"),
|
|
218
|
+
["--check"],
|
|
219
|
+
);
|
|
220
|
+
if (manifestCode !== 0) {
|
|
221
|
+
fail(manifestOut.trim() || "agents.manifest.json drift — regenerate with --write");
|
|
222
|
+
}
|
|
223
|
+
ok("agents.manifest.json in sync");
|
|
224
|
+
|
|
206
225
|
console.log("\nharness:verify PASS");
|
|
207
226
|
}
|
|
208
227
|
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# harness-web search (internal)
|
|
2
|
+
|
|
3
|
+
## Engine
|
|
4
|
+
|
|
5
|
+
Default: DuckDuckGo static HTML — `GET https://html.duckduckgo.com/html/?q=…`
|
|
6
|
+
|
|
7
|
+
Implemented in `harness_web/search_ddg.py` via `Fetcher.get` (HTTP, not a browser per query).
|
|
8
|
+
|
|
9
|
+
## Selectors
|
|
10
|
+
|
|
11
|
+
| Field | CSS |
|
|
12
|
+
|-------|-----|
|
|
13
|
+
| Result block | `.result` |
|
|
14
|
+
| Title + link | `.result__a` |
|
|
15
|
+
| Snippet | `.result__snippet` |
|
|
16
|
+
|
|
17
|
+
DDG redirect URLs (`//duckduckgo.com/l/?uddg=…`) are unwrapped to the target `uddg` parameter.
|
|
18
|
+
|
|
19
|
+
## Challenge detection
|
|
20
|
+
|
|
21
|
+
If status 403 or HTML contains challenge markers (`anomaly-modal`, etc.), retry **once** with `StealthyFetcher`, then exit with a clear “search engine blocked” message.
|
|
22
|
+
|
|
23
|
+
## Output
|
|
24
|
+
|
|
25
|
+
`.web/search.json` — envelope compatible with legacy Firecrawl skills:
|
|
26
|
+
|
|
27
|
+
```json
|
|
28
|
+
{
|
|
29
|
+
"query": "...",
|
|
30
|
+
"engine": "ddg_html",
|
|
31
|
+
"data": { "web": [{ "url", "title", "description" }] }
|
|
32
|
+
}
|
|
33
|
+
```
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""harness-web — Scrapling-backed web search and scrape for ultimate-pi harness agents."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
import os
|
|
8
|
+
import shutil
|
|
9
|
+
import sys
|
|
10
|
+
import time
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
# Re-exec with scrapling's uv-tool Python when the library is not on default python3.
|
|
14
|
+
def _bootstrap_scrapling() -> None:
|
|
15
|
+
try:
|
|
16
|
+
import scrapling # noqa: F401
|
|
17
|
+
except ImportError:
|
|
18
|
+
scrapling_bin = shutil.which("scrapling")
|
|
19
|
+
if not scrapling_bin:
|
|
20
|
+
sys.exit(
|
|
21
|
+
'scrapling not installed. Run: uv tool install "scrapling[fetchers]" && scrapling install'
|
|
22
|
+
)
|
|
23
|
+
with open(scrapling_bin, encoding="utf-8") as fh:
|
|
24
|
+
shebang = fh.readline().strip()
|
|
25
|
+
if shebang.startswith("#!"):
|
|
26
|
+
os.execv(shebang[2:], [shebang[2:], *sys.argv])
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
_bootstrap_scrapling()
|
|
30
|
+
|
|
31
|
+
# Imports after bootstrap (scrapling must be available).
|
|
32
|
+
SCRIPT_DIR = Path(__file__).resolve().parent
|
|
33
|
+
if str(SCRIPT_DIR) not in sys.path:
|
|
34
|
+
sys.path.insert(0, str(SCRIPT_DIR))
|
|
35
|
+
|
|
36
|
+
from harness_web.config import HarnessWebConfig, load_config # noqa: E402
|
|
37
|
+
from harness_web.output import write_search_results # noqa: E402
|
|
38
|
+
from harness_web.scrape import bulk_scrape, map_url, scrape_url # noqa: E402
|
|
39
|
+
from harness_web.search_ddg import search_ddg # noqa: E402
|
|
40
|
+
|
|
41
|
+
DEFAULT_WEB_DIR = ".web"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _default_out(sub: str) -> Path:
|
|
45
|
+
return Path(DEFAULT_WEB_DIR) / sub
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def cmd_search(args: argparse.Namespace, config: HarnessWebConfig) -> int:
|
|
49
|
+
out = Path(args.output or _default_out("search.json"))
|
|
50
|
+
results = search_ddg(args.query, limit=args.limit, config=config)
|
|
51
|
+
write_search_results(out, results, args.query)
|
|
52
|
+
print(f"wrote {out} ({len(results)} results)")
|
|
53
|
+
return 0
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def cmd_scrape(args: argparse.Namespace, config: HarnessWebConfig) -> int:
|
|
57
|
+
out = Path(args.output or _default_out("page.md"))
|
|
58
|
+
fast = config.use_fast_for_url(args.url, args.fast)
|
|
59
|
+
scrape_url(
|
|
60
|
+
args.url,
|
|
61
|
+
str(out),
|
|
62
|
+
config=config,
|
|
63
|
+
fast=fast,
|
|
64
|
+
wait_ms=args.wait_for,
|
|
65
|
+
)
|
|
66
|
+
mode = "fast" if fast else "stealth"
|
|
67
|
+
print(f"wrote {out} ({mode})")
|
|
68
|
+
return 0
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def cmd_map(args: argparse.Namespace, config: HarnessWebConfig) -> int:
|
|
72
|
+
out = Path(args.output or _default_out("map.json"))
|
|
73
|
+
fast = config.use_fast_for_url(args.url, args.fast)
|
|
74
|
+
map_url(args.url, str(out), config=config, fast=fast, limit=args.limit)
|
|
75
|
+
print(f"wrote {out}")
|
|
76
|
+
return 0
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def cmd_bulk_scrape(args: argparse.Namespace, config: HarnessWebConfig) -> int:
|
|
80
|
+
sleep_sec = args.sleep if args.sleep is not None else config.rate_limit_ms / 1000.0
|
|
81
|
+
if args.urls:
|
|
82
|
+
urls = list(args.urls)
|
|
83
|
+
elif args.from_search:
|
|
84
|
+
import json
|
|
85
|
+
|
|
86
|
+
data = json.loads(Path(args.from_search).read_text(encoding="utf-8"))
|
|
87
|
+
urls = [item["url"] for item in data.get("data", {}).get("web", []) if item.get("url")]
|
|
88
|
+
else:
|
|
89
|
+
urls = search_ddg(args.query, limit=args.limit, config=config)
|
|
90
|
+
urls = [r["url"] for r in urls]
|
|
91
|
+
|
|
92
|
+
if not urls:
|
|
93
|
+
print("bulk-scrape: no URLs to fetch", file=sys.stderr)
|
|
94
|
+
return 1
|
|
95
|
+
|
|
96
|
+
out_dir = Path(args.output or _default_out("bulk"))
|
|
97
|
+
fast = args.fast or config.fetch_mode == "fast"
|
|
98
|
+
failures = bulk_scrape(
|
|
99
|
+
urls[: args.limit],
|
|
100
|
+
str(out_dir),
|
|
101
|
+
config=config,
|
|
102
|
+
fast=fast,
|
|
103
|
+
sleep_sec=sleep_sec,
|
|
104
|
+
)
|
|
105
|
+
print(f"wrote {len(urls[: args.limit]) - len(failures)} pages to {out_dir}")
|
|
106
|
+
for fail in failures:
|
|
107
|
+
print(f" failed: {fail}", file=sys.stderr)
|
|
108
|
+
return 1 if failures and len(failures) == len(urls) else 0
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
112
|
+
p = argparse.ArgumentParser(
|
|
113
|
+
prog="harness-web",
|
|
114
|
+
description="Harness web layer: search (DDG HTML) and scrape (Scrapling).",
|
|
115
|
+
)
|
|
116
|
+
sub = p.add_subparsers(dest="command", required=True)
|
|
117
|
+
|
|
118
|
+
ps = sub.add_parser("search", help="Search via DuckDuckGo HTML SERP")
|
|
119
|
+
ps.add_argument("query", help="Search query")
|
|
120
|
+
ps.add_argument("-o", "--output", help="JSON output path (default: .web/search.json)")
|
|
121
|
+
ps.add_argument("--limit", type=int, default=5)
|
|
122
|
+
ps.set_defaults(func=cmd_search)
|
|
123
|
+
|
|
124
|
+
pc = sub.add_parser("scrape", help="Scrape a URL to markdown")
|
|
125
|
+
pc.add_argument("url")
|
|
126
|
+
pc.add_argument("-o", "--output", help="Markdown output (default: .web/page.md)")
|
|
127
|
+
pc.add_argument(
|
|
128
|
+
"--fast",
|
|
129
|
+
action="store_true",
|
|
130
|
+
help="HTTP get + ai-targeted extract (skip stealth browser)",
|
|
131
|
+
)
|
|
132
|
+
pc.add_argument(
|
|
133
|
+
"--wait-for",
|
|
134
|
+
type=int,
|
|
135
|
+
default=None,
|
|
136
|
+
metavar="MS",
|
|
137
|
+
help="Extra wait after load (stealth mode, milliseconds)",
|
|
138
|
+
)
|
|
139
|
+
pc.set_defaults(func=cmd_scrape)
|
|
140
|
+
|
|
141
|
+
pb = sub.add_parser("bulk-scrape", help="Search then scrape multiple URLs")
|
|
142
|
+
pb.add_argument("query", nargs="?", help="Search query when not using --from-search")
|
|
143
|
+
pb.add_argument("-o", "--output", help="Output directory (default: .web/bulk)")
|
|
144
|
+
pb.add_argument("--limit", type=int, default=3, help="Max URLs to scrape")
|
|
145
|
+
pb.add_argument("--from-search", metavar="JSON", help="Use URLs from search JSON file")
|
|
146
|
+
pb.add_argument("urls", nargs="*", help="Explicit URLs to scrape")
|
|
147
|
+
pb.add_argument("--fast", action="store_true")
|
|
148
|
+
pb.add_argument(
|
|
149
|
+
"--sleep",
|
|
150
|
+
type=float,
|
|
151
|
+
default=None,
|
|
152
|
+
help="Seconds between scrapes (default: HARNESS_WEB_RATE_LIMIT_MS)",
|
|
153
|
+
)
|
|
154
|
+
pb.set_defaults(func=cmd_bulk_scrape)
|
|
155
|
+
|
|
156
|
+
pm = sub.add_parser("map", help="List same-host links from a page")
|
|
157
|
+
pm.add_argument("url")
|
|
158
|
+
pm.add_argument("-o", "--output", help="JSON output (default: .web/map.json)")
|
|
159
|
+
pm.add_argument("--limit", type=int, default=100)
|
|
160
|
+
pm.add_argument("--fast", action="store_true")
|
|
161
|
+
pm.set_defaults(func=cmd_map)
|
|
162
|
+
|
|
163
|
+
return p
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def main(argv: list[str] | None = None) -> int:
|
|
167
|
+
parser = build_parser()
|
|
168
|
+
args = parser.parse_args(argv)
|
|
169
|
+
config = load_config()
|
|
170
|
+
t0 = time.monotonic()
|
|
171
|
+
rc = args.func(args, config)
|
|
172
|
+
_ = time.monotonic() - t0
|
|
173
|
+
return rc
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
if __name__ == "__main__":
|
|
177
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Harness web facade — Scrapling-backed search and scrape for ultimate-pi agents."""
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Environment and defaults for harness-web."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from urllib.parse import urlparse
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _int_env(name: str, default: int) -> int:
|
|
11
|
+
raw = os.environ.get(name, "").strip()
|
|
12
|
+
if not raw:
|
|
13
|
+
return default
|
|
14
|
+
try:
|
|
15
|
+
return int(raw)
|
|
16
|
+
except ValueError:
|
|
17
|
+
return default
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _fetch_mode() -> str:
|
|
21
|
+
mode = os.environ.get("HARNESS_WEB_FETCH_MODE", "stealth").strip().lower()
|
|
22
|
+
if mode in ("stealth", "fast", "auto"):
|
|
23
|
+
return mode
|
|
24
|
+
return "stealth"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
_STATIC_HOSTS = frozenset(
|
|
28
|
+
{
|
|
29
|
+
"example.com",
|
|
30
|
+
"www.example.com",
|
|
31
|
+
"localhost",
|
|
32
|
+
"127.0.0.1",
|
|
33
|
+
}
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def host_is_static(url: str) -> bool:
|
|
38
|
+
try:
|
|
39
|
+
host = (urlparse(url).hostname or "").lower()
|
|
40
|
+
except ValueError:
|
|
41
|
+
return False
|
|
42
|
+
if host in _STATIC_HOSTS:
|
|
43
|
+
return True
|
|
44
|
+
if host.endswith(".localhost"):
|
|
45
|
+
return True
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass(frozen=True)
|
|
50
|
+
class HarnessWebConfig:
|
|
51
|
+
fetch_mode: str
|
|
52
|
+
search_engine: str
|
|
53
|
+
proxy: str | None
|
|
54
|
+
rate_limit_ms: int
|
|
55
|
+
timeout_ms: int
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def timeout_sec(self) -> int:
|
|
59
|
+
return max(1, self.timeout_ms // 1000)
|
|
60
|
+
|
|
61
|
+
def use_fast_for_url(self, url: str, cli_fast: bool) -> bool:
|
|
62
|
+
if cli_fast:
|
|
63
|
+
return True
|
|
64
|
+
if self.fetch_mode == "fast":
|
|
65
|
+
return True
|
|
66
|
+
if self.fetch_mode == "auto" and host_is_static(url):
|
|
67
|
+
return True
|
|
68
|
+
return False
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def load_config() -> HarnessWebConfig:
|
|
72
|
+
proxy = os.environ.get("HARNESS_WEB_PROXY", "").strip() or None
|
|
73
|
+
return HarnessWebConfig(
|
|
74
|
+
fetch_mode=_fetch_mode(),
|
|
75
|
+
search_engine=os.environ.get("HARNESS_WEB_SEARCH_ENGINE", "ddg_html").strip()
|
|
76
|
+
or "ddg_html",
|
|
77
|
+
proxy=proxy,
|
|
78
|
+
rate_limit_ms=_int_env("HARNESS_WEB_RATE_LIMIT_MS", 2000),
|
|
79
|
+
timeout_ms=_int_env("HARNESS_WEB_TIMEOUT_MS", 30000),
|
|
80
|
+
)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Write harness-web artifacts (JSON + markdown)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from scrapling.core.shell import Convertor
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def ensure_parent(path: Path) -> None:
|
|
13
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def write_json(path: Path, payload: Any) -> None:
|
|
17
|
+
ensure_parent(path)
|
|
18
|
+
path.write_text(json.dumps(payload, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def write_search_results(path: Path, results: list[dict[str, str]], query: str) -> None:
|
|
22
|
+
"""Firecrawl-compatible envelope: data.web[].url|title|description."""
|
|
23
|
+
write_json(
|
|
24
|
+
path,
|
|
25
|
+
{
|
|
26
|
+
"query": query,
|
|
27
|
+
"engine": "ddg_html",
|
|
28
|
+
"data": {
|
|
29
|
+
"web": [
|
|
30
|
+
{
|
|
31
|
+
"url": r["url"],
|
|
32
|
+
"title": r.get("title", ""),
|
|
33
|
+
"description": r.get("description", ""),
|
|
34
|
+
}
|
|
35
|
+
for r in results
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
},
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def write_page_markdown(path: Path, page: Any, *, main_content_only: bool = True) -> None:
|
|
43
|
+
ensure_parent(path)
|
|
44
|
+
try:
|
|
45
|
+
Convertor.write_content_to_file(
|
|
46
|
+
page,
|
|
47
|
+
str(path.resolve()),
|
|
48
|
+
css_selector=None,
|
|
49
|
+
main_content_only=main_content_only,
|
|
50
|
+
)
|
|
51
|
+
except ModuleNotFoundError as err:
|
|
52
|
+
if "markdownify" not in str(err):
|
|
53
|
+
raise
|
|
54
|
+
text = page.get_all_text(strip=True) if hasattr(page, "get_all_text") else str(page)
|
|
55
|
+
path.write_text(text + "\n", encoding="utf-8")
|