unbrowse 2.8.4 → 2.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +392 -645
- package/package.json +1 -1
- package/runtime-src/api/routes.ts +42 -7
- package/runtime-src/cli.ts +248 -356
- package/runtime-src/execution/index.ts +21 -35
- package/runtime-src/orchestrator/index.ts +73 -41
- package/runtime-src/server.ts +2 -8
- package/runtime-src/version.ts +1 -6
- package/vendor/kuri/darwin-arm64/kuri +0 -0
- package/vendor/kuri/darwin-x64/kuri +0 -0
- package/vendor/kuri/linux-arm64/kuri +0 -0
- package/vendor/kuri/linux-x64/kuri +0 -0
- package/runtime-src/transform/schema-hints.ts +0 -358
package/dist/cli.js
CHANGED
|
@@ -624,14 +624,6 @@ async function executeInPageFetch(tabId, url, method, headers, body) {
|
|
|
624
624
|
return { status: 0, data: result };
|
|
625
625
|
}
|
|
626
626
|
}
|
|
627
|
-
async function health() {
|
|
628
|
-
try {
|
|
629
|
-
const result = await kuriGet("/health");
|
|
630
|
-
return { ok: result?.ok === true || result?.status === "ok", tabs: result?.tabs };
|
|
631
|
-
} catch {
|
|
632
|
-
return { ok: false };
|
|
633
|
-
}
|
|
634
|
-
}
|
|
635
627
|
async function action(tabId, actionType, ref, value) {
|
|
636
628
|
const params = { tab_id: tabId, action: actionType, ref };
|
|
637
629
|
if (value !== undefined)
|
|
@@ -5289,7 +5281,6 @@ var init_marketplace = __esm(() => {
|
|
|
5289
5281
|
import { createHash as createHash3 } from "crypto";
|
|
5290
5282
|
import { readFileSync as readFileSync3, readdirSync as readdirSync3 } from "fs";
|
|
5291
5283
|
import { dirname, join as join4 } from "path";
|
|
5292
|
-
import { execSync } from "child_process";
|
|
5293
5284
|
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
5294
5285
|
function collectTsFiles(dir) {
|
|
5295
5286
|
const results = [];
|
|
@@ -5318,11 +5309,7 @@ function computeCodeHash() {
|
|
|
5318
5309
|
}
|
|
5319
5310
|
}
|
|
5320
5311
|
function getGitSha() {
|
|
5321
|
-
|
|
5322
|
-
return execSync("git rev-parse --short HEAD", { encoding: "utf-8", cwd: MODULE_DIR }).trim();
|
|
5323
|
-
} catch {
|
|
5324
|
-
return "unknown";
|
|
5325
|
-
}
|
|
5312
|
+
return "unknown";
|
|
5326
5313
|
}
|
|
5327
5314
|
var MODULE_DIR, CODE_HASH, GIT_SHA, TRACE_VERSION;
|
|
5328
5315
|
var init_version = __esm(() => {
|
|
@@ -6342,259 +6329,6 @@ var init_drift = __esm(() => {
|
|
|
6342
6329
|
init_transform();
|
|
6343
6330
|
});
|
|
6344
6331
|
|
|
6345
|
-
// ../../src/transform/schema-hints.ts
|
|
6346
|
-
function buildIntentProfile(intent) {
|
|
6347
|
-
const text = intent?.toLowerCase() ?? "";
|
|
6348
|
-
const profile = {
|
|
6349
|
-
preferredPaths: [],
|
|
6350
|
-
discouragedPaths: [],
|
|
6351
|
-
preferredFields: [],
|
|
6352
|
-
discouragedFields: [],
|
|
6353
|
-
wantsStructuredRecords: /\b(search|list|find|get|fetch|timeline|feed|trending)\b/.test(text)
|
|
6354
|
-
};
|
|
6355
|
-
if (/\b(repo|repos|repository|repositories|code|projects?)\b/.test(text)) {
|
|
6356
|
-
profile.preferredPaths.push("repositories", "repos", "results", "items", "data");
|
|
6357
|
-
profile.preferredFields.push("full_name", "name", "description", "stargazers_count", "stars", "language", "owner", "url");
|
|
6358
|
-
profile.discouragedPaths.push("accounts", "users", "hashtags", "topics");
|
|
6359
|
-
}
|
|
6360
|
-
if (/\b(post|posts|tweet|tweets|status|statuses|timeline|feed|thread|threads)\b/.test(text)) {
|
|
6361
|
-
profile.preferredPaths.push("statuses", "posts", "tweets", "timeline", "entries", "results");
|
|
6362
|
-
profile.preferredFields.push("content", "text", "body", "created_at", "url", "account", "username", "replies_count", "reblogs_count", "favourites_count");
|
|
6363
|
-
profile.discouragedPaths.push("accounts", "users", "people", "profiles", "hashtags");
|
|
6364
|
-
}
|
|
6365
|
-
if (/\b(person|people|user|users|profile|profiles|member|members|account|accounts)\b/.test(text)) {
|
|
6366
|
-
profile.preferredPaths.push("people", "users", "accounts", "profiles", "included", "elements");
|
|
6367
|
-
profile.preferredFields.push("name", "headline", "title", "public_identifier", "username", "handle", "url");
|
|
6368
|
-
profile.discouragedPaths.push("hashtags", "statuses", "posts");
|
|
6369
|
-
}
|
|
6370
|
-
if (/\b(trend|trending|topic|topics)\b/.test(text)) {
|
|
6371
|
-
profile.preferredPaths.push("trends", "topics", "timeline", "entries", "results", "data");
|
|
6372
|
-
profile.preferredFields.push("name", "query", "topic", "post_count", "tweet_volume", "url");
|
|
6373
|
-
profile.discouragedPaths.push("accounts", "users");
|
|
6374
|
-
}
|
|
6375
|
-
return profile;
|
|
6376
|
-
}
|
|
6377
|
-
function findArrayCandidates(schema, path5, depth, results) {
|
|
6378
|
-
if (schema.type === "array" && schema.items) {
|
|
6379
|
-
const items = schema.items;
|
|
6380
|
-
if (items.type === "object" && items.properties) {
|
|
6381
|
-
const fieldCount = Object.keys(items.properties).length;
|
|
6382
|
-
results.push({ path: path5 ? `${path5}[]` : "[]", itemSchema: items, fieldCount, depth });
|
|
6383
|
-
for (const [key, prop] of Object.entries(items.properties)) {
|
|
6384
|
-
const childPath = path5 ? `${path5}[].${key}` : `[].${key}`;
|
|
6385
|
-
findArrayCandidates(prop, childPath, depth + 1, results);
|
|
6386
|
-
}
|
|
6387
|
-
return;
|
|
6388
|
-
}
|
|
6389
|
-
if (items.type === "array") {
|
|
6390
|
-
findArrayCandidates(items, path5 ? `${path5}[]` : "[]", depth + 1, results);
|
|
6391
|
-
}
|
|
6392
|
-
return;
|
|
6393
|
-
}
|
|
6394
|
-
if (schema.type === "object" && schema.properties) {
|
|
6395
|
-
for (const [key, prop] of Object.entries(schema.properties)) {
|
|
6396
|
-
const childPath = path5 ? `${path5}.${key}` : key;
|
|
6397
|
-
findArrayCandidates(prop, childPath, depth + 1, results);
|
|
6398
|
-
}
|
|
6399
|
-
}
|
|
6400
|
-
}
|
|
6401
|
-
function scoreField(name, schema) {
|
|
6402
|
-
let score = 0;
|
|
6403
|
-
const lower = name.toLowerCase();
|
|
6404
|
-
if (/^(id|name|title|label|slug)$/i.test(name))
|
|
6405
|
-
score += 10;
|
|
6406
|
-
if (/^(url|link|href|uri)$/i.test(name))
|
|
6407
|
-
score += 8;
|
|
6408
|
-
if (/^(description|text|content|body|summary|bio)$/i.test(name))
|
|
6409
|
-
score += 7;
|
|
6410
|
-
if (/^(email|username|handle|screen.?name)$/i.test(name))
|
|
6411
|
-
score += 7;
|
|
6412
|
-
if (/date|time|created|updated|start|end/i.test(lower))
|
|
6413
|
-
score += 5;
|
|
6414
|
-
if (/count|total|price|amount|score|rating|likes|views|followers/i.test(lower))
|
|
6415
|
-
score += 5;
|
|
6416
|
-
if (/status|state|type|category|kind|tag/i.test(lower))
|
|
6417
|
-
score += 4;
|
|
6418
|
-
if (/city|address|location|lat|lng|geo|place|venue/i.test(lower))
|
|
6419
|
-
score += 4;
|
|
6420
|
-
if (/image|photo|avatar|thumbnail|cover|logo|icon/i.test(lower))
|
|
6421
|
-
score += 3;
|
|
6422
|
-
if (schema.type === "string" || schema.type === "integer" || schema.type === "number" || schema.type === "boolean") {
|
|
6423
|
-
score += 2;
|
|
6424
|
-
}
|
|
6425
|
-
if (/urn|tracking|internal|hash|token|cursor|pagination|__/i.test(lower))
|
|
6426
|
-
score -= 5;
|
|
6427
|
-
if (name.startsWith("$") || name.startsWith("_"))
|
|
6428
|
-
score -= 3;
|
|
6429
|
-
return score;
|
|
6430
|
-
}
|
|
6431
|
-
function selectBestArray(candidates, intent) {
|
|
6432
|
-
if (candidates.length === 0)
|
|
6433
|
-
return null;
|
|
6434
|
-
const profile = buildIntentProfile(intent);
|
|
6435
|
-
const scored = candidates.map((c) => {
|
|
6436
|
-
let score = c.fieldCount * 2;
|
|
6437
|
-
if (c.depth >= 1 && c.depth <= 3)
|
|
6438
|
-
score += 5;
|
|
6439
|
-
if (c.depth === 0)
|
|
6440
|
-
score += 2;
|
|
6441
|
-
const pathLower = c.path.toLowerCase();
|
|
6442
|
-
if (/data|results|items|entries|elements|records|list|feed|posts|events|users/i.test(pathLower)) {
|
|
6443
|
-
score += 8;
|
|
6444
|
-
}
|
|
6445
|
-
if (/included|nodes|edges/i.test(pathLower))
|
|
6446
|
-
score += 6;
|
|
6447
|
-
for (const token of profile.preferredPaths) {
|
|
6448
|
-
if (pathLower.includes(token))
|
|
6449
|
-
score += 14;
|
|
6450
|
-
}
|
|
6451
|
-
for (const token of profile.discouragedPaths) {
|
|
6452
|
-
if (pathLower.includes(token))
|
|
6453
|
-
score -= 18;
|
|
6454
|
-
}
|
|
6455
|
-
const fieldNames = Object.keys(c.itemSchema.properties ?? {}).map((name) => name.toLowerCase());
|
|
6456
|
-
for (const token of profile.preferredFields) {
|
|
6457
|
-
if (fieldNames.includes(token.toLowerCase()))
|
|
6458
|
-
score += 7;
|
|
6459
|
-
}
|
|
6460
|
-
for (const token of profile.discouragedFields) {
|
|
6461
|
-
if (fieldNames.includes(token.toLowerCase()))
|
|
6462
|
-
score -= 8;
|
|
6463
|
-
}
|
|
6464
|
-
if (profile.wantsStructuredRecords && c.fieldCount < 3)
|
|
6465
|
-
score -= 12;
|
|
6466
|
-
if (fieldNames.length > 0 && fieldNames.every((name) => /^(link|title|label|text|value)$/i.test(name))) {
|
|
6467
|
-
score -= 16;
|
|
6468
|
-
}
|
|
6469
|
-
if (c.fieldCount < 3)
|
|
6470
|
-
score -= 5;
|
|
6471
|
-
return { candidate: c, score };
|
|
6472
|
-
});
|
|
6473
|
-
scored.sort((a, b) => b.score - a.score);
|
|
6474
|
-
return scored[0]?.candidate ?? null;
|
|
6475
|
-
}
|
|
6476
|
-
function schemaToTree(schema, maxDepth = 3) {
|
|
6477
|
-
const tree = {};
|
|
6478
|
-
function walk(s, path5, depth) {
|
|
6479
|
-
if (depth > maxDepth)
|
|
6480
|
-
return;
|
|
6481
|
-
if (s.type === "object" && s.properties) {
|
|
6482
|
-
for (const [key, prop] of Object.entries(s.properties)) {
|
|
6483
|
-
const childPath = path5 ? `${path5}.${key}` : key;
|
|
6484
|
-
if (prop.type === "array" && prop.items) {
|
|
6485
|
-
if (prop.items.type === "object" && prop.items.properties) {
|
|
6486
|
-
const count = Object.keys(prop.items.properties).length;
|
|
6487
|
-
tree[`${childPath}[]`] = `array<object> (${count} fields)`;
|
|
6488
|
-
walk(prop.items, `${childPath}[]`, depth + 1);
|
|
6489
|
-
} else {
|
|
6490
|
-
tree[`${childPath}[]`] = `array<${prop.items.type}>`;
|
|
6491
|
-
}
|
|
6492
|
-
} else if (prop.type === "object" && prop.properties) {
|
|
6493
|
-
const count = Object.keys(prop.properties).length;
|
|
6494
|
-
tree[childPath] = `object (${count} fields)`;
|
|
6495
|
-
walk(prop, childPath, depth + 1);
|
|
6496
|
-
} else {
|
|
6497
|
-
tree[childPath] = prop.type;
|
|
6498
|
-
}
|
|
6499
|
-
}
|
|
6500
|
-
}
|
|
6501
|
-
}
|
|
6502
|
-
if (schema.type === "array" && schema.items) {
|
|
6503
|
-
tree["[]"] = `array<${schema.items.type}>`;
|
|
6504
|
-
if (schema.items.type === "object") {
|
|
6505
|
-
walk(schema.items, "[]", 1);
|
|
6506
|
-
}
|
|
6507
|
-
} else {
|
|
6508
|
-
walk(schema, "", 0);
|
|
6509
|
-
}
|
|
6510
|
-
return tree;
|
|
6511
|
-
}
|
|
6512
|
-
function generateExtractionHints(schema, intent) {
|
|
6513
|
-
if (schema.type !== "object" && schema.type !== "array")
|
|
6514
|
-
return null;
|
|
6515
|
-
const profile = buildIntentProfile(intent);
|
|
6516
|
-
if (schema.type === "object" && schema.properties) {
|
|
6517
|
-
for (const token of profile.preferredPaths) {
|
|
6518
|
-
const prop = schema.properties[token];
|
|
6519
|
-
if (prop?.type === "array" && (!prop.items || prop.items.type !== "object")) {
|
|
6520
|
-
return finalize({
|
|
6521
|
-
path: `${token}[]`,
|
|
6522
|
-
fields: [],
|
|
6523
|
-
item_field_count: 0,
|
|
6524
|
-
confidence: "medium"
|
|
6525
|
-
}, schema);
|
|
6526
|
-
}
|
|
6527
|
-
}
|
|
6528
|
-
}
|
|
6529
|
-
const candidates = [];
|
|
6530
|
-
findArrayCandidates(schema, "", 0, candidates);
|
|
6531
|
-
if (candidates.length === 0) {
|
|
6532
|
-
if (schema.type === "object" && schema.properties) {
|
|
6533
|
-
const propCount = Object.keys(schema.properties).length;
|
|
6534
|
-
if (propCount <= 5)
|
|
6535
|
-
return null;
|
|
6536
|
-
}
|
|
6537
|
-
if (schema.type === "object" && schema.properties) {
|
|
6538
|
-
const fields = Object.entries(schema.properties).map(([name, prop]) => ({ name, score: scoreField(name, prop) })).filter((f) => f.score > 0).sort((a, b) => b.score - a.score).slice(0, 8).map((f) => f.name);
|
|
6539
|
-
if (fields.length >= 2) {
|
|
6540
|
-
return finalize({ path: "", fields, item_field_count: Object.keys(schema.properties).length, confidence: "low" }, schema);
|
|
6541
|
-
}
|
|
6542
|
-
}
|
|
6543
|
-
return null;
|
|
6544
|
-
}
|
|
6545
|
-
const best = selectBestArray(candidates, intent);
|
|
6546
|
-
if (!best)
|
|
6547
|
-
return null;
|
|
6548
|
-
const itemProps = best.itemSchema.properties ?? {};
|
|
6549
|
-
const scoredFields = Object.entries(itemProps).map(([name, prop]) => ({ name, score: scoreField(name, prop), type: prop.type })).sort((a, b) => b.score - a.score);
|
|
6550
|
-
const topFields = scoredFields.filter((f) => f.score > 0).slice(0, 10).map((f) => f.name);
|
|
6551
|
-
if (intent) {
|
|
6552
|
-
const intentWords = intent.toLowerCase().split(/\s+/);
|
|
6553
|
-
for (const field of scoredFields) {
|
|
6554
|
-
if (topFields.includes(field.name))
|
|
6555
|
-
continue;
|
|
6556
|
-
const fieldLower = field.name.toLowerCase();
|
|
6557
|
-
if (intentWords.some((w) => fieldLower.includes(w) || w.includes(fieldLower))) {
|
|
6558
|
-
topFields.push(field.name);
|
|
6559
|
-
}
|
|
6560
|
-
}
|
|
6561
|
-
}
|
|
6562
|
-
if (topFields.length < 2) {
|
|
6563
|
-
const primitiveFields = scoredFields.filter((f) => f.type === "string" || f.type === "integer" || f.type === "number").slice(0, 5).map((f) => f.name);
|
|
6564
|
-
if (primitiveFields.length < 2)
|
|
6565
|
-
return null;
|
|
6566
|
-
return finalize({
|
|
6567
|
-
path: best.path,
|
|
6568
|
-
fields: primitiveFields,
|
|
6569
|
-
item_field_count: best.fieldCount,
|
|
6570
|
-
confidence: "low"
|
|
6571
|
-
}, schema);
|
|
6572
|
-
}
|
|
6573
|
-
const confidence = best.fieldCount >= 5 ? "high" : best.fieldCount >= 3 ? "medium" : "low";
|
|
6574
|
-
return finalize({
|
|
6575
|
-
path: best.path,
|
|
6576
|
-
fields: topFields,
|
|
6577
|
-
item_field_count: best.fieldCount,
|
|
6578
|
-
confidence
|
|
6579
|
-
}, schema);
|
|
6580
|
-
}
|
|
6581
|
-
function finalize(hint, schema) {
|
|
6582
|
-
hint.cli_args = hintsToCliArgs(hint);
|
|
6583
|
-
hint.schema_tree = schemaToTree(schema, 2);
|
|
6584
|
-
return hint;
|
|
6585
|
-
}
|
|
6586
|
-
function hintsToCliArgs(hints) {
|
|
6587
|
-
const parts = [];
|
|
6588
|
-
if (hints.path) {
|
|
6589
|
-
parts.push(`--path "${hints.path}"`);
|
|
6590
|
-
}
|
|
6591
|
-
if (hints.fields.length > 0) {
|
|
6592
|
-
parts.push(`--extract "${hints.fields.join(",")}"`);
|
|
6593
|
-
}
|
|
6594
|
-
parts.push("--limit 10");
|
|
6595
|
-
return parts.join(" ");
|
|
6596
|
-
}
|
|
6597
|
-
|
|
6598
6332
|
// ../../src/execution/retry.ts
|
|
6599
6333
|
async function withRetry(fn, isRetryable, opts) {
|
|
6600
6334
|
const maxRetries = opts?.maxRetries ?? MAX_RETRIES;
|
|
@@ -9760,7 +9494,7 @@ function sanitizeNavigationQueryParams(url) {
|
|
|
9760
9494
|
return out;
|
|
9761
9495
|
}
|
|
9762
9496
|
function restoreTemplatePlaceholderEncoding(url) {
|
|
9763
|
-
return url.replace(/%7B
|
|
9497
|
+
return url.replace(/%7B(\w+)%7D/gi, "{$1}");
|
|
9764
9498
|
}
|
|
9765
9499
|
function compactSchemaSample(value, depth = 0) {
|
|
9766
9500
|
if (depth >= 4)
|
|
@@ -10807,6 +10541,15 @@ async function tryHttpFetch(url, authHeaders, cookies) {
|
|
|
10807
10541
|
return null;
|
|
10808
10542
|
}
|
|
10809
10543
|
}
|
|
10544
|
+
function flattenExtracted(data) {
|
|
10545
|
+
if (!Array.isArray(data))
|
|
10546
|
+
return data;
|
|
10547
|
+
const first = data[0];
|
|
10548
|
+
if (first && typeof first === "object" && "type" in first && "data" in first && "relevance_score" in first) {
|
|
10549
|
+
return data.reduce((best, cur) => (cur.relevance_score ?? 0) > (best.relevance_score ?? 0) ? cur : best).data;
|
|
10550
|
+
}
|
|
10551
|
+
return data;
|
|
10552
|
+
}
|
|
10810
10553
|
async function executeDomExtractionEndpoint(endpoint, url, intent, authHeaders, cookies) {
|
|
10811
10554
|
const ssrResult = await tryHttpFetch(url, authHeaders, cookies);
|
|
10812
10555
|
if (ssrResult) {
|
|
@@ -10818,16 +10561,7 @@ async function executeDomExtractionEndpoint(endpoint, url, intent, authHeaders,
|
|
|
10818
10561
|
if (ssrSemantic.verdict !== "fail") {
|
|
10819
10562
|
console.log(`[ssr-fast] hit — extracted via HTTP fetch`);
|
|
10820
10563
|
return {
|
|
10821
|
-
data:
|
|
10822
|
-
data: ssrExtracted.data,
|
|
10823
|
-
_extraction: {
|
|
10824
|
-
method: ssrExtracted.extraction_method,
|
|
10825
|
-
confidence: ssrExtracted.confidence,
|
|
10826
|
-
source: "ssr-fast",
|
|
10827
|
-
final_url: ssrResult.final_url,
|
|
10828
|
-
...ssrExtracted.selector ? { selector: ssrExtracted.selector } : {}
|
|
10829
|
-
}
|
|
10830
|
-
},
|
|
10564
|
+
data: flattenExtracted(ssrExtracted.data),
|
|
10831
10565
|
status: 200,
|
|
10832
10566
|
trace_id: nanoid5()
|
|
10833
10567
|
};
|
|
@@ -10865,16 +10599,7 @@ async function executeDomExtractionEndpoint(endpoint, url, intent, authHeaders,
|
|
|
10865
10599
|
};
|
|
10866
10600
|
}
|
|
10867
10601
|
return {
|
|
10868
|
-
data:
|
|
10869
|
-
data: extracted.data,
|
|
10870
|
-
_extraction: {
|
|
10871
|
-
method: extracted.extraction_method,
|
|
10872
|
-
confidence: extracted.confidence,
|
|
10873
|
-
source: "rendered-dom",
|
|
10874
|
-
final_url: captured.final_url,
|
|
10875
|
-
...extracted.selector ? { selector: extracted.selector } : {}
|
|
10876
|
-
}
|
|
10877
|
-
},
|
|
10602
|
+
data: flattenExtracted(extracted.data),
|
|
10878
10603
|
status: 200,
|
|
10879
10604
|
trace_id: nanoid5()
|
|
10880
10605
|
};
|
|
@@ -10933,9 +10658,7 @@ async function executeEndpoint(skill, endpoint, params = {}, projection, options
|
|
|
10933
10658
|
}
|
|
10934
10659
|
return {
|
|
10935
10660
|
trace: trace2,
|
|
10936
|
-
result: resultData2
|
|
10937
|
-
...endpoint.response_schema ? { response_schema: endpoint.response_schema } : {},
|
|
10938
|
-
...endpoint.response_schema ? { extraction_hints: generateExtractionHints(endpoint.response_schema, skill.intent_signature) ?? undefined } : {}
|
|
10661
|
+
result: resultData2
|
|
10939
10662
|
};
|
|
10940
10663
|
} catch (err) {
|
|
10941
10664
|
const trace2 = stampTrace({
|
|
@@ -11183,6 +10906,7 @@ async function executeEndpoint(skill, endpoint, params = {}, projection, options
|
|
|
11183
10906
|
let last = { data: null, status: 0 };
|
|
11184
10907
|
for (const replayUrl of replayUrls) {
|
|
11185
10908
|
const replayHeaders = buildStructuredReplayHeaders(url, replayUrl, headers);
|
|
10909
|
+
log("exec", `server-fetch: ${endpoint.method} ${replayUrl.substring(0, 80)} csrf=${replayHeaders["x-csrf-token"]?.substring(0, 10)}... cookies=${replayHeaders["cookie"]?.length ?? 0}chars`);
|
|
11186
10910
|
const res = await fetch(replayUrl, {
|
|
11187
10911
|
method: endpoint.method,
|
|
11188
10912
|
headers: replayHeaders,
|
|
@@ -11260,7 +10984,8 @@ async function executeEndpoint(skill, endpoint, params = {}, projection, options
|
|
|
11260
10984
|
try {
|
|
11261
10985
|
result = await serverFetch();
|
|
11262
10986
|
if (result.status >= 200 && result.status < 400) {
|
|
11263
|
-
|
|
10987
|
+
const isApiEndpoint = /\/(api|graphql)\b/i.test(endpoint.url_template) || /\.(json)(\?|$)/.test(endpoint.url_template);
|
|
10988
|
+
if (!isApiEndpoint && shouldFallbackToBrowserReplay(result.data, endpoint, options?.intent ?? skill.intent_signature, options?.contextUrl)) {
|
|
11264
10989
|
result = await withRetry(browserCall, (r) => isRetryableStatus(r.status));
|
|
11265
10990
|
strategy = "browser";
|
|
11266
10991
|
} else {
|
|
@@ -11453,12 +11178,9 @@ async function executeEndpoint(skill, endpoint, params = {}, projection, options
|
|
|
11453
11178
|
} else if (trace.success) {
|
|
11454
11179
|
resultData = projectResultForIntent(data, effectiveIntent);
|
|
11455
11180
|
}
|
|
11456
|
-
const rawResultShape = resultData === data;
|
|
11457
11181
|
return {
|
|
11458
11182
|
trace,
|
|
11459
|
-
result: resultData
|
|
11460
|
-
...endpoint.response_schema && rawResultShape ? { response_schema: endpoint.response_schema } : {},
|
|
11461
|
-
...endpoint.response_schema && rawResultShape ? { extraction_hints: generateExtractionHints(endpoint.response_schema, effectiveIntent) ?? undefined } : {}
|
|
11183
|
+
result: resultData
|
|
11462
11184
|
};
|
|
11463
11185
|
}
|
|
11464
11186
|
function templatizeQueryParams(url) {
|
|
@@ -13012,15 +12734,84 @@ import { nanoid as nanoid7 } from "nanoid";
|
|
|
13012
12734
|
import { existsSync as existsSync9, writeFileSync as writeFileSync7, readFileSync as readFileSync6, mkdirSync as mkdirSync8, readdirSync as readdirSync5 } from "node:fs";
|
|
13013
12735
|
import { dirname as dirname2, join as join9 } from "node:path";
|
|
13014
12736
|
import { createHash as createHash6 } from "node:crypto";
|
|
13015
|
-
function summarizeSchema(schema) {
|
|
13016
|
-
|
|
13017
|
-
|
|
13018
|
-
|
|
13019
|
-
|
|
13020
|
-
|
|
12737
|
+
function summarizeSchema(schema, maxDepth = 3) {
|
|
12738
|
+
function walk(s, depth) {
|
|
12739
|
+
if (depth <= 0)
|
|
12740
|
+
return s.type;
|
|
12741
|
+
if (s.type === "array" && s.items) {
|
|
12742
|
+
const inner = walk(s.items, depth - 1);
|
|
12743
|
+
return inner && typeof inner === "object" ? [inner] : [`${s.items.type ?? "unknown"}`];
|
|
12744
|
+
}
|
|
12745
|
+
if (s.properties) {
|
|
12746
|
+
const out = {};
|
|
12747
|
+
for (const [k, v] of Object.entries(s.properties)) {
|
|
12748
|
+
out[k] = walk(v, depth - 1);
|
|
12749
|
+
}
|
|
12750
|
+
return out;
|
|
12751
|
+
}
|
|
12752
|
+
return s.type;
|
|
13021
12753
|
}
|
|
12754
|
+
if (schema.properties)
|
|
12755
|
+
return walk(schema, maxDepth);
|
|
12756
|
+
if (schema.type === "array" && schema.items)
|
|
12757
|
+
return { "[]": walk(schema.items, maxDepth - 1) };
|
|
13022
12758
|
return null;
|
|
13023
12759
|
}
|
|
12760
|
+
function extractSampleValues(sample, maxLeaves = 12) {
|
|
12761
|
+
if (sample == null)
|
|
12762
|
+
return null;
|
|
12763
|
+
const SKIP_KEYS = new Set([
|
|
12764
|
+
"__typename",
|
|
12765
|
+
"entryType",
|
|
12766
|
+
"itemType",
|
|
12767
|
+
"clientEventInfo",
|
|
12768
|
+
"feedbackInfo",
|
|
12769
|
+
"controllerData",
|
|
12770
|
+
"injectionType",
|
|
12771
|
+
"sortIndex",
|
|
12772
|
+
"cursor",
|
|
12773
|
+
"cursorType",
|
|
12774
|
+
"displayTreatment",
|
|
12775
|
+
"socialContext",
|
|
12776
|
+
"promotedMetadata",
|
|
12777
|
+
"feedbackKeys",
|
|
12778
|
+
"tweetDisplayType",
|
|
12779
|
+
"element",
|
|
12780
|
+
"component",
|
|
12781
|
+
"details"
|
|
12782
|
+
]);
|
|
12783
|
+
const out = {};
|
|
12784
|
+
let count = 0;
|
|
12785
|
+
function walk(obj, path5, depth) {
|
|
12786
|
+
if (count >= maxLeaves || depth > 10)
|
|
12787
|
+
return;
|
|
12788
|
+
if (obj == null)
|
|
12789
|
+
return;
|
|
12790
|
+
if (Array.isArray(obj)) {
|
|
12791
|
+
if (obj.length > 0)
|
|
12792
|
+
walk(obj[0], path5 + "[]", depth + 1);
|
|
12793
|
+
return;
|
|
12794
|
+
}
|
|
12795
|
+
if (typeof obj === "object") {
|
|
12796
|
+
for (const [k, v] of Object.entries(obj)) {
|
|
12797
|
+
if (count >= maxLeaves)
|
|
12798
|
+
break;
|
|
12799
|
+
if (SKIP_KEYS.has(k))
|
|
12800
|
+
continue;
|
|
12801
|
+
const p = path5 ? `${path5}.${k}` : k;
|
|
12802
|
+
if (v != null && typeof v === "object") {
|
|
12803
|
+
walk(v, p, depth + 1);
|
|
12804
|
+
} else if (v != null && v !== "" && v !== 0 && v !== false) {
|
|
12805
|
+
out[p] = typeof v === "string" && v.length > 80 ? v.slice(0, 77) + "..." : v;
|
|
12806
|
+
count++;
|
|
12807
|
+
}
|
|
12808
|
+
}
|
|
12809
|
+
return;
|
|
12810
|
+
}
|
|
12811
|
+
}
|
|
12812
|
+
walk(sample, "", 0);
|
|
12813
|
+
return count > 0 ? out : null;
|
|
12814
|
+
}
|
|
13024
12815
|
function persistDomainCache() {
|
|
13025
12816
|
try {
|
|
13026
12817
|
const dir = dirname2(DOMAIN_CACHE_FILE);
|
|
@@ -13274,14 +13065,12 @@ function cacheResolvedSkill(cacheKey, skill, endpointId) {
|
|
|
13274
13065
|
});
|
|
13275
13066
|
persistRouteCache();
|
|
13276
13067
|
}
|
|
13277
|
-
function promoteResultSnapshot(cacheKey, skill, endpointId, result, trace
|
|
13068
|
+
function promoteResultSnapshot(cacheKey, skill, endpointId, result, trace) {
|
|
13278
13069
|
routeResultCache.set(cacheKey, {
|
|
13279
13070
|
skill,
|
|
13280
13071
|
endpointId,
|
|
13281
13072
|
result,
|
|
13282
13073
|
trace,
|
|
13283
|
-
response_schema,
|
|
13284
|
-
extraction_hints,
|
|
13285
13074
|
expires: Date.now() + ROUTE_CACHE_TTL
|
|
13286
13075
|
});
|
|
13287
13076
|
}
|
|
@@ -13299,9 +13088,7 @@ function buildCachedResultResponse(cached, source, timing) {
|
|
|
13299
13088
|
},
|
|
13300
13089
|
source,
|
|
13301
13090
|
skill: cached.skill,
|
|
13302
|
-
timing
|
|
13303
|
-
response_schema: cached.response_schema,
|
|
13304
|
-
extraction_hints: cached.extraction_hints
|
|
13091
|
+
timing
|
|
13305
13092
|
};
|
|
13306
13093
|
}
|
|
13307
13094
|
function invalidateResolveCacheEntries(cacheKeys, domainKeys = []) {
|
|
@@ -14349,7 +14136,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
14349
14136
|
routeResultCache.delete(k);
|
|
14350
14137
|
}
|
|
14351
14138
|
}
|
|
14352
|
-
function
|
|
14139
|
+
function finalize(source, result2, skillId, skill, trace2) {
|
|
14353
14140
|
timing.total_ms = Date.now() - t0;
|
|
14354
14141
|
timing.source = source;
|
|
14355
14142
|
timing.skill_id = skillId;
|
|
@@ -14497,6 +14284,15 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
14497
14284
|
url: r.endpoint.url_template.length > 120 ? r.endpoint.url_template.slice(0, 120) + "..." : r.endpoint.url_template,
|
|
14498
14285
|
score: Math.round(r.score * 10) / 10,
|
|
14499
14286
|
schema_summary: r.endpoint.response_schema ? summarizeSchema(r.endpoint.response_schema) : null,
|
|
14287
|
+
input_params: r.endpoint.semantic?.requires?.map((b) => ({
|
|
14288
|
+
key: b.key,
|
|
14289
|
+
type: b.type ?? b.semantic_type,
|
|
14290
|
+
required: b.required ?? false,
|
|
14291
|
+
example: b.example_value
|
|
14292
|
+
})) ?? [],
|
|
14293
|
+
description_in: r.endpoint.semantic?.description_in,
|
|
14294
|
+
example_fields: r.endpoint.semantic?.example_fields?.slice(0, 12),
|
|
14295
|
+
sample_values: extractSampleValues(r.endpoint.semantic?.example_response_compact),
|
|
14500
14296
|
dom_extraction: !!r.endpoint.dom_extraction,
|
|
14501
14297
|
trigger_url: r.endpoint.trigger_url
|
|
14502
14298
|
})),
|
|
@@ -14505,7 +14301,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
14505
14301
|
trace: deferTrace,
|
|
14506
14302
|
source,
|
|
14507
14303
|
skill: resolvedSkill,
|
|
14508
|
-
timing:
|
|
14304
|
+
timing: finalize(source, null, resolvedSkill.skill_id, resolvedSkill, deferTrace)
|
|
14509
14305
|
};
|
|
14510
14306
|
}
|
|
14511
14307
|
function missingTemplateParams(endpoint, boundParams) {
|
|
@@ -14835,7 +14631,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
14835
14631
|
skill_id: skill.skill_id,
|
|
14836
14632
|
selected_endpoint_id: candidate.endpoint.endpoint_id
|
|
14837
14633
|
});
|
|
14838
|
-
promoteResultSnapshot(cacheKey, skill, candidate.endpoint.endpoint_id, execOut.result, execOut.trace
|
|
14634
|
+
promoteResultSnapshot(cacheKey, skill, candidate.endpoint.endpoint_id, execOut.result, execOut.trace);
|
|
14839
14635
|
try {
|
|
14840
14636
|
const endpointSeq = decisionTrace.autoexec_attempts.map((a) => a.endpoint_id);
|
|
14841
14637
|
storeExecutionTrace({
|
|
@@ -14892,7 +14688,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
14892
14688
|
trace: execOut.trace,
|
|
14893
14689
|
source,
|
|
14894
14690
|
skill,
|
|
14895
|
-
timing:
|
|
14691
|
+
timing: finalize(source, null, skill.skill_id, skill, execOut.trace)
|
|
14896
14692
|
};
|
|
14897
14693
|
}
|
|
14898
14694
|
}
|
|
@@ -14913,9 +14709,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
14913
14709
|
trace: execOut.trace,
|
|
14914
14710
|
source,
|
|
14915
14711
|
skill,
|
|
14916
|
-
timing:
|
|
14917
|
-
response_schema: execOut.response_schema,
|
|
14918
|
-
extraction_hints: execOut.extraction_hints
|
|
14712
|
+
timing: finalize(source, execOut.result, skill.skill_id, skill, execOut.trace)
|
|
14919
14713
|
};
|
|
14920
14714
|
}
|
|
14921
14715
|
decisionTrace.autoexec_attempts.push({
|
|
@@ -14979,7 +14773,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
14979
14773
|
skill_id: cachedResult.skill.skill_id,
|
|
14980
14774
|
selected_endpoint_id: cachedResult.endpointId ?? cachedResult.trace.endpoint_id
|
|
14981
14775
|
});
|
|
14982
|
-
return buildCachedResultResponse(cachedResult, "marketplace",
|
|
14776
|
+
return buildCachedResultResponse(cachedResult, "marketplace", finalize("route-cache", cachedResult.result, cachedResult.skill.skill_id, cachedResult.skill, cachedResult.trace));
|
|
14983
14777
|
}
|
|
14984
14778
|
}
|
|
14985
14779
|
}
|
|
@@ -15093,15 +14887,13 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
15093
14887
|
timing.execute_ms = Date.now() - te02;
|
|
15094
14888
|
if (execOut.trace.success && isAcceptableIntentResult(execOut.result, queryIntent)) {
|
|
15095
14889
|
timing.cache_hit = true;
|
|
15096
|
-
promoteResultSnapshot(cacheKey, skill, params.endpoint_id ?? cached.entry.endpointId, execOut.result, execOut.trace
|
|
14890
|
+
promoteResultSnapshot(cacheKey, skill, params.endpoint_id ?? cached.entry.endpointId, execOut.result, execOut.trace);
|
|
15097
14891
|
return {
|
|
15098
14892
|
result: execOut.result,
|
|
15099
14893
|
trace: execOut.trace,
|
|
15100
14894
|
source: "marketplace",
|
|
15101
14895
|
skill,
|
|
15102
|
-
timing:
|
|
15103
|
-
response_schema: execOut.response_schema,
|
|
15104
|
-
extraction_hints: execOut.extraction_hints
|
|
14896
|
+
timing: finalize("route-cache", execOut.result, cached.entry.skillId, skill, execOut.trace)
|
|
15105
14897
|
};
|
|
15106
14898
|
}
|
|
15107
14899
|
} catch {
|
|
@@ -15208,15 +15000,13 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
15208
15000
|
])));
|
|
15209
15001
|
timing.execute_ms = Date.now() - te02;
|
|
15210
15002
|
cacheResolvedSkill(cacheKey, winner.candidate.skill, winner.trace.endpoint_id);
|
|
15211
|
-
promoteResultSnapshot(cacheKey, winner.candidate.skill, winner.trace.endpoint_id, winner.result, winner.trace
|
|
15003
|
+
promoteResultSnapshot(cacheKey, winner.candidate.skill, winner.trace.endpoint_id, winner.result, winner.trace);
|
|
15212
15004
|
return {
|
|
15213
15005
|
result: winner.result,
|
|
15214
15006
|
trace: winner.trace,
|
|
15215
15007
|
source: "marketplace",
|
|
15216
15008
|
skill: winner.candidate.skill,
|
|
15217
|
-
timing:
|
|
15218
|
-
response_schema: winner.response_schema,
|
|
15219
|
-
extraction_hints: winner.extraction_hints
|
|
15009
|
+
timing: finalize("marketplace", winner.result, winner.candidate.skill.skill_id, winner.candidate.skill, winner.trace)
|
|
15220
15010
|
};
|
|
15221
15011
|
} catch (err) {
|
|
15222
15012
|
console.log(`[race] all candidates failed after ${Date.now() - te02}ms: ${err.message}`);
|
|
@@ -15253,7 +15043,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
15253
15043
|
completed_at: new Date().toISOString(),
|
|
15254
15044
|
success: true
|
|
15255
15045
|
};
|
|
15256
|
-
const t =
|
|
15046
|
+
const t = finalize("direct-fetch", data, "direct-fetch", undefined, trace2);
|
|
15257
15047
|
console.log(`[direct-fetch] ${context.url} returned JSON directly — skipping browser`);
|
|
15258
15048
|
return {
|
|
15259
15049
|
result: data,
|
|
@@ -15285,7 +15075,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
15285
15075
|
success: true,
|
|
15286
15076
|
network_events: firstPassResult.interceptedEntries
|
|
15287
15077
|
};
|
|
15288
|
-
const t =
|
|
15078
|
+
const t = finalize("first-pass", firstPassResult.result, firstPassResult.miniSkill.skill_id, firstPassResult.miniSkill, trace2);
|
|
15289
15079
|
return {
|
|
15290
15080
|
result: firstPassResult.result,
|
|
15291
15081
|
trace: trace2,
|
|
@@ -15321,7 +15111,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
15321
15111
|
completed_at: fpNow,
|
|
15322
15112
|
success: true
|
|
15323
15113
|
};
|
|
15324
|
-
const t =
|
|
15114
|
+
const t = finalize("browse-session", null, "browse-session", undefined, trace2);
|
|
15325
15115
|
return {
|
|
15326
15116
|
result: {
|
|
15327
15117
|
status: "browse_session_open",
|
|
@@ -15359,15 +15149,13 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
15359
15149
|
if (agentChoseEndpoint) {
|
|
15360
15150
|
const execOut = await executeSkill(domainHit.skill, { ...params, endpoint_id: params.endpoint_id ?? domainHit.endpointId, ...queryIntent !== intent ? { intent: queryIntent } : {} }, projection, { ...options, intent: queryIntent, contextUrl: context?.url });
|
|
15361
15151
|
if (execOut.trace.success && isAcceptableIntentResult(execOut.result, queryIntent)) {
|
|
15362
|
-
promoteResultSnapshot(cacheKey, domainHit.skill, params.endpoint_id ?? domainHit.endpointId, execOut.result, execOut.trace
|
|
15152
|
+
promoteResultSnapshot(cacheKey, domainHit.skill, params.endpoint_id ?? domainHit.endpointId, execOut.result, execOut.trace);
|
|
15363
15153
|
return {
|
|
15364
15154
|
result: execOut.result,
|
|
15365
15155
|
trace: execOut.trace,
|
|
15366
15156
|
source: "marketplace",
|
|
15367
15157
|
skill: domainHit.skill,
|
|
15368
|
-
timing:
|
|
15369
|
-
response_schema: execOut.response_schema,
|
|
15370
|
-
extraction_hints: execOut.extraction_hints
|
|
15158
|
+
timing: finalize("marketplace", execOut.result, domainHit.skill.skill_id, domainHit.skill, execOut.trace)
|
|
15371
15159
|
};
|
|
15372
15160
|
}
|
|
15373
15161
|
invalidateResolveCacheEntries([cacheKey], requestedDomainCacheKey ? [requestedDomainCacheKey] : []);
|
|
@@ -15403,7 +15191,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
15403
15191
|
trace,
|
|
15404
15192
|
source: "live-capture",
|
|
15405
15193
|
skill: await getOrCreateBrowserCaptureSkill(),
|
|
15406
|
-
timing:
|
|
15194
|
+
timing: finalize("live-capture", result, undefined, undefined, trace)
|
|
15407
15195
|
};
|
|
15408
15196
|
}
|
|
15409
15197
|
if (learned_skill) {
|
|
@@ -15422,7 +15210,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
15422
15210
|
trace,
|
|
15423
15211
|
source: "live-capture",
|
|
15424
15212
|
skill: await getOrCreateBrowserCaptureSkill(),
|
|
15425
|
-
timing:
|
|
15213
|
+
timing: finalize("live-capture", result, undefined, undefined, trace)
|
|
15426
15214
|
};
|
|
15427
15215
|
}
|
|
15428
15216
|
}
|
|
@@ -15512,7 +15300,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
15512
15300
|
trace: rejectedTrace,
|
|
15513
15301
|
source: "live-capture",
|
|
15514
15302
|
skill: captureSkill,
|
|
15515
|
-
timing:
|
|
15303
|
+
timing: finalize("live-capture", result, undefined, undefined, rejectedTrace)
|
|
15516
15304
|
};
|
|
15517
15305
|
}
|
|
15518
15306
|
if (learned_skill && learnedSkillUsable) {
|
|
@@ -15546,7 +15334,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
15546
15334
|
trace,
|
|
15547
15335
|
source: "live-capture",
|
|
15548
15336
|
skill: captureSkill,
|
|
15549
|
-
timing:
|
|
15337
|
+
timing: finalize("live-capture", result, undefined, undefined, trace)
|
|
15550
15338
|
};
|
|
15551
15339
|
}
|
|
15552
15340
|
const hasNonDomApiEndpoints = !!learned_skill?.endpoints?.some((ep) => !ep.dom_extraction && ep.method !== "WS");
|
|
@@ -15560,7 +15348,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
15560
15348
|
trace,
|
|
15561
15349
|
source: directExtractionSource === "html-embedded" ? "live-capture" : "dom-fallback",
|
|
15562
15350
|
skill: learned_skill,
|
|
15563
|
-
timing:
|
|
15351
|
+
timing: finalize(directExtractionSource === "html-embedded" ? "live-capture" : "dom-fallback", result, learned_skill.skill_id, learned_skill, trace)
|
|
15564
15352
|
};
|
|
15565
15353
|
queuePassivePublishIfExecuted(learned_skill, direct, parityBaseline);
|
|
15566
15354
|
return direct;
|
|
@@ -15570,7 +15358,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
15570
15358
|
trace,
|
|
15571
15359
|
source: "dom-fallback",
|
|
15572
15360
|
skill: captureSkill,
|
|
15573
|
-
timing:
|
|
15361
|
+
timing: finalize("dom-fallback", result, undefined, undefined, trace)
|
|
15574
15362
|
};
|
|
15575
15363
|
}
|
|
15576
15364
|
if (!learned_skill) {
|
|
@@ -15579,7 +15367,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
15579
15367
|
trace,
|
|
15580
15368
|
source: "live-capture",
|
|
15581
15369
|
skill: captureSkill,
|
|
15582
|
-
timing:
|
|
15370
|
+
timing: finalize("live-capture", result, undefined, undefined, trace)
|
|
15583
15371
|
};
|
|
15584
15372
|
}
|
|
15585
15373
|
if (agentChoseEndpoint && learned_skill) {
|
|
@@ -15598,22 +15386,18 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
|
|
|
15598
15386
|
trace: execOut.trace,
|
|
15599
15387
|
source: "live-capture",
|
|
15600
15388
|
skill: learned_skill,
|
|
15601
|
-
timing:
|
|
15602
|
-
response_schema: execOut.response_schema,
|
|
15603
|
-
extraction_hints: execOut.extraction_hints
|
|
15389
|
+
timing: finalize("live-capture", execOut.result, learned_skill.skill_id, learned_skill, execOut.trace)
|
|
15604
15390
|
}, parityBaseline);
|
|
15605
15391
|
}
|
|
15606
15392
|
if (execOut.trace.success && isAcceptableIntentResult(execOut.result, queryIntent)) {
|
|
15607
|
-
promoteResultSnapshot(cacheKey, learned_skill, execOut.trace.endpoint_id, execOut.result, execOut.trace
|
|
15393
|
+
promoteResultSnapshot(cacheKey, learned_skill, execOut.trace.endpoint_id, execOut.result, execOut.trace);
|
|
15608
15394
|
}
|
|
15609
15395
|
return {
|
|
15610
15396
|
result: execOut.result,
|
|
15611
15397
|
trace: execOut.trace,
|
|
15612
15398
|
source: "live-capture",
|
|
15613
15399
|
skill: learned_skill,
|
|
15614
|
-
timing:
|
|
15615
|
-
response_schema: execOut.response_schema,
|
|
15616
|
-
extraction_hints: execOut.extraction_hints
|
|
15400
|
+
timing: finalize("live-capture", execOut.result, learned_skill.skill_id, learned_skill, execOut.trace)
|
|
15617
15401
|
};
|
|
15618
15402
|
}
|
|
15619
15403
|
const deferred = await buildDeferralWithAutoExec(learned_skill, "live-capture", authRecommended ? {
|
|
@@ -16699,7 +16483,19 @@ async function registerRoutes(app) {
|
|
|
16699
16483
|
app.get("/v1/skills/:skill_id", async (req, reply) => {
|
|
16700
16484
|
const clientScope = clientScopeFor(req);
|
|
16701
16485
|
const { skill_id } = req.params;
|
|
16702
|
-
|
|
16486
|
+
let skill = getRecentLocalSkill(skill_id, clientScope);
|
|
16487
|
+
if (!skill) {
|
|
16488
|
+
for (const [, entry] of domainSkillCache) {
|
|
16489
|
+
if (entry.skillId === skill_id && entry.localSkillPath) {
|
|
16490
|
+
try {
|
|
16491
|
+
skill = JSON.parse(__require("fs").readFileSync(entry.localSkillPath, "utf-8"));
|
|
16492
|
+
} catch {}
|
|
16493
|
+
break;
|
|
16494
|
+
}
|
|
16495
|
+
}
|
|
16496
|
+
}
|
|
16497
|
+
if (!skill)
|
|
16498
|
+
skill = await getSkill2(skill_id, clientScope);
|
|
16703
16499
|
if (!skill)
|
|
16704
16500
|
return reply.code(404).send({ error: "Skill not found" });
|
|
16705
16501
|
return reply.send(skill);
|
|
@@ -16710,18 +16506,46 @@ async function registerRoutes(app) {
|
|
|
16710
16506
|
const { endpoints: reviews } = req.body;
|
|
16711
16507
|
if (!reviews?.length)
|
|
16712
16508
|
return reply.code(400).send({ error: "endpoints[] required" });
|
|
16713
|
-
|
|
16509
|
+
let skill = getRecentLocalSkill(skill_id, clientScope);
|
|
16510
|
+
if (!skill) {
|
|
16511
|
+
for (const [, entry] of domainSkillCache) {
|
|
16512
|
+
if (entry.skillId === skill_id && entry.localSkillPath) {
|
|
16513
|
+
try {
|
|
16514
|
+
skill = JSON.parse(__require("fs").readFileSync(entry.localSkillPath, "utf-8"));
|
|
16515
|
+
} catch {}
|
|
16516
|
+
break;
|
|
16517
|
+
}
|
|
16518
|
+
}
|
|
16519
|
+
}
|
|
16520
|
+
if (!skill)
|
|
16521
|
+
skill = await getSkill2(skill_id, clientScope);
|
|
16714
16522
|
if (!skill)
|
|
16715
16523
|
return reply.code(404).send({ error: "Skill not found" });
|
|
16716
16524
|
const updated = mergeAgentReview(skill.endpoints, reviews);
|
|
16717
16525
|
skill.endpoints = updated;
|
|
16718
16526
|
skill.updated_at = new Date().toISOString();
|
|
16719
16527
|
try {
|
|
16720
|
-
|
|
16721
|
-
|
|
16722
|
-
|
|
16723
|
-
|
|
16528
|
+
cachePublishedSkill(skill);
|
|
16529
|
+
} catch {}
|
|
16530
|
+
const domain = skill.domain;
|
|
16531
|
+
if (domain) {
|
|
16532
|
+
const revCacheKey = buildResolveCacheKey(domain, skill.intent_signature ?? `browse ${domain}`, undefined);
|
|
16533
|
+
const revScopedKey = scopedCacheKey(clientScope, revCacheKey);
|
|
16534
|
+
writeSkillSnapshot(revScopedKey, skill);
|
|
16535
|
+
const revDomainKey = getDomainReuseKey(domain);
|
|
16536
|
+
if (revDomainKey) {
|
|
16537
|
+
domainSkillCache.set(revDomainKey, {
|
|
16538
|
+
skillId: skill.skill_id,
|
|
16539
|
+
localSkillPath: snapshotPathForCacheKey(revScopedKey),
|
|
16540
|
+
ts: Date.now()
|
|
16541
|
+
});
|
|
16542
|
+
persistDomainCache();
|
|
16543
|
+
}
|
|
16724
16544
|
}
|
|
16545
|
+
try {
|
|
16546
|
+
await publishSkill2(skill);
|
|
16547
|
+
} catch {}
|
|
16548
|
+
return reply.send({ ok: true, endpoints_updated: reviews.length });
|
|
16725
16549
|
});
|
|
16726
16550
|
app.post("/v1/skills/:skill_id/chunk", async (req, reply) => {
|
|
16727
16551
|
const clientScope = clientScopeFor(req);
|
|
@@ -17229,7 +17053,7 @@ __export(exports_server, {
|
|
|
17229
17053
|
startUnbrowseServer: () => startUnbrowseServer,
|
|
17230
17054
|
installServerExitCleanup: () => installServerExitCleanup
|
|
17231
17055
|
});
|
|
17232
|
-
import { execSync as
|
|
17056
|
+
import { execSync as execSync2 } from "node:child_process";
|
|
17233
17057
|
import { mkdirSync as mkdirSync10, unlinkSync, writeFileSync as writeFileSync9 } from "node:fs";
|
|
17234
17058
|
import path5 from "node:path";
|
|
17235
17059
|
import Fastify from "fastify";
|
|
@@ -17259,15 +17083,8 @@ async function startUnbrowseServer(options = {}) {
|
|
|
17259
17083
|
const pidFile = options.pidFile ?? process.env.UNBROWSE_PID_FILE;
|
|
17260
17084
|
updatePidFile(pidFile, host, port);
|
|
17261
17085
|
try {
|
|
17262
|
-
|
|
17086
|
+
execSync2("pkill -f chrome-headless-shell", { stdio: "ignore" });
|
|
17263
17087
|
} catch {}
|
|
17264
|
-
try {
|
|
17265
|
-
await start();
|
|
17266
|
-
const h = await health();
|
|
17267
|
-
console.log(`[startup] Kuri ready — ${h.tabs ?? 0} tabs`);
|
|
17268
|
-
} catch (err) {
|
|
17269
|
-
console.warn(`[startup] WARNING: Kuri not available. Capture will start it on demand. ${err instanceof Error ? err.message : err}`);
|
|
17270
|
-
}
|
|
17271
17088
|
await ensureRegistered2();
|
|
17272
17089
|
const app = Fastify({ logger: options.logger ?? true });
|
|
17273
17090
|
await app.register(cors, { origin: true });
|
|
@@ -17298,7 +17115,6 @@ var init_server = __esm(async () => {
|
|
|
17298
17115
|
init_verification();
|
|
17299
17116
|
init_client2();
|
|
17300
17117
|
init_capture();
|
|
17301
|
-
init_client();
|
|
17302
17118
|
await init_routes();
|
|
17303
17119
|
});
|
|
17304
17120
|
|
|
@@ -18294,184 +18110,6 @@ function normalizeSetupScope(value) {
|
|
|
18294
18110
|
return normalized;
|
|
18295
18111
|
return "auto";
|
|
18296
18112
|
}
|
|
18297
|
-
function buildEntityIndex(items) {
|
|
18298
|
-
const index = new Map;
|
|
18299
|
-
for (const item of items) {
|
|
18300
|
-
if (item != null && typeof item === "object") {
|
|
18301
|
-
const urn = item.entityUrn;
|
|
18302
|
-
if (typeof urn === "string")
|
|
18303
|
-
index.set(urn, item);
|
|
18304
|
-
}
|
|
18305
|
-
}
|
|
18306
|
-
return index;
|
|
18307
|
-
}
|
|
18308
|
-
function detectEntityIndex(data) {
|
|
18309
|
-
if (data == null || typeof data !== "object")
|
|
18310
|
-
return null;
|
|
18311
|
-
let best = null;
|
|
18312
|
-
const check = (arr) => {
|
|
18313
|
-
if (arr.length < 2)
|
|
18314
|
-
return;
|
|
18315
|
-
const sample = arr.slice(0, 10);
|
|
18316
|
-
const withUrn = sample.filter((i) => i != null && typeof i === "object" && typeof i.entityUrn === "string").length;
|
|
18317
|
-
if (withUrn >= sample.length * 0.5 && (!best || arr.length > best.length)) {
|
|
18318
|
-
best = arr;
|
|
18319
|
-
}
|
|
18320
|
-
};
|
|
18321
|
-
const obj = data;
|
|
18322
|
-
for (const val of Object.values(obj)) {
|
|
18323
|
-
if (Array.isArray(val)) {
|
|
18324
|
-
check(val);
|
|
18325
|
-
} else if (val != null && typeof val === "object" && !Array.isArray(val)) {
|
|
18326
|
-
for (const nested of Object.values(val)) {
|
|
18327
|
-
if (Array.isArray(nested))
|
|
18328
|
-
check(nested);
|
|
18329
|
-
}
|
|
18330
|
-
}
|
|
18331
|
-
}
|
|
18332
|
-
return best ? buildEntityIndex(best) : null;
|
|
18333
|
-
}
|
|
18334
|
-
function resolvePath2(obj, path9, entityIndex) {
|
|
18335
|
-
if (!path9 || obj == null)
|
|
18336
|
-
return obj;
|
|
18337
|
-
const segments = path9.split(".");
|
|
18338
|
-
let cur = obj;
|
|
18339
|
-
for (let i = 0;i < segments.length; i++) {
|
|
18340
|
-
if (cur == null)
|
|
18341
|
-
return;
|
|
18342
|
-
const seg = segments[i];
|
|
18343
|
-
if (seg.endsWith("[]")) {
|
|
18344
|
-
const key = seg.slice(0, -2);
|
|
18345
|
-
const arr = key ? cur[key] : cur;
|
|
18346
|
-
if (!Array.isArray(arr))
|
|
18347
|
-
return;
|
|
18348
|
-
const remaining = segments.slice(i + 1).join(".");
|
|
18349
|
-
if (!remaining)
|
|
18350
|
-
return arr;
|
|
18351
|
-
return arr.flatMap((item) => {
|
|
18352
|
-
const v = resolvePath2(item, remaining, entityIndex);
|
|
18353
|
-
return v === undefined ? [] : Array.isArray(v) ? v : [v];
|
|
18354
|
-
});
|
|
18355
|
-
}
|
|
18356
|
-
const indexMatch = seg.match(/^(.+?)\[(\d+)\]$/);
|
|
18357
|
-
if (indexMatch) {
|
|
18358
|
-
const key = indexMatch[1];
|
|
18359
|
-
const idx = parseInt(indexMatch[2], 10);
|
|
18360
|
-
const arr = key ? cur[key] : cur;
|
|
18361
|
-
if (!Array.isArray(arr) || idx >= arr.length)
|
|
18362
|
-
return;
|
|
18363
|
-
cur = arr[idx];
|
|
18364
|
-
continue;
|
|
18365
|
-
}
|
|
18366
|
-
const rec = cur;
|
|
18367
|
-
let val = rec[seg];
|
|
18368
|
-
if (val == null && entityIndex) {
|
|
18369
|
-
const ref = rec[`*${seg}`];
|
|
18370
|
-
if (typeof ref === "string") {
|
|
18371
|
-
val = entityIndex.get(ref);
|
|
18372
|
-
}
|
|
18373
|
-
}
|
|
18374
|
-
cur = val;
|
|
18375
|
-
}
|
|
18376
|
-
return cur;
|
|
18377
|
-
}
|
|
18378
|
-
function extractFields(data, fields, entityIndex) {
|
|
18379
|
-
if (data == null)
|
|
18380
|
-
return data;
|
|
18381
|
-
function mapItem(item) {
|
|
18382
|
-
const out = {};
|
|
18383
|
-
for (const f of fields) {
|
|
18384
|
-
const colonIdx = f.indexOf(":");
|
|
18385
|
-
const alias = colonIdx >= 0 ? f.slice(0, colonIdx) : f.split(".").pop();
|
|
18386
|
-
const path9 = colonIdx >= 0 ? f.slice(colonIdx + 1) : f;
|
|
18387
|
-
const resolved = resolvePath2(item, path9, entityIndex ?? undefined) ?? [];
|
|
18388
|
-
out[alias] = Array.isArray(resolved) ? resolved.length === 0 ? null : resolved.length === 1 ? resolved[0] : resolved : resolved;
|
|
18389
|
-
}
|
|
18390
|
-
return out;
|
|
18391
|
-
}
|
|
18392
|
-
function hasValue(v) {
|
|
18393
|
-
if (v == null)
|
|
18394
|
-
return false;
|
|
18395
|
-
if (Array.isArray(v))
|
|
18396
|
-
return v.length > 0;
|
|
18397
|
-
return true;
|
|
18398
|
-
}
|
|
18399
|
-
if (Array.isArray(data)) {
|
|
18400
|
-
return data.map(mapItem).filter((row) => Object.values(row).some(hasValue));
|
|
18401
|
-
}
|
|
18402
|
-
return mapItem(data);
|
|
18403
|
-
}
|
|
18404
|
-
function hasMeaningfulValue(value) {
|
|
18405
|
-
if (value == null)
|
|
18406
|
-
return false;
|
|
18407
|
-
if (typeof value === "string")
|
|
18408
|
-
return value.trim().length > 0;
|
|
18409
|
-
if (typeof value === "number" || typeof value === "boolean")
|
|
18410
|
-
return true;
|
|
18411
|
-
if (Array.isArray(value))
|
|
18412
|
-
return value.some((item) => hasMeaningfulValue(item));
|
|
18413
|
-
if (typeof value === "object")
|
|
18414
|
-
return Object.values(value).some((item) => hasMeaningfulValue(item));
|
|
18415
|
-
return false;
|
|
18416
|
-
}
|
|
18417
|
-
function isPlainRecord(value) {
|
|
18418
|
-
return value != null && typeof value === "object" && !Array.isArray(value);
|
|
18419
|
-
}
|
|
18420
|
-
function isScalarLike(value) {
|
|
18421
|
-
if (value == null)
|
|
18422
|
-
return false;
|
|
18423
|
-
if (typeof value === "string")
|
|
18424
|
-
return value.trim().length > 0;
|
|
18425
|
-
if (typeof value === "number" || typeof value === "boolean")
|
|
18426
|
-
return true;
|
|
18427
|
-
if (Array.isArray(value)) {
|
|
18428
|
-
return value.length > 0 && value.every((item) => item == null || typeof item === "string" || typeof item === "number" || typeof item === "boolean");
|
|
18429
|
-
}
|
|
18430
|
-
return false;
|
|
18431
|
-
}
|
|
18432
|
-
function looksStructuredForDirectOutput(value) {
|
|
18433
|
-
if (Array.isArray(value)) {
|
|
18434
|
-
const sample = value.filter(isPlainRecord).slice(0, 3);
|
|
18435
|
-
if (sample.length === 0)
|
|
18436
|
-
return false;
|
|
18437
|
-
const simpleRows = sample.filter((row) => {
|
|
18438
|
-
const keys2 = Object.keys(row);
|
|
18439
|
-
const scalarFields2 = Object.values(row).filter(isScalarLike).length;
|
|
18440
|
-
return keys2.length > 0 && keys2.length <= 20 && scalarFields2 >= 2;
|
|
18441
|
-
});
|
|
18442
|
-
return simpleRows.length >= Math.ceil(sample.length / 2);
|
|
18443
|
-
}
|
|
18444
|
-
if (!isPlainRecord(value))
|
|
18445
|
-
return false;
|
|
18446
|
-
const keys = Object.keys(value);
|
|
18447
|
-
if (keys.length === 0 || keys.length > 20)
|
|
18448
|
-
return false;
|
|
18449
|
-
const scalarFields = Object.values(value).filter(isScalarLike).length;
|
|
18450
|
-
return scalarFields >= 2;
|
|
18451
|
-
}
|
|
18452
|
-
function applyTransforms(result, flags) {
|
|
18453
|
-
let data = result;
|
|
18454
|
-
const entityIndex = detectEntityIndex(result);
|
|
18455
|
-
const pathFlag = flags.path;
|
|
18456
|
-
if (pathFlag) {
|
|
18457
|
-
data = resolvePath2(data, pathFlag, entityIndex);
|
|
18458
|
-
if (data === undefined) {
|
|
18459
|
-
process.stderr.write(`[unbrowse] warning: --path "${pathFlag}" resolved to undefined. Check path against response structure.
|
|
18460
|
-
`);
|
|
18461
|
-
return [];
|
|
18462
|
-
}
|
|
18463
|
-
}
|
|
18464
|
-
const extractFlag = flags.extract;
|
|
18465
|
-
if (extractFlag) {
|
|
18466
|
-
const fields = extractFlag.split(",").map((f) => f.trim());
|
|
18467
|
-
data = extractFields(data, fields, entityIndex);
|
|
18468
|
-
}
|
|
18469
|
-
const limitFlag = flags.limit;
|
|
18470
|
-
if (limitFlag && Array.isArray(data)) {
|
|
18471
|
-
data = data.slice(0, Number(limitFlag));
|
|
18472
|
-
}
|
|
18473
|
-
return data;
|
|
18474
|
-
}
|
|
18475
18113
|
function slimTrace(obj) {
|
|
18476
18114
|
const trace = obj.trace;
|
|
18477
18115
|
const out = {
|
|
@@ -18487,67 +18125,14 @@ function slimTrace(obj) {
|
|
|
18487
18125
|
};
|
|
18488
18126
|
if ("result" in obj)
|
|
18489
18127
|
out.result = obj.result;
|
|
18128
|
+
if (obj.available_endpoints)
|
|
18129
|
+
out.available_endpoints = obj.available_endpoints;
|
|
18130
|
+
if (obj.source)
|
|
18131
|
+
out.source = obj.source;
|
|
18132
|
+
if (obj.skill)
|
|
18133
|
+
out.skill = obj.skill;
|
|
18490
18134
|
return out;
|
|
18491
18135
|
}
|
|
18492
|
-
function wrapWithHints(obj) {
|
|
18493
|
-
const hints = obj.extraction_hints;
|
|
18494
|
-
if (!hints)
|
|
18495
|
-
return obj;
|
|
18496
|
-
const resultStr = JSON.stringify(obj.result ?? "");
|
|
18497
|
-
if (resultStr.length < 2000)
|
|
18498
|
-
return obj;
|
|
18499
|
-
const trace = obj.trace;
|
|
18500
|
-
return {
|
|
18501
|
-
trace: trace ? {
|
|
18502
|
-
trace_id: trace.trace_id,
|
|
18503
|
-
skill_id: trace.skill_id,
|
|
18504
|
-
endpoint_id: trace.endpoint_id,
|
|
18505
|
-
success: trace.success,
|
|
18506
|
-
status_code: trace.status_code
|
|
18507
|
-
} : undefined,
|
|
18508
|
-
_response_too_large: `${resultStr.length} bytes \u2014 use extraction flags below to get structured data`,
|
|
18509
|
-
extraction_hints: hints
|
|
18510
|
-
};
|
|
18511
|
-
}
|
|
18512
|
-
function schemaOnly(obj) {
|
|
18513
|
-
const trace = obj.trace;
|
|
18514
|
-
return {
|
|
18515
|
-
trace: trace ? { trace_id: trace.trace_id, skill_id: trace.skill_id, endpoint_id: trace.endpoint_id, success: trace.success } : undefined,
|
|
18516
|
-
extraction_hints: obj.extraction_hints ?? null,
|
|
18517
|
-
response_schema: obj.response_schema ?? null
|
|
18518
|
-
};
|
|
18519
|
-
}
|
|
18520
|
-
function autoExtractOrWrap(obj) {
|
|
18521
|
-
const hints = obj.extraction_hints;
|
|
18522
|
-
const resultStr = JSON.stringify(obj.result ?? "");
|
|
18523
|
-
if (resultStr.length < 2000)
|
|
18524
|
-
return obj;
|
|
18525
|
-
if (looksStructuredForDirectOutput(obj.result)) {
|
|
18526
|
-
return slimTrace({ ...obj, extraction_hints: undefined, response_schema: undefined });
|
|
18527
|
-
}
|
|
18528
|
-
if (!hints)
|
|
18529
|
-
return obj;
|
|
18530
|
-
if (hints.confidence === "high") {
|
|
18531
|
-
const syntheticFlags = {};
|
|
18532
|
-
if (hints.path)
|
|
18533
|
-
syntheticFlags.path = hints.path;
|
|
18534
|
-
if (hints.fields.length > 0)
|
|
18535
|
-
syntheticFlags.extract = hints.fields.join(",");
|
|
18536
|
-
syntheticFlags.limit = "20";
|
|
18537
|
-
const extracted = applyTransforms(obj.result, syntheticFlags);
|
|
18538
|
-
if (!hasMeaningfulValue(extracted))
|
|
18539
|
-
return wrapWithHints(obj);
|
|
18540
|
-
const slimmed = slimTrace({ ...obj, result: extracted });
|
|
18541
|
-
slimmed._auto_extracted = {
|
|
18542
|
-
applied: hints.cli_args,
|
|
18543
|
-
confidence: hints.confidence,
|
|
18544
|
-
all_fields: hints.schema_tree,
|
|
18545
|
-
note: "Auto-extracted using response_schema. Add/remove fields with --extract, change array with --path, or use --raw for full response."
|
|
18546
|
-
};
|
|
18547
|
-
return slimmed;
|
|
18548
|
-
}
|
|
18549
|
-
return wrapWithHints(obj);
|
|
18550
|
-
}
|
|
18551
18136
|
async function cmdHealth(flags) {
|
|
18552
18137
|
output(await api3("GET", "/health"), !!flags.pretty);
|
|
18553
18138
|
}
|
|
@@ -18559,6 +18144,8 @@ async function cmdResolve(flags) {
|
|
|
18559
18144
|
const url = flags.url;
|
|
18560
18145
|
const domain = flags.domain;
|
|
18561
18146
|
const explicitEndpointId = flags["endpoint-id"];
|
|
18147
|
+
const autoExecute = !!flags.execute;
|
|
18148
|
+
const extraParams = flags.params ? JSON.parse(flags.params) : {};
|
|
18562
18149
|
if (url) {
|
|
18563
18150
|
body.params = { url };
|
|
18564
18151
|
body.context = { url };
|
|
@@ -18570,14 +18157,19 @@ async function cmdResolve(flags) {
|
|
|
18570
18157
|
body.params = { ...body.params ?? {}, endpoint_id: explicitEndpointId };
|
|
18571
18158
|
}
|
|
18572
18159
|
if (flags.params) {
|
|
18573
|
-
body.params = { ...body.params ?? {}, ...
|
|
18160
|
+
body.params = { ...body.params ?? {}, ...extraParams };
|
|
18574
18161
|
}
|
|
18575
18162
|
if (flags["dry-run"])
|
|
18576
18163
|
body.dry_run = true;
|
|
18577
18164
|
if (flags["force-capture"])
|
|
18578
18165
|
body.force_capture = true;
|
|
18579
|
-
const hasTransforms = !!(flags.path || flags.extract);
|
|
18580
18166
|
body.projection = { raw: true };
|
|
18167
|
+
function execBody(endpointId) {
|
|
18168
|
+
return { params: { endpoint_id: endpointId, ...extraParams }, intent, projection: { raw: true } };
|
|
18169
|
+
}
|
|
18170
|
+
function resolveSkillId() {
|
|
18171
|
+
return result.skill?.skill_id ?? result.skill_id;
|
|
18172
|
+
}
|
|
18581
18173
|
const startedAt = Date.now();
|
|
18582
18174
|
let result = await withPendingNotice(api3("POST", "/v1/intent/resolve", body), "Still working. First-time capture/indexing for a site can take 20-80s. Waiting is usually better than falling back.");
|
|
18583
18175
|
const resultError = result.result?.error ?? result.error;
|
|
@@ -18595,14 +18187,18 @@ async function cmdResolve(flags) {
|
|
|
18595
18187
|
}
|
|
18596
18188
|
}
|
|
18597
18189
|
if (explicitEndpointId && result.available_endpoints) {
|
|
18598
|
-
const skillId =
|
|
18190
|
+
const skillId = resolveSkillId();
|
|
18599
18191
|
if (skillId) {
|
|
18600
|
-
|
|
18601
|
-
|
|
18602
|
-
|
|
18603
|
-
|
|
18604
|
-
|
|
18605
|
-
|
|
18192
|
+
result = await withPendingNotice(api3("POST", `/v1/skills/${skillId}/execute`, execBody(explicitEndpointId)), "Executing selected endpoint...");
|
|
18193
|
+
}
|
|
18194
|
+
}
|
|
18195
|
+
if (autoExecute && result.available_endpoints && !result.result) {
|
|
18196
|
+
const endpoints = result.available_endpoints;
|
|
18197
|
+
const skillId = resolveSkillId();
|
|
18198
|
+
if (skillId && endpoints.length > 0) {
|
|
18199
|
+
const bestEndpoint = endpoints[0];
|
|
18200
|
+
info(`Auto-executing endpoint: ${bestEndpoint.description ?? bestEndpoint.endpoint_id}`);
|
|
18201
|
+
result = await withPendingNotice(api3("POST", `/v1/skills/${skillId}/execute`, execBody(bestEndpoint.endpoint_id)), "Executing best endpoint...");
|
|
18606
18202
|
}
|
|
18607
18203
|
}
|
|
18608
18204
|
const resultObj = result.result;
|
|
@@ -18619,13 +18215,7 @@ async function cmdResolve(flags) {
|
|
|
18619
18215
|
if (Date.now() - startedAt > 3000 && result.source === "live-capture") {
|
|
18620
18216
|
info("Live capture finished. Future runs against this site should be much faster.");
|
|
18621
18217
|
}
|
|
18622
|
-
|
|
18623
|
-
output(schemaOnly(result), !!flags.pretty);
|
|
18624
|
-
return;
|
|
18625
|
-
}
|
|
18626
|
-
if (hasTransforms && result.result != null) {
|
|
18627
|
-
result = slimTrace({ ...result, result: applyTransforms(result.result, flags) });
|
|
18628
|
-
}
|
|
18218
|
+
result = slimTrace(result);
|
|
18629
18219
|
const skill = result.skill;
|
|
18630
18220
|
const trace = result.trace;
|
|
18631
18221
|
if (skill?.skill_id && trace) {
|
|
@@ -18633,6 +18223,81 @@ async function cmdResolve(flags) {
|
|
|
18633
18223
|
}
|
|
18634
18224
|
output(result, !!flags.pretty);
|
|
18635
18225
|
}
|
|
18226
|
+
function drillPath(data, path9) {
|
|
18227
|
+
const segments = path9.split(/\./).flatMap((s) => {
|
|
18228
|
+
const m = s.match(/^(.+)\[\]$/);
|
|
18229
|
+
return m ? [m[1], "[]"] : [s];
|
|
18230
|
+
});
|
|
18231
|
+
let values = [data];
|
|
18232
|
+
for (const seg of segments) {
|
|
18233
|
+
if (values.length === 0)
|
|
18234
|
+
return [];
|
|
18235
|
+
if (seg === "[]") {
|
|
18236
|
+
values = values.flatMap((v) => Array.isArray(v) ? v : [v]);
|
|
18237
|
+
continue;
|
|
18238
|
+
}
|
|
18239
|
+
values = values.flatMap((v) => {
|
|
18240
|
+
if (v == null)
|
|
18241
|
+
return [];
|
|
18242
|
+
if (Array.isArray(v)) {
|
|
18243
|
+
return v.map((item) => item?.[seg]).filter((x) => x !== undefined);
|
|
18244
|
+
}
|
|
18245
|
+
if (typeof v === "object") {
|
|
18246
|
+
const val = v[seg];
|
|
18247
|
+
return val !== undefined ? [val] : [];
|
|
18248
|
+
}
|
|
18249
|
+
return [];
|
|
18250
|
+
});
|
|
18251
|
+
}
|
|
18252
|
+
return values;
|
|
18253
|
+
}
|
|
18254
|
+
function resolveDotPath(obj, path9) {
|
|
18255
|
+
let cur = obj;
|
|
18256
|
+
for (const key of path9.split(".")) {
|
|
18257
|
+
if (cur == null || typeof cur !== "object")
|
|
18258
|
+
return;
|
|
18259
|
+
cur = cur[key];
|
|
18260
|
+
}
|
|
18261
|
+
return cur;
|
|
18262
|
+
}
|
|
18263
|
+
function applyExtract(items, extractSpec) {
|
|
18264
|
+
const fields = extractSpec.split(",").map((f) => {
|
|
18265
|
+
const colon = f.indexOf(":");
|
|
18266
|
+
if (colon > 0)
|
|
18267
|
+
return { alias: f.slice(0, colon), path: f.slice(colon + 1) };
|
|
18268
|
+
return { alias: f, path: f };
|
|
18269
|
+
});
|
|
18270
|
+
return items.map((item) => {
|
|
18271
|
+
const row = {};
|
|
18272
|
+
let hasValue = false;
|
|
18273
|
+
for (const { alias, path: path9 } of fields) {
|
|
18274
|
+
const val = resolveDotPath(item, path9);
|
|
18275
|
+
row[alias] = val ?? null;
|
|
18276
|
+
if (val != null)
|
|
18277
|
+
hasValue = true;
|
|
18278
|
+
}
|
|
18279
|
+
return hasValue ? row : null;
|
|
18280
|
+
}).filter((row) => row !== null);
|
|
18281
|
+
}
|
|
18282
|
+
function schemaOf(value, depth = 4) {
|
|
18283
|
+
if (value == null)
|
|
18284
|
+
return "null";
|
|
18285
|
+
if (Array.isArray(value)) {
|
|
18286
|
+
if (value.length === 0)
|
|
18287
|
+
return ["unknown"];
|
|
18288
|
+
return [schemaOf(value[0], depth - 1)];
|
|
18289
|
+
}
|
|
18290
|
+
if (typeof value === "object") {
|
|
18291
|
+
if (depth <= 0)
|
|
18292
|
+
return "object";
|
|
18293
|
+
const out = {};
|
|
18294
|
+
for (const [k, v] of Object.entries(value)) {
|
|
18295
|
+
out[k] = schemaOf(v, depth - 1);
|
|
18296
|
+
}
|
|
18297
|
+
return out;
|
|
18298
|
+
}
|
|
18299
|
+
return typeof value;
|
|
18300
|
+
}
|
|
18636
18301
|
async function cmdExecute(flags) {
|
|
18637
18302
|
const skillId = flags.skill;
|
|
18638
18303
|
if (!skillId)
|
|
@@ -18654,15 +18319,41 @@ async function cmdExecute(flags) {
|
|
|
18654
18319
|
body.dry_run = true;
|
|
18655
18320
|
if (flags["confirm-unsafe"])
|
|
18656
18321
|
body.confirm_unsafe = true;
|
|
18657
|
-
const hasTransforms = !!(flags.path || flags.extract);
|
|
18658
18322
|
body.projection = { raw: true };
|
|
18659
18323
|
let result = await withPendingNotice(api3("POST", `/v1/skills/${skillId}/execute`, body), "Still working. This endpoint may require browser replay or first-time auth/capture setup.");
|
|
18660
|
-
|
|
18661
|
-
|
|
18324
|
+
result = slimTrace(result);
|
|
18325
|
+
const pathFlag = flags.path;
|
|
18326
|
+
const extractFlag = flags.extract;
|
|
18327
|
+
const limitFlag = flags.limit ? Number(flags.limit) : undefined;
|
|
18328
|
+
const schemaFlag = !!flags.schema;
|
|
18329
|
+
const rawFlag = !!flags.raw;
|
|
18330
|
+
if (schemaFlag && !rawFlag) {
|
|
18331
|
+
const data = result.result;
|
|
18332
|
+
output({ trace: result.trace, schema: schemaOf(data) }, !!flags.pretty);
|
|
18662
18333
|
return;
|
|
18663
18334
|
}
|
|
18664
|
-
if (
|
|
18665
|
-
|
|
18335
|
+
if (!rawFlag && (pathFlag || extractFlag || limitFlag)) {
|
|
18336
|
+
let data = pathFlag ? drillPath(result.result, pathFlag) : result.result;
|
|
18337
|
+
const items = Array.isArray(data) ? data : data != null ? [data] : [];
|
|
18338
|
+
const extracted = extractFlag ? applyExtract(items, extractFlag) : items;
|
|
18339
|
+
const limited = limitFlag ? extracted.slice(0, limitFlag) : extracted;
|
|
18340
|
+
output({ trace: result.trace, data: limited, count: limited.length }, !!flags.pretty);
|
|
18341
|
+
return;
|
|
18342
|
+
}
|
|
18343
|
+
if (!rawFlag && !pathFlag && !extractFlag && !schemaFlag) {
|
|
18344
|
+
const raw = JSON.stringify(result.result);
|
|
18345
|
+
if (raw && raw.length > 2048) {
|
|
18346
|
+
const schema = schemaOf(result.result);
|
|
18347
|
+
output({
|
|
18348
|
+
trace: result.trace,
|
|
18349
|
+
extraction_hints: {
|
|
18350
|
+
message: "Response is large. Use --path/--extract/--limit to filter, or --schema to see structure, or --raw for full response.",
|
|
18351
|
+
schema_tree: schema,
|
|
18352
|
+
response_bytes: raw.length
|
|
18353
|
+
}
|
|
18354
|
+
}, !!flags.pretty);
|
|
18355
|
+
return;
|
|
18356
|
+
}
|
|
18666
18357
|
}
|
|
18667
18358
|
output(result, !!flags.pretty);
|
|
18668
18359
|
}
|
|
@@ -18683,6 +18374,18 @@ async function cmdFeedback(flags) {
|
|
|
18683
18374
|
body.diagnostics = JSON.parse(flags.diagnostics);
|
|
18684
18375
|
output(await api3("POST", "/v1/feedback", body), !!flags.pretty);
|
|
18685
18376
|
}
|
|
18377
|
+
async function cmdReview(flags) {
|
|
18378
|
+
const skillId = flags.skill;
|
|
18379
|
+
if (!skillId)
|
|
18380
|
+
die("--skill is required");
|
|
18381
|
+
const endpointsJson = flags.endpoints;
|
|
18382
|
+
if (!endpointsJson)
|
|
18383
|
+
die("--endpoints is required (JSON array of {endpoint_id, description?, action_kind?, resource_kind?})");
|
|
18384
|
+
const endpoints = JSON.parse(endpointsJson);
|
|
18385
|
+
if (!Array.isArray(endpoints) || endpoints.length === 0)
|
|
18386
|
+
die("--endpoints must be a non-empty JSON array");
|
|
18387
|
+
output(await api3("POST", `/v1/skills/${skillId}/review`, { endpoints }), !!flags.pretty);
|
|
18388
|
+
}
|
|
18686
18389
|
async function cmdLogin(flags) {
|
|
18687
18390
|
const url = flags.url;
|
|
18688
18391
|
if (!url)
|
|
@@ -18761,6 +18464,7 @@ var CLI_REFERENCE = {
|
|
|
18761
18464
|
{ name: "resolve", usage: '--intent "..." --url "..." [opts]', desc: "Resolve intent \u2192 search/capture/execute" },
|
|
18762
18465
|
{ name: "execute", usage: "--skill ID --endpoint ID [opts]", desc: "Execute a specific endpoint" },
|
|
18763
18466
|
{ name: "feedback", usage: "--skill ID --endpoint ID --rating N", desc: "Submit feedback (mandatory after resolve)" },
|
|
18467
|
+
{ name: "review", usage: "--skill ID --endpoints '[...]'", desc: "Push reviewed descriptions/metadata back to skill" },
|
|
18764
18468
|
{ name: "login", usage: '--url "..."', desc: "Interactive browser login" },
|
|
18765
18469
|
{ name: "skills", usage: "", desc: "List all skills" },
|
|
18766
18470
|
{ name: "skill", usage: "<id>", desc: "Get skill details" },
|
|
@@ -18802,11 +18506,13 @@ var CLI_REFERENCE = {
|
|
|
18802
18506
|
],
|
|
18803
18507
|
examples: [
|
|
18804
18508
|
"unbrowse setup",
|
|
18509
|
+
'unbrowse resolve --intent "top stories" --url "https://news.ycombinator.com" --execute',
|
|
18805
18510
|
'unbrowse resolve --intent "get timeline" --url "https://x.com"',
|
|
18806
18511
|
"unbrowse execute --skill abc --endpoint def --pretty",
|
|
18807
|
-
|
|
18808
|
-
'unbrowse execute --skill abc --endpoint def --path "data.
|
|
18809
|
-
"unbrowse feedback --skill abc --endpoint def --rating 5"
|
|
18512
|
+
"unbrowse execute --skill abc --endpoint def --schema --pretty",
|
|
18513
|
+
'unbrowse execute --skill abc --endpoint def --path "data.items[]" --extract "name,url" --limit 10 --pretty',
|
|
18514
|
+
"unbrowse feedback --skill abc --endpoint def --rating 5",
|
|
18515
|
+
`unbrowse review --skill abc --endpoints '[{"endpoint_id":"def","description":"..."}]'`
|
|
18810
18516
|
]
|
|
18811
18517
|
};
|
|
18812
18518
|
function printHelp() {
|
|
@@ -18860,9 +18566,9 @@ function cmdStop(flags) {
|
|
|
18860
18566
|
}
|
|
18861
18567
|
async function cmdUpgrade(flags) {
|
|
18862
18568
|
info("Checking for updates...");
|
|
18863
|
-
const { execSync:
|
|
18569
|
+
const { execSync: execSync3 } = await import("child_process");
|
|
18864
18570
|
try {
|
|
18865
|
-
const result =
|
|
18571
|
+
const result = execSync3("npm view unbrowse version", { encoding: "utf-8", timeout: 1e4 }).trim();
|
|
18866
18572
|
const versionInfo = checkServerVersion(BASE_URL, import.meta.url);
|
|
18867
18573
|
const installed = versionInfo?.installed ?? "unknown";
|
|
18868
18574
|
if (result === installed) {
|
|
@@ -18926,9 +18632,7 @@ async function cmdSiteTask(pack, taskName, flags) {
|
|
|
18926
18632
|
body.dry_run = true;
|
|
18927
18633
|
if (flags["force-capture"])
|
|
18928
18634
|
body.force_capture = true;
|
|
18929
|
-
|
|
18930
|
-
if (flags.raw || hasTransforms)
|
|
18931
|
-
body.projection = { raw: true };
|
|
18635
|
+
body.projection = { raw: true };
|
|
18932
18636
|
const startedAt = Date.now();
|
|
18933
18637
|
let result = await withPendingNotice(api3("POST", "/v1/intent/resolve", body), "Still working. First-time capture/indexing for a site can take 20-80s.");
|
|
18934
18638
|
if (result && typeof result === "object" && result.error === "auth_required") {
|
|
@@ -18937,15 +18641,7 @@ async function cmdSiteTask(pack, taskName, flags) {
|
|
|
18937
18641
|
output({ ...result, _deps: { ...deps2, requires: ["login"] }, _next: [`unbrowse ${pack.site} login`] }, !!flags.pretty);
|
|
18938
18642
|
process.exit(2);
|
|
18939
18643
|
}
|
|
18940
|
-
|
|
18941
|
-
output(schemaOnly(result), !!flags.pretty);
|
|
18942
|
-
return;
|
|
18943
|
-
}
|
|
18944
|
-
if (hasTransforms && result.result != null) {
|
|
18945
|
-
result = slimTrace({ ...result, result: applyTransforms(result.result, flags) });
|
|
18946
|
-
} else if (!flags.raw && result.result != null) {
|
|
18947
|
-
result = autoExtractOrWrap(result);
|
|
18948
|
-
}
|
|
18644
|
+
result = slimTrace(result);
|
|
18949
18645
|
const deps = buildDepsMetadata(pack, taskName);
|
|
18950
18646
|
result._deps = deps;
|
|
18951
18647
|
result._shortcut = `${pack.site} ${taskName}`;
|
|
@@ -18977,14 +18673,9 @@ async function cmdSiteBatch(pack, batchArg, flags) {
|
|
|
18977
18673
|
};
|
|
18978
18674
|
if (flags["force-capture"])
|
|
18979
18675
|
body.force_capture = true;
|
|
18980
|
-
|
|
18981
|
-
|
|
18982
|
-
|
|
18983
|
-
let res = await api3("POST", "/v1/intent/resolve", body);
|
|
18984
|
-
if (!flags.raw && res.result != null) {
|
|
18985
|
-
res = autoExtractOrWrap(res);
|
|
18986
|
-
}
|
|
18987
|
-
return { task, result: res };
|
|
18676
|
+
body.projection = { raw: true };
|
|
18677
|
+
const res = await api3("POST", "/v1/intent/resolve", body);
|
|
18678
|
+
return { task, result: slimTrace(res) };
|
|
18988
18679
|
});
|
|
18989
18680
|
const waveResult = await Promise.all(promises);
|
|
18990
18681
|
waveResults.push({
|
|
@@ -19085,6 +18776,54 @@ async function cmdForward() {
|
|
|
19085
18776
|
async function cmdClose() {
|
|
19086
18777
|
output(await api3("POST", "/v1/browse/close"), false);
|
|
19087
18778
|
}
|
|
18779
|
+
async function cmdConnectChrome() {
|
|
18780
|
+
const { execSync: execSync3, spawn: spawnProc } = __require("child_process");
|
|
18781
|
+
try {
|
|
18782
|
+
const res = await fetch("http://127.0.0.1:9222/json/version", { signal: AbortSignal.timeout(1000) });
|
|
18783
|
+
if (res.ok) {
|
|
18784
|
+
const data = await res.json();
|
|
18785
|
+
if (!data["User-Agent"]?.includes("Headless")) {
|
|
18786
|
+
console.log("Your Chrome is already connected with CDP on port 9222.");
|
|
18787
|
+
console.log("Browse commands will use your real browser with all your sessions.");
|
|
18788
|
+
return;
|
|
18789
|
+
}
|
|
18790
|
+
}
|
|
18791
|
+
} catch {}
|
|
18792
|
+
try {
|
|
18793
|
+
execSync3("pkill -f kuri/chrome-profile", { stdio: "ignore" });
|
|
18794
|
+
} catch {}
|
|
18795
|
+
console.log("Quitting Chrome to relaunch with remote debugging...");
|
|
18796
|
+
if (process.platform === "darwin") {
|
|
18797
|
+
try {
|
|
18798
|
+
execSync3('osascript -e "quit app \\"Google Chrome\\""', { stdio: "ignore", timeout: 5000 });
|
|
18799
|
+
} catch {}
|
|
18800
|
+
} else {
|
|
18801
|
+
try {
|
|
18802
|
+
execSync3("pkill -f chrome", { stdio: "ignore" });
|
|
18803
|
+
} catch {}
|
|
18804
|
+
}
|
|
18805
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
18806
|
+
console.log("Launching Chrome with remote debugging on port 9222...");
|
|
18807
|
+
if (process.platform === "darwin") {
|
|
18808
|
+
spawnProc("/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", ["--remote-debugging-port=9222", "--no-first-run", "--no-default-browser-check"], { stdio: "ignore", detached: true }).unref();
|
|
18809
|
+
} else {
|
|
18810
|
+
spawnProc("google-chrome", ["--remote-debugging-port=9222"], { stdio: "ignore", detached: true }).unref();
|
|
18811
|
+
}
|
|
18812
|
+
const deadline = Date.now() + 15000;
|
|
18813
|
+
while (Date.now() < deadline) {
|
|
18814
|
+
try {
|
|
18815
|
+
const res = await fetch("http://127.0.0.1:9222/json/version", { signal: AbortSignal.timeout(500) });
|
|
18816
|
+
if (res.ok) {
|
|
18817
|
+
console.log("Connected. Your real Chrome is now available for browse commands.");
|
|
18818
|
+
console.log("All your logged-in sessions (LinkedIn, X, etc.) will work.");
|
|
18819
|
+
console.log('Run: unbrowse go "https://linkedin.com/feed/"');
|
|
18820
|
+
return;
|
|
18821
|
+
}
|
|
18822
|
+
} catch {}
|
|
18823
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
18824
|
+
}
|
|
18825
|
+
console.error("Could not connect to Chrome. Make sure all Chrome windows are closed and try again.");
|
|
18826
|
+
}
|
|
19088
18827
|
async function main() {
|
|
19089
18828
|
const { command, args, flags } = parseArgs(process.argv);
|
|
19090
18829
|
const noAutoStart = !!flags["no-auto-start"];
|
|
@@ -19106,6 +18845,8 @@ async function main() {
|
|
|
19106
18845
|
return cmdRestart(flags);
|
|
19107
18846
|
if (command === "upgrade" || command === "update")
|
|
19108
18847
|
return cmdUpgrade(flags);
|
|
18848
|
+
if (command === "connect-chrome")
|
|
18849
|
+
return cmdConnectChrome();
|
|
19109
18850
|
const KNOWN_COMMANDS = new Set([
|
|
19110
18851
|
"health",
|
|
19111
18852
|
"setup",
|
|
@@ -19114,6 +18855,7 @@ async function main() {
|
|
|
19114
18855
|
"exec",
|
|
19115
18856
|
"feedback",
|
|
19116
18857
|
"fb",
|
|
18858
|
+
"review",
|
|
19117
18859
|
"login",
|
|
19118
18860
|
"skills",
|
|
19119
18861
|
"skill",
|
|
@@ -19139,7 +18881,8 @@ async function main() {
|
|
|
19139
18881
|
"eval",
|
|
19140
18882
|
"back",
|
|
19141
18883
|
"forward",
|
|
19142
|
-
"close"
|
|
18884
|
+
"close",
|
|
18885
|
+
"connect-chrome"
|
|
19143
18886
|
]);
|
|
19144
18887
|
if (!KNOWN_COMMANDS.has(command)) {
|
|
19145
18888
|
const pack = findSitePack(command);
|
|
@@ -19173,6 +18916,8 @@ async function main() {
|
|
|
19173
18916
|
case "feedback":
|
|
19174
18917
|
case "fb":
|
|
19175
18918
|
return cmdFeedback(flags);
|
|
18919
|
+
case "review":
|
|
18920
|
+
return cmdReview(flags);
|
|
19176
18921
|
case "login":
|
|
19177
18922
|
return cmdLogin(flags);
|
|
19178
18923
|
case "skills":
|
|
@@ -19215,6 +18960,8 @@ async function main() {
|
|
|
19215
18960
|
return cmdForward();
|
|
19216
18961
|
case "close":
|
|
19217
18962
|
return cmdClose();
|
|
18963
|
+
case "connect-chrome":
|
|
18964
|
+
return cmdConnectChrome();
|
|
19218
18965
|
default:
|
|
19219
18966
|
info(`Unknown command: ${command}`);
|
|
19220
18967
|
printHelp();
|