@vellumai/assistant 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -16
- package/package.json +1 -1
- package/src/__tests__/call-orchestrator.test.ts +321 -0
- package/src/__tests__/channel-approval-routes.test.ts +382 -124
- package/src/__tests__/channel-approvals.test.ts +51 -2
- package/src/__tests__/channel-delivery-store.test.ts +30 -4
- package/src/__tests__/channel-guardian.test.ts +187 -0
- package/src/__tests__/config-schema.test.ts +1 -1
- package/src/__tests__/daemon-lifecycle.test.ts +635 -0
- package/src/__tests__/gateway-only-enforcement.test.ts +19 -13
- package/src/__tests__/handlers-twilio-config.test.ts +73 -0
- package/src/__tests__/secret-scanner.test.ts +223 -0
- package/src/__tests__/shell-parser-property.test.ts +357 -2
- package/src/__tests__/system-prompt.test.ts +25 -1
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +34 -1
- package/src/__tests__/user-reference.test.ts +68 -0
- package/src/calls/call-orchestrator.ts +63 -11
- package/src/cli/map.ts +6 -0
- package/src/commands/__tests__/cc-command-registry.test.ts +67 -0
- package/src/commands/cc-command-registry.ts +14 -1
- package/src/config/bundled-skills/claude-code/TOOLS.json +10 -3
- package/src/config/bundled-skills/messaging/SKILL.md +4 -0
- package/src/config/defaults.ts +1 -1
- package/src/config/schema.ts +3 -3
- package/src/config/skills.ts +5 -32
- package/src/config/system-prompt.ts +16 -0
- package/src/config/user-reference.ts +29 -0
- package/src/config/vellum-skills/catalog.json +52 -0
- package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -1
- package/src/config/vellum-skills/twilio-setup/SKILL.md +38 -0
- package/src/daemon/auth-manager.ts +103 -0
- package/src/daemon/computer-use-session.ts +8 -1
- package/src/daemon/config-watcher.ts +253 -0
- package/src/daemon/handlers/config.ts +36 -13
- package/src/daemon/handlers/skills.ts +6 -7
- package/src/daemon/ipc-contract.ts +6 -0
- package/src/daemon/ipc-handler.ts +87 -0
- package/src/daemon/lifecycle.ts +16 -4
- package/src/daemon/ride-shotgun-handler.ts +11 -1
- package/src/daemon/server.ts +105 -502
- package/src/daemon/session-agent-loop.ts +5 -14
- package/src/daemon/session-runtime-assembly.ts +60 -44
- package/src/daemon/session.ts +8 -1
- package/src/memory/db-connection.ts +28 -0
- package/src/memory/db-init.ts +1019 -0
- package/src/memory/db.ts +2 -2007
- package/src/memory/embedding-backend.ts +79 -11
- package/src/memory/indexer.ts +2 -0
- package/src/memory/job-utils.ts +64 -4
- package/src/memory/jobs-worker.ts +7 -1
- package/src/memory/recall-cache.ts +107 -0
- package/src/memory/retriever.ts +30 -1
- package/src/memory/schema-migration.ts +984 -0
- package/src/memory/schema.ts +1 -0
- package/src/memory/search/types.ts +2 -0
- package/src/permissions/prompter.ts +14 -3
- package/src/permissions/trust-store.ts +7 -0
- package/src/runtime/channel-approvals.ts +17 -3
- package/src/runtime/gateway-client.ts +2 -1
- package/src/runtime/http-server.ts +15 -4
- package/src/runtime/routes/channel-routes.ts +172 -84
- package/src/runtime/routes/run-routes.ts +7 -1
- package/src/runtime/run-orchestrator.ts +8 -1
- package/src/security/secret-scanner.ts +218 -0
- package/src/skills/frontmatter.ts +63 -0
- package/src/skills/slash-commands.ts +23 -0
- package/src/skills/vellum-catalog-remote.ts +107 -0
- package/src/tools/browser/auto-navigate.ts +132 -24
- package/src/tools/browser/browser-manager.ts +67 -61
- package/src/tools/claude-code/claude-code.ts +55 -3
- package/src/tools/executor.ts +10 -2
- package/src/tools/skills/vellum-catalog.ts +61 -156
- package/src/tools/terminal/parser.ts +21 -5
- package/src/util/platform.ts +8 -1
- package/src/util/retry.ts +4 -4
|
@@ -457,6 +457,216 @@ function scanEntropy(
|
|
|
457
457
|
return matches;
|
|
458
458
|
}
|
|
459
459
|
|
|
460
|
+
// ---------------------------------------------------------------------------
|
|
461
|
+
// Encoded secret detection — decode + re-scan pass
|
|
462
|
+
// ---------------------------------------------------------------------------
|
|
463
|
+
|
|
464
|
+
/**
|
|
465
|
+
* Find percent-encoded segments containing 3+ encoded bytes, using a linear
|
|
466
|
+
* scan instead of a regex with nested quantifiers (which caused catastrophic
|
|
467
|
+
* backtracking on long near-miss inputs).
|
|
468
|
+
*/
|
|
469
|
+
function findPercentEncodedSegments(text: string): Array<{ start: number; end: number; match: string }> {
|
|
470
|
+
const results: Array<{ start: number; end: number; match: string }> = [];
|
|
471
|
+
const len = text.length;
|
|
472
|
+
const isUrlChar = (ch: string) => /[A-Za-z0-9_.~+/\-]/.test(ch);
|
|
473
|
+
const isHexDigit = (ch: string) => /[0-9A-Fa-f]/.test(ch);
|
|
474
|
+
|
|
475
|
+
let i = 0;
|
|
476
|
+
while (i < len) {
|
|
477
|
+
// Look for the start of a percent-encoded segment
|
|
478
|
+
if (text[i] !== '%' && !isUrlChar(text[i])) { i++; continue; }
|
|
479
|
+
|
|
480
|
+
// Walk a candidate segment of URL-safe chars and %XX sequences
|
|
481
|
+
const start = i;
|
|
482
|
+
let pctCount = 0;
|
|
483
|
+
while (i < len) {
|
|
484
|
+
if (text[i] === '%' && i + 2 < len && isHexDigit(text[i + 1]) && isHexDigit(text[i + 2])) {
|
|
485
|
+
pctCount++;
|
|
486
|
+
i += 3;
|
|
487
|
+
} else if (isUrlChar(text[i])) {
|
|
488
|
+
i++;
|
|
489
|
+
} else {
|
|
490
|
+
break;
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
if (pctCount >= 3) {
|
|
495
|
+
results.push({ start, end: i, match: text.slice(start, i) });
|
|
496
|
+
}
|
|
497
|
+
// Avoid re-scanning the same position if we didn't advance
|
|
498
|
+
if (i === start) i++;
|
|
499
|
+
}
|
|
500
|
+
return results;
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
/** Hex-escape sequences: \xHH patterns (3+ consecutive) */
|
|
504
|
+
const HEX_ESCAPE_RE = /(?:\\x[0-9A-Fa-f]{2}){3,}/g;
|
|
505
|
+
|
|
506
|
+
/** Candidate base64 segments — 24+ chars that could encode a secret (≥18 decoded bytes) */
|
|
507
|
+
const ENCODED_BASE64_RE = /\b([A-Za-z0-9+/\-_]{24,}={0,3})(?=\W|$)/g;
|
|
508
|
+
|
|
509
|
+
/** Continuous hex-encoded bytes — 32+ hex chars (16+ bytes decoded) */
|
|
510
|
+
const CONTINUOUS_HEX_RE = /\b([0-9a-fA-F]{32,})\b/g;
|
|
511
|
+
|
|
512
|
+
/** Check if decoded content is printable ASCII text */
|
|
513
|
+
function isPrintableText(s: string): boolean {
|
|
514
|
+
return s.length > 0 && /^[\x20-\x7E\t\n\r]+$/.test(s);
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
function tryDecodeBase64(encoded: string): string | null {
|
|
518
|
+
try {
|
|
519
|
+
// Handle both standard and URL-safe base64
|
|
520
|
+
const standardized = encoded.replace(/-/g, '+').replace(/_/g, '/');
|
|
521
|
+
const decoded = Buffer.from(standardized, 'base64').toString('utf-8');
|
|
522
|
+
if (!isPrintableText(decoded)) return null;
|
|
523
|
+
// Verify round-trip to reject garbage decodes
|
|
524
|
+
const reEncoded = Buffer.from(decoded, 'utf-8').toString('base64').replace(/=+$/, '');
|
|
525
|
+
if (standardized.replace(/=+$/, '') !== reEncoded) return null;
|
|
526
|
+
return decoded;
|
|
527
|
+
} catch {
|
|
528
|
+
return null;
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
function tryDecodePercentEncoded(encoded: string): string | null {
|
|
533
|
+
try {
|
|
534
|
+
const decoded = decodeURIComponent(encoded);
|
|
535
|
+
if (decoded === encoded) return null;
|
|
536
|
+
if (!isPrintableText(decoded)) return null;
|
|
537
|
+
return decoded;
|
|
538
|
+
} catch {
|
|
539
|
+
return null;
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
function tryDecodeHexEscapes(encoded: string): string | null {
|
|
544
|
+
try {
|
|
545
|
+
const decoded = encoded.replace(/\\x([0-9A-Fa-f]{2})/g, (_, hex) =>
|
|
546
|
+
String.fromCharCode(parseInt(hex, 16)),
|
|
547
|
+
);
|
|
548
|
+
if (decoded === encoded) return null;
|
|
549
|
+
if (!isPrintableText(decoded)) return null;
|
|
550
|
+
return decoded;
|
|
551
|
+
} catch {
|
|
552
|
+
return null;
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
function tryDecodeContinuousHex(encoded: string): string | null {
|
|
557
|
+
try {
|
|
558
|
+
// Odd-length strings can't be decoded as pairs of hex digits
|
|
559
|
+
if (encoded.length % 2 !== 0) return null;
|
|
560
|
+
// Decode pairs of hex digits to bytes
|
|
561
|
+
const bytes: number[] = [];
|
|
562
|
+
for (let i = 0; i < encoded.length; i += 2) {
|
|
563
|
+
bytes.push(parseInt(encoded.slice(i, i + 2), 16));
|
|
564
|
+
}
|
|
565
|
+
const decoded = String.fromCharCode(...bytes);
|
|
566
|
+
if (!isPrintableText(decoded)) return null;
|
|
567
|
+
return decoded;
|
|
568
|
+
} catch {
|
|
569
|
+
return null;
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
/** Check if an encoded segment overlaps with any existing match range */
|
|
574
|
+
function overlapsExisting(start: number, end: number, ranges: Set<string>): boolean {
|
|
575
|
+
for (const rangeKey of ranges) {
|
|
576
|
+
const sep = rangeKey.indexOf(':');
|
|
577
|
+
const rStart = Number(rangeKey.slice(0, sep));
|
|
578
|
+
const rEnd = Number(rangeKey.slice(sep + 1));
|
|
579
|
+
if (start < rEnd && end > rStart) return true;
|
|
580
|
+
}
|
|
581
|
+
return false;
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
/**
|
|
585
|
+
* Scan for encoded secrets by decoding candidate segments and running
|
|
586
|
+
* pattern matching on the decoded content. Catches base64-encoded,
|
|
587
|
+
* hex-encoded, and percent-encoded secrets that raw regex would miss.
|
|
588
|
+
*/
|
|
589
|
+
function scanEncoded(
|
|
590
|
+
text: string,
|
|
591
|
+
existingRanges: Set<string>,
|
|
592
|
+
): SecretMatch[] {
|
|
593
|
+
const matches: SecretMatch[] = [];
|
|
594
|
+
|
|
595
|
+
// Helper: try to match decoded content against known secret patterns
|
|
596
|
+
const tryMatchDecoded = (
|
|
597
|
+
encoded: string,
|
|
598
|
+
decoded: string,
|
|
599
|
+
startIndex: number,
|
|
600
|
+
endIndex: number,
|
|
601
|
+
encoding: string,
|
|
602
|
+
) => {
|
|
603
|
+
for (const pattern of PATTERNS) {
|
|
604
|
+
pattern.regex.lastIndex = 0;
|
|
605
|
+
let pm: RegExpExecArray | null;
|
|
606
|
+
while ((pm = pattern.regex.exec(decoded)) !== null) {
|
|
607
|
+
const value = pm[1] ?? pm[0];
|
|
608
|
+
if (isPlaceholder(value)) continue;
|
|
609
|
+
if (isAllowlisted(value)) continue;
|
|
610
|
+
if (pattern.type === 'AWS Secret Key' && !isLikelyAwsSecret(value)) continue;
|
|
611
|
+
|
|
612
|
+
const key = `${startIndex}:${endIndex}`;
|
|
613
|
+
existingRanges.add(key);
|
|
614
|
+
matches.push({
|
|
615
|
+
type: `${pattern.type} (${encoding})`,
|
|
616
|
+
startIndex,
|
|
617
|
+
endIndex,
|
|
618
|
+
redactedValue: redact(encoded),
|
|
619
|
+
});
|
|
620
|
+
return;
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
};
|
|
624
|
+
|
|
625
|
+
// Percent-encoded segments: use linear-time scanner instead of regex
|
|
626
|
+
if (text.includes('%')) {
|
|
627
|
+
for (const seg of findPercentEncodedSegments(text)) {
|
|
628
|
+
if (seg.match.length > 1000) continue;
|
|
629
|
+
if (overlapsExisting(seg.start, seg.end, existingRanges)) continue;
|
|
630
|
+
const decoded = tryDecodePercentEncoded(seg.match);
|
|
631
|
+
if (!decoded) continue;
|
|
632
|
+
tryMatchDecoded(seg.match, decoded, seg.start, seg.end, 'percent-encoded');
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
// Regex-based decoders for the remaining encodings
|
|
637
|
+
const decoders: Array<{
|
|
638
|
+
regex: RegExp;
|
|
639
|
+
decode: (s: string) => string | null;
|
|
640
|
+
encoding: string;
|
|
641
|
+
quickCheck?: (t: string) => boolean;
|
|
642
|
+
}> = [
|
|
643
|
+
{ regex: HEX_ESCAPE_RE, decode: tryDecodeHexEscapes, encoding: 'hex-escaped', quickCheck: (t) => t.includes('\\x') },
|
|
644
|
+
{ regex: ENCODED_BASE64_RE, decode: tryDecodeBase64, encoding: 'base64-encoded' },
|
|
645
|
+
{ regex: CONTINUOUS_HEX_RE, decode: tryDecodeContinuousHex, encoding: 'hex-encoded' },
|
|
646
|
+
];
|
|
647
|
+
|
|
648
|
+
for (const { regex, decode, encoding, quickCheck } of decoders) {
|
|
649
|
+
if (quickCheck && !quickCheck(text)) continue;
|
|
650
|
+
regex.lastIndex = 0;
|
|
651
|
+
let m: RegExpExecArray | null;
|
|
652
|
+
while ((m = regex.exec(text)) !== null) {
|
|
653
|
+
const encoded = m[1] ?? m[0];
|
|
654
|
+
if (encoded.length > 1000) continue;
|
|
655
|
+
const startIndex = m.index + (m[0].indexOf(encoded));
|
|
656
|
+
const endIndex = startIndex + encoded.length;
|
|
657
|
+
|
|
658
|
+
if (overlapsExisting(startIndex, endIndex, existingRanges)) continue;
|
|
659
|
+
|
|
660
|
+
const decoded = decode(encoded);
|
|
661
|
+
if (!decoded) continue;
|
|
662
|
+
|
|
663
|
+
tryMatchDecoded(encoded, decoded, startIndex, endIndex, encoding);
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
return matches;
|
|
668
|
+
}
|
|
669
|
+
|
|
460
670
|
// ---------------------------------------------------------------------------
|
|
461
671
|
// Scan function
|
|
462
672
|
// ---------------------------------------------------------------------------
|
|
@@ -508,6 +718,10 @@ export function scanText(text: string, entropyConfig?: Partial<EntropyConfig>):
|
|
|
508
718
|
const entropyMatches = scanEntropy(text, eConfig, seen);
|
|
509
719
|
matches.push(...entropyMatches);
|
|
510
720
|
|
|
721
|
+
// Encoded secret detection — decode candidate segments and re-scan
|
|
722
|
+
const encodedMatches = scanEncoded(text, seen);
|
|
723
|
+
matches.push(...encodedMatches);
|
|
724
|
+
|
|
511
725
|
// Sort by position; at same start, wider match first so redaction covers the full span
|
|
512
726
|
matches.sort((a, b) => a.startIndex - b.startIndex || b.endIndex - a.endIndex);
|
|
513
727
|
return matches;
|
|
@@ -547,4 +761,8 @@ export {
|
|
|
547
761
|
redact as _redact,
|
|
548
762
|
PATTERNS as _PATTERNS,
|
|
549
763
|
hasSecretContext as _hasSecretContext,
|
|
764
|
+
tryDecodeBase64 as _tryDecodeBase64,
|
|
765
|
+
tryDecodePercentEncoded as _tryDecodePercentEncoded,
|
|
766
|
+
tryDecodeHexEscapes as _tryDecodeHexEscapes,
|
|
767
|
+
tryDecodeContinuousHex as _tryDecodeContinuousHex,
|
|
550
768
|
};
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared frontmatter parsing for SKILL.md files.
|
|
3
|
+
*
|
|
4
|
+
* Frontmatter is a YAML-like block delimited by `---` at the top of a file.
|
|
5
|
+
* This module provides a single implementation used by the skill catalog loader,
|
|
6
|
+
* the Vellum catalog installer, and the CC command registry.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
/** Matches a `---` delimited frontmatter block at the start of a file. */
|
|
10
|
+
export const FRONTMATTER_REGEX = /^---\r?\n([\s\S]*?)\r?\n---(?:\r?\n|$)/;
|
|
11
|
+
|
|
12
|
+
export interface FrontmatterParseResult {
|
|
13
|
+
/** Key-value pairs extracted from the frontmatter block. */
|
|
14
|
+
fields: Record<string, string>;
|
|
15
|
+
/** The remaining file content after the frontmatter block. */
|
|
16
|
+
body: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Parse frontmatter fields from file content.
|
|
21
|
+
*
|
|
22
|
+
* Extracts key-value pairs from the `---` delimited block at the top of the
|
|
23
|
+
* file. Handles single- and double-quoted values, and unescapes common escape
|
|
24
|
+
* sequences (`\n`, `\r`, `\\`, `\"`) in double-quoted values.
|
|
25
|
+
*
|
|
26
|
+
* Returns `null` if no frontmatter block is found.
|
|
27
|
+
*/
|
|
28
|
+
export function parseFrontmatterFields(content: string): FrontmatterParseResult | null {
|
|
29
|
+
const match = content.match(FRONTMATTER_REGEX);
|
|
30
|
+
if (!match) return null;
|
|
31
|
+
|
|
32
|
+
const frontmatter = match[1];
|
|
33
|
+
const fields: Record<string, string> = {};
|
|
34
|
+
|
|
35
|
+
for (const line of frontmatter.split(/\r?\n/)) {
|
|
36
|
+
const trimmed = line.trim();
|
|
37
|
+
if (!trimmed || trimmed.startsWith('#')) continue;
|
|
38
|
+
const separatorIndex = trimmed.indexOf(':');
|
|
39
|
+
if (separatorIndex === -1) continue;
|
|
40
|
+
|
|
41
|
+
const key = trimmed.slice(0, separatorIndex).trim();
|
|
42
|
+
let value = trimmed.slice(separatorIndex + 1).trim();
|
|
43
|
+
|
|
44
|
+
const isDoubleQuoted = value.startsWith('"') && value.endsWith('"');
|
|
45
|
+
const isSingleQuoted = value.startsWith("'") && value.endsWith("'");
|
|
46
|
+
if (isDoubleQuoted || isSingleQuoted) {
|
|
47
|
+
value = value.slice(1, -1);
|
|
48
|
+
if (isDoubleQuoted) {
|
|
49
|
+
// Unescape sequences produced by buildSkillMarkdown's esc().
|
|
50
|
+
// Only for double-quoted values — single-quoted YAML treats backslashes literally.
|
|
51
|
+
// Single-pass to avoid misinterpreting \\n (escaped backslash + n) as a newline.
|
|
52
|
+
value = value.replace(/\\(["\\nr])/g, (_, ch) => {
|
|
53
|
+
if (ch === 'n') return '\n';
|
|
54
|
+
if (ch === 'r') return '\r';
|
|
55
|
+
return ch; // handles \\ → \ and \" → "
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
fields[key] = value;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return { fields, body: content.slice(match[0].length) };
|
|
63
|
+
}
|
|
@@ -155,6 +155,10 @@ export function formatUnknownSlashSkillMessage(
|
|
|
155
155
|
/**
|
|
156
156
|
* Rewrite user input for a known slash command into a model-facing prompt
|
|
157
157
|
* that explicitly instructs the model to invoke the skill.
|
|
158
|
+
*
|
|
159
|
+
* For the claude-code skill, trailing arguments are routed via the `command`
|
|
160
|
+
* input (not `prompt`) so that .claude/commands/*.md templates are loaded
|
|
161
|
+
* and $ARGUMENTS substitution is applied.
|
|
158
162
|
*/
|
|
159
163
|
export function rewriteKnownSlashCommandPrompt(params: {
|
|
160
164
|
rawInput: string;
|
|
@@ -162,6 +166,25 @@ export function rewriteKnownSlashCommandPrompt(params: {
|
|
|
162
166
|
skillName: string;
|
|
163
167
|
trailingArgs: string;
|
|
164
168
|
}): string {
|
|
169
|
+
// For the claude-code skill, route trailing args through the `command` input
|
|
170
|
+
// so CC command templates (.claude/commands/*.md) are loaded and $ARGUMENTS
|
|
171
|
+
// substitution is applied, rather than sending them as a raw prompt.
|
|
172
|
+
if (params.skillId === 'claude-code' && params.trailingArgs) {
|
|
173
|
+
// Extract the command name (first word of trailing args) and remaining arguments
|
|
174
|
+
const parts = params.trailingArgs.split(/\s+/);
|
|
175
|
+
const commandName = parts[0];
|
|
176
|
+
const commandArgs = parts.slice(1).join(' ');
|
|
177
|
+
|
|
178
|
+
const lines = [
|
|
179
|
+
`The user invoked the slash command \`/${params.skillId}\`.`,
|
|
180
|
+
`Execute the Claude Code command "${commandName}" using the claude_code tool with command="${commandName}".`,
|
|
181
|
+
];
|
|
182
|
+
if (commandArgs) {
|
|
183
|
+
lines.push(`Pass the following as the \`arguments\` input: ${commandArgs}`);
|
|
184
|
+
}
|
|
185
|
+
return lines.join('\n');
|
|
186
|
+
}
|
|
187
|
+
|
|
165
188
|
const lines = [
|
|
166
189
|
`The user invoked the slash command \`/${params.skillId}\`.`,
|
|
167
190
|
`Please invoke the "${params.skillName}" skill (ID: ${params.skillId}).`,
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { readFileSync } from 'node:fs';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
|
|
4
|
+
import type { CatalogEntry } from '../tools/skills/vellum-catalog.js';
|
|
5
|
+
import { getLogger } from '../util/logger.js';
|
|
6
|
+
|
|
7
|
+
const log = getLogger('vellum-catalog-remote');
|
|
8
|
+
|
|
9
|
+
const GITHUB_RAW_BASE =
|
|
10
|
+
'https://raw.githubusercontent.com/vellum-ai/vellum-assistant/main/assistant/src/config/vellum-skills';
|
|
11
|
+
|
|
12
|
+
const CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
|
|
13
|
+
|
|
14
|
+
interface CatalogManifest {
|
|
15
|
+
version: number;
|
|
16
|
+
skills: CatalogEntry[];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
let cachedEntries: CatalogEntry[] | null = null;
|
|
20
|
+
let cacheTimestamp = 0;
|
|
21
|
+
|
|
22
|
+
function getBundledCatalogPath(): string {
|
|
23
|
+
return join(import.meta.dir, '..', 'config', 'vellum-skills', 'catalog.json');
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function loadBundledCatalog(): CatalogEntry[] {
|
|
27
|
+
try {
|
|
28
|
+
const raw = readFileSync(getBundledCatalogPath(), 'utf-8');
|
|
29
|
+
const manifest: CatalogManifest = JSON.parse(raw);
|
|
30
|
+
return manifest.skills ?? [];
|
|
31
|
+
} catch (err) {
|
|
32
|
+
log.warn({ err }, 'Failed to read bundled catalog.json');
|
|
33
|
+
return [];
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function getBundledSkillContent(skillId: string): string | null {
|
|
38
|
+
try {
|
|
39
|
+
const skillPath = join(import.meta.dir, '..', 'config', 'vellum-skills', skillId, 'SKILL.md');
|
|
40
|
+
return readFileSync(skillPath, 'utf-8');
|
|
41
|
+
} catch {
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Fetch catalog entries (cached, async). Falls back to bundled copy. */
|
|
47
|
+
export async function fetchCatalogEntries(): Promise<CatalogEntry[]> {
|
|
48
|
+
const now = Date.now();
|
|
49
|
+
if (cachedEntries && now - cacheTimestamp < CACHE_TTL_MS) {
|
|
50
|
+
return cachedEntries;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
try {
|
|
54
|
+
const url = `${GITHUB_RAW_BASE}/catalog.json`;
|
|
55
|
+
const response = await fetch(url, {
|
|
56
|
+
signal: AbortSignal.timeout(5000),
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
if (!response.ok) {
|
|
60
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const manifest: CatalogManifest = await response.json();
|
|
64
|
+
const skills = manifest.skills;
|
|
65
|
+
if (!Array.isArray(skills) || skills.length === 0) {
|
|
66
|
+
throw new Error('Remote catalog has invalid or empty skills array');
|
|
67
|
+
}
|
|
68
|
+
cachedEntries = skills;
|
|
69
|
+
cacheTimestamp = now;
|
|
70
|
+
log.info({ count: cachedEntries.length }, 'Fetched remote vellum-skills catalog');
|
|
71
|
+
return cachedEntries;
|
|
72
|
+
} catch (err) {
|
|
73
|
+
log.warn({ err }, 'Failed to fetch remote catalog, falling back to bundled copy');
|
|
74
|
+
const bundled = loadBundledCatalog();
|
|
75
|
+
// Cache the bundled result too so we don't re-fetch on every call during outage
|
|
76
|
+
cachedEntries = bundled;
|
|
77
|
+
cacheTimestamp = now;
|
|
78
|
+
return bundled;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/** Fetch a skill's SKILL.md content from GitHub. Falls back to bundled copy. */
|
|
83
|
+
export async function fetchSkillContent(skillId: string): Promise<string | null> {
|
|
84
|
+
try {
|
|
85
|
+
const url = `${GITHUB_RAW_BASE}/${encodeURIComponent(skillId)}/SKILL.md`;
|
|
86
|
+
const response = await fetch(url, {
|
|
87
|
+
signal: AbortSignal.timeout(10000),
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
if (!response.ok) {
|
|
91
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const content = await response.text();
|
|
95
|
+
log.info({ skillId }, 'Fetched remote SKILL.md');
|
|
96
|
+
return content;
|
|
97
|
+
} catch (err) {
|
|
98
|
+
log.warn({ err, skillId }, 'Failed to fetch remote SKILL.md, falling back to bundled copy');
|
|
99
|
+
return getBundledSkillContent(skillId);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/** Check if a skill ID exists in the remote catalog. */
|
|
104
|
+
export async function checkVellumSkill(skillId: string): Promise<boolean> {
|
|
105
|
+
const entries = await fetchCatalogEntries();
|
|
106
|
+
return entries.some((e) => e.id === skillId);
|
|
107
|
+
}
|
|
@@ -10,9 +10,9 @@ import { getLogger } from '../../util/logger.js';
|
|
|
10
10
|
const log = getLogger('auto-navigate');
|
|
11
11
|
|
|
12
12
|
const CDP_BASE = 'http://localhost:9222';
|
|
13
|
-
const MAX_PAGES =
|
|
14
|
-
const PAGE_WAIT_MS =
|
|
15
|
-
const SCROLL_WAIT_MS =
|
|
13
|
+
const MAX_PAGES = 10;
|
|
14
|
+
const PAGE_WAIT_MS = 2500;
|
|
15
|
+
const SCROLL_WAIT_MS = 1000;
|
|
16
16
|
|
|
17
17
|
/** Minimal CDP client — connects to one page tab. */
|
|
18
18
|
class MiniCDP {
|
|
@@ -57,15 +57,28 @@ class MiniCDP {
|
|
|
57
57
|
close() { this.ws?.close(); }
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
+
export interface AutoNavProgress {
|
|
61
|
+
type: 'visiting' | 'discovered' | 'done';
|
|
62
|
+
url?: string;
|
|
63
|
+
pageNumber?: number;
|
|
64
|
+
totalDiscovered?: number;
|
|
65
|
+
visitedCount?: number;
|
|
66
|
+
}
|
|
67
|
+
|
|
60
68
|
/**
|
|
61
69
|
* Navigate Chrome through a domain's pages to trigger API calls.
|
|
62
70
|
* Discovers internal links from the DOM and visits up to ~15 unique paths.
|
|
63
71
|
*
|
|
64
72
|
* @param domain The domain to crawl (e.g. "example.com").
|
|
65
73
|
* @param abortSignal Optional signal to stop navigation early.
|
|
74
|
+
* @param onProgress Optional callback for live progress updates.
|
|
66
75
|
* @returns List of visited page URLs.
|
|
67
76
|
*/
|
|
68
|
-
export async function autoNavigate(
|
|
77
|
+
export async function autoNavigate(
|
|
78
|
+
domain: string,
|
|
79
|
+
abortSignal?: { aborted: boolean },
|
|
80
|
+
onProgress?: (p: AutoNavProgress) => void,
|
|
81
|
+
): Promise<string[]> {
|
|
69
82
|
let wsUrl: string | null = null;
|
|
70
83
|
try {
|
|
71
84
|
const res = await fetch(`${CDP_BASE}/json/list`);
|
|
@@ -108,6 +121,7 @@ export async function autoNavigate(domain: string, abortSignal?: { aborted: bool
|
|
|
108
121
|
|
|
109
122
|
// Navigate to the domain root first
|
|
110
123
|
try {
|
|
124
|
+
onProgress?.({ type: 'visiting', url: rootUrl, pageNumber: 1 });
|
|
111
125
|
await cdp.send('Page.navigate', { url: rootUrl });
|
|
112
126
|
await sleep(PAGE_WAIT_MS);
|
|
113
127
|
visited.add('/');
|
|
@@ -125,12 +139,11 @@ export async function autoNavigate(domain: string, abortSignal?: { aborted: bool
|
|
|
125
139
|
await scrollPage(cdp);
|
|
126
140
|
await sleep(SCROLL_WAIT_MS);
|
|
127
141
|
|
|
128
|
-
// Click common interactive elements on the root page
|
|
129
|
-
await clickInteractiveElements(cdp);
|
|
130
|
-
await sleep(SCROLL_WAIT_MS);
|
|
131
|
-
|
|
132
142
|
// Discover internal links from the current page
|
|
133
|
-
|
|
143
|
+
let discoveredLinks = await discoverInternalLinks(cdp, domain);
|
|
144
|
+
// Sort links: deeper paths first (more likely to be content pages), skip shallow nav links
|
|
145
|
+
discoveredLinks = rankLinks(discoveredLinks);
|
|
146
|
+
onProgress?.({ type: 'discovered', totalDiscovered: discoveredLinks.length });
|
|
134
147
|
log.info({ count: discoveredLinks.length }, 'Discovered internal links from root');
|
|
135
148
|
|
|
136
149
|
// Visit discovered pages
|
|
@@ -140,6 +153,7 @@ export async function autoNavigate(domain: string, abortSignal?: { aborted: bool
|
|
|
140
153
|
if (visited.has(link.key)) continue;
|
|
141
154
|
|
|
142
155
|
const url = link.url;
|
|
156
|
+
onProgress?.({ type: 'visiting', url, pageNumber: visited.size + 1, totalDiscovered: discoveredLinks.length });
|
|
143
157
|
log.info({ url }, 'Auto-navigate visiting page');
|
|
144
158
|
|
|
145
159
|
try {
|
|
@@ -152,9 +166,9 @@ export async function autoNavigate(domain: string, abortSignal?: { aborted: bool
|
|
|
152
166
|
await scrollPage(cdp);
|
|
153
167
|
await sleep(SCROLL_WAIT_MS);
|
|
154
168
|
|
|
155
|
-
// Click
|
|
156
|
-
await
|
|
157
|
-
await sleep(
|
|
169
|
+
// Click tabs/buttons within the page (NOT nav links — those navigate away)
|
|
170
|
+
await clickPageTabs(cdp);
|
|
171
|
+
await sleep(800);
|
|
158
172
|
|
|
159
173
|
// Discover more links from this page
|
|
160
174
|
const newLinks = await discoverInternalLinks(cdp, domain);
|
|
@@ -171,6 +185,7 @@ export async function autoNavigate(domain: string, abortSignal?: { aborted: bool
|
|
|
171
185
|
}
|
|
172
186
|
|
|
173
187
|
cdp.close();
|
|
188
|
+
onProgress?.({ type: 'done', visitedCount: visitedUrls.length, totalDiscovered: discoveredLinks.length });
|
|
174
189
|
log.info({ visited: visitedUrls.length, total: discoveredLinks.length + 1 }, 'Auto-navigation finished');
|
|
175
190
|
return visitedUrls;
|
|
176
191
|
}
|
|
@@ -180,6 +195,56 @@ interface DiscoveredLink {
|
|
|
180
195
|
url: string;
|
|
181
196
|
/** Deduplication key: origin + pathname. */
|
|
182
197
|
key: string;
|
|
198
|
+
/** Path depth (number of segments). */
|
|
199
|
+
depth: number;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/** Paths that are typically navigation chrome, not content pages. */
|
|
203
|
+
const SKIP_PATHS = [
|
|
204
|
+
'/home', '/login', '/signup', '/register', '/sign-up', '/sign-in',
|
|
205
|
+
'/help', '/support', '/contact', '/about', '/terms', '/privacy',
|
|
206
|
+
'/careers', '/press', '/blog', '/faq', '/sitemap',
|
|
207
|
+
];
|
|
208
|
+
|
|
209
|
+
/** Path patterns that indicate high-value purchase/content flows. */
|
|
210
|
+
const HIGH_VALUE_PATTERNS = [
|
|
211
|
+
/\/orders/i, /\/cart/i, /\/checkout/i, /\/account/i, /\/settings/i,
|
|
212
|
+
/\/store\//i, /\/restaurant\//i, /\/menu/i, /\/payment/i,
|
|
213
|
+
/\/profile/i, /\/history/i, /\/favorites/i, /\/saved/i,
|
|
214
|
+
/\/search/i, /\/category/i, /\/collection/i,
|
|
215
|
+
];
|
|
216
|
+
|
|
217
|
+
/** Sort links to prioritize purchase/content flows, deduplicate by pattern. */
|
|
218
|
+
function rankLinks(links: DiscoveredLink[]): DiscoveredLink[] {
|
|
219
|
+
const filtered = links.filter(l => {
|
|
220
|
+
const path = new URL(l.url).pathname.toLowerCase();
|
|
221
|
+
if (SKIP_PATHS.some(skip => path === skip || path === skip + '/')) return false;
|
|
222
|
+
return true;
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
// Deduplicate by host+path pattern — keep only one of /store/123, /store/456
|
|
226
|
+
// but preserve different subdomains (shop.example.com vs admin.example.com)
|
|
227
|
+
const byPattern = new Map<string, DiscoveredLink>();
|
|
228
|
+
for (const link of filtered) {
|
|
229
|
+
const parsed = new URL(link.url);
|
|
230
|
+
// Collapse numeric/hash segments to find the pattern
|
|
231
|
+
const pathPattern = parsed.pathname.replace(/\/\d+/g, '/{id}').replace(/\/[a-f0-9]{8,}/gi, '/{id}');
|
|
232
|
+
const pattern = parsed.hostname + pathPattern;
|
|
233
|
+
if (!byPattern.has(pattern)) {
|
|
234
|
+
byPattern.set(pattern, link);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
return [...byPattern.values()].sort((a, b) => {
|
|
239
|
+
const aPath = new URL(a.url).pathname.toLowerCase();
|
|
240
|
+
const bPath = new URL(b.url).pathname.toLowerCase();
|
|
241
|
+
// High-value paths first
|
|
242
|
+
const aHighValue = HIGH_VALUE_PATTERNS.some(p => p.test(aPath)) ? 1 : 0;
|
|
243
|
+
const bHighValue = HIGH_VALUE_PATTERNS.some(p => p.test(bPath)) ? 1 : 0;
|
|
244
|
+
if (aHighValue !== bHighValue) return bHighValue - aHighValue;
|
|
245
|
+
// Then by depth (deeper = more specific)
|
|
246
|
+
return Math.min(b.depth, 4) - Math.min(a.depth, 4);
|
|
247
|
+
});
|
|
183
248
|
}
|
|
184
249
|
|
|
185
250
|
/** Extract internal links from the current page DOM, preserving subdomains. */
|
|
@@ -204,7 +269,11 @@ async function discoverInternalLinks(cdp: MiniCDP, domain: string): Promise<Disc
|
|
|
204
269
|
const key = url.origin + url.pathname;
|
|
205
270
|
if (!seen.has(key)) {
|
|
206
271
|
seen.add(key);
|
|
207
|
-
links.push({
|
|
272
|
+
links.push({
|
|
273
|
+
url: url.origin + url.pathname,
|
|
274
|
+
key,
|
|
275
|
+
depth: path.split('/').filter(Boolean).length,
|
|
276
|
+
});
|
|
208
277
|
}
|
|
209
278
|
} catch { /* skip malformed URLs */ }
|
|
210
279
|
}
|
|
@@ -222,25 +291,64 @@ async function discoverInternalLinks(cdp: MiniCDP, domain: string): Promise<Disc
|
|
|
222
291
|
|
|
223
292
|
/** Scroll the page to trigger lazy-loaded content. */
|
|
224
293
|
async function scrollPage(cdp: MiniCDP): Promise<void> {
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
294
|
+
// Scroll in increments to trigger multiple lazy-load thresholds
|
|
295
|
+
for (let i = 0; i < 3; i++) {
|
|
296
|
+
await cdp.send('Runtime.evaluate', {
|
|
297
|
+
expression: 'window.scrollBy(0, 600)',
|
|
298
|
+
awaitPromise: false,
|
|
299
|
+
}).catch(() => {});
|
|
300
|
+
await sleep(500);
|
|
301
|
+
}
|
|
229
302
|
}
|
|
230
303
|
|
|
231
|
-
/**
|
|
232
|
-
|
|
304
|
+
/**
|
|
305
|
+
* Click tabs, buttons, and flow-relevant elements within the current page.
|
|
306
|
+
* Avoids clicking navigation links (which would navigate away).
|
|
307
|
+
*/
|
|
308
|
+
async function clickPageTabs(cdp: MiniCDP): Promise<void> {
|
|
233
309
|
const selectors = [
|
|
234
|
-
'
|
|
235
|
-
'[role="
|
|
236
|
-
'[role="tablist"] button',
|
|
310
|
+
'[role="tab"]:not(:first-child)',
|
|
311
|
+
'[role="tablist"] button:not(:first-child)',
|
|
237
312
|
'button[data-tab]',
|
|
238
|
-
'
|
|
313
|
+
'[data-testid*="tab"]',
|
|
314
|
+
'button[aria-expanded="false"]',
|
|
239
315
|
];
|
|
240
316
|
|
|
241
317
|
for (const selector of selectors) {
|
|
242
318
|
await clickInPage(cdp, selector);
|
|
243
|
-
await sleep(
|
|
319
|
+
await sleep(600);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Also try clicking purchase-flow buttons to trigger API calls
|
|
323
|
+
// (Add to Cart, etc. — these fire API requests even if we don't complete the flow)
|
|
324
|
+
await clickByText(cdp, 'Add to Cart');
|
|
325
|
+
await clickByText(cdp, 'Add to Order');
|
|
326
|
+
await clickByText(cdp, 'Add Item');
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/** Click a button by its visible text content. */
|
|
330
|
+
async function clickByText(cdp: MiniCDP, text: string): Promise<boolean> {
|
|
331
|
+
try {
|
|
332
|
+
const result = await cdp.send('Runtime.evaluate', {
|
|
333
|
+
expression: `
|
|
334
|
+
(function() {
|
|
335
|
+
const buttons = document.querySelectorAll('button, [role="button"]');
|
|
336
|
+
for (const btn of buttons) {
|
|
337
|
+
if (btn.textContent && btn.textContent.trim().toLowerCase().includes(${JSON.stringify(text.toLowerCase())})) {
|
|
338
|
+
btn.scrollIntoView({ block: 'center' });
|
|
339
|
+
btn.click();
|
|
340
|
+
return true;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
return false;
|
|
344
|
+
})()
|
|
345
|
+
`,
|
|
346
|
+
awaitPromise: false,
|
|
347
|
+
returnByValue: true,
|
|
348
|
+
}) as { result?: { value?: boolean } };
|
|
349
|
+
return result?.result?.value === true;
|
|
350
|
+
} catch {
|
|
351
|
+
return false;
|
|
244
352
|
}
|
|
245
353
|
}
|
|
246
354
|
|