veryfront 0.1.73 → 0.1.75
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/cli/commands/knowledge/command-help.d.ts.map +1 -1
- package/esm/cli/commands/knowledge/command-help.js +3 -1
- package/esm/cli/commands/knowledge/command.d.ts +34 -5
- package/esm/cli/commands/knowledge/command.d.ts.map +1 -1
- package/esm/cli/commands/knowledge/command.js +151 -22
- package/esm/cli/commands/knowledge/parser-source.d.ts.map +1 -1
- package/esm/cli/commands/knowledge/parser-source.js +110 -5
- package/esm/deno.d.ts +2 -0
- package/esm/deno.js +3 -1
- package/esm/src/data/data-fetcher.d.ts +11 -1
- package/esm/src/data/data-fetcher.d.ts.map +1 -1
- package/esm/src/data/data-fetcher.js +5 -2
- package/esm/src/data/index.d.ts +1 -1
- package/esm/src/data/index.d.ts.map +1 -1
- package/esm/src/data/server-data-fetcher.d.ts +14 -1
- package/esm/src/data/server-data-fetcher.d.ts.map +1 -1
- package/esm/src/data/server-data-fetcher.js +49 -3
- package/esm/src/rendering/orchestrator/lifecycle.d.ts +4 -0
- package/esm/src/rendering/orchestrator/lifecycle.d.ts.map +1 -1
- package/esm/src/rendering/orchestrator/lifecycle.js +8 -0
- package/esm/src/rendering/orchestrator/pipeline.d.ts.map +1 -1
- package/esm/src/rendering/orchestrator/pipeline.js +6 -1
- package/esm/src/rendering/orchestrator/ssr-orchestrator.d.ts +26 -1
- package/esm/src/rendering/orchestrator/ssr-orchestrator.d.ts.map +1 -1
- package/esm/src/rendering/orchestrator/ssr-orchestrator.js +77 -1
- package/esm/src/routing/api/handler.d.ts.map +1 -1
- package/esm/src/routing/api/handler.js +6 -2
- package/esm/src/routing/api/route-executor.d.ts +8 -2
- package/esm/src/routing/api/route-executor.d.ts.map +1 -1
- package/esm/src/routing/api/route-executor.js +131 -3
- package/esm/src/security/deno-permissions.d.ts +6 -0
- package/esm/src/security/deno-permissions.d.ts.map +1 -1
- package/esm/src/security/deno-permissions.js +10 -0
- package/esm/src/security/sandbox/project-worker.d.ts +61 -0
- package/esm/src/security/sandbox/project-worker.d.ts.map +1 -0
- package/esm/src/security/sandbox/project-worker.js +318 -0
- package/esm/src/security/sandbox/worker-permissions.d.ts +30 -0
- package/esm/src/security/sandbox/worker-permissions.d.ts.map +1 -0
- package/esm/src/security/sandbox/worker-permissions.js +60 -0
- package/esm/src/security/sandbox/worker-pool.d.ts +87 -0
- package/esm/src/security/sandbox/worker-pool.d.ts.map +1 -0
- package/esm/src/security/sandbox/worker-pool.js +356 -0
- package/esm/src/security/sandbox/worker-types.d.ts +165 -0
- package/esm/src/security/sandbox/worker-types.d.ts.map +1 -0
- package/esm/src/security/sandbox/worker-types.js +17 -0
- package/esm/src/server/handlers/request/ssr/ssr.handler.d.ts +2 -0
- package/esm/src/server/handlers/request/ssr/ssr.handler.d.ts.map +1 -1
- package/esm/src/server/handlers/request/ssr/ssr.handler.js +6 -2
- package/esm/src/server/project-env/storage.d.ts +6 -0
- package/esm/src/server/project-env/storage.d.ts.map +1 -1
- package/esm/src/server/project-env/storage.js +8 -0
- package/esm/src/server/runtime-handler/adapter-factory.d.ts +3 -0
- package/esm/src/server/runtime-handler/adapter-factory.d.ts.map +1 -1
- package/esm/src/server/runtime-handler/adapter-factory.js +6 -5
- package/esm/src/server/runtime-handler/index.d.ts +33 -0
- package/esm/src/server/runtime-handler/index.d.ts.map +1 -1
- package/esm/src/server/runtime-handler/index.js +103 -37
- package/esm/src/server/runtime-handler/local-project-discovery.d.ts +32 -4
- package/esm/src/server/runtime-handler/local-project-discovery.d.ts.map +1 -1
- package/esm/src/server/runtime-handler/local-project-discovery.js +46 -16
- package/esm/src/server/runtime-handler/project-isolation.d.ts +5 -0
- package/esm/src/server/runtime-handler/project-isolation.d.ts.map +1 -1
- package/esm/src/server/runtime-handler/project-isolation.js +44 -0
- package/esm/src/server/services/rendering/ssr.service.d.ts +19 -1
- package/esm/src/server/services/rendering/ssr.service.d.ts.map +1 -1
- package/esm/src/server/services/rendering/ssr.service.js +9 -1
- package/esm/src/server/shared/renderer/adapter.d.ts +25 -0
- package/esm/src/server/shared/renderer/adapter.d.ts.map +1 -1
- package/esm/src/server/shared/renderer/adapter.js +83 -10
- package/esm/src/server/shared/renderer/index.d.ts +1 -1
- package/esm/src/server/shared/renderer/index.d.ts.map +1 -1
- package/esm/src/server/shared/renderer/index.js +1 -1
- package/esm/src/server/shared/renderer/memory/pressure.d.ts +7 -0
- package/esm/src/server/shared/renderer/memory/pressure.d.ts.map +1 -1
- package/esm/src/server/shared/renderer/memory/pressure.js +7 -0
- package/esm/src/transforms/pipeline/stages/ssr-vf-modules/path-resolver.d.ts +4 -4
- package/esm/src/transforms/pipeline/stages/ssr-vf-modules/path-resolver.d.ts.map +1 -1
- package/esm/src/transforms/pipeline/stages/ssr-vf-modules/path-resolver.js +15 -15
- package/esm/src/utils/index.d.ts +10 -1
- package/esm/src/utils/index.d.ts.map +1 -1
- package/esm/src/utils/index.js +9 -1
- package/esm/src/utils/logger/index.d.ts +1 -1
- package/esm/src/utils/logger/index.d.ts.map +1 -1
- package/esm/src/utils/logger/index.js +1 -1
- package/esm/src/utils/logger/logger.d.ts +14 -0
- package/esm/src/utils/logger/logger.d.ts.map +1 -1
- package/esm/src/utils/logger/logger.js +17 -0
- package/esm/src/workflow/claude-code/tool.d.ts +5 -5
- package/package.json +4 -1
- package/src/cli/commands/knowledge/command-help.ts +3 -1
- package/src/cli/commands/knowledge/command.ts +180 -22
- package/src/cli/commands/knowledge/parser-source.ts +110 -5
- package/src/deno.js +3 -1
- package/src/src/data/data-fetcher.ts +18 -2
- package/src/src/data/index.ts +1 -1
- package/src/src/data/server-data-fetcher.ts +78 -3
- package/src/src/rendering/orchestrator/lifecycle.ts +11 -0
- package/src/src/rendering/orchestrator/pipeline.ts +7 -2
- package/src/src/rendering/orchestrator/ssr-orchestrator.ts +119 -0
- package/src/src/routing/api/handler.ts +16 -3
- package/src/src/routing/api/route-executor.ts +222 -1
- package/src/src/security/deno-permissions.ts +11 -0
- package/src/src/security/sandbox/project-worker.ts +416 -0
- package/src/src/security/sandbox/worker-permissions.ts +74 -0
- package/src/src/security/sandbox/worker-pool.ts +451 -0
- package/src/src/security/sandbox/worker-types.ts +209 -0
- package/src/src/server/handlers/request/ssr/ssr.handler.ts +11 -2
- package/src/src/server/project-env/storage.ts +9 -0
- package/src/src/server/runtime-handler/adapter-factory.ts +13 -5
- package/src/src/server/runtime-handler/index.ts +132 -39
- package/src/src/server/runtime-handler/local-project-discovery.ts +51 -17
- package/src/src/server/runtime-handler/project-isolation.ts +53 -0
- package/src/src/server/services/rendering/ssr.service.ts +34 -3
- package/src/src/server/shared/renderer/adapter.ts +107 -8
- package/src/src/server/shared/renderer/index.ts +7 -1
- package/src/src/server/shared/renderer/memory/pressure.ts +8 -0
- package/src/src/transforms/pipeline/stages/ssr-vf-modules/path-resolver.ts +18 -12
- package/src/src/utils/index.ts +11 -0
- package/src/src/utils/logger/index.ts +1 -0
- package/src/src/utils/logger/logger.ts +34 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as dntShim from "../../../_dnt.shims.js";
|
|
2
2
|
import { z } from "zod";
|
|
3
|
-
import { createFileSystem } from "../../../src/platform/index.js";
|
|
3
|
+
import { createFileSystem, getEnv } from "../../../src/platform/index.js";
|
|
4
4
|
import { basename, extname, join, normalize, relative } from "../../../src/platform/compat/path/index.js";
|
|
5
5
|
import { withSpan } from "../../../src/observability/tracing/otlp-setup.js";
|
|
6
6
|
import { cliLogger } from "../../utils/index.js";
|
|
@@ -9,6 +9,7 @@ import type { ParsedArgs } from "../../shared/types.js";
|
|
|
9
9
|
import { downloadUploadToFile, listAllUploads, type UploadItem } from "../uploads/command.js";
|
|
10
10
|
import { putRemoteFileFromLocal } from "../files/command.js";
|
|
11
11
|
import { knowledgeIngestPythonSource } from "./parser-source.js";
|
|
12
|
+
import { createJobUserLogger, type Logger, serverLogger } from "../../../src/utils/index.js";
|
|
12
13
|
|
|
13
14
|
const SUPPORTED_EXTENSIONS = new Set([
|
|
14
15
|
".pdf",
|
|
@@ -59,10 +60,12 @@ type KnowledgeSource =
|
|
|
59
60
|
|
|
60
61
|
type DownloadResult = { uploadPath: string; localPath: string; bytes?: number };
|
|
61
62
|
|
|
63
|
+
const knowledgeJobLogger = serverLogger.component("knowledge-ingest");
|
|
64
|
+
|
|
62
65
|
const KnowledgeIngestArgsSchema = z.object({
|
|
63
66
|
projectSlug: z.string().optional(),
|
|
64
67
|
projectDir: z.string().optional(),
|
|
65
|
-
|
|
68
|
+
sources: z.array(z.string()).default([]),
|
|
66
69
|
path: z.string().optional(),
|
|
67
70
|
all: z.boolean().default(false),
|
|
68
71
|
recursive: z.boolean().default(false),
|
|
@@ -72,6 +75,44 @@ const KnowledgeIngestArgsSchema = z.object({
|
|
|
72
75
|
slug: z.string().optional(),
|
|
73
76
|
json: z.boolean().default(false),
|
|
74
77
|
quiet: z.boolean().default(false),
|
|
78
|
+
}).superRefine((value, ctx) => {
|
|
79
|
+
const hasExplicitSources = value.sources.length > 0;
|
|
80
|
+
const hasPath = typeof value.path === "string" && value.path.length > 0;
|
|
81
|
+
|
|
82
|
+
if (hasExplicitSources && (hasPath || value.all)) {
|
|
83
|
+
ctx.addIssue({
|
|
84
|
+
code: z.ZodIssueCode.custom,
|
|
85
|
+
message: "Use either explicit source paths or --path with --all, not both.",
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (!hasExplicitSources && !hasPath && !value.all) {
|
|
90
|
+
ctx.addIssue({
|
|
91
|
+
code: z.ZodIssueCode.custom,
|
|
92
|
+
message: "Provide one or more source paths or use --path with --all.",
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (hasPath && !value.all) {
|
|
97
|
+
ctx.addIssue({
|
|
98
|
+
code: z.ZodIssueCode.custom,
|
|
99
|
+
message: "--path requires --all.",
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (!hasPath && value.all) {
|
|
104
|
+
ctx.addIssue({
|
|
105
|
+
code: z.ZodIssueCode.custom,
|
|
106
|
+
message: "--all requires --path.",
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (value.slug && value.sources.length !== 1) {
|
|
111
|
+
ctx.addIssue({
|
|
112
|
+
code: z.ZodIssueCode.custom,
|
|
113
|
+
message: "--slug can only be used with a single explicit source.",
|
|
114
|
+
});
|
|
115
|
+
}
|
|
75
116
|
});
|
|
76
117
|
|
|
77
118
|
export type KnowledgeIngestOptions = z.infer<typeof KnowledgeIngestArgsSchema>;
|
|
@@ -92,12 +133,33 @@ function printJson(value: unknown): void {
|
|
|
92
133
|
console.log(JSON.stringify(value, null, 2));
|
|
93
134
|
}
|
|
94
135
|
|
|
136
|
+
function createKnowledgeIngestEventLogger(): Logger | null {
|
|
137
|
+
const projectId = getEnv("TENANT_PROJECT_ID");
|
|
138
|
+
const jobId = getEnv("JOB_ID");
|
|
139
|
+
|
|
140
|
+
if (!projectId || !jobId) {
|
|
141
|
+
return null;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return createJobUserLogger(knowledgeJobLogger, {
|
|
145
|
+
projectId,
|
|
146
|
+
jobId,
|
|
147
|
+
batchId: getEnv("JOB_BATCH_ID") ?? undefined,
|
|
148
|
+
jobTarget: getEnv("JOB_TARGET") ?? undefined,
|
|
149
|
+
task: "knowledge-ingest",
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function buildKnowledgeSourceName(source: KnowledgeSource): string {
|
|
154
|
+
return basename(source.kind === "upload" ? source.uploadPath : source.localPath);
|
|
155
|
+
}
|
|
156
|
+
|
|
95
157
|
function showKnowledgeUsage(): void {
|
|
96
158
|
console.log(`
|
|
97
159
|
Veryfront Knowledge
|
|
98
160
|
|
|
99
161
|
Usage:
|
|
100
|
-
veryfront knowledge ingest <source
|
|
162
|
+
veryfront knowledge ingest <source...> [options]
|
|
101
163
|
veryfront knowledge ingest --path <prefix-or-dir> --all [options]
|
|
102
164
|
|
|
103
165
|
Subcommands:
|
|
@@ -111,7 +173,7 @@ export function parseKnowledgeIngestArgs(
|
|
|
111
173
|
return KnowledgeIngestArgsSchema.safeParse({
|
|
112
174
|
projectSlug: getStringArg(args, "project", "p", "project-slug"),
|
|
113
175
|
projectDir: getStringArg(args, "project-dir", "dir", "d"),
|
|
114
|
-
|
|
176
|
+
sources: args._.slice(2).filter((value): value is string => typeof value === "string"),
|
|
115
177
|
path: getStringArg(args, "path"),
|
|
116
178
|
all: getBooleanArg(args, "all"),
|
|
117
179
|
recursive: getBooleanArg(args, "recursive"),
|
|
@@ -273,6 +335,7 @@ export async function runKnowledgeParser(input: {
|
|
|
273
335
|
description?: string;
|
|
274
336
|
slug?: string;
|
|
275
337
|
sourceReference?: string;
|
|
338
|
+
env?: Record<string, string>;
|
|
276
339
|
}): Promise<KnowledgeParserResult> {
|
|
277
340
|
const tempDir = await dntShim.Deno.makeTempDir({ prefix: "veryfront-knowledge-parser-" });
|
|
278
341
|
const inputJsonPath = `${tempDir}/input.json`;
|
|
@@ -296,6 +359,7 @@ export async function runKnowledgeParser(input: {
|
|
|
296
359
|
try {
|
|
297
360
|
result = await new dntShim.Deno.Command("python3", {
|
|
298
361
|
args: [scriptPath, "--input-json", inputJsonPath, "--output-json", outputJsonPath],
|
|
362
|
+
...(input.env ? { env: input.env } : {}),
|
|
299
363
|
stdout: "piped",
|
|
300
364
|
stderr: "piped",
|
|
301
365
|
}).output();
|
|
@@ -321,7 +385,7 @@ export async function runKnowledgeParser(input: {
|
|
|
321
385
|
}
|
|
322
386
|
|
|
323
387
|
export async function collectKnowledgeSources(
|
|
324
|
-
options: Pick<KnowledgeIngestOptions, "
|
|
388
|
+
options: Pick<KnowledgeIngestOptions, "sources" | "path" | "all" | "recursive">,
|
|
325
389
|
deps: {
|
|
326
390
|
client: ApiClient;
|
|
327
391
|
projectSlug: string;
|
|
@@ -330,29 +394,68 @@ export async function collectKnowledgeSources(
|
|
|
330
394
|
): Promise<KnowledgeSource[]> {
|
|
331
395
|
const fs = createFileSystem();
|
|
332
396
|
|
|
333
|
-
if (options.
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
397
|
+
if (options.sources.length > 0) {
|
|
398
|
+
const explicitSources: Array<
|
|
399
|
+
| { kind: "local"; sources: KnowledgeSource[] }
|
|
400
|
+
| { kind: "upload"; input: string; uploadPath: string }
|
|
401
|
+
> = [];
|
|
402
|
+
const uploadTargets: string[] = [];
|
|
403
|
+
|
|
404
|
+
for (const input of options.sources) {
|
|
405
|
+
if (!isProjectUploadReference(input) && await fs.exists(input)) {
|
|
406
|
+
const localFiles = await collectLocalFiles(input, options.recursive);
|
|
407
|
+
if (!localFiles.length) throw new Error(`No supported files found at ${input}`);
|
|
408
|
+
explicitSources.push({
|
|
409
|
+
kind: "local",
|
|
410
|
+
sources: localFiles.map((localPath) => ({ kind: "local", input, localPath })),
|
|
411
|
+
});
|
|
412
|
+
continue;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
if (isLikelyLocalPath(input)) {
|
|
416
|
+
throw new Error(`Local file not found: ${input}`);
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
const uploadPath = normalizeProjectUploadPath(input);
|
|
420
|
+
explicitSources.push({ kind: "upload", input, uploadPath });
|
|
421
|
+
uploadTargets.push(uploadPath);
|
|
338
422
|
}
|
|
339
423
|
|
|
340
|
-
|
|
341
|
-
|
|
424
|
+
const downloads = uploadTargets.length > 0 ? await deps.downloadUploads(uploadTargets) : [];
|
|
425
|
+
const downloadsByPath = new Map<string, DownloadResult[]>();
|
|
426
|
+
|
|
427
|
+
for (const download of downloads) {
|
|
428
|
+
const existing = downloadsByPath.get(download.uploadPath) ?? [];
|
|
429
|
+
existing.push(download);
|
|
430
|
+
downloadsByPath.set(download.uploadPath, existing);
|
|
342
431
|
}
|
|
343
432
|
|
|
344
|
-
const
|
|
345
|
-
const
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
433
|
+
const resolvedSources: KnowledgeSource[] = [];
|
|
434
|
+
for (const source of explicitSources) {
|
|
435
|
+
if (source.kind === "local") {
|
|
436
|
+
resolvedSources.push(...source.sources);
|
|
437
|
+
continue;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
const matchingDownloads = downloadsByPath.get(source.uploadPath);
|
|
441
|
+
const download = matchingDownloads?.shift();
|
|
442
|
+
if (!download) {
|
|
443
|
+
throw new Error(`Upload not found: ${formatKnowledgeUploadSource(source.uploadPath)}`);
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
resolvedSources.push({
|
|
447
|
+
kind: "upload",
|
|
448
|
+
input: source.input,
|
|
449
|
+
uploadPath: download.uploadPath,
|
|
450
|
+
localPath: download.localPath,
|
|
451
|
+
});
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
return resolvedSources;
|
|
352
455
|
}
|
|
353
456
|
|
|
354
457
|
if (!options.path || !options.all) {
|
|
355
|
-
throw new Error("Provide
|
|
458
|
+
throw new Error("Provide one or more source paths or use --path with --all.");
|
|
356
459
|
}
|
|
357
460
|
|
|
358
461
|
if (!isProjectUploadReference(options.path) && await fs.exists(options.path)) {
|
|
@@ -405,12 +508,24 @@ export async function ingestResolvedSources(
|
|
|
405
508
|
outputDir: string;
|
|
406
509
|
runParser: typeof runKnowledgeParser;
|
|
407
510
|
uploadKnowledgeFile: (remotePath: string, localPath: string) => Promise<{ path: string }>;
|
|
511
|
+
eventLogger?: Logger | null;
|
|
408
512
|
},
|
|
409
513
|
): Promise<KnowledgeIngestFileResult[]> {
|
|
410
|
-
|
|
514
|
+
if (options.slug && sources.length !== 1) {
|
|
515
|
+
throw new Error("--slug can only be used with a single explicit source.");
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
const slugs = options.slug ? [options.slug] : ensureUniqueSlugs(sources);
|
|
411
519
|
const results: KnowledgeIngestFileResult[] = [];
|
|
412
520
|
|
|
413
521
|
for (const [index, source] of sources.entries()) {
|
|
522
|
+
deps.eventLogger?.info("Processing knowledge source", {
|
|
523
|
+
phase: "file_processing",
|
|
524
|
+
progress_current: index + 1,
|
|
525
|
+
progress_total: sources.length,
|
|
526
|
+
source_name: buildKnowledgeSourceName(source),
|
|
527
|
+
});
|
|
528
|
+
|
|
414
529
|
const parser = await deps.runParser({
|
|
415
530
|
filePath: source.localPath,
|
|
416
531
|
outputDir: deps.outputDir,
|
|
@@ -424,6 +539,26 @@ export async function ingestResolvedSources(
|
|
|
424
539
|
options.knowledgePath,
|
|
425
540
|
);
|
|
426
541
|
const uploaded = await deps.uploadKnowledgeFile(remotePath, parser.sandbox_output_path);
|
|
542
|
+
|
|
543
|
+
deps.eventLogger?.info("Knowledge source ingested", {
|
|
544
|
+
phase: "file_completed",
|
|
545
|
+
progress_current: index + 1,
|
|
546
|
+
progress_total: sources.length,
|
|
547
|
+
source_name: buildKnowledgeSourceName(source),
|
|
548
|
+
remote_path: uploaded.path,
|
|
549
|
+
warning_count: parser.warnings.length,
|
|
550
|
+
});
|
|
551
|
+
|
|
552
|
+
if (parser.warnings.length > 0) {
|
|
553
|
+
deps.eventLogger?.warn("Knowledge source emitted warnings", {
|
|
554
|
+
phase: "file_warning",
|
|
555
|
+
progress_current: index + 1,
|
|
556
|
+
progress_total: sources.length,
|
|
557
|
+
source_name: buildKnowledgeSourceName(source),
|
|
558
|
+
warning_count: parser.warnings.length,
|
|
559
|
+
});
|
|
560
|
+
}
|
|
561
|
+
|
|
427
562
|
results.push(
|
|
428
563
|
createKnowledgeIngestResult({
|
|
429
564
|
source: buildSourceReference(source),
|
|
@@ -462,8 +597,14 @@ export async function knowledgeCommand(args: ParsedArgs): Promise<void> {
|
|
|
462
597
|
const outputDir = options.outputDir ?? await defaultOutputRoot();
|
|
463
598
|
const shouldCleanupOutputDir = options.outputDir === undefined;
|
|
464
599
|
const downloadOutputDir = resolveKnowledgeDownloadOutputDir(outputDir);
|
|
600
|
+
const eventLogger = createKnowledgeIngestEventLogger();
|
|
465
601
|
|
|
466
602
|
try {
|
|
603
|
+
eventLogger?.info("Starting knowledge ingest", {
|
|
604
|
+
phase: "started",
|
|
605
|
+
mode: options.path ? "path_prefix" : "explicit_sources",
|
|
606
|
+
});
|
|
607
|
+
|
|
467
608
|
const sources = await collectKnowledgeSources(options, {
|
|
468
609
|
client,
|
|
469
610
|
projectSlug: config.projectSlug,
|
|
@@ -475,15 +616,27 @@ export async function knowledgeCommand(args: ParsedArgs): Promise<void> {
|
|
|
475
616
|
),
|
|
476
617
|
});
|
|
477
618
|
|
|
619
|
+
eventLogger?.info("Resolved knowledge sources", {
|
|
620
|
+
phase: "sources_resolved",
|
|
621
|
+
progress_total: sources.length,
|
|
622
|
+
});
|
|
623
|
+
|
|
478
624
|
const results = await ingestResolvedSources(sources, options, {
|
|
479
625
|
client,
|
|
480
626
|
projectSlug: config.projectSlug,
|
|
481
627
|
outputDir,
|
|
482
628
|
runParser: runKnowledgeParser,
|
|
629
|
+
eventLogger,
|
|
483
630
|
uploadKnowledgeFile: (remotePath, localPath) =>
|
|
484
631
|
putRemoteFileFromLocal(client, config.projectSlug, remotePath, localPath),
|
|
485
632
|
});
|
|
486
633
|
|
|
634
|
+
eventLogger?.info("Completed knowledge ingest", {
|
|
635
|
+
phase: "completed",
|
|
636
|
+
progress_current: results.length,
|
|
637
|
+
progress_total: results.length,
|
|
638
|
+
});
|
|
639
|
+
|
|
487
640
|
if (options.json) {
|
|
488
641
|
printJson(results);
|
|
489
642
|
return;
|
|
@@ -495,6 +648,11 @@ export async function knowledgeCommand(args: ParsedArgs): Promise<void> {
|
|
|
495
648
|
cliLogger.info(` ${result.summary}`);
|
|
496
649
|
}
|
|
497
650
|
}
|
|
651
|
+
} catch (error) {
|
|
652
|
+
eventLogger?.error("Knowledge ingest failed", {
|
|
653
|
+
phase: "failed",
|
|
654
|
+
});
|
|
655
|
+
throw error;
|
|
498
656
|
} finally {
|
|
499
657
|
if (shouldCleanupOutputDir) {
|
|
500
658
|
await Promise.all([
|
|
@@ -3,6 +3,7 @@ import argparse
|
|
|
3
3
|
import csv
|
|
4
4
|
import json
|
|
5
5
|
import re
|
|
6
|
+
import subprocess
|
|
6
7
|
from datetime import date
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import Any, Optional
|
|
@@ -71,6 +72,107 @@ def build_frontmatter(source: str, source_type: str, description: str) -> str:
|
|
|
71
72
|
])
|
|
72
73
|
|
|
73
74
|
|
|
75
|
+
def metadata_int(metadata: dict[str, Any], *keys: str) -> Optional[int]:
|
|
76
|
+
for key in keys:
|
|
77
|
+
value = metadata.get(key)
|
|
78
|
+
if isinstance(value, int) and not isinstance(value, bool):
|
|
79
|
+
return value
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def metadata_string_list(metadata: dict[str, Any], *keys: str) -> Optional[list[str]]:
|
|
84
|
+
for key in keys:
|
|
85
|
+
value = metadata.get(key)
|
|
86
|
+
if isinstance(value, list) and all(isinstance(item, str) for item in value):
|
|
87
|
+
return value
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def build_kreuzberg_stats(source_type: str, content: str, metadata: dict[str, Any]):
|
|
92
|
+
stats: dict[str, Any] = {
|
|
93
|
+
"characters": len(content),
|
|
94
|
+
"lines": len(content.splitlines()) if content else 0,
|
|
95
|
+
"engine": "kreuzberg",
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if isinstance(metadata.get("mime_type"), str):
|
|
99
|
+
stats["mime_type"] = metadata["mime_type"]
|
|
100
|
+
|
|
101
|
+
if source_type == "pdf":
|
|
102
|
+
stats["pages"] = metadata_int(metadata, "page_count") or 0
|
|
103
|
+
stats["tables"] = metadata_int(metadata, "table_count") or 0
|
|
104
|
+
elif source_type in {"xlsx", "xls"}:
|
|
105
|
+
stats["sheets"] = metadata_int(metadata, "sheet_count") or 0
|
|
106
|
+
stats["rows"] = metadata_int(metadata, "row_count") or 0
|
|
107
|
+
stats["sheet_names"] = metadata_string_list(metadata, "sheet_names") or []
|
|
108
|
+
elif source_type == "docx":
|
|
109
|
+
stats["paragraphs"] = metadata_int(metadata, "paragraph_count") or 0
|
|
110
|
+
stats["tables"] = metadata_int(metadata, "table_count") or 0
|
|
111
|
+
elif source_type == "pptx":
|
|
112
|
+
stats["slides"] = metadata_int(metadata, "slide_count", "page_count") or 0
|
|
113
|
+
stats["tables"] = metadata_int(metadata, "table_count") or 0
|
|
114
|
+
elif source_type == "html":
|
|
115
|
+
stats["tables"] = metadata_int(metadata, "table_count") or 0
|
|
116
|
+
|
|
117
|
+
return stats
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def parse_with_kreuzberg(path: str, source_type: str):
|
|
121
|
+
warnings: list[str] = []
|
|
122
|
+
completed = subprocess.run(
|
|
123
|
+
[
|
|
124
|
+
"kreuzberg",
|
|
125
|
+
"extract",
|
|
126
|
+
path,
|
|
127
|
+
"--format",
|
|
128
|
+
"json",
|
|
129
|
+
"--output-format",
|
|
130
|
+
"markdown",
|
|
131
|
+
],
|
|
132
|
+
capture_output=True,
|
|
133
|
+
text=True,
|
|
134
|
+
check=False,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
if completed.returncode != 0:
|
|
138
|
+
detail = completed.stderr.strip() or completed.stdout.strip() or f"exit code {completed.returncode}"
|
|
139
|
+
raise RuntimeError(f"kreuzberg extract failed: {detail}")
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
payload = json.loads(completed.stdout)
|
|
143
|
+
except json.JSONDecodeError as error:
|
|
144
|
+
raise RuntimeError(f"kreuzberg extract returned invalid JSON: {error}") from error
|
|
145
|
+
|
|
146
|
+
content = payload.get("content", "")
|
|
147
|
+
if not isinstance(content, str):
|
|
148
|
+
raise RuntimeError("kreuzberg extract did not return string content")
|
|
149
|
+
|
|
150
|
+
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
|
|
151
|
+
normalized_content = clean_text(content)
|
|
152
|
+
stats = build_kreuzberg_stats(source_type, normalized_content, metadata)
|
|
153
|
+
|
|
154
|
+
return normalized_content or "_No extractable text found in document._", stats, warnings
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def prefer_kreuzberg(source_type: str, fallback_parser):
|
|
158
|
+
def parser(path: str):
|
|
159
|
+
try:
|
|
160
|
+
return parse_with_kreuzberg(path, source_type)
|
|
161
|
+
except FileNotFoundError as error:
|
|
162
|
+
if getattr(error, "filename", "") == "kreuzberg":
|
|
163
|
+
return fallback_parser(path)
|
|
164
|
+
raise
|
|
165
|
+
except RuntimeError as error:
|
|
166
|
+
content, stats, warnings = fallback_parser(path)
|
|
167
|
+
warnings.append(
|
|
168
|
+
"kreuzberg extraction failed; fell back to the built-in parser: "
|
|
169
|
+
+ str(error)
|
|
170
|
+
)
|
|
171
|
+
return content, stats, warnings
|
|
172
|
+
|
|
173
|
+
return parser
|
|
174
|
+
|
|
175
|
+
|
|
74
176
|
def parse_csv_like(path: str, delimiter: str = ","):
|
|
75
177
|
warnings: list[str] = []
|
|
76
178
|
with open(path, newline="", encoding="utf-8-sig") as file:
|
|
@@ -305,18 +407,19 @@ def parse_json(path: str):
|
|
|
305
407
|
def select_parser(path: Path):
|
|
306
408
|
ext = path.suffix.lower()
|
|
307
409
|
if ext == ".pdf":
|
|
308
|
-
return "pdf", parse_pdf
|
|
410
|
+
return "pdf", prefer_kreuzberg("pdf", parse_pdf)
|
|
309
411
|
if ext in {".csv", ".tsv"}:
|
|
310
412
|
delimiter = "\t" if ext == ".tsv" else ","
|
|
311
413
|
return ext.lstrip("."), lambda file_path: parse_csv_like(file_path, delimiter)
|
|
312
414
|
if ext in {".xlsx", ".xls"}:
|
|
313
|
-
|
|
415
|
+
source_type = ext.lstrip(".")
|
|
416
|
+
return source_type, prefer_kreuzberg(source_type, parse_excel)
|
|
314
417
|
if ext == ".docx":
|
|
315
|
-
return "docx", parse_docx
|
|
418
|
+
return "docx", prefer_kreuzberg("docx", parse_docx)
|
|
316
419
|
if ext == ".pptx":
|
|
317
|
-
return "pptx", parse_pptx
|
|
420
|
+
return "pptx", prefer_kreuzberg("pptx", parse_pptx)
|
|
318
421
|
if ext in {".html", ".htm"}:
|
|
319
|
-
return "html", parse_html
|
|
422
|
+
return "html", prefer_kreuzberg("html", parse_html)
|
|
320
423
|
if ext in {".txt", ".md", ".mdx"}:
|
|
321
424
|
return ext.lstrip("."), parse_text
|
|
322
425
|
if ext == ".json":
|
|
@@ -325,6 +428,8 @@ def select_parser(path: Path):
|
|
|
325
428
|
|
|
326
429
|
|
|
327
430
|
def build_summary(source_type: str, stats: dict[str, Any]) -> str:
|
|
431
|
+
if stats.get("engine") == "kreuzberg":
|
|
432
|
+
return f"Converted {source_type.upper()} to markdown ({stats.get('characters', 0)} chars)."
|
|
328
433
|
if source_type in {"csv", "tsv"}:
|
|
329
434
|
return f"Parsed {stats.get('rows', 0)} rows across {stats.get('columns', 0)} columns."
|
|
330
435
|
if source_type in {"xlsx", "xls"}:
|
package/src/deno.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export default {
|
|
2
2
|
"name": "veryfront",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.75",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"nodeModulesDir": "auto",
|
|
6
6
|
"exclude": [
|
|
@@ -34,6 +34,7 @@ export default {
|
|
|
34
34
|
"./resource": "./src/resource/index.ts",
|
|
35
35
|
"./mcp": "./src/mcp/index.ts",
|
|
36
36
|
"./middleware": "./src/middleware/index.ts",
|
|
37
|
+
"./utils": "./src/utils/index.ts",
|
|
37
38
|
"./oauth": "./src/oauth/index.ts",
|
|
38
39
|
"./provider": "./src/provider/index.ts",
|
|
39
40
|
"./fs": "./src/fs/index.ts",
|
|
@@ -71,6 +72,7 @@ export default {
|
|
|
71
72
|
"veryfront/workflow/claude-code": "./src/workflow/claude-code/index.ts",
|
|
72
73
|
"veryfront/workflow/claude-code/react": "./src/workflow/claude-code/react/index.ts",
|
|
73
74
|
"veryfront/workflow/discovery": "./src/workflow/discovery/index.ts",
|
|
75
|
+
"veryfront/utils": "./src/utils/index.ts",
|
|
74
76
|
"veryfront/utils/box": "./src/utils/box.ts",
|
|
75
77
|
"veryfront/utils/case-utils": "./src/utils/case-utils.ts",
|
|
76
78
|
"veryfront/utils/constants/server": "./src/utils/constants/server.ts",
|
|
@@ -1,11 +1,22 @@
|
|
|
1
1
|
import { withSpan } from "../observability/tracing/otlp-setup.js";
|
|
2
2
|
import { SpanNames } from "../observability/tracing/span-names.js";
|
|
3
3
|
import { CacheManager } from "./data-fetching-cache.js";
|
|
4
|
-
import { ServerDataFetcher } from "./server-data-fetcher.js";
|
|
4
|
+
import { ServerDataFetcher, type ServerDataFetchOptions } from "./server-data-fetcher.js";
|
|
5
5
|
import { StaticDataFetcher } from "./static-data-fetcher.js";
|
|
6
6
|
import { StaticPathsFetcher } from "./static-paths-fetcher.js";
|
|
7
7
|
import type { DataContext, DataResult, PageWithData, StaticPathsResult } from "./types.js";
|
|
8
8
|
|
|
9
|
+
/**
|
|
10
|
+
* Options for isolated data fetching. Passed through to ServerDataFetcher
|
|
11
|
+
* when worker isolation is enabled.
|
|
12
|
+
*/
|
|
13
|
+
export interface FetchDataOptions {
|
|
14
|
+
/** Absolute path to the module containing getServerData */
|
|
15
|
+
modulePath?: string;
|
|
16
|
+
/** Project directory for worker scoping */
|
|
17
|
+
projectDir?: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
9
20
|
export class DataFetcher {
|
|
10
21
|
private cacheManager: CacheManager;
|
|
11
22
|
private serverFetcher: ServerDataFetcher;
|
|
@@ -23,6 +34,7 @@ export class DataFetcher {
|
|
|
23
34
|
pageModule: PageWithData,
|
|
24
35
|
context: DataContext,
|
|
25
36
|
mode: "development" | "production" = "development",
|
|
37
|
+
options?: FetchDataOptions,
|
|
26
38
|
): Promise<DataResult> {
|
|
27
39
|
const preferServerData = mode === "development" || !pageModule.getStaticData;
|
|
28
40
|
const useServer = preferServerData && !!pageModule.getServerData;
|
|
@@ -34,10 +46,14 @@ export class DataFetcher {
|
|
|
34
46
|
? "static"
|
|
35
47
|
: "none";
|
|
36
48
|
|
|
49
|
+
const isolationOptions: ServerDataFetchOptions | undefined = options
|
|
50
|
+
? { modulePath: options.modulePath, projectDir: options.projectDir }
|
|
51
|
+
: undefined;
|
|
52
|
+
|
|
37
53
|
return withSpan(
|
|
38
54
|
SpanNames.DATA_FETCH,
|
|
39
55
|
() => {
|
|
40
|
-
if (useServer) return this.serverFetcher.fetch(pageModule, context);
|
|
56
|
+
if (useServer) return this.serverFetcher.fetch(pageModule, context, isolationOptions);
|
|
41
57
|
if (useStatic) return this.staticFetcher.fetch(pageModule, context);
|
|
42
58
|
return Promise.resolve({ props: {} });
|
|
43
59
|
},
|
package/src/src/data/index.ts
CHANGED
|
@@ -1,12 +1,29 @@
|
|
|
1
|
+
import * as dntShim from "../../_dnt.shims.js";
|
|
1
2
|
import type { DataContext, DataResult, PageWithData } from "./types.js";
|
|
2
3
|
import { serverLogger } from "../utils/index.js";
|
|
3
4
|
import { DATA_FETCH_TIMEOUT_MS } from "../config/defaults.js";
|
|
4
5
|
import { TimeoutError, withTimeoutThrow } from "../rendering/utils/stream-utils.js";
|
|
5
6
|
import { withSpan } from "../observability/tracing/otlp-setup.js";
|
|
6
7
|
import { CircuitBreakerOpen, getCircuitBreaker } from "../utils/circuit-breaker.js";
|
|
8
|
+
import { getWorkerPool, isDataIsolationEnabled } from "../security/sandbox/worker-pool.js";
|
|
9
|
+
import type { WorkerResponse } from "../security/sandbox/worker-types.js";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Options for isolated data fetching through Worker pool.
|
|
13
|
+
*/
|
|
14
|
+
export interface ServerDataFetchOptions {
|
|
15
|
+
/** Absolute path to the module containing getServerData */
|
|
16
|
+
modulePath?: string;
|
|
17
|
+
/** Project directory for worker scoping */
|
|
18
|
+
projectDir?: string;
|
|
19
|
+
}
|
|
7
20
|
|
|
8
21
|
export class ServerDataFetcher {
|
|
9
|
-
fetch(
|
|
22
|
+
fetch(
|
|
23
|
+
pageModule: PageWithData,
|
|
24
|
+
context: DataContext,
|
|
25
|
+
options?: ServerDataFetchOptions,
|
|
26
|
+
): Promise<DataResult> {
|
|
10
27
|
if (typeof pageModule.getServerData !== "function") {
|
|
11
28
|
return Promise.resolve({ props: {} });
|
|
12
29
|
}
|
|
@@ -20,6 +37,11 @@ export class ServerDataFetcher {
|
|
|
20
37
|
successThreshold: 2,
|
|
21
38
|
});
|
|
22
39
|
|
|
40
|
+
// Choose isolated or direct execution
|
|
41
|
+
const useIsolation = isDataIsolationEnabled() &&
|
|
42
|
+
!!options?.modulePath &&
|
|
43
|
+
!!options?.projectDir;
|
|
44
|
+
|
|
23
45
|
return withSpan(
|
|
24
46
|
"data.fetch_server",
|
|
25
47
|
async () => {
|
|
@@ -28,7 +50,9 @@ export class ServerDataFetcher {
|
|
|
28
50
|
try {
|
|
29
51
|
const result = await circuitBreaker.execute(() =>
|
|
30
52
|
withTimeoutThrow(
|
|
31
|
-
|
|
53
|
+
useIsolation
|
|
54
|
+
? this.fetchIsolated(options!.modulePath!, options!.projectDir!, context)
|
|
55
|
+
: Promise.resolve(pageModule.getServerData!(context)),
|
|
32
56
|
DATA_FETCH_TIMEOUT_MS,
|
|
33
57
|
`getServerData for ${pathname}`,
|
|
34
58
|
)
|
|
@@ -59,7 +83,11 @@ export class ServerDataFetcher {
|
|
|
59
83
|
throw error;
|
|
60
84
|
}
|
|
61
85
|
|
|
62
|
-
this.logError("DATA_FETCH_ERROR getServerData failed", error, {
|
|
86
|
+
this.logError("DATA_FETCH_ERROR getServerData failed", error, {
|
|
87
|
+
pathname,
|
|
88
|
+
durationMs,
|
|
89
|
+
isolated: useIsolation,
|
|
90
|
+
});
|
|
63
91
|
throw error;
|
|
64
92
|
}
|
|
65
93
|
},
|
|
@@ -68,8 +96,55 @@ export class ServerDataFetcher {
|
|
|
68
96
|
"data.pathname": pathname,
|
|
69
97
|
"data.timeout_ms": DATA_FETCH_TIMEOUT_MS,
|
|
70
98
|
"data.project_id": projectId,
|
|
99
|
+
"data.isolated": useIsolation,
|
|
100
|
+
},
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Execute getServerData in a per-project Worker.
|
|
106
|
+
*/
|
|
107
|
+
private async fetchIsolated(
|
|
108
|
+
modulePath: string,
|
|
109
|
+
projectDir: string,
|
|
110
|
+
context: DataContext,
|
|
111
|
+
): Promise<DataResult> {
|
|
112
|
+
const pool = getWorkerPool();
|
|
113
|
+
const body = context.request?.body ? new Uint8Array(await context.request.arrayBuffer()) : null;
|
|
114
|
+
|
|
115
|
+
const workerResponse: WorkerResponse = await pool.execute(
|
|
116
|
+
projectDir,
|
|
117
|
+
[projectDir],
|
|
118
|
+
{
|
|
119
|
+
type: "fetch-data",
|
|
120
|
+
id: dntShim.crypto.randomUUID(),
|
|
121
|
+
modulePath,
|
|
122
|
+
context: {
|
|
123
|
+
params: context.params,
|
|
124
|
+
query: context.query?.toString() ?? "",
|
|
125
|
+
request: {
|
|
126
|
+
url: context.request?.url ?? context.url?.toString() ?? "http://localhost",
|
|
127
|
+
method: context.request?.method ?? "GET",
|
|
128
|
+
headers: context.request ? [...context.request.headers.entries()] : [],
|
|
129
|
+
body,
|
|
130
|
+
},
|
|
131
|
+
url: context.url?.toString() ?? "http://localhost",
|
|
132
|
+
},
|
|
71
133
|
},
|
|
72
134
|
);
|
|
135
|
+
|
|
136
|
+
if (workerResponse.type === "error") {
|
|
137
|
+
const err = new Error(workerResponse.error.message);
|
|
138
|
+
err.name = workerResponse.error.name;
|
|
139
|
+
throw err;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if (workerResponse.type === "data-result") {
|
|
143
|
+
return workerResponse.result as DataResult;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Unexpected response type — shouldn't happen but be defensive
|
|
147
|
+
throw new Error(`Unexpected worker response type: ${workerResponse.type}`);
|
|
73
148
|
}
|
|
74
149
|
|
|
75
150
|
/**
|