veryfront 0.1.73 → 0.1.75

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/esm/cli/commands/knowledge/command-help.d.ts.map +1 -1
  2. package/esm/cli/commands/knowledge/command-help.js +3 -1
  3. package/esm/cli/commands/knowledge/command.d.ts +34 -5
  4. package/esm/cli/commands/knowledge/command.d.ts.map +1 -1
  5. package/esm/cli/commands/knowledge/command.js +151 -22
  6. package/esm/cli/commands/knowledge/parser-source.d.ts.map +1 -1
  7. package/esm/cli/commands/knowledge/parser-source.js +110 -5
  8. package/esm/deno.d.ts +2 -0
  9. package/esm/deno.js +3 -1
  10. package/esm/src/data/data-fetcher.d.ts +11 -1
  11. package/esm/src/data/data-fetcher.d.ts.map +1 -1
  12. package/esm/src/data/data-fetcher.js +5 -2
  13. package/esm/src/data/index.d.ts +1 -1
  14. package/esm/src/data/index.d.ts.map +1 -1
  15. package/esm/src/data/server-data-fetcher.d.ts +14 -1
  16. package/esm/src/data/server-data-fetcher.d.ts.map +1 -1
  17. package/esm/src/data/server-data-fetcher.js +49 -3
  18. package/esm/src/rendering/orchestrator/lifecycle.d.ts +4 -0
  19. package/esm/src/rendering/orchestrator/lifecycle.d.ts.map +1 -1
  20. package/esm/src/rendering/orchestrator/lifecycle.js +8 -0
  21. package/esm/src/rendering/orchestrator/pipeline.d.ts.map +1 -1
  22. package/esm/src/rendering/orchestrator/pipeline.js +6 -1
  23. package/esm/src/rendering/orchestrator/ssr-orchestrator.d.ts +26 -1
  24. package/esm/src/rendering/orchestrator/ssr-orchestrator.d.ts.map +1 -1
  25. package/esm/src/rendering/orchestrator/ssr-orchestrator.js +77 -1
  26. package/esm/src/routing/api/handler.d.ts.map +1 -1
  27. package/esm/src/routing/api/handler.js +6 -2
  28. package/esm/src/routing/api/route-executor.d.ts +8 -2
  29. package/esm/src/routing/api/route-executor.d.ts.map +1 -1
  30. package/esm/src/routing/api/route-executor.js +131 -3
  31. package/esm/src/security/deno-permissions.d.ts +6 -0
  32. package/esm/src/security/deno-permissions.d.ts.map +1 -1
  33. package/esm/src/security/deno-permissions.js +10 -0
  34. package/esm/src/security/sandbox/project-worker.d.ts +61 -0
  35. package/esm/src/security/sandbox/project-worker.d.ts.map +1 -0
  36. package/esm/src/security/sandbox/project-worker.js +318 -0
  37. package/esm/src/security/sandbox/worker-permissions.d.ts +30 -0
  38. package/esm/src/security/sandbox/worker-permissions.d.ts.map +1 -0
  39. package/esm/src/security/sandbox/worker-permissions.js +60 -0
  40. package/esm/src/security/sandbox/worker-pool.d.ts +87 -0
  41. package/esm/src/security/sandbox/worker-pool.d.ts.map +1 -0
  42. package/esm/src/security/sandbox/worker-pool.js +356 -0
  43. package/esm/src/security/sandbox/worker-types.d.ts +165 -0
  44. package/esm/src/security/sandbox/worker-types.d.ts.map +1 -0
  45. package/esm/src/security/sandbox/worker-types.js +17 -0
  46. package/esm/src/server/handlers/request/ssr/ssr.handler.d.ts +2 -0
  47. package/esm/src/server/handlers/request/ssr/ssr.handler.d.ts.map +1 -1
  48. package/esm/src/server/handlers/request/ssr/ssr.handler.js +6 -2
  49. package/esm/src/server/project-env/storage.d.ts +6 -0
  50. package/esm/src/server/project-env/storage.d.ts.map +1 -1
  51. package/esm/src/server/project-env/storage.js +8 -0
  52. package/esm/src/server/runtime-handler/adapter-factory.d.ts +3 -0
  53. package/esm/src/server/runtime-handler/adapter-factory.d.ts.map +1 -1
  54. package/esm/src/server/runtime-handler/adapter-factory.js +6 -5
  55. package/esm/src/server/runtime-handler/index.d.ts +33 -0
  56. package/esm/src/server/runtime-handler/index.d.ts.map +1 -1
  57. package/esm/src/server/runtime-handler/index.js +103 -37
  58. package/esm/src/server/runtime-handler/local-project-discovery.d.ts +32 -4
  59. package/esm/src/server/runtime-handler/local-project-discovery.d.ts.map +1 -1
  60. package/esm/src/server/runtime-handler/local-project-discovery.js +46 -16
  61. package/esm/src/server/runtime-handler/project-isolation.d.ts +5 -0
  62. package/esm/src/server/runtime-handler/project-isolation.d.ts.map +1 -1
  63. package/esm/src/server/runtime-handler/project-isolation.js +44 -0
  64. package/esm/src/server/services/rendering/ssr.service.d.ts +19 -1
  65. package/esm/src/server/services/rendering/ssr.service.d.ts.map +1 -1
  66. package/esm/src/server/services/rendering/ssr.service.js +9 -1
  67. package/esm/src/server/shared/renderer/adapter.d.ts +25 -0
  68. package/esm/src/server/shared/renderer/adapter.d.ts.map +1 -1
  69. package/esm/src/server/shared/renderer/adapter.js +83 -10
  70. package/esm/src/server/shared/renderer/index.d.ts +1 -1
  71. package/esm/src/server/shared/renderer/index.d.ts.map +1 -1
  72. package/esm/src/server/shared/renderer/index.js +1 -1
  73. package/esm/src/server/shared/renderer/memory/pressure.d.ts +7 -0
  74. package/esm/src/server/shared/renderer/memory/pressure.d.ts.map +1 -1
  75. package/esm/src/server/shared/renderer/memory/pressure.js +7 -0
  76. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/path-resolver.d.ts +4 -4
  77. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/path-resolver.d.ts.map +1 -1
  78. package/esm/src/transforms/pipeline/stages/ssr-vf-modules/path-resolver.js +15 -15
  79. package/esm/src/utils/index.d.ts +10 -1
  80. package/esm/src/utils/index.d.ts.map +1 -1
  81. package/esm/src/utils/index.js +9 -1
  82. package/esm/src/utils/logger/index.d.ts +1 -1
  83. package/esm/src/utils/logger/index.d.ts.map +1 -1
  84. package/esm/src/utils/logger/index.js +1 -1
  85. package/esm/src/utils/logger/logger.d.ts +14 -0
  86. package/esm/src/utils/logger/logger.d.ts.map +1 -1
  87. package/esm/src/utils/logger/logger.js +17 -0
  88. package/esm/src/workflow/claude-code/tool.d.ts +5 -5
  89. package/package.json +4 -1
  90. package/src/cli/commands/knowledge/command-help.ts +3 -1
  91. package/src/cli/commands/knowledge/command.ts +180 -22
  92. package/src/cli/commands/knowledge/parser-source.ts +110 -5
  93. package/src/deno.js +3 -1
  94. package/src/src/data/data-fetcher.ts +18 -2
  95. package/src/src/data/index.ts +1 -1
  96. package/src/src/data/server-data-fetcher.ts +78 -3
  97. package/src/src/rendering/orchestrator/lifecycle.ts +11 -0
  98. package/src/src/rendering/orchestrator/pipeline.ts +7 -2
  99. package/src/src/rendering/orchestrator/ssr-orchestrator.ts +119 -0
  100. package/src/src/routing/api/handler.ts +16 -3
  101. package/src/src/routing/api/route-executor.ts +222 -1
  102. package/src/src/security/deno-permissions.ts +11 -0
  103. package/src/src/security/sandbox/project-worker.ts +416 -0
  104. package/src/src/security/sandbox/worker-permissions.ts +74 -0
  105. package/src/src/security/sandbox/worker-pool.ts +451 -0
  106. package/src/src/security/sandbox/worker-types.ts +209 -0
  107. package/src/src/server/handlers/request/ssr/ssr.handler.ts +11 -2
  108. package/src/src/server/project-env/storage.ts +9 -0
  109. package/src/src/server/runtime-handler/adapter-factory.ts +13 -5
  110. package/src/src/server/runtime-handler/index.ts +132 -39
  111. package/src/src/server/runtime-handler/local-project-discovery.ts +51 -17
  112. package/src/src/server/runtime-handler/project-isolation.ts +53 -0
  113. package/src/src/server/services/rendering/ssr.service.ts +34 -3
  114. package/src/src/server/shared/renderer/adapter.ts +107 -8
  115. package/src/src/server/shared/renderer/index.ts +7 -1
  116. package/src/src/server/shared/renderer/memory/pressure.ts +8 -0
  117. package/src/src/transforms/pipeline/stages/ssr-vf-modules/path-resolver.ts +18 -12
  118. package/src/src/utils/index.ts +11 -0
  119. package/src/src/utils/logger/index.ts +1 -0
  120. package/src/src/utils/logger/logger.ts +34 -0
@@ -1,6 +1,6 @@
1
1
  import * as dntShim from "../../../_dnt.shims.js";
2
2
  import { z } from "zod";
3
- import { createFileSystem } from "../../../src/platform/index.js";
3
+ import { createFileSystem, getEnv } from "../../../src/platform/index.js";
4
4
  import { basename, extname, join, normalize, relative } from "../../../src/platform/compat/path/index.js";
5
5
  import { withSpan } from "../../../src/observability/tracing/otlp-setup.js";
6
6
  import { cliLogger } from "../../utils/index.js";
@@ -9,6 +9,7 @@ import type { ParsedArgs } from "../../shared/types.js";
9
9
  import { downloadUploadToFile, listAllUploads, type UploadItem } from "../uploads/command.js";
10
10
  import { putRemoteFileFromLocal } from "../files/command.js";
11
11
  import { knowledgeIngestPythonSource } from "./parser-source.js";
12
+ import { createJobUserLogger, type Logger, serverLogger } from "../../../src/utils/index.js";
12
13
 
13
14
  const SUPPORTED_EXTENSIONS = new Set([
14
15
  ".pdf",
@@ -59,10 +60,12 @@ type KnowledgeSource =
59
60
 
60
61
  type DownloadResult = { uploadPath: string; localPath: string; bytes?: number };
61
62
 
63
+ const knowledgeJobLogger = serverLogger.component("knowledge-ingest");
64
+
62
65
  const KnowledgeIngestArgsSchema = z.object({
63
66
  projectSlug: z.string().optional(),
64
67
  projectDir: z.string().optional(),
65
- source: z.string().optional(),
68
+ sources: z.array(z.string()).default([]),
66
69
  path: z.string().optional(),
67
70
  all: z.boolean().default(false),
68
71
  recursive: z.boolean().default(false),
@@ -72,6 +75,44 @@ const KnowledgeIngestArgsSchema = z.object({
72
75
  slug: z.string().optional(),
73
76
  json: z.boolean().default(false),
74
77
  quiet: z.boolean().default(false),
78
+ }).superRefine((value, ctx) => {
79
+ const hasExplicitSources = value.sources.length > 0;
80
+ const hasPath = typeof value.path === "string" && value.path.length > 0;
81
+
82
+ if (hasExplicitSources && (hasPath || value.all)) {
83
+ ctx.addIssue({
84
+ code: z.ZodIssueCode.custom,
85
+ message: "Use either explicit source paths or --path with --all, not both.",
86
+ });
87
+ }
88
+
89
+ if (!hasExplicitSources && !hasPath && !value.all) {
90
+ ctx.addIssue({
91
+ code: z.ZodIssueCode.custom,
92
+ message: "Provide one or more source paths or use --path with --all.",
93
+ });
94
+ }
95
+
96
+ if (hasPath && !value.all) {
97
+ ctx.addIssue({
98
+ code: z.ZodIssueCode.custom,
99
+ message: "--path requires --all.",
100
+ });
101
+ }
102
+
103
+ if (!hasPath && value.all) {
104
+ ctx.addIssue({
105
+ code: z.ZodIssueCode.custom,
106
+ message: "--all requires --path.",
107
+ });
108
+ }
109
+
110
+ if (value.slug && value.sources.length !== 1) {
111
+ ctx.addIssue({
112
+ code: z.ZodIssueCode.custom,
113
+ message: "--slug can only be used with a single explicit source.",
114
+ });
115
+ }
75
116
  });
76
117
 
77
118
  export type KnowledgeIngestOptions = z.infer<typeof KnowledgeIngestArgsSchema>;
@@ -92,12 +133,33 @@ function printJson(value: unknown): void {
92
133
  console.log(JSON.stringify(value, null, 2));
93
134
  }
94
135
 
136
+ function createKnowledgeIngestEventLogger(): Logger | null {
137
+ const projectId = getEnv("TENANT_PROJECT_ID");
138
+ const jobId = getEnv("JOB_ID");
139
+
140
+ if (!projectId || !jobId) {
141
+ return null;
142
+ }
143
+
144
+ return createJobUserLogger(knowledgeJobLogger, {
145
+ projectId,
146
+ jobId,
147
+ batchId: getEnv("JOB_BATCH_ID") ?? undefined,
148
+ jobTarget: getEnv("JOB_TARGET") ?? undefined,
149
+ task: "knowledge-ingest",
150
+ });
151
+ }
152
+
153
+ function buildKnowledgeSourceName(source: KnowledgeSource): string {
154
+ return basename(source.kind === "upload" ? source.uploadPath : source.localPath);
155
+ }
156
+
95
157
  function showKnowledgeUsage(): void {
96
158
  console.log(`
97
159
  Veryfront Knowledge
98
160
 
99
161
  Usage:
100
- veryfront knowledge ingest <source> [options]
162
+ veryfront knowledge ingest <source...> [options]
101
163
  veryfront knowledge ingest --path <prefix-or-dir> --all [options]
102
164
 
103
165
  Subcommands:
@@ -111,7 +173,7 @@ export function parseKnowledgeIngestArgs(
111
173
  return KnowledgeIngestArgsSchema.safeParse({
112
174
  projectSlug: getStringArg(args, "project", "p", "project-slug"),
113
175
  projectDir: getStringArg(args, "project-dir", "dir", "d"),
114
- source: typeof args._[2] === "string" ? args._[2] : undefined,
176
+ sources: args._.slice(2).filter((value): value is string => typeof value === "string"),
115
177
  path: getStringArg(args, "path"),
116
178
  all: getBooleanArg(args, "all"),
117
179
  recursive: getBooleanArg(args, "recursive"),
@@ -273,6 +335,7 @@ export async function runKnowledgeParser(input: {
273
335
  description?: string;
274
336
  slug?: string;
275
337
  sourceReference?: string;
338
+ env?: Record<string, string>;
276
339
  }): Promise<KnowledgeParserResult> {
277
340
  const tempDir = await dntShim.Deno.makeTempDir({ prefix: "veryfront-knowledge-parser-" });
278
341
  const inputJsonPath = `${tempDir}/input.json`;
@@ -296,6 +359,7 @@ export async function runKnowledgeParser(input: {
296
359
  try {
297
360
  result = await new dntShim.Deno.Command("python3", {
298
361
  args: [scriptPath, "--input-json", inputJsonPath, "--output-json", outputJsonPath],
362
+ ...(input.env ? { env: input.env } : {}),
299
363
  stdout: "piped",
300
364
  stderr: "piped",
301
365
  }).output();
@@ -321,7 +385,7 @@ export async function runKnowledgeParser(input: {
321
385
  }
322
386
 
323
387
  export async function collectKnowledgeSources(
324
- options: Pick<KnowledgeIngestOptions, "source" | "path" | "all" | "recursive">,
388
+ options: Pick<KnowledgeIngestOptions, "sources" | "path" | "all" | "recursive">,
325
389
  deps: {
326
390
  client: ApiClient;
327
391
  projectSlug: string;
@@ -330,29 +394,68 @@ export async function collectKnowledgeSources(
330
394
  ): Promise<KnowledgeSource[]> {
331
395
  const fs = createFileSystem();
332
396
 
333
- if (options.source) {
334
- if (!isProjectUploadReference(options.source) && await fs.exists(options.source)) {
335
- const localFiles = await collectLocalFiles(options.source, options.recursive);
336
- if (!localFiles.length) throw new Error(`No supported files found at ${options.source}`);
337
- return localFiles.map((localPath) => ({ kind: "local", input: options.source!, localPath }));
397
+ if (options.sources.length > 0) {
398
+ const explicitSources: Array<
399
+ | { kind: "local"; sources: KnowledgeSource[] }
400
+ | { kind: "upload"; input: string; uploadPath: string }
401
+ > = [];
402
+ const uploadTargets: string[] = [];
403
+
404
+ for (const input of options.sources) {
405
+ if (!isProjectUploadReference(input) && await fs.exists(input)) {
406
+ const localFiles = await collectLocalFiles(input, options.recursive);
407
+ if (!localFiles.length) throw new Error(`No supported files found at ${input}`);
408
+ explicitSources.push({
409
+ kind: "local",
410
+ sources: localFiles.map((localPath) => ({ kind: "local", input, localPath })),
411
+ });
412
+ continue;
413
+ }
414
+
415
+ if (isLikelyLocalPath(input)) {
416
+ throw new Error(`Local file not found: ${input}`);
417
+ }
418
+
419
+ const uploadPath = normalizeProjectUploadPath(input);
420
+ explicitSources.push({ kind: "upload", input, uploadPath });
421
+ uploadTargets.push(uploadPath);
338
422
  }
339
423
 
340
- if (isLikelyLocalPath(options.source)) {
341
- throw new Error(`Local file not found: ${options.source}`);
424
+ const downloads = uploadTargets.length > 0 ? await deps.downloadUploads(uploadTargets) : [];
425
+ const downloadsByPath = new Map<string, DownloadResult[]>();
426
+
427
+ for (const download of downloads) {
428
+ const existing = downloadsByPath.get(download.uploadPath) ?? [];
429
+ existing.push(download);
430
+ downloadsByPath.set(download.uploadPath, existing);
342
431
  }
343
432
 
344
- const uploadPath = normalizeProjectUploadPath(options.source);
345
- const downloads = await deps.downloadUploads([uploadPath]);
346
- return downloads.map((download) => ({
347
- kind: "upload",
348
- input: options.source!,
349
- uploadPath: download.uploadPath,
350
- localPath: download.localPath,
351
- }));
433
+ const resolvedSources: KnowledgeSource[] = [];
434
+ for (const source of explicitSources) {
435
+ if (source.kind === "local") {
436
+ resolvedSources.push(...source.sources);
437
+ continue;
438
+ }
439
+
440
+ const matchingDownloads = downloadsByPath.get(source.uploadPath);
441
+ const download = matchingDownloads?.shift();
442
+ if (!download) {
443
+ throw new Error(`Upload not found: ${formatKnowledgeUploadSource(source.uploadPath)}`);
444
+ }
445
+
446
+ resolvedSources.push({
447
+ kind: "upload",
448
+ input: source.input,
449
+ uploadPath: download.uploadPath,
450
+ localPath: download.localPath,
451
+ });
452
+ }
453
+
454
+ return resolvedSources;
352
455
  }
353
456
 
354
457
  if (!options.path || !options.all) {
355
- throw new Error("Provide a source path or use --path with --all.");
458
+ throw new Error("Provide one or more source paths or use --path with --all.");
356
459
  }
357
460
 
358
461
  if (!isProjectUploadReference(options.path) && await fs.exists(options.path)) {
@@ -405,12 +508,24 @@ export async function ingestResolvedSources(
405
508
  outputDir: string;
406
509
  runParser: typeof runKnowledgeParser;
407
510
  uploadKnowledgeFile: (remotePath: string, localPath: string) => Promise<{ path: string }>;
511
+ eventLogger?: Logger | null;
408
512
  },
409
513
  ): Promise<KnowledgeIngestFileResult[]> {
410
- const slugs = options.slug && sources.length === 1 ? [options.slug] : ensureUniqueSlugs(sources);
514
+ if (options.slug && sources.length !== 1) {
515
+ throw new Error("--slug can only be used with a single explicit source.");
516
+ }
517
+
518
+ const slugs = options.slug ? [options.slug] : ensureUniqueSlugs(sources);
411
519
  const results: KnowledgeIngestFileResult[] = [];
412
520
 
413
521
  for (const [index, source] of sources.entries()) {
522
+ deps.eventLogger?.info("Processing knowledge source", {
523
+ phase: "file_processing",
524
+ progress_current: index + 1,
525
+ progress_total: sources.length,
526
+ source_name: buildKnowledgeSourceName(source),
527
+ });
528
+
414
529
  const parser = await deps.runParser({
415
530
  filePath: source.localPath,
416
531
  outputDir: deps.outputDir,
@@ -424,6 +539,26 @@ export async function ingestResolvedSources(
424
539
  options.knowledgePath,
425
540
  );
426
541
  const uploaded = await deps.uploadKnowledgeFile(remotePath, parser.sandbox_output_path);
542
+
543
+ deps.eventLogger?.info("Knowledge source ingested", {
544
+ phase: "file_completed",
545
+ progress_current: index + 1,
546
+ progress_total: sources.length,
547
+ source_name: buildKnowledgeSourceName(source),
548
+ remote_path: uploaded.path,
549
+ warning_count: parser.warnings.length,
550
+ });
551
+
552
+ if (parser.warnings.length > 0) {
553
+ deps.eventLogger?.warn("Knowledge source emitted warnings", {
554
+ phase: "file_warning",
555
+ progress_current: index + 1,
556
+ progress_total: sources.length,
557
+ source_name: buildKnowledgeSourceName(source),
558
+ warning_count: parser.warnings.length,
559
+ });
560
+ }
561
+
427
562
  results.push(
428
563
  createKnowledgeIngestResult({
429
564
  source: buildSourceReference(source),
@@ -462,8 +597,14 @@ export async function knowledgeCommand(args: ParsedArgs): Promise<void> {
462
597
  const outputDir = options.outputDir ?? await defaultOutputRoot();
463
598
  const shouldCleanupOutputDir = options.outputDir === undefined;
464
599
  const downloadOutputDir = resolveKnowledgeDownloadOutputDir(outputDir);
600
+ const eventLogger = createKnowledgeIngestEventLogger();
465
601
 
466
602
  try {
603
+ eventLogger?.info("Starting knowledge ingest", {
604
+ phase: "started",
605
+ mode: options.path ? "path_prefix" : "explicit_sources",
606
+ });
607
+
467
608
  const sources = await collectKnowledgeSources(options, {
468
609
  client,
469
610
  projectSlug: config.projectSlug,
@@ -475,15 +616,27 @@ export async function knowledgeCommand(args: ParsedArgs): Promise<void> {
475
616
  ),
476
617
  });
477
618
 
619
+ eventLogger?.info("Resolved knowledge sources", {
620
+ phase: "sources_resolved",
621
+ progress_total: sources.length,
622
+ });
623
+
478
624
  const results = await ingestResolvedSources(sources, options, {
479
625
  client,
480
626
  projectSlug: config.projectSlug,
481
627
  outputDir,
482
628
  runParser: runKnowledgeParser,
629
+ eventLogger,
483
630
  uploadKnowledgeFile: (remotePath, localPath) =>
484
631
  putRemoteFileFromLocal(client, config.projectSlug, remotePath, localPath),
485
632
  });
486
633
 
634
+ eventLogger?.info("Completed knowledge ingest", {
635
+ phase: "completed",
636
+ progress_current: results.length,
637
+ progress_total: results.length,
638
+ });
639
+
487
640
  if (options.json) {
488
641
  printJson(results);
489
642
  return;
@@ -495,6 +648,11 @@ export async function knowledgeCommand(args: ParsedArgs): Promise<void> {
495
648
  cliLogger.info(` ${result.summary}`);
496
649
  }
497
650
  }
651
+ } catch (error) {
652
+ eventLogger?.error("Knowledge ingest failed", {
653
+ phase: "failed",
654
+ });
655
+ throw error;
498
656
  } finally {
499
657
  if (shouldCleanupOutputDir) {
500
658
  await Promise.all([
@@ -3,6 +3,7 @@ import argparse
3
3
  import csv
4
4
  import json
5
5
  import re
6
+ import subprocess
6
7
  from datetime import date
7
8
  from pathlib import Path
8
9
  from typing import Any, Optional
@@ -71,6 +72,107 @@ def build_frontmatter(source: str, source_type: str, description: str) -> str:
71
72
  ])
72
73
 
73
74
 
75
+ def metadata_int(metadata: dict[str, Any], *keys: str) -> Optional[int]:
76
+ for key in keys:
77
+ value = metadata.get(key)
78
+ if isinstance(value, int) and not isinstance(value, bool):
79
+ return value
80
+ return None
81
+
82
+
83
+ def metadata_string_list(metadata: dict[str, Any], *keys: str) -> Optional[list[str]]:
84
+ for key in keys:
85
+ value = metadata.get(key)
86
+ if isinstance(value, list) and all(isinstance(item, str) for item in value):
87
+ return value
88
+ return None
89
+
90
+
91
+ def build_kreuzberg_stats(source_type: str, content: str, metadata: dict[str, Any]):
92
+ stats: dict[str, Any] = {
93
+ "characters": len(content),
94
+ "lines": len(content.splitlines()) if content else 0,
95
+ "engine": "kreuzberg",
96
+ }
97
+
98
+ if isinstance(metadata.get("mime_type"), str):
99
+ stats["mime_type"] = metadata["mime_type"]
100
+
101
+ if source_type == "pdf":
102
+ stats["pages"] = metadata_int(metadata, "page_count") or 0
103
+ stats["tables"] = metadata_int(metadata, "table_count") or 0
104
+ elif source_type in {"xlsx", "xls"}:
105
+ stats["sheets"] = metadata_int(metadata, "sheet_count") or 0
106
+ stats["rows"] = metadata_int(metadata, "row_count") or 0
107
+ stats["sheet_names"] = metadata_string_list(metadata, "sheet_names") or []
108
+ elif source_type == "docx":
109
+ stats["paragraphs"] = metadata_int(metadata, "paragraph_count") or 0
110
+ stats["tables"] = metadata_int(metadata, "table_count") or 0
111
+ elif source_type == "pptx":
112
+ stats["slides"] = metadata_int(metadata, "slide_count", "page_count") or 0
113
+ stats["tables"] = metadata_int(metadata, "table_count") or 0
114
+ elif source_type == "html":
115
+ stats["tables"] = metadata_int(metadata, "table_count") or 0
116
+
117
+ return stats
118
+
119
+
120
+ def parse_with_kreuzberg(path: str, source_type: str):
121
+ warnings: list[str] = []
122
+ completed = subprocess.run(
123
+ [
124
+ "kreuzberg",
125
+ "extract",
126
+ path,
127
+ "--format",
128
+ "json",
129
+ "--output-format",
130
+ "markdown",
131
+ ],
132
+ capture_output=True,
133
+ text=True,
134
+ check=False,
135
+ )
136
+
137
+ if completed.returncode != 0:
138
+ detail = completed.stderr.strip() or completed.stdout.strip() or f"exit code {completed.returncode}"
139
+ raise RuntimeError(f"kreuzberg extract failed: {detail}")
140
+
141
+ try:
142
+ payload = json.loads(completed.stdout)
143
+ except json.JSONDecodeError as error:
144
+ raise RuntimeError(f"kreuzberg extract returned invalid JSON: {error}") from error
145
+
146
+ content = payload.get("content", "")
147
+ if not isinstance(content, str):
148
+ raise RuntimeError("kreuzberg extract did not return string content")
149
+
150
+ metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
151
+ normalized_content = clean_text(content)
152
+ stats = build_kreuzberg_stats(source_type, normalized_content, metadata)
153
+
154
+ return normalized_content or "_No extractable text found in document._", stats, warnings
155
+
156
+
157
+ def prefer_kreuzberg(source_type: str, fallback_parser):
158
+ def parser(path: str):
159
+ try:
160
+ return parse_with_kreuzberg(path, source_type)
161
+ except FileNotFoundError as error:
162
+ if getattr(error, "filename", "") == "kreuzberg":
163
+ return fallback_parser(path)
164
+ raise
165
+ except RuntimeError as error:
166
+ content, stats, warnings = fallback_parser(path)
167
+ warnings.append(
168
+ "kreuzberg extraction failed; fell back to the built-in parser: "
169
+ + str(error)
170
+ )
171
+ return content, stats, warnings
172
+
173
+ return parser
174
+
175
+
74
176
  def parse_csv_like(path: str, delimiter: str = ","):
75
177
  warnings: list[str] = []
76
178
  with open(path, newline="", encoding="utf-8-sig") as file:
@@ -305,18 +407,19 @@ def parse_json(path: str):
305
407
  def select_parser(path: Path):
306
408
  ext = path.suffix.lower()
307
409
  if ext == ".pdf":
308
- return "pdf", parse_pdf
410
+ return "pdf", prefer_kreuzberg("pdf", parse_pdf)
309
411
  if ext in {".csv", ".tsv"}:
310
412
  delimiter = "\t" if ext == ".tsv" else ","
311
413
  return ext.lstrip("."), lambda file_path: parse_csv_like(file_path, delimiter)
312
414
  if ext in {".xlsx", ".xls"}:
313
- return ext.lstrip("."), parse_excel
415
+ source_type = ext.lstrip(".")
416
+ return source_type, prefer_kreuzberg(source_type, parse_excel)
314
417
  if ext == ".docx":
315
- return "docx", parse_docx
418
+ return "docx", prefer_kreuzberg("docx", parse_docx)
316
419
  if ext == ".pptx":
317
- return "pptx", parse_pptx
420
+ return "pptx", prefer_kreuzberg("pptx", parse_pptx)
318
421
  if ext in {".html", ".htm"}:
319
- return "html", parse_html
422
+ return "html", prefer_kreuzberg("html", parse_html)
320
423
  if ext in {".txt", ".md", ".mdx"}:
321
424
  return ext.lstrip("."), parse_text
322
425
  if ext == ".json":
@@ -325,6 +428,8 @@ def select_parser(path: Path):
325
428
 
326
429
 
327
430
  def build_summary(source_type: str, stats: dict[str, Any]) -> str:
431
+ if stats.get("engine") == "kreuzberg":
432
+ return f"Converted {source_type.upper()} to markdown ({stats.get('characters', 0)} chars)."
328
433
  if source_type in {"csv", "tsv"}:
329
434
  return f"Parsed {stats.get('rows', 0)} rows across {stats.get('columns', 0)} columns."
330
435
  if source_type in {"xlsx", "xls"}:
package/src/deno.js CHANGED
@@ -1,6 +1,6 @@
1
1
  export default {
2
2
  "name": "veryfront",
3
- "version": "0.1.73",
3
+ "version": "0.1.75",
4
4
  "license": "Apache-2.0",
5
5
  "nodeModulesDir": "auto",
6
6
  "exclude": [
@@ -34,6 +34,7 @@ export default {
34
34
  "./resource": "./src/resource/index.ts",
35
35
  "./mcp": "./src/mcp/index.ts",
36
36
  "./middleware": "./src/middleware/index.ts",
37
+ "./utils": "./src/utils/index.ts",
37
38
  "./oauth": "./src/oauth/index.ts",
38
39
  "./provider": "./src/provider/index.ts",
39
40
  "./fs": "./src/fs/index.ts",
@@ -71,6 +72,7 @@ export default {
71
72
  "veryfront/workflow/claude-code": "./src/workflow/claude-code/index.ts",
72
73
  "veryfront/workflow/claude-code/react": "./src/workflow/claude-code/react/index.ts",
73
74
  "veryfront/workflow/discovery": "./src/workflow/discovery/index.ts",
75
+ "veryfront/utils": "./src/utils/index.ts",
74
76
  "veryfront/utils/box": "./src/utils/box.ts",
75
77
  "veryfront/utils/case-utils": "./src/utils/case-utils.ts",
76
78
  "veryfront/utils/constants/server": "./src/utils/constants/server.ts",
@@ -1,11 +1,22 @@
1
1
  import { withSpan } from "../observability/tracing/otlp-setup.js";
2
2
  import { SpanNames } from "../observability/tracing/span-names.js";
3
3
  import { CacheManager } from "./data-fetching-cache.js";
4
- import { ServerDataFetcher } from "./server-data-fetcher.js";
4
+ import { ServerDataFetcher, type ServerDataFetchOptions } from "./server-data-fetcher.js";
5
5
  import { StaticDataFetcher } from "./static-data-fetcher.js";
6
6
  import { StaticPathsFetcher } from "./static-paths-fetcher.js";
7
7
  import type { DataContext, DataResult, PageWithData, StaticPathsResult } from "./types.js";
8
8
 
9
+ /**
10
+ * Options for isolated data fetching. Passed through to ServerDataFetcher
11
+ * when worker isolation is enabled.
12
+ */
13
+ export interface FetchDataOptions {
14
+ /** Absolute path to the module containing getServerData */
15
+ modulePath?: string;
16
+ /** Project directory for worker scoping */
17
+ projectDir?: string;
18
+ }
19
+
9
20
  export class DataFetcher {
10
21
  private cacheManager: CacheManager;
11
22
  private serverFetcher: ServerDataFetcher;
@@ -23,6 +34,7 @@ export class DataFetcher {
23
34
  pageModule: PageWithData,
24
35
  context: DataContext,
25
36
  mode: "development" | "production" = "development",
37
+ options?: FetchDataOptions,
26
38
  ): Promise<DataResult> {
27
39
  const preferServerData = mode === "development" || !pageModule.getStaticData;
28
40
  const useServer = preferServerData && !!pageModule.getServerData;
@@ -34,10 +46,14 @@ export class DataFetcher {
34
46
  ? "static"
35
47
  : "none";
36
48
 
49
+ const isolationOptions: ServerDataFetchOptions | undefined = options
50
+ ? { modulePath: options.modulePath, projectDir: options.projectDir }
51
+ : undefined;
52
+
37
53
  return withSpan(
38
54
  SpanNames.DATA_FETCH,
39
55
  () => {
40
- if (useServer) return this.serverFetcher.fetch(pageModule, context);
56
+ if (useServer) return this.serverFetcher.fetch(pageModule, context, isolationOptions);
41
57
  if (useStatic) return this.staticFetcher.fetch(pageModule, context);
42
58
  return Promise.resolve({ props: {} });
43
59
  },
@@ -13,5 +13,5 @@ export type {
13
13
  PageWithData,
14
14
  StaticPathsResult,
15
15
  } from "./types.js";
16
- export { DataFetcher } from "./data-fetcher.js";
16
+ export { DataFetcher, type FetchDataOptions } from "./data-fetcher.js";
17
17
  export { notFound, redirect } from "./helpers.js";
@@ -1,12 +1,29 @@
1
+ import * as dntShim from "../../_dnt.shims.js";
1
2
  import type { DataContext, DataResult, PageWithData } from "./types.js";
2
3
  import { serverLogger } from "../utils/index.js";
3
4
  import { DATA_FETCH_TIMEOUT_MS } from "../config/defaults.js";
4
5
  import { TimeoutError, withTimeoutThrow } from "../rendering/utils/stream-utils.js";
5
6
  import { withSpan } from "../observability/tracing/otlp-setup.js";
6
7
  import { CircuitBreakerOpen, getCircuitBreaker } from "../utils/circuit-breaker.js";
8
+ import { getWorkerPool, isDataIsolationEnabled } from "../security/sandbox/worker-pool.js";
9
+ import type { WorkerResponse } from "../security/sandbox/worker-types.js";
10
+
11
+ /**
12
+ * Options for isolated data fetching through Worker pool.
13
+ */
14
+ export interface ServerDataFetchOptions {
15
+ /** Absolute path to the module containing getServerData */
16
+ modulePath?: string;
17
+ /** Project directory for worker scoping */
18
+ projectDir?: string;
19
+ }
7
20
 
8
21
  export class ServerDataFetcher {
9
- fetch(pageModule: PageWithData, context: DataContext): Promise<DataResult> {
22
+ fetch(
23
+ pageModule: PageWithData,
24
+ context: DataContext,
25
+ options?: ServerDataFetchOptions,
26
+ ): Promise<DataResult> {
10
27
  if (typeof pageModule.getServerData !== "function") {
11
28
  return Promise.resolve({ props: {} });
12
29
  }
@@ -20,6 +37,11 @@ export class ServerDataFetcher {
20
37
  successThreshold: 2,
21
38
  });
22
39
 
40
+ // Choose isolated or direct execution
41
+ const useIsolation = isDataIsolationEnabled() &&
42
+ !!options?.modulePath &&
43
+ !!options?.projectDir;
44
+
23
45
  return withSpan(
24
46
  "data.fetch_server",
25
47
  async () => {
@@ -28,7 +50,9 @@ export class ServerDataFetcher {
28
50
  try {
29
51
  const result = await circuitBreaker.execute(() =>
30
52
  withTimeoutThrow(
31
- Promise.resolve(pageModule.getServerData!(context)),
53
+ useIsolation
54
+ ? this.fetchIsolated(options!.modulePath!, options!.projectDir!, context)
55
+ : Promise.resolve(pageModule.getServerData!(context)),
32
56
  DATA_FETCH_TIMEOUT_MS,
33
57
  `getServerData for ${pathname}`,
34
58
  )
@@ -59,7 +83,11 @@ export class ServerDataFetcher {
59
83
  throw error;
60
84
  }
61
85
 
62
- this.logError("DATA_FETCH_ERROR getServerData failed", error, { pathname, durationMs });
86
+ this.logError("DATA_FETCH_ERROR getServerData failed", error, {
87
+ pathname,
88
+ durationMs,
89
+ isolated: useIsolation,
90
+ });
63
91
  throw error;
64
92
  }
65
93
  },
@@ -68,8 +96,55 @@ export class ServerDataFetcher {
68
96
  "data.pathname": pathname,
69
97
  "data.timeout_ms": DATA_FETCH_TIMEOUT_MS,
70
98
  "data.project_id": projectId,
99
+ "data.isolated": useIsolation,
100
+ },
101
+ );
102
+ }
103
+
104
+ /**
105
+ * Execute getServerData in a per-project Worker.
106
+ */
107
+ private async fetchIsolated(
108
+ modulePath: string,
109
+ projectDir: string,
110
+ context: DataContext,
111
+ ): Promise<DataResult> {
112
+ const pool = getWorkerPool();
113
+ const body = context.request?.body ? new Uint8Array(await context.request.arrayBuffer()) : null;
114
+
115
+ const workerResponse: WorkerResponse = await pool.execute(
116
+ projectDir,
117
+ [projectDir],
118
+ {
119
+ type: "fetch-data",
120
+ id: dntShim.crypto.randomUUID(),
121
+ modulePath,
122
+ context: {
123
+ params: context.params,
124
+ query: context.query?.toString() ?? "",
125
+ request: {
126
+ url: context.request?.url ?? context.url?.toString() ?? "http://localhost",
127
+ method: context.request?.method ?? "GET",
128
+ headers: context.request ? [...context.request.headers.entries()] : [],
129
+ body,
130
+ },
131
+ url: context.url?.toString() ?? "http://localhost",
132
+ },
71
133
  },
72
134
  );
135
+
136
+ if (workerResponse.type === "error") {
137
+ const err = new Error(workerResponse.error.message);
138
+ err.name = workerResponse.error.name;
139
+ throw err;
140
+ }
141
+
142
+ if (workerResponse.type === "data-result") {
143
+ return workerResponse.result as DataResult;
144
+ }
145
+
146
+ // Unexpected response type — shouldn't happen but be defensive
147
+ throw new Error(`Unexpected worker response type: ${workerResponse.type}`);
73
148
  }
74
149
 
75
150
  /**