veryfront 0.1.72 → 0.1.74
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/cli/commands/knowledge/command-help.d.ts.map +1 -1
- package/esm/cli/commands/knowledge/command-help.js +3 -1
- package/esm/cli/commands/knowledge/command.d.ts +32 -5
- package/esm/cli/commands/knowledge/command.d.ts.map +1 -1
- package/esm/cli/commands/knowledge/command.js +87 -21
- package/esm/cli/commands/knowledge/parser-source.d.ts.map +1 -1
- package/esm/cli/commands/knowledge/parser-source.js +110 -5
- package/esm/deno.js +1 -1
- package/esm/src/html/html-shell-generator.d.ts.map +1 -1
- package/esm/src/html/html-shell-generator.js +6 -0
- package/esm/src/rendering/orchestrator/pipeline.d.ts.map +1 -1
- package/esm/src/rendering/orchestrator/pipeline.js +116 -105
- package/esm/src/server/dev-server/error-overlay/error-formatter.d.ts +4 -0
- package/esm/src/server/dev-server/error-overlay/error-formatter.d.ts.map +1 -1
- package/esm/src/server/dev-server/error-overlay/error-formatter.js +15 -0
- package/esm/src/server/dev-server/error-overlay/html-template.d.ts +1 -1
- package/esm/src/server/dev-server/error-overlay/html-template.d.ts.map +1 -1
- package/esm/src/server/dev-server/error-overlay/html-template.js +131 -8
- package/esm/src/server/dev-server/error-overlay/index.d.ts +1 -1
- package/esm/src/server/dev-server/error-overlay/index.d.ts.map +1 -1
- package/esm/src/server/dev-server/error-overlay/index.js +1 -1
- package/esm/src/server/dev-server/error-overlay/overlay-renderer.d.ts +1 -1
- package/esm/src/server/dev-server/error-overlay/overlay-renderer.d.ts.map +1 -1
- package/esm/src/server/dev-server/error-overlay/overlay-renderer.js +2 -2
- package/esm/src/server/dev-server/request-handler.d.ts.map +1 -1
- package/esm/src/server/dev-server/request-handler.js +6 -2
- package/esm/src/server/handlers/request/ssr/ssr.handler.d.ts +2 -0
- package/esm/src/server/handlers/request/ssr/ssr.handler.d.ts.map +1 -1
- package/esm/src/server/handlers/request/ssr/ssr.handler.js +6 -2
- package/esm/src/server/runtime-handler/adapter-factory.d.ts +3 -0
- package/esm/src/server/runtime-handler/adapter-factory.d.ts.map +1 -1
- package/esm/src/server/runtime-handler/adapter-factory.js +6 -5
- package/esm/src/server/runtime-handler/index.d.ts +33 -0
- package/esm/src/server/runtime-handler/index.d.ts.map +1 -1
- package/esm/src/server/runtime-handler/index.js +103 -37
- package/esm/src/server/runtime-handler/local-project-discovery.d.ts +32 -4
- package/esm/src/server/runtime-handler/local-project-discovery.d.ts.map +1 -1
- package/esm/src/server/runtime-handler/local-project-discovery.js +46 -16
- package/esm/src/server/services/rendering/ssr.service.d.ts +19 -1
- package/esm/src/server/services/rendering/ssr.service.d.ts.map +1 -1
- package/esm/src/server/services/rendering/ssr.service.js +18 -3
- package/esm/src/server/shared/renderer/adapter.d.ts +25 -0
- package/esm/src/server/shared/renderer/adapter.d.ts.map +1 -1
- package/esm/src/server/shared/renderer/adapter.js +83 -10
- package/esm/src/server/shared/renderer/index.d.ts +1 -1
- package/esm/src/server/shared/renderer/index.d.ts.map +1 -1
- package/esm/src/server/shared/renderer/index.js +1 -1
- package/esm/src/server/utils/error-html.d.ts.map +1 -1
- package/esm/src/server/utils/error-html.js +26 -6
- package/package.json +1 -1
- package/src/cli/commands/knowledge/command-help.ts +3 -1
- package/src/cli/commands/knowledge/command.ts +104 -21
- package/src/cli/commands/knowledge/parser-source.ts +110 -5
- package/src/deno.js +1 -1
- package/src/src/html/html-shell-generator.ts +9 -0
- package/src/src/rendering/orchestrator/pipeline.ts +186 -172
- package/src/src/server/dev-server/error-overlay/error-formatter.ts +21 -0
- package/src/src/server/dev-server/error-overlay/html-template.ts +139 -8
- package/src/src/server/dev-server/error-overlay/index.ts +1 -0
- package/src/src/server/dev-server/error-overlay/overlay-renderer.ts +2 -1
- package/src/src/server/dev-server/request-handler.ts +6 -2
- package/src/src/server/handlers/request/ssr/ssr.handler.ts +11 -2
- package/src/src/server/runtime-handler/adapter-factory.ts +13 -5
- package/src/src/server/runtime-handler/index.ts +132 -39
- package/src/src/server/runtime-handler/local-project-discovery.ts +51 -17
- package/src/src/server/services/rendering/ssr.service.ts +43 -5
- package/src/src/server/shared/renderer/adapter.ts +107 -8
- package/src/src/server/shared/renderer/index.ts +7 -1
- package/src/src/server/utils/error-html.ts +29 -6
|
@@ -10,12 +10,72 @@ const logger = rendererLogger.component("renderer-adapter");
|
|
|
10
10
|
const RENDER_CACHE_TTL_SECONDS = 3_600;
|
|
11
11
|
/** Maximum entries for the local render cache layer */
|
|
12
12
|
const RENDER_CACHE_LOCAL_MAX_ENTRIES = 200;
|
|
13
|
-
|
|
13
|
+
/**
|
|
14
|
+
* Default initializer that delegates to the real shared renderer
|
|
15
|
+
* singleton from `#veryfront/rendering/renderer.ts`.
|
|
16
|
+
*/
|
|
17
|
+
const defaultInitializer = {
|
|
18
|
+
initialize: initializeRenderer,
|
|
19
|
+
isInitialized: isRendererInitialized,
|
|
20
|
+
get: getRenderer,
|
|
21
|
+
destroy: destroySharedRenderer,
|
|
22
|
+
};
|
|
23
|
+
let activeInitializer = defaultInitializer;
|
|
24
|
+
let rendererInitState = null;
|
|
25
|
+
function scheduleInitializerDestroy(initializer, pendingPromise) {
|
|
26
|
+
const destroy = async () => {
|
|
27
|
+
try {
|
|
28
|
+
await initializer.destroy();
|
|
29
|
+
}
|
|
30
|
+
catch (error) {
|
|
31
|
+
logger.warn("Failed to destroy renderer initializer", {
|
|
32
|
+
error: error instanceof Error ? error.message : String(error),
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
};
|
|
36
|
+
if (pendingPromise) {
|
|
37
|
+
void pendingPromise
|
|
38
|
+
.catch(() => undefined)
|
|
39
|
+
.then(destroy);
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
if (!initializer.isInitialized())
|
|
43
|
+
return;
|
|
44
|
+
void destroy();
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Replace the renderer initializer used by the adapter layer.
|
|
48
|
+
* Pass `undefined` to restore the default (real) initializer.
|
|
49
|
+
*
|
|
50
|
+
* Returns a disposer that restores the previous initializer — use in
|
|
51
|
+
* `afterEach` or with `using` to prevent test pollution:
|
|
52
|
+
*
|
|
53
|
+
* ```ts
|
|
54
|
+
* afterEach(() => setRendererInitializer(undefined));
|
|
55
|
+
* ```
|
|
56
|
+
*
|
|
57
|
+
* @internal Test-only — not part of the public API.
|
|
58
|
+
*/
|
|
59
|
+
export function setRendererInitializer(initializer) {
|
|
60
|
+
const nextInitializer = initializer ?? defaultInitializer;
|
|
61
|
+
const previous = activeInitializer;
|
|
62
|
+
const previousPendingPromise = rendererInitState?.initializer === previous
|
|
63
|
+
? rendererInitState.promise
|
|
64
|
+
: undefined;
|
|
65
|
+
activeInitializer = nextInitializer;
|
|
66
|
+
if (rendererInitState?.initializer !== activeInitializer) {
|
|
67
|
+
rendererInitState = null;
|
|
68
|
+
}
|
|
69
|
+
if (previous !== activeInitializer) {
|
|
70
|
+
scheduleInitializerDestroy(previous, previousPendingPromise);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
14
73
|
async function getOrInitRenderer() {
|
|
15
|
-
if (
|
|
16
|
-
return
|
|
17
|
-
if (
|
|
18
|
-
return
|
|
74
|
+
if (activeInitializer.isInitialized())
|
|
75
|
+
return activeInitializer.get();
|
|
76
|
+
if (rendererInitState?.initializer === activeInitializer) {
|
|
77
|
+
return rendererInitState.promise;
|
|
78
|
+
}
|
|
19
79
|
const isProxyMode = getEnvBoolean("PROXY_MODE", false, {
|
|
20
80
|
trueValues: ["1"],
|
|
21
81
|
trim: false,
|
|
@@ -42,12 +102,19 @@ async function getOrInitRenderer() {
|
|
|
42
102
|
hasApiUrl: !!apiBaseUrl,
|
|
43
103
|
cacheType: useApiCache ? "api-distributed" : "memory",
|
|
44
104
|
});
|
|
45
|
-
|
|
105
|
+
const initializer = activeInitializer;
|
|
106
|
+
const initPromise = initializer.initialize(options);
|
|
107
|
+
rendererInitState = {
|
|
108
|
+
initializer,
|
|
109
|
+
promise: initPromise,
|
|
110
|
+
};
|
|
46
111
|
try {
|
|
47
|
-
return await
|
|
112
|
+
return await initPromise;
|
|
48
113
|
}
|
|
49
114
|
finally {
|
|
50
|
-
|
|
115
|
+
if (rendererInitState?.promise === initPromise) {
|
|
116
|
+
rendererInitState = null;
|
|
117
|
+
}
|
|
51
118
|
}
|
|
52
119
|
}
|
|
53
120
|
function resolveEnvironment(ctx) {
|
|
@@ -210,6 +277,12 @@ export async function getRendererForProject(ctx) {
|
|
|
210
277
|
return new RendererAdapterImpl(renderer, renderCtx);
|
|
211
278
|
}
|
|
212
279
|
export async function destroyRendererAdapter() {
|
|
213
|
-
|
|
214
|
-
|
|
280
|
+
const pendingPromise = rendererInitState?.initializer === activeInitializer
|
|
281
|
+
? rendererInitState.promise
|
|
282
|
+
: undefined;
|
|
283
|
+
rendererInitState = null;
|
|
284
|
+
if (pendingPromise) {
|
|
285
|
+
await pendingPromise.catch(() => undefined);
|
|
286
|
+
}
|
|
287
|
+
await activeInitializer.destroy();
|
|
215
288
|
}
|
|
@@ -3,6 +3,6 @@
|
|
|
3
3
|
*
|
|
4
4
|
* @module server/shared/renderer
|
|
5
5
|
*/
|
|
6
|
-
export { destroyRendererAdapter, getRendererForProject, type RendererAdapter } from "./adapter.js";
|
|
6
|
+
export { destroyRendererAdapter, getRendererForProject, type RendererAdapter, type RendererInitializer, setRendererInitializer, } from "./adapter.js";
|
|
7
7
|
export { shouldRejectDueToMemory } from "./memory/pressure.js";
|
|
8
8
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/src/server/shared/renderer/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/src/server/shared/renderer/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EACL,sBAAsB,EACtB,qBAAqB,EACrB,KAAK,eAAe,EACpB,KAAK,mBAAmB,EACxB,sBAAsB,GACvB,MAAM,cAAc,CAAC;AACtB,OAAO,EAAE,uBAAuB,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -3,5 +3,5 @@
|
|
|
3
3
|
*
|
|
4
4
|
* @module server/shared/renderer
|
|
5
5
|
*/
|
|
6
|
-
export { destroyRendererAdapter, getRendererForProject } from "./adapter.js";
|
|
6
|
+
export { destroyRendererAdapter, getRendererForProject, setRendererInitializer, } from "./adapter.js";
|
|
7
7
|
export { shouldRejectDueToMemory } from "./memory/pressure.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"error-html.d.ts","sourceRoot":"","sources":["../../../../src/src/server/utils/error-html.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"error-html.d.ts","sourceRoot":"","sources":["../../../../src/src/server/utils/error-html.ts"],"names":[],"mappings":"AAEA,UAAU,gBAAgB;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,gDAAgD;IAChD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,sDAAsD;IACtD,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,gBAAgB,GAAG,MAAM,CAQnE;AA4GD,eAAO,MAAM,UAAU;wBACD,MAAM,GAAG,MAAM;0BAUb,MAAM,GAAG,MAAM;kBAQvB,MAAM;sBAQF,MAAM;CAOzB,CAAC"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { escapeHTML } from "../../html/html-escape.js";
|
|
1
2
|
export function generateErrorHtml(options) {
|
|
2
3
|
const { statusCode, title, message, pathname, minimal } = options;
|
|
3
4
|
if (minimal) {
|
|
@@ -6,13 +7,16 @@ export function generateErrorHtml(options) {
|
|
|
6
7
|
return generateStyledErrorHtml(statusCode, title, message);
|
|
7
8
|
}
|
|
8
9
|
function generateStyledErrorHtml(statusCode, title, message) {
|
|
10
|
+
const errorMessage = title === "Not Found" ? `Page not found: ${message}` : message;
|
|
11
|
+
// 4xx = warning (routing/config issue), 5xx = error (something broke)
|
|
12
|
+
const errorType = statusCode >= 500 ? "error" : "warning";
|
|
9
13
|
return `<!DOCTYPE html>
|
|
10
14
|
<html lang="en">
|
|
11
15
|
<head>
|
|
12
16
|
<meta charset="utf-8">
|
|
13
17
|
<meta name="viewport" content="width=device-width">
|
|
14
18
|
<link rel="icon" type="image/png" href="https://cdn.veryfront.com/images/veryfront-favicon.png">
|
|
15
|
-
<title>${statusCode} ${title} — Veryfront</title>
|
|
19
|
+
<title>${statusCode} ${escapeHTML(title)} — Veryfront</title>
|
|
16
20
|
<style>
|
|
17
21
|
:root {
|
|
18
22
|
--bg: #ffffff;
|
|
@@ -61,9 +65,25 @@ function generateStyledErrorHtml(statusCode, title, message) {
|
|
|
61
65
|
</head>
|
|
62
66
|
<body>
|
|
63
67
|
<div class="container">
|
|
64
|
-
<h1 class="title">${title}</h1>
|
|
65
|
-
<p class="message">${message}</p>
|
|
68
|
+
<h1 class="title">${escapeHTML(title)}</h1>
|
|
69
|
+
<p class="message">${escapeHTML(message)}</p>
|
|
66
70
|
</div>
|
|
71
|
+
<script>
|
|
72
|
+
if (window.parent !== window) {
|
|
73
|
+
try {
|
|
74
|
+
window.parent.postMessage({
|
|
75
|
+
action: 'appUpdated',
|
|
76
|
+
isInitialLoad: true,
|
|
77
|
+
hasError: true,
|
|
78
|
+
url: window.location.href,
|
|
79
|
+
errors: [{
|
|
80
|
+
type: '${errorType}',
|
|
81
|
+
message: ${JSON.stringify(errorMessage).replace(/</g, "\\u003c")}
|
|
82
|
+
}]
|
|
83
|
+
}, '*');
|
|
84
|
+
} catch (e) { /* postMessage may fail in cross-origin iframes */ }
|
|
85
|
+
}
|
|
86
|
+
</script>
|
|
67
87
|
</body>
|
|
68
88
|
</html>`;
|
|
69
89
|
}
|
|
@@ -74,11 +94,11 @@ function generateMinimalErrorHtml(statusCode, title, message, pathname) {
|
|
|
74
94
|
<head>
|
|
75
95
|
<meta charset="utf-8"/>
|
|
76
96
|
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
|
77
|
-
<title>${statusCode} ${title}</title>
|
|
97
|
+
<title>${statusCode} ${escapeHTML(title)}</title>
|
|
78
98
|
</head>
|
|
79
99
|
<body>
|
|
80
|
-
<h1>${statusCode} ${title}</h1>
|
|
81
|
-
<p>${fullMessage}</p>
|
|
100
|
+
<h1>${statusCode} ${escapeHTML(title)}</h1>
|
|
101
|
+
<p>${escapeHTML(fullMessage)}</p>
|
|
82
102
|
</body>
|
|
83
103
|
</html>`;
|
|
84
104
|
}
|
package/package.json
CHANGED
|
@@ -3,7 +3,7 @@ import type { CommandHelp } from "../../help/types.js";
|
|
|
3
3
|
export const knowledgeHelp: CommandHelp = {
|
|
4
4
|
name: "knowledge",
|
|
5
5
|
description: "Ingest documents into the project knowledge base",
|
|
6
|
-
usage: "veryfront knowledge ingest <source
|
|
6
|
+
usage: "veryfront knowledge ingest <source...> [options]",
|
|
7
7
|
options: [
|
|
8
8
|
{
|
|
9
9
|
flag: "--project, -p <slug>",
|
|
@@ -28,6 +28,7 @@ export const knowledgeHelp: CommandHelp = {
|
|
|
28
28
|
],
|
|
29
29
|
examples: [
|
|
30
30
|
"veryfront knowledge ingest uploads/contracts/q1.pdf --json",
|
|
31
|
+
"veryfront knowledge ingest uploads/contracts/a.pdf uploads/contracts/b.pdf uploads/contracts/c.pdf --json",
|
|
31
32
|
"veryfront knowledge ingest /workspace/uploads/q1.pdf --json",
|
|
32
33
|
"veryfront knowledge ingest --path uploads/ --all --json",
|
|
33
34
|
],
|
|
@@ -36,5 +37,6 @@ export const knowledgeHelp: CommandHelp = {
|
|
|
36
37
|
"`uploads/...` means a remote project upload; use `./uploads/...` or `/workspace/uploads/...` to force a local file",
|
|
37
38
|
"`ingest` orchestrates upload resolution, parsing, and project file writes",
|
|
38
39
|
"Requires python3; non-text formats also require the supported parser packages unless you run inside the Veryfront sandbox",
|
|
40
|
+
"The Veryfront sandbox image includes `kreuzberg`, and knowledge ingest falls back to the built-in parser when `kreuzberg` is unavailable or extraction fails",
|
|
39
41
|
],
|
|
40
42
|
};
|
|
@@ -62,7 +62,7 @@ type DownloadResult = { uploadPath: string; localPath: string; bytes?: number };
|
|
|
62
62
|
const KnowledgeIngestArgsSchema = z.object({
|
|
63
63
|
projectSlug: z.string().optional(),
|
|
64
64
|
projectDir: z.string().optional(),
|
|
65
|
-
|
|
65
|
+
sources: z.array(z.string()).default([]),
|
|
66
66
|
path: z.string().optional(),
|
|
67
67
|
all: z.boolean().default(false),
|
|
68
68
|
recursive: z.boolean().default(false),
|
|
@@ -72,6 +72,44 @@ const KnowledgeIngestArgsSchema = z.object({
|
|
|
72
72
|
slug: z.string().optional(),
|
|
73
73
|
json: z.boolean().default(false),
|
|
74
74
|
quiet: z.boolean().default(false),
|
|
75
|
+
}).superRefine((value, ctx) => {
|
|
76
|
+
const hasExplicitSources = value.sources.length > 0;
|
|
77
|
+
const hasPath = typeof value.path === "string" && value.path.length > 0;
|
|
78
|
+
|
|
79
|
+
if (hasExplicitSources && (hasPath || value.all)) {
|
|
80
|
+
ctx.addIssue({
|
|
81
|
+
code: z.ZodIssueCode.custom,
|
|
82
|
+
message: "Use either explicit source paths or --path with --all, not both.",
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (!hasExplicitSources && !hasPath && !value.all) {
|
|
87
|
+
ctx.addIssue({
|
|
88
|
+
code: z.ZodIssueCode.custom,
|
|
89
|
+
message: "Provide one or more source paths or use --path with --all.",
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (hasPath && !value.all) {
|
|
94
|
+
ctx.addIssue({
|
|
95
|
+
code: z.ZodIssueCode.custom,
|
|
96
|
+
message: "--path requires --all.",
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if (!hasPath && value.all) {
|
|
101
|
+
ctx.addIssue({
|
|
102
|
+
code: z.ZodIssueCode.custom,
|
|
103
|
+
message: "--all requires --path.",
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (value.slug && value.sources.length !== 1) {
|
|
108
|
+
ctx.addIssue({
|
|
109
|
+
code: z.ZodIssueCode.custom,
|
|
110
|
+
message: "--slug can only be used with a single explicit source.",
|
|
111
|
+
});
|
|
112
|
+
}
|
|
75
113
|
});
|
|
76
114
|
|
|
77
115
|
export type KnowledgeIngestOptions = z.infer<typeof KnowledgeIngestArgsSchema>;
|
|
@@ -97,7 +135,7 @@ function showKnowledgeUsage(): void {
|
|
|
97
135
|
Veryfront Knowledge
|
|
98
136
|
|
|
99
137
|
Usage:
|
|
100
|
-
veryfront knowledge ingest <source
|
|
138
|
+
veryfront knowledge ingest <source...> [options]
|
|
101
139
|
veryfront knowledge ingest --path <prefix-or-dir> --all [options]
|
|
102
140
|
|
|
103
141
|
Subcommands:
|
|
@@ -111,7 +149,7 @@ export function parseKnowledgeIngestArgs(
|
|
|
111
149
|
return KnowledgeIngestArgsSchema.safeParse({
|
|
112
150
|
projectSlug: getStringArg(args, "project", "p", "project-slug"),
|
|
113
151
|
projectDir: getStringArg(args, "project-dir", "dir", "d"),
|
|
114
|
-
|
|
152
|
+
sources: args._.slice(2).filter((value): value is string => typeof value === "string"),
|
|
115
153
|
path: getStringArg(args, "path"),
|
|
116
154
|
all: getBooleanArg(args, "all"),
|
|
117
155
|
recursive: getBooleanArg(args, "recursive"),
|
|
@@ -273,6 +311,7 @@ export async function runKnowledgeParser(input: {
|
|
|
273
311
|
description?: string;
|
|
274
312
|
slug?: string;
|
|
275
313
|
sourceReference?: string;
|
|
314
|
+
env?: Record<string, string>;
|
|
276
315
|
}): Promise<KnowledgeParserResult> {
|
|
277
316
|
const tempDir = await dntShim.Deno.makeTempDir({ prefix: "veryfront-knowledge-parser-" });
|
|
278
317
|
const inputJsonPath = `${tempDir}/input.json`;
|
|
@@ -296,6 +335,7 @@ export async function runKnowledgeParser(input: {
|
|
|
296
335
|
try {
|
|
297
336
|
result = await new dntShim.Deno.Command("python3", {
|
|
298
337
|
args: [scriptPath, "--input-json", inputJsonPath, "--output-json", outputJsonPath],
|
|
338
|
+
...(input.env ? { env: input.env } : {}),
|
|
299
339
|
stdout: "piped",
|
|
300
340
|
stderr: "piped",
|
|
301
341
|
}).output();
|
|
@@ -321,7 +361,7 @@ export async function runKnowledgeParser(input: {
|
|
|
321
361
|
}
|
|
322
362
|
|
|
323
363
|
export async function collectKnowledgeSources(
|
|
324
|
-
options: Pick<KnowledgeIngestOptions, "
|
|
364
|
+
options: Pick<KnowledgeIngestOptions, "sources" | "path" | "all" | "recursive">,
|
|
325
365
|
deps: {
|
|
326
366
|
client: ApiClient;
|
|
327
367
|
projectSlug: string;
|
|
@@ -330,29 +370,68 @@ export async function collectKnowledgeSources(
|
|
|
330
370
|
): Promise<KnowledgeSource[]> {
|
|
331
371
|
const fs = createFileSystem();
|
|
332
372
|
|
|
333
|
-
if (options.
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
373
|
+
if (options.sources.length > 0) {
|
|
374
|
+
const explicitSources: Array<
|
|
375
|
+
| { kind: "local"; sources: KnowledgeSource[] }
|
|
376
|
+
| { kind: "upload"; input: string; uploadPath: string }
|
|
377
|
+
> = [];
|
|
378
|
+
const uploadTargets: string[] = [];
|
|
379
|
+
|
|
380
|
+
for (const input of options.sources) {
|
|
381
|
+
if (!isProjectUploadReference(input) && await fs.exists(input)) {
|
|
382
|
+
const localFiles = await collectLocalFiles(input, options.recursive);
|
|
383
|
+
if (!localFiles.length) throw new Error(`No supported files found at ${input}`);
|
|
384
|
+
explicitSources.push({
|
|
385
|
+
kind: "local",
|
|
386
|
+
sources: localFiles.map((localPath) => ({ kind: "local", input, localPath })),
|
|
387
|
+
});
|
|
388
|
+
continue;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
if (isLikelyLocalPath(input)) {
|
|
392
|
+
throw new Error(`Local file not found: ${input}`);
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
const uploadPath = normalizeProjectUploadPath(input);
|
|
396
|
+
explicitSources.push({ kind: "upload", input, uploadPath });
|
|
397
|
+
uploadTargets.push(uploadPath);
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
const downloads = uploadTargets.length > 0 ? await deps.downloadUploads(uploadTargets) : [];
|
|
401
|
+
const downloadsByPath = new Map<string, DownloadResult[]>();
|
|
402
|
+
|
|
403
|
+
for (const download of downloads) {
|
|
404
|
+
const existing = downloadsByPath.get(download.uploadPath) ?? [];
|
|
405
|
+
existing.push(download);
|
|
406
|
+
downloadsByPath.set(download.uploadPath, existing);
|
|
338
407
|
}
|
|
339
408
|
|
|
340
|
-
|
|
341
|
-
|
|
409
|
+
const resolvedSources: KnowledgeSource[] = [];
|
|
410
|
+
for (const source of explicitSources) {
|
|
411
|
+
if (source.kind === "local") {
|
|
412
|
+
resolvedSources.push(...source.sources);
|
|
413
|
+
continue;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
const matchingDownloads = downloadsByPath.get(source.uploadPath);
|
|
417
|
+
const download = matchingDownloads?.shift();
|
|
418
|
+
if (!download) {
|
|
419
|
+
throw new Error(`Upload not found: ${formatKnowledgeUploadSource(source.uploadPath)}`);
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
resolvedSources.push({
|
|
423
|
+
kind: "upload",
|
|
424
|
+
input: source.input,
|
|
425
|
+
uploadPath: download.uploadPath,
|
|
426
|
+
localPath: download.localPath,
|
|
427
|
+
});
|
|
342
428
|
}
|
|
343
429
|
|
|
344
|
-
|
|
345
|
-
const downloads = await deps.downloadUploads([uploadPath]);
|
|
346
|
-
return downloads.map((download) => ({
|
|
347
|
-
kind: "upload",
|
|
348
|
-
input: options.source!,
|
|
349
|
-
uploadPath: download.uploadPath,
|
|
350
|
-
localPath: download.localPath,
|
|
351
|
-
}));
|
|
430
|
+
return resolvedSources;
|
|
352
431
|
}
|
|
353
432
|
|
|
354
433
|
if (!options.path || !options.all) {
|
|
355
|
-
throw new Error("Provide
|
|
434
|
+
throw new Error("Provide one or more source paths or use --path with --all.");
|
|
356
435
|
}
|
|
357
436
|
|
|
358
437
|
if (!isProjectUploadReference(options.path) && await fs.exists(options.path)) {
|
|
@@ -407,7 +486,11 @@ export async function ingestResolvedSources(
|
|
|
407
486
|
uploadKnowledgeFile: (remotePath: string, localPath: string) => Promise<{ path: string }>;
|
|
408
487
|
},
|
|
409
488
|
): Promise<KnowledgeIngestFileResult[]> {
|
|
410
|
-
|
|
489
|
+
if (options.slug && sources.length !== 1) {
|
|
490
|
+
throw new Error("--slug can only be used with a single explicit source.");
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
const slugs = options.slug ? [options.slug] : ensureUniqueSlugs(sources);
|
|
411
494
|
const results: KnowledgeIngestFileResult[] = [];
|
|
412
495
|
|
|
413
496
|
for (const [index, source] of sources.entries()) {
|
|
@@ -3,6 +3,7 @@ import argparse
|
|
|
3
3
|
import csv
|
|
4
4
|
import json
|
|
5
5
|
import re
|
|
6
|
+
import subprocess
|
|
6
7
|
from datetime import date
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import Any, Optional
|
|
@@ -71,6 +72,107 @@ def build_frontmatter(source: str, source_type: str, description: str) -> str:
|
|
|
71
72
|
])
|
|
72
73
|
|
|
73
74
|
|
|
75
|
+
def metadata_int(metadata: dict[str, Any], *keys: str) -> Optional[int]:
|
|
76
|
+
for key in keys:
|
|
77
|
+
value = metadata.get(key)
|
|
78
|
+
if isinstance(value, int) and not isinstance(value, bool):
|
|
79
|
+
return value
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def metadata_string_list(metadata: dict[str, Any], *keys: str) -> Optional[list[str]]:
|
|
84
|
+
for key in keys:
|
|
85
|
+
value = metadata.get(key)
|
|
86
|
+
if isinstance(value, list) and all(isinstance(item, str) for item in value):
|
|
87
|
+
return value
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def build_kreuzberg_stats(source_type: str, content: str, metadata: dict[str, Any]):
|
|
92
|
+
stats: dict[str, Any] = {
|
|
93
|
+
"characters": len(content),
|
|
94
|
+
"lines": len(content.splitlines()) if content else 0,
|
|
95
|
+
"engine": "kreuzberg",
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if isinstance(metadata.get("mime_type"), str):
|
|
99
|
+
stats["mime_type"] = metadata["mime_type"]
|
|
100
|
+
|
|
101
|
+
if source_type == "pdf":
|
|
102
|
+
stats["pages"] = metadata_int(metadata, "page_count") or 0
|
|
103
|
+
stats["tables"] = metadata_int(metadata, "table_count") or 0
|
|
104
|
+
elif source_type in {"xlsx", "xls"}:
|
|
105
|
+
stats["sheets"] = metadata_int(metadata, "sheet_count") or 0
|
|
106
|
+
stats["rows"] = metadata_int(metadata, "row_count") or 0
|
|
107
|
+
stats["sheet_names"] = metadata_string_list(metadata, "sheet_names") or []
|
|
108
|
+
elif source_type == "docx":
|
|
109
|
+
stats["paragraphs"] = metadata_int(metadata, "paragraph_count") or 0
|
|
110
|
+
stats["tables"] = metadata_int(metadata, "table_count") or 0
|
|
111
|
+
elif source_type == "pptx":
|
|
112
|
+
stats["slides"] = metadata_int(metadata, "slide_count", "page_count") or 0
|
|
113
|
+
stats["tables"] = metadata_int(metadata, "table_count") or 0
|
|
114
|
+
elif source_type == "html":
|
|
115
|
+
stats["tables"] = metadata_int(metadata, "table_count") or 0
|
|
116
|
+
|
|
117
|
+
return stats
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def parse_with_kreuzberg(path: str, source_type: str):
|
|
121
|
+
warnings: list[str] = []
|
|
122
|
+
completed = subprocess.run(
|
|
123
|
+
[
|
|
124
|
+
"kreuzberg",
|
|
125
|
+
"extract",
|
|
126
|
+
path,
|
|
127
|
+
"--format",
|
|
128
|
+
"json",
|
|
129
|
+
"--output-format",
|
|
130
|
+
"markdown",
|
|
131
|
+
],
|
|
132
|
+
capture_output=True,
|
|
133
|
+
text=True,
|
|
134
|
+
check=False,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
if completed.returncode != 0:
|
|
138
|
+
detail = completed.stderr.strip() or completed.stdout.strip() or f"exit code {completed.returncode}"
|
|
139
|
+
raise RuntimeError(f"kreuzberg extract failed: {detail}")
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
payload = json.loads(completed.stdout)
|
|
143
|
+
except json.JSONDecodeError as error:
|
|
144
|
+
raise RuntimeError(f"kreuzberg extract returned invalid JSON: {error}") from error
|
|
145
|
+
|
|
146
|
+
content = payload.get("content", "")
|
|
147
|
+
if not isinstance(content, str):
|
|
148
|
+
raise RuntimeError("kreuzberg extract did not return string content")
|
|
149
|
+
|
|
150
|
+
metadata = payload.get("metadata") if isinstance(payload.get("metadata"), dict) else {}
|
|
151
|
+
normalized_content = clean_text(content)
|
|
152
|
+
stats = build_kreuzberg_stats(source_type, normalized_content, metadata)
|
|
153
|
+
|
|
154
|
+
return normalized_content or "_No extractable text found in document._", stats, warnings
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def prefer_kreuzberg(source_type: str, fallback_parser):
|
|
158
|
+
def parser(path: str):
|
|
159
|
+
try:
|
|
160
|
+
return parse_with_kreuzberg(path, source_type)
|
|
161
|
+
except FileNotFoundError as error:
|
|
162
|
+
if getattr(error, "filename", "") == "kreuzberg":
|
|
163
|
+
return fallback_parser(path)
|
|
164
|
+
raise
|
|
165
|
+
except RuntimeError as error:
|
|
166
|
+
content, stats, warnings = fallback_parser(path)
|
|
167
|
+
warnings.append(
|
|
168
|
+
"kreuzberg extraction failed; fell back to the built-in parser: "
|
|
169
|
+
+ str(error)
|
|
170
|
+
)
|
|
171
|
+
return content, stats, warnings
|
|
172
|
+
|
|
173
|
+
return parser
|
|
174
|
+
|
|
175
|
+
|
|
74
176
|
def parse_csv_like(path: str, delimiter: str = ","):
|
|
75
177
|
warnings: list[str] = []
|
|
76
178
|
with open(path, newline="", encoding="utf-8-sig") as file:
|
|
@@ -305,18 +407,19 @@ def parse_json(path: str):
|
|
|
305
407
|
def select_parser(path: Path):
|
|
306
408
|
ext = path.suffix.lower()
|
|
307
409
|
if ext == ".pdf":
|
|
308
|
-
return "pdf", parse_pdf
|
|
410
|
+
return "pdf", prefer_kreuzberg("pdf", parse_pdf)
|
|
309
411
|
if ext in {".csv", ".tsv"}:
|
|
310
412
|
delimiter = "\t" if ext == ".tsv" else ","
|
|
311
413
|
return ext.lstrip("."), lambda file_path: parse_csv_like(file_path, delimiter)
|
|
312
414
|
if ext in {".xlsx", ".xls"}:
|
|
313
|
-
|
|
415
|
+
source_type = ext.lstrip(".")
|
|
416
|
+
return source_type, prefer_kreuzberg(source_type, parse_excel)
|
|
314
417
|
if ext == ".docx":
|
|
315
|
-
return "docx", parse_docx
|
|
418
|
+
return "docx", prefer_kreuzberg("docx", parse_docx)
|
|
316
419
|
if ext == ".pptx":
|
|
317
|
-
return "pptx", parse_pptx
|
|
420
|
+
return "pptx", prefer_kreuzberg("pptx", parse_pptx)
|
|
318
421
|
if ext in {".html", ".htm"}:
|
|
319
|
-
return "html", parse_html
|
|
422
|
+
return "html", prefer_kreuzberg("html", parse_html)
|
|
320
423
|
if ext in {".txt", ".md", ".mdx"}:
|
|
321
424
|
return ext.lstrip("."), parse_text
|
|
322
425
|
if ext == ".json":
|
|
@@ -325,6 +428,8 @@ def select_parser(path: Path):
|
|
|
325
428
|
|
|
326
429
|
|
|
327
430
|
def build_summary(source_type: str, stats: dict[str, Any]) -> str:
|
|
431
|
+
if stats.get("engine") == "kreuzberg":
|
|
432
|
+
return f"Converted {source_type.upper()} to markdown ({stats.get('characters', 0)} chars)."
|
|
328
433
|
if source_type in {"csv", "tsv"}:
|
|
329
434
|
return f"Parsed {stats.get('rows', 0)} rows across {stats.get('columns', 0)} columns."
|
|
330
435
|
if source_type in {"xlsx", "xls"}:
|
package/src/deno.js
CHANGED
|
@@ -208,6 +208,14 @@ async function generateHTMLShellPartsImpl(
|
|
|
208
208
|
|
|
209
209
|
const nonceAttr = nonce ? ` nonce="${nonce}"` : "";
|
|
210
210
|
|
|
211
|
+
// Expose project slug for runtime error overlay "Fix in Veryfront" button
|
|
212
|
+
const overlaySlug = options.projectId || meta.slug;
|
|
213
|
+
const slugForOverlay = useDevScripts && overlaySlug
|
|
214
|
+
? `<script${nonceAttr}>window.__VF_PROJECT_SLUG__=${
|
|
215
|
+
JSON.stringify(overlaySlug).replace(/</g, "\\u003c")
|
|
216
|
+
};</script>`
|
|
217
|
+
: "";
|
|
218
|
+
|
|
211
219
|
const hydrationErrorSuppression = useDevScripts ? "" : `<script${nonceAttr}>
|
|
212
220
|
(function(){
|
|
213
221
|
var origError = console.error;
|
|
@@ -303,6 +311,7 @@ async function generateHTMLShellPartsImpl(
|
|
|
303
311
|
${linkTags}
|
|
304
312
|
${styleTags}
|
|
305
313
|
${modeStyles}
|
|
314
|
+
${slugForOverlay}
|
|
306
315
|
</head>
|
|
307
316
|
<body${bodyClass ? ` class="${bodyClass}"` : ""} suppressHydrationWarning>
|
|
308
317
|
<div ${rootAttributes}>`;
|