veryfront 0.1.83 → 0.1.85
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/cli/commands/knowledge/command.d.ts +10 -13
- package/esm/cli/commands/knowledge/command.d.ts.map +1 -1
- package/esm/cli/commands/knowledge/command.js +294 -110
- package/esm/cli/commands/knowledge/parser-source.d.ts.map +1 -1
- package/esm/cli/commands/knowledge/parser-source.js +52 -0
- package/esm/cli/commands/knowledge/result.d.ts +54 -0
- package/esm/cli/commands/knowledge/result.d.ts.map +1 -0
- package/esm/cli/commands/knowledge/result.js +22 -0
- package/esm/cli/commands/knowledge/source-policy.d.ts +11 -0
- package/esm/cli/commands/knowledge/source-policy.d.ts.map +1 -0
- package/esm/cli/commands/knowledge/source-policy.js +135 -0
- package/esm/deno.js +1 -1
- package/esm/src/jobs/index.d.ts +1 -1
- package/esm/src/jobs/index.d.ts.map +1 -1
- package/esm/src/jobs/index.js +1 -1
- package/esm/src/jobs/schemas.d.ts +1104 -185
- package/esm/src/jobs/schemas.d.ts.map +1 -1
- package/esm/src/jobs/schemas.js +81 -8
- package/esm/src/proxy/error-response.d.ts +7 -0
- package/esm/src/proxy/error-response.d.ts.map +1 -0
- package/esm/src/proxy/error-response.js +26 -0
- package/esm/src/proxy/handler.d.ts.map +1 -1
- package/esm/src/proxy/handler.js +25 -0
- package/esm/src/proxy/main.js +2 -23
- package/esm/src/rendering/orchestrator/css-candidate-manifest.d.ts +11 -0
- package/esm/src/rendering/orchestrator/css-candidate-manifest.d.ts.map +1 -1
- package/esm/src/rendering/orchestrator/css-candidate-manifest.js +23 -12
- package/esm/src/server/handlers/dev/styles-candidate-scanner.d.ts.map +1 -1
- package/esm/src/server/handlers/dev/styles-candidate-scanner.js +25 -8
- package/esm/src/server/handlers/dev/styles-css.handler.d.ts +1 -0
- package/esm/src/server/handlers/dev/styles-css.handler.d.ts.map +1 -1
- package/esm/src/server/handlers/dev/styles-css.handler.js +15 -5
- package/esm/src/transforms/esm/http-cache-helpers.d.ts.map +1 -1
- package/esm/src/transforms/esm/http-cache-helpers.js +7 -1
- package/esm/src/transforms/import-rewriter/strategies/bare-strategy.d.ts.map +1 -1
- package/esm/src/transforms/import-rewriter/strategies/bare-strategy.js +11 -8
- package/esm/src/transforms/shared/package-specifier.d.ts +7 -0
- package/esm/src/transforms/shared/package-specifier.d.ts.map +1 -0
- package/esm/src/transforms/shared/package-specifier.js +19 -0
- package/package.json +1 -1
- package/src/cli/commands/knowledge/command.ts +375 -139
- package/src/cli/commands/knowledge/parser-source.ts +52 -0
- package/src/cli/commands/knowledge/result.ts +88 -0
- package/src/cli/commands/knowledge/source-policy.ts +164 -0
- package/src/deno.js +1 -1
- package/src/src/jobs/index.ts +16 -0
- package/src/src/jobs/schemas.ts +105 -8
- package/src/src/proxy/error-response.ts +33 -0
- package/src/src/proxy/handler.ts +43 -0
- package/src/src/proxy/main.ts +2 -27
- package/src/src/rendering/orchestrator/css-candidate-manifest.ts +40 -14
- package/src/src/server/handlers/dev/styles-candidate-scanner.ts +37 -11
- package/src/src/server/handlers/dev/styles-css.handler.ts +25 -4
- package/src/src/transforms/esm/http-cache-helpers.ts +12 -1
- package/src/src/transforms/import-rewriter/strategies/bare-strategy.ts +11 -12
- package/src/src/transforms/shared/package-specifier.ts +29 -0
|
@@ -11,22 +11,14 @@ import { putRemoteFileFromLocal } from "../files/command.js";
|
|
|
11
11
|
import { knowledgeIngestPythonSource } from "./parser-source.js";
|
|
12
12
|
import { createJobUserLogger, type Logger, serverLogger } from "../../../src/utils/index.js";
|
|
13
13
|
import { writeJobResultIfConfigured } from "../../utils/write-job-result.js";
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
".pptx",
|
|
23
|
-
".html",
|
|
24
|
-
".htm",
|
|
25
|
-
".txt",
|
|
26
|
-
".json",
|
|
27
|
-
".md",
|
|
28
|
-
".mdx",
|
|
29
|
-
]);
|
|
14
|
+
import { classifyKnowledgeDirectoryPath, classifyKnowledgeSourcePath } from "./source-policy.js";
|
|
15
|
+
import {
|
|
16
|
+
buildKnowledgeIngestJobResult,
|
|
17
|
+
type KnowledgeIngestFailedFileResult,
|
|
18
|
+
type KnowledgeIngestFailureReason,
|
|
19
|
+
type KnowledgeIngestFileResult,
|
|
20
|
+
type KnowledgeIngestSkippedFileResult,
|
|
21
|
+
} from "./result.js";
|
|
30
22
|
|
|
31
23
|
export interface KnowledgeParserResult {
|
|
32
24
|
success: true;
|
|
@@ -43,22 +35,15 @@ export interface KnowledgeParserResult {
|
|
|
43
35
|
warnings: string[];
|
|
44
36
|
}
|
|
45
37
|
|
|
46
|
-
export interface KnowledgeIngestFileResult {
|
|
47
|
-
source: string;
|
|
48
|
-
localSourcePath: string;
|
|
49
|
-
outputPath: string;
|
|
50
|
-
remotePath: string;
|
|
51
|
-
slug: string;
|
|
52
|
-
sourceType: string;
|
|
53
|
-
summary: string;
|
|
54
|
-
stats: Record<string, unknown>;
|
|
55
|
-
warnings: string[];
|
|
56
|
-
}
|
|
57
|
-
|
|
58
38
|
type KnowledgeSource =
|
|
59
39
|
| { kind: "local"; input: string; localPath: string }
|
|
60
40
|
| { kind: "upload"; input: string; uploadPath: string; localPath: string };
|
|
61
41
|
|
|
42
|
+
export interface KnowledgeSourceCollection {
|
|
43
|
+
sources: KnowledgeSource[];
|
|
44
|
+
skipped: KnowledgeIngestSkippedFileResult[];
|
|
45
|
+
}
|
|
46
|
+
|
|
62
47
|
type DownloadResult = { uploadPath: string; localPath: string; bytes?: number };
|
|
63
48
|
|
|
64
49
|
const knowledgeJobLogger = serverLogger.component("knowledge-ingest");
|
|
@@ -207,6 +192,18 @@ export function formatKnowledgeUploadSource(uploadPath: string): string {
|
|
|
207
192
|
: `uploads/${normalizedPath}`;
|
|
208
193
|
}
|
|
209
194
|
|
|
195
|
+
function resolveExplicitUploadPath(inputPath: string): string {
|
|
196
|
+
const normalizedInput = normalizeKnowledgeInputPath(inputPath);
|
|
197
|
+
const displayInput = inputPath.replace(/\\/g, "/");
|
|
198
|
+
const uploadPath = normalizeProjectUploadPath(inputPath);
|
|
199
|
+
if (!uploadPath || normalizedInput.endsWith("/")) {
|
|
200
|
+
throw new Error(
|
|
201
|
+
`Directory upload references require --path <prefix> --all: ${displayInput}`,
|
|
202
|
+
);
|
|
203
|
+
}
|
|
204
|
+
return uploadPath;
|
|
205
|
+
}
|
|
206
|
+
|
|
210
207
|
export function isLikelyLocalPath(value: string): boolean {
|
|
211
208
|
return value.startsWith("/") || value.startsWith("./") || value.startsWith("../") ||
|
|
212
209
|
/^[A-Za-z]:[\\/]/.test(value);
|
|
@@ -218,10 +215,6 @@ function isProjectUploadReference(value: string): boolean {
|
|
|
218
215
|
return normalizedValue === "uploads" || normalizedValue.startsWith("uploads/");
|
|
219
216
|
}
|
|
220
217
|
|
|
221
|
-
function isSupportedKnowledgeFile(path: string): boolean {
|
|
222
|
-
return SUPPORTED_EXTENSIONS.has(extname(path).toLowerCase());
|
|
223
|
-
}
|
|
224
|
-
|
|
225
218
|
function slugify(value: string): string {
|
|
226
219
|
return value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "") || "document";
|
|
227
220
|
}
|
|
@@ -234,32 +227,138 @@ export function resolveKnowledgeDownloadOutputDir(outputDir: string): string {
|
|
|
234
227
|
return join(outputDir, ".uploads");
|
|
235
228
|
}
|
|
236
229
|
|
|
237
|
-
|
|
230
|
+
function createSkippedKnowledgeSource(input: {
|
|
231
|
+
source: string;
|
|
232
|
+
localSourcePath?: string | null;
|
|
233
|
+
message: string;
|
|
234
|
+
reason: KnowledgeIngestSkippedFileResult["reason"];
|
|
235
|
+
}): KnowledgeIngestSkippedFileResult {
|
|
236
|
+
return {
|
|
237
|
+
source: input.source,
|
|
238
|
+
localSourcePath: input.localSourcePath ?? null,
|
|
239
|
+
message: input.message,
|
|
240
|
+
reason: input.reason,
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
function createFailedKnowledgeSource(input: {
|
|
245
|
+
source: string;
|
|
246
|
+
localSourcePath: string;
|
|
247
|
+
message: string;
|
|
248
|
+
reason: KnowledgeIngestFailedFileResult["reason"];
|
|
249
|
+
}): KnowledgeIngestFailedFileResult {
|
|
250
|
+
return {
|
|
251
|
+
source: input.source,
|
|
252
|
+
localSourcePath: input.localSourcePath,
|
|
253
|
+
message: input.message,
|
|
254
|
+
reason: input.reason,
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
function classifySourceOrSkip(input: {
|
|
259
|
+
source: string;
|
|
260
|
+
localSourcePath?: string | null;
|
|
261
|
+
}): KnowledgeIngestSkippedFileResult | null {
|
|
262
|
+
const decision = classifyKnowledgeSourcePath(input.source);
|
|
263
|
+
if (decision.kind === "ingest") {
|
|
264
|
+
return null;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
return createSkippedKnowledgeSource({
|
|
268
|
+
source: input.source,
|
|
269
|
+
localSourcePath: input.localSourcePath,
|
|
270
|
+
message: decision.message,
|
|
271
|
+
reason: decision.reason,
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
function classifyDirectoryOrSkip(input: {
|
|
276
|
+
source: string;
|
|
277
|
+
}): KnowledgeIngestSkippedFileResult | null {
|
|
278
|
+
const decision = classifyKnowledgeDirectoryPath(input.source);
|
|
279
|
+
if (decision.kind === "ingest") {
|
|
280
|
+
return null;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
return createSkippedKnowledgeSource({
|
|
284
|
+
source: input.source,
|
|
285
|
+
localSourcePath: null,
|
|
286
|
+
message: decision.message,
|
|
287
|
+
reason: decision.reason,
|
|
288
|
+
});
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
async function collectLocalFiles(
|
|
292
|
+
root: string,
|
|
293
|
+
recursive: boolean,
|
|
294
|
+
): Promise<KnowledgeSourceCollection> {
|
|
238
295
|
const fs = createFileSystem();
|
|
239
296
|
const stat = await fs.stat(root);
|
|
240
|
-
if (stat.isFile)
|
|
241
|
-
|
|
297
|
+
if (stat.isFile) {
|
|
298
|
+
const skipped = classifySourceOrSkip({ source: root, localSourcePath: root });
|
|
299
|
+
return skipped == null
|
|
300
|
+
? {
|
|
301
|
+
sources: [{ kind: "local", input: root, localPath: root }],
|
|
302
|
+
skipped: [],
|
|
303
|
+
}
|
|
304
|
+
: {
|
|
305
|
+
sources: [],
|
|
306
|
+
skipped: [skipped],
|
|
307
|
+
};
|
|
308
|
+
}
|
|
309
|
+
if (!stat.isDirectory) {
|
|
310
|
+
return { sources: [], skipped: [] };
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
const skippedRootDirectory = classifyDirectoryOrSkip({ source: root });
|
|
314
|
+
if (skippedRootDirectory != null) {
|
|
315
|
+
return {
|
|
316
|
+
sources: [],
|
|
317
|
+
skipped: [skippedRootDirectory],
|
|
318
|
+
};
|
|
319
|
+
}
|
|
242
320
|
|
|
243
|
-
const
|
|
321
|
+
const collection: KnowledgeSourceCollection = {
|
|
322
|
+
sources: [],
|
|
323
|
+
skipped: [],
|
|
324
|
+
};
|
|
244
325
|
async function walk(dir: string): Promise<void> {
|
|
245
326
|
for await (const entry of fs.readDir(dir)) {
|
|
246
327
|
const entryPath = join(dir, entry.name);
|
|
247
328
|
if (entry.isDirectory) {
|
|
329
|
+
const skipped = classifyDirectoryOrSkip({ source: entryPath });
|
|
330
|
+
if (skipped != null) {
|
|
331
|
+
collection.skipped.push(skipped);
|
|
332
|
+
continue;
|
|
333
|
+
}
|
|
248
334
|
if (recursive) await walk(entryPath);
|
|
249
335
|
continue;
|
|
250
336
|
}
|
|
251
|
-
|
|
252
|
-
|
|
337
|
+
|
|
338
|
+
if (!entry.isFile) {
|
|
339
|
+
continue;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
const skipped = classifySourceOrSkip({ source: entryPath, localSourcePath: entryPath });
|
|
343
|
+
if (skipped != null) {
|
|
344
|
+
collection.skipped.push(skipped);
|
|
345
|
+
continue;
|
|
253
346
|
}
|
|
347
|
+
|
|
348
|
+
collection.sources.push({ kind: "local", input: root, localPath: entryPath });
|
|
254
349
|
}
|
|
255
350
|
}
|
|
256
351
|
|
|
257
352
|
await walk(root);
|
|
258
|
-
|
|
353
|
+
collection.sources.sort((left, right) => left.localPath.localeCompare(right.localPath));
|
|
354
|
+
collection.skipped.sort((left, right) => left.source.localeCompare(right.source));
|
|
355
|
+
return collection;
|
|
259
356
|
}
|
|
260
357
|
|
|
261
358
|
function buildSourceReference(source: KnowledgeSource): string {
|
|
262
|
-
return source.kind === "upload"
|
|
359
|
+
return source.kind === "upload"
|
|
360
|
+
? formatKnowledgeUploadSource(source.uploadPath)
|
|
361
|
+
: source.localPath;
|
|
263
362
|
}
|
|
264
363
|
|
|
265
364
|
function buildSuggestedSlug(source: KnowledgeSource, index: number): string {
|
|
@@ -344,42 +443,51 @@ export async function runKnowledgeParser(input: {
|
|
|
344
443
|
const scriptPath = `${tempDir}/ingest_document_to_knowledge.py`;
|
|
345
444
|
|
|
346
445
|
try {
|
|
347
|
-
await dntShim.Deno.writeTextFile(
|
|
348
|
-
inputJsonPath,
|
|
349
|
-
JSON.stringify({
|
|
350
|
-
file_path: input.filePath,
|
|
351
|
-
output_dir: input.outputDir,
|
|
352
|
-
description: input.description,
|
|
353
|
-
slug: input.slug,
|
|
354
|
-
source_reference: input.sourceReference,
|
|
355
|
-
}),
|
|
356
|
-
);
|
|
357
|
-
await dntShim.Deno.writeTextFile(scriptPath, knowledgeIngestPythonSource);
|
|
358
|
-
|
|
359
|
-
let result: dntShim.Deno.CommandOutput;
|
|
360
446
|
try {
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
447
|
+
await dntShim.Deno.writeTextFile(
|
|
448
|
+
inputJsonPath,
|
|
449
|
+
JSON.stringify({
|
|
450
|
+
file_path: input.filePath,
|
|
451
|
+
output_dir: input.outputDir,
|
|
452
|
+
description: input.description,
|
|
453
|
+
slug: input.slug,
|
|
454
|
+
source_reference: input.sourceReference,
|
|
455
|
+
}),
|
|
456
|
+
);
|
|
457
|
+
await dntShim.Deno.writeTextFile(scriptPath, knowledgeIngestPythonSource);
|
|
458
|
+
|
|
459
|
+
let result: dntShim.Deno.CommandOutput;
|
|
460
|
+
try {
|
|
461
|
+
result = await new dntShim.Deno.Command("python3", {
|
|
462
|
+
args: [scriptPath, "--input-json", inputJsonPath, "--output-json", outputJsonPath],
|
|
463
|
+
...(input.env ? { env: input.env } : {}),
|
|
464
|
+
stdout: "piped",
|
|
465
|
+
stderr: "piped",
|
|
466
|
+
}).output();
|
|
467
|
+
} catch (error) {
|
|
468
|
+
if (error instanceof dntShim.Deno.errors.NotFound) {
|
|
469
|
+
throw new Error(
|
|
470
|
+
"python3 is required. Install python3 and the supported parser packages, or run the command inside the Veryfront sandbox.",
|
|
471
|
+
);
|
|
472
|
+
}
|
|
473
|
+
throw error;
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
if (result.code !== 0) {
|
|
477
|
+
const stderr = new TextDecoder().decode(result.stderr).trim();
|
|
478
|
+
throw new Error(stderr || "parser exited unsuccessfully");
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
const raw = await dntShim.Deno.readTextFile(outputJsonPath);
|
|
482
|
+
return JSON.parse(raw) as KnowledgeParserResult;
|
|
367
483
|
} catch (error) {
|
|
368
|
-
if (error instanceof
|
|
369
|
-
throw
|
|
370
|
-
"knowledge ingest requires python3. Install python3 and the supported parser packages, or run the command inside the Veryfront sandbox.",
|
|
371
|
-
);
|
|
484
|
+
if (error instanceof Error && error.message.startsWith("knowledge ingest parser failed")) {
|
|
485
|
+
throw error;
|
|
372
486
|
}
|
|
373
|
-
throw error;
|
|
374
|
-
}
|
|
375
487
|
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
throw new Error(`knowledge ingest parser failed${stderr ? `: ${stderr}` : ""}`);
|
|
488
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
489
|
+
throw new Error(`knowledge ingest parser failed: ${message}`);
|
|
379
490
|
}
|
|
380
|
-
|
|
381
|
-
const raw = await dntShim.Deno.readTextFile(outputJsonPath);
|
|
382
|
-
return JSON.parse(raw) as KnowledgeParserResult;
|
|
383
491
|
} finally {
|
|
384
492
|
await dntShim.Deno.remove(tempDir, { recursive: true }).catch(() => undefined);
|
|
385
493
|
}
|
|
@@ -392,23 +500,23 @@ export async function collectKnowledgeSources(
|
|
|
392
500
|
projectSlug: string;
|
|
393
501
|
downloadUploads: (uploadPaths: string[]) => Promise<DownloadResult[]>;
|
|
394
502
|
},
|
|
395
|
-
): Promise<
|
|
503
|
+
): Promise<KnowledgeSourceCollection> {
|
|
396
504
|
const fs = createFileSystem();
|
|
397
505
|
|
|
398
506
|
if (options.sources.length > 0) {
|
|
399
507
|
const explicitSources: Array<
|
|
400
|
-
| { kind: "local";
|
|
508
|
+
| { kind: "local"; collection: KnowledgeSourceCollection }
|
|
401
509
|
| { kind: "upload"; input: string; uploadPath: string }
|
|
402
510
|
> = [];
|
|
403
511
|
const uploadTargets: string[] = [];
|
|
512
|
+
const skipped: KnowledgeIngestSkippedFileResult[] = [];
|
|
404
513
|
|
|
405
514
|
for (const input of options.sources) {
|
|
406
515
|
if (!isProjectUploadReference(input) && await fs.exists(input)) {
|
|
407
|
-
const
|
|
408
|
-
if (!localFiles.length) throw new Error(`No supported files found at ${input}`);
|
|
516
|
+
const collection = await collectLocalFiles(input, options.recursive);
|
|
409
517
|
explicitSources.push({
|
|
410
518
|
kind: "local",
|
|
411
|
-
|
|
519
|
+
collection,
|
|
412
520
|
});
|
|
413
521
|
continue;
|
|
414
522
|
}
|
|
@@ -417,7 +525,15 @@ export async function collectKnowledgeSources(
|
|
|
417
525
|
throw new Error(`Local file not found: ${input}`);
|
|
418
526
|
}
|
|
419
527
|
|
|
420
|
-
const uploadPath =
|
|
528
|
+
const uploadPath = resolveExplicitUploadPath(input);
|
|
529
|
+
const skippedUpload = classifySourceOrSkip({
|
|
530
|
+
source: formatKnowledgeUploadSource(uploadPath),
|
|
531
|
+
});
|
|
532
|
+
if (skippedUpload != null) {
|
|
533
|
+
skipped.push(skippedUpload);
|
|
534
|
+
continue;
|
|
535
|
+
}
|
|
536
|
+
|
|
421
537
|
explicitSources.push({ kind: "upload", input, uploadPath });
|
|
422
538
|
uploadTargets.push(uploadPath);
|
|
423
539
|
}
|
|
@@ -434,7 +550,14 @@ export async function collectKnowledgeSources(
|
|
|
434
550
|
const resolvedSources: KnowledgeSource[] = [];
|
|
435
551
|
for (const source of explicitSources) {
|
|
436
552
|
if (source.kind === "local") {
|
|
437
|
-
|
|
553
|
+
for (const localSource of source.collection.sources) {
|
|
554
|
+
resolvedSources.push({
|
|
555
|
+
kind: "local",
|
|
556
|
+
input: localSource.input,
|
|
557
|
+
localPath: localSource.localPath,
|
|
558
|
+
});
|
|
559
|
+
}
|
|
560
|
+
skipped.push(...source.collection.skipped);
|
|
438
561
|
continue;
|
|
439
562
|
}
|
|
440
563
|
|
|
@@ -452,7 +575,10 @@ export async function collectKnowledgeSources(
|
|
|
452
575
|
});
|
|
453
576
|
}
|
|
454
577
|
|
|
455
|
-
return
|
|
578
|
+
return {
|
|
579
|
+
sources: resolvedSources,
|
|
580
|
+
skipped,
|
|
581
|
+
};
|
|
456
582
|
}
|
|
457
583
|
|
|
458
584
|
if (!options.path || !options.all) {
|
|
@@ -460,9 +586,7 @@ export async function collectKnowledgeSources(
|
|
|
460
586
|
}
|
|
461
587
|
|
|
462
588
|
if (!isProjectUploadReference(options.path) && await fs.exists(options.path)) {
|
|
463
|
-
|
|
464
|
-
if (!localFiles.length) throw new Error(`No supported files found under ${options.path}`);
|
|
465
|
-
return localFiles.map((localPath) => ({ kind: "local", input: options.path!, localPath }));
|
|
589
|
+
return collectLocalFiles(options.path, options.recursive);
|
|
466
590
|
}
|
|
467
591
|
|
|
468
592
|
const displayUploadPrefix = normalizeKnowledgeInputPath(options.path);
|
|
@@ -476,28 +600,57 @@ export async function collectKnowledgeSources(
|
|
|
476
600
|
});
|
|
477
601
|
|
|
478
602
|
let uploads = await listUploadsForPrefix(uploadPrefix || undefined);
|
|
603
|
+
let skipped = uploads.flatMap((item: UploadItem) => {
|
|
604
|
+
if (item.type === "folder") {
|
|
605
|
+
return [];
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
const skippedUpload = classifySourceOrSkip({
|
|
609
|
+
source: formatKnowledgeUploadSource(item.path),
|
|
610
|
+
});
|
|
611
|
+
return skippedUpload == null ? [] : [skippedUpload];
|
|
612
|
+
});
|
|
479
613
|
let uploadTargets = uploads
|
|
480
|
-
.filter((item: UploadItem) => item.type !== "folder"
|
|
481
|
-
.map((item: UploadItem) => item.path)
|
|
614
|
+
.filter((item: UploadItem) => item.type !== "folder")
|
|
615
|
+
.map((item: UploadItem) => item.path)
|
|
616
|
+
.filter((uploadPath) =>
|
|
617
|
+
classifySourceOrSkip({ source: formatKnowledgeUploadSource(uploadPath) }) == null
|
|
618
|
+
);
|
|
482
619
|
|
|
483
620
|
if (!uploadTargets.length && uploadPrefix && !uploadPrefix.endsWith("/")) {
|
|
484
621
|
uploads = await listUploadsForPrefix(`${uploadPrefix}/`);
|
|
622
|
+
skipped = uploads.flatMap((item: UploadItem) => {
|
|
623
|
+
if (item.type === "folder") {
|
|
624
|
+
return [];
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
const skippedUpload = classifySourceOrSkip({
|
|
628
|
+
source: formatKnowledgeUploadSource(item.path),
|
|
629
|
+
});
|
|
630
|
+
return skippedUpload == null ? [] : [skippedUpload];
|
|
631
|
+
});
|
|
485
632
|
uploadTargets = uploads
|
|
486
|
-
.filter((item: UploadItem) => item.type !== "folder"
|
|
487
|
-
.map((item: UploadItem) => item.path)
|
|
633
|
+
.filter((item: UploadItem) => item.type !== "folder")
|
|
634
|
+
.map((item: UploadItem) => item.path)
|
|
635
|
+
.filter((uploadPath) =>
|
|
636
|
+
classifySourceOrSkip({ source: formatKnowledgeUploadSource(uploadPath) }) == null
|
|
637
|
+
);
|
|
488
638
|
}
|
|
489
639
|
|
|
490
|
-
if (!uploadTargets.length) {
|
|
640
|
+
if (!uploadTargets.length && skipped.length === 0) {
|
|
491
641
|
throw new Error(`No supported uploads found under ${displayUploadPrefix}`);
|
|
492
642
|
}
|
|
493
643
|
|
|
494
644
|
const downloads = await deps.downloadUploads(uploadTargets);
|
|
495
|
-
return
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
645
|
+
return {
|
|
646
|
+
sources: downloads.map((download) => ({
|
|
647
|
+
kind: "upload",
|
|
648
|
+
input: options.path!,
|
|
649
|
+
uploadPath: download.uploadPath,
|
|
650
|
+
localPath: download.localPath,
|
|
651
|
+
})),
|
|
652
|
+
skipped,
|
|
653
|
+
};
|
|
501
654
|
}
|
|
502
655
|
|
|
503
656
|
export async function ingestResolvedSources(
|
|
@@ -511,67 +664,111 @@ export async function ingestResolvedSources(
|
|
|
511
664
|
uploadKnowledgeFile: (remotePath: string, localPath: string) => Promise<{ path: string }>;
|
|
512
665
|
eventLogger?: Logger | null;
|
|
513
666
|
},
|
|
514
|
-
): Promise<
|
|
667
|
+
): Promise<{
|
|
668
|
+
ingested: KnowledgeIngestFileResult[];
|
|
669
|
+
failed: KnowledgeIngestFailedFileResult[];
|
|
670
|
+
}> {
|
|
515
671
|
if (options.slug && sources.length !== 1) {
|
|
516
672
|
throw new Error("--slug can only be used with a single explicit source.");
|
|
517
673
|
}
|
|
518
674
|
|
|
519
675
|
const slugs = options.slug ? [options.slug] : ensureUniqueSlugs(sources);
|
|
520
|
-
const
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
676
|
+
const ingested: KnowledgeIngestFileResult[] = [];
|
|
677
|
+
const failed: KnowledgeIngestFailedFileResult[] = [];
|
|
678
|
+
const recordSourceFailure = (
|
|
679
|
+
source: KnowledgeSource,
|
|
680
|
+
sourceReference: string,
|
|
681
|
+
index: number,
|
|
682
|
+
message: string,
|
|
683
|
+
reason: KnowledgeIngestFailureReason,
|
|
684
|
+
) => {
|
|
685
|
+
deps.eventLogger?.error("Knowledge source failed", {
|
|
686
|
+
phase: "file_failed",
|
|
525
687
|
progress_current: index + 1,
|
|
526
688
|
progress_total: sources.length,
|
|
527
689
|
source_name: buildKnowledgeSourceName(source),
|
|
690
|
+
error: message,
|
|
528
691
|
});
|
|
529
692
|
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
const remotePath = deriveKnowledgeRemotePath(
|
|
538
|
-
parser.sandbox_output_path,
|
|
539
|
-
deps.outputDir,
|
|
540
|
-
options.knowledgePath,
|
|
541
|
-
);
|
|
542
|
-
const uploaded = await deps.uploadKnowledgeFile(remotePath, parser.sandbox_output_path);
|
|
693
|
+
failed.push(createFailedKnowledgeSource({
|
|
694
|
+
source: sourceReference,
|
|
695
|
+
localSourcePath: source.localPath,
|
|
696
|
+
message,
|
|
697
|
+
reason,
|
|
698
|
+
}));
|
|
699
|
+
};
|
|
543
700
|
|
|
544
|
-
|
|
545
|
-
|
|
701
|
+
for (const [index, source] of sources.entries()) {
|
|
702
|
+
const sourceReference = buildSourceReference(source);
|
|
703
|
+
|
|
704
|
+
deps.eventLogger?.info("Processing knowledge source", {
|
|
705
|
+
phase: "file_processing",
|
|
546
706
|
progress_current: index + 1,
|
|
547
707
|
progress_total: sources.length,
|
|
548
708
|
source_name: buildKnowledgeSourceName(source),
|
|
549
|
-
remote_path: uploaded.path,
|
|
550
|
-
warning_count: parser.warnings.length,
|
|
551
709
|
});
|
|
552
710
|
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
711
|
+
let parser: KnowledgeParserResult;
|
|
712
|
+
try {
|
|
713
|
+
parser = await deps.runParser({
|
|
714
|
+
filePath: source.localPath,
|
|
715
|
+
outputDir: deps.outputDir,
|
|
716
|
+
description: options.description,
|
|
717
|
+
slug: slugs[index],
|
|
718
|
+
sourceReference,
|
|
719
|
+
});
|
|
720
|
+
} catch (error) {
|
|
721
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
722
|
+
recordSourceFailure(source, sourceReference, index, message, "parser_error");
|
|
723
|
+
continue;
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
try {
|
|
727
|
+
const remotePath = deriveKnowledgeRemotePath(
|
|
728
|
+
parser.sandbox_output_path,
|
|
729
|
+
deps.outputDir,
|
|
730
|
+
options.knowledgePath,
|
|
731
|
+
);
|
|
732
|
+
const uploaded = await deps.uploadKnowledgeFile(remotePath, parser.sandbox_output_path);
|
|
733
|
+
|
|
734
|
+
deps.eventLogger?.info("Knowledge source ingested", {
|
|
735
|
+
phase: "file_completed",
|
|
556
736
|
progress_current: index + 1,
|
|
557
737
|
progress_total: sources.length,
|
|
558
738
|
source_name: buildKnowledgeSourceName(source),
|
|
739
|
+
remote_path: uploaded.path,
|
|
559
740
|
warning_count: parser.warnings.length,
|
|
560
741
|
});
|
|
561
|
-
}
|
|
562
742
|
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
743
|
+
if (parser.warnings.length > 0) {
|
|
744
|
+
deps.eventLogger?.warn("Knowledge source emitted warnings", {
|
|
745
|
+
phase: "file_warning",
|
|
746
|
+
progress_current: index + 1,
|
|
747
|
+
progress_total: sources.length,
|
|
748
|
+
source_name: buildKnowledgeSourceName(source),
|
|
749
|
+
warning_count: parser.warnings.length,
|
|
750
|
+
});
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
ingested.push(
|
|
754
|
+
createKnowledgeIngestResult({
|
|
755
|
+
source: sourceReference,
|
|
756
|
+
localSourcePath: source.localPath,
|
|
757
|
+
outputPath: parser.sandbox_output_path,
|
|
758
|
+
remotePath: uploaded.path,
|
|
759
|
+
parser,
|
|
760
|
+
}),
|
|
761
|
+
);
|
|
762
|
+
} catch (error) {
|
|
763
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
764
|
+
recordSourceFailure(source, sourceReference, index, message, "upload_error");
|
|
765
|
+
}
|
|
572
766
|
}
|
|
573
767
|
|
|
574
|
-
return
|
|
768
|
+
return {
|
|
769
|
+
ingested,
|
|
770
|
+
failed,
|
|
771
|
+
};
|
|
575
772
|
}
|
|
576
773
|
|
|
577
774
|
export async function knowledgeCommand(args: ParsedArgs): Promise<void> {
|
|
@@ -601,12 +798,14 @@ export async function knowledgeCommand(args: ParsedArgs): Promise<void> {
|
|
|
601
798
|
const eventLogger = createKnowledgeIngestEventLogger();
|
|
602
799
|
|
|
603
800
|
try {
|
|
801
|
+
const sourceMode = options.path ? "path_prefix" : "explicit_sources";
|
|
802
|
+
|
|
604
803
|
eventLogger?.info("Starting knowledge ingest", {
|
|
605
804
|
phase: "started",
|
|
606
|
-
mode:
|
|
805
|
+
mode: sourceMode,
|
|
607
806
|
});
|
|
608
807
|
|
|
609
|
-
const
|
|
808
|
+
const collection = await collectKnowledgeSources(options, {
|
|
610
809
|
client,
|
|
611
810
|
projectSlug: config.projectSlug,
|
|
612
811
|
downloadUploads: (uploadPaths) =>
|
|
@@ -616,13 +815,25 @@ export async function knowledgeCommand(args: ParsedArgs): Promise<void> {
|
|
|
616
815
|
),
|
|
617
816
|
),
|
|
618
817
|
});
|
|
818
|
+
const requestedCount = collection.sources.length + collection.skipped.length;
|
|
819
|
+
if (requestedCount === 0) {
|
|
820
|
+
throw new Error("No supported knowledge sources were found.");
|
|
821
|
+
}
|
|
619
822
|
|
|
620
823
|
eventLogger?.info("Resolved knowledge sources", {
|
|
621
824
|
phase: "sources_resolved",
|
|
622
|
-
progress_total:
|
|
825
|
+
progress_total: requestedCount,
|
|
826
|
+
ingestable_count: collection.sources.length,
|
|
827
|
+
skipped_count: collection.skipped.length,
|
|
623
828
|
});
|
|
829
|
+
if (collection.skipped.length > 0) {
|
|
830
|
+
eventLogger?.warn("Skipped knowledge sources", {
|
|
831
|
+
phase: "sources_skipped",
|
|
832
|
+
skipped_count: collection.skipped.length,
|
|
833
|
+
});
|
|
834
|
+
}
|
|
624
835
|
|
|
625
|
-
const results = await ingestResolvedSources(sources, options, {
|
|
836
|
+
const results = await ingestResolvedSources(collection.sources, options, {
|
|
626
837
|
client,
|
|
627
838
|
projectSlug: config.projectSlug,
|
|
628
839
|
outputDir,
|
|
@@ -631,26 +842,51 @@ export async function knowledgeCommand(args: ParsedArgs): Promise<void> {
|
|
|
631
842
|
uploadKnowledgeFile: (remotePath, localPath) =>
|
|
632
843
|
putRemoteFileFromLocal(client, config.projectSlug, remotePath, localPath),
|
|
633
844
|
});
|
|
845
|
+
const jobResult = buildKnowledgeIngestJobResult({
|
|
846
|
+
requestedCount,
|
|
847
|
+
sourceMode,
|
|
848
|
+
knowledgePath: options.knowledgePath,
|
|
849
|
+
ingested: results.ingested,
|
|
850
|
+
skipped: collection.skipped,
|
|
851
|
+
failed: results.failed,
|
|
852
|
+
});
|
|
634
853
|
|
|
635
854
|
eventLogger?.info("Completed knowledge ingest", {
|
|
636
855
|
phase: "completed",
|
|
637
|
-
progress_current:
|
|
638
|
-
progress_total:
|
|
856
|
+
progress_current: requestedCount,
|
|
857
|
+
progress_total: requestedCount,
|
|
858
|
+
ingested_count: jobResult.summary.ingested_count,
|
|
859
|
+
skipped_count: jobResult.summary.skipped_count,
|
|
860
|
+
failed_count: jobResult.summary.failed_count,
|
|
639
861
|
});
|
|
640
862
|
|
|
641
|
-
await writeJobResultIfConfigured(
|
|
863
|
+
await writeJobResultIfConfigured(jobResult);
|
|
642
864
|
|
|
643
865
|
if (options.json) {
|
|
644
|
-
printJson(
|
|
866
|
+
printJson(jobResult);
|
|
645
867
|
return;
|
|
646
868
|
}
|
|
647
869
|
|
|
648
|
-
for (const result of
|
|
870
|
+
for (const result of jobResult.ingested) {
|
|
649
871
|
if (!options.quiet) {
|
|
650
872
|
cliLogger.info(`Ingested ${result.source} -> ${result.remotePath}`);
|
|
651
873
|
cliLogger.info(` ${result.summary}`);
|
|
652
874
|
}
|
|
653
875
|
}
|
|
876
|
+
|
|
877
|
+
for (const skipped of jobResult.skipped) {
|
|
878
|
+
if (!options.quiet) {
|
|
879
|
+
cliLogger.warn(`Skipped ${skipped.source}`);
|
|
880
|
+
cliLogger.warn(` ${skipped.message}`);
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
for (const failure of jobResult.failed) {
|
|
885
|
+
if (!options.quiet) {
|
|
886
|
+
cliLogger.error(`Failed ${failure.source}`);
|
|
887
|
+
cliLogger.error(` ${failure.message}`);
|
|
888
|
+
}
|
|
889
|
+
}
|
|
654
890
|
} catch (error) {
|
|
655
891
|
eventLogger?.error("Knowledge ingest failed", {
|
|
656
892
|
phase: "failed",
|