@paroicms/site-generator-plugin 0.9.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/gen-backend/ddl/site-generator.ddl.sql +57 -9
- package/gen-backend/dist/commands/execute-command.js +35 -9
- package/gen-backend/dist/commands/generator-session.js +49 -10
- package/gen-backend/dist/data-format.js +32 -4
- package/gen-backend/dist/db/db-init.js +3 -1
- package/gen-backend/dist/db/db-read.queries.js +142 -0
- package/gen-backend/dist/db/db-write.queries.js +144 -0
- package/gen-backend/dist/db/ddl-migration.js +8 -6
- package/gen-backend/dist/db/formatters.js +46 -0
- package/gen-backend/dist/generator/fake-content-generator.ts/content-report.js +9 -5
- package/gen-backend/dist/generator/fake-content-generator.ts/create-database-with-fake-content.js +18 -13
- package/gen-backend/dist/generator/fake-content-generator.ts/generate-fake-content.js +16 -12
- package/gen-backend/dist/generator/fake-content-generator.ts/invoke-generate-fake-content.js +26 -17
- package/gen-backend/dist/generator/lib/calling-llm-anthropic.js +33 -0
- package/gen-backend/dist/generator/lib/calling-llm-mistral.js +156 -0
- package/gen-backend/dist/generator/lib/create-prompt.js +2 -2
- package/gen-backend/dist/generator/lib/debug-utils.js +74 -48
- package/gen-backend/dist/generator/lib/llm-tokens.js +7 -9
- package/gen-backend/dist/generator/lib/llm-utils.js +8 -0
- package/gen-backend/dist/generator/lib/prompt-template.js +10 -0
- package/gen-backend/dist/generator/lib/session-utils.js +31 -0
- package/gen-backend/dist/generator/llm-queries/invoke-message-guard.js +20 -9
- package/gen-backend/dist/generator/llm-queries/invoke-new-site-analysis.js +73 -47
- package/gen-backend/dist/generator/llm-queries/invoke-update-site-schema.js +106 -43
- package/gen-backend/dist/generator/site-generator/site-generator.js +26 -18
- package/gen-backend/dist/lib/create-raw-context.js +31 -0
- package/gen-backend/dist/lib/site-remover.js +1 -1
- package/gen-backend/dist/plugin.js +8 -54
- package/gen-backend/prompts/generate-fake-content-multiple-documents.md +5 -5
- package/gen-backend/prompts/generate-fake-content-multiple-parts.md +5 -5
- package/gen-backend/prompts/generate-fake-content-single.md +4 -4
- package/gen-backend/prompts/{new-site-1-analysis.md → initial-1-analysis.md} +38 -29
- package/gen-backend/prompts/{new-site-2-fields.md → initial-2-fields.md} +3 -3
- package/gen-backend/prompts/message-guard.md +1 -1
- package/gen-backend/prompts/update-site-schema-1-write-details.md +5 -5
- package/gen-backend/prompts/update-site-schema-2-execute.md +29 -29
- package/gen-front/dist/gen-front.css +1 -1
- package/gen-front/dist/gen-front.mjs +137 -1175
- package/package.json +30 -32
- package/gen-backend/dist/db/db.queries.js +0 -60
- package/gen-backend/prompts/test-message1.txt +0 -1
- package/gen-front/dist/gen-front.eot +0 -0
- package/gen-front/dist/gen-front.svg +0 -345
- package/gen-front/dist/gen-front.ttf +0 -0
- package/gen-front/dist/gen-front.woff +0 -0
- package/gen-front/dist/gen-front.woff2 +0 -0
- package/gen-front/dist/gen-front2.woff2 +0 -0
- package/gen-front/dist/gen-front3.woff2 +0 -0
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
export function createGeneratedContentReport() {
|
|
2
|
-
let
|
|
2
|
+
let totalEntryCount = 0;
|
|
3
|
+
const llmReports = [];
|
|
3
4
|
return {
|
|
4
|
-
|
|
5
|
-
return
|
|
5
|
+
getResults() {
|
|
6
|
+
return { entryCount: totalEntryCount, llmReports };
|
|
6
7
|
},
|
|
7
|
-
|
|
8
|
-
|
|
8
|
+
add(entryCount, llmReport) {
|
|
9
|
+
totalEntryCount += entryCount;
|
|
10
|
+
if (llmReport) {
|
|
11
|
+
llmReports.push(llmReport);
|
|
12
|
+
}
|
|
9
13
|
},
|
|
10
14
|
};
|
|
11
15
|
}
|
package/gen-backend/dist/generator/fake-content-generator.ts/create-database-with-fake-content.js
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import { getPartTypeByName, getRegularDocumentTypeByName, getRoutingDocumentTypeByName, } from "@paroicms/internal-anywhere-lib";
|
|
2
2
|
import { createSimpleTranslator, } from "@paroicms/public-server-lib";
|
|
3
|
+
import { updateGeneratedSiteStepSetAsCompleted, } from "../../db/db-write.queries.js";
|
|
3
4
|
import { getRandomImagePath } from "../lib/images-lib.js";
|
|
4
5
|
import { createTaskCollector } from "../lib/tasks.js";
|
|
5
6
|
import { dedupMessages } from "./content-helpers.js";
|
|
6
7
|
import { createGeneratedContentReport } from "./content-report.js";
|
|
7
8
|
import { generateLocalizedFooterMention } from "./create-node-contents.js";
|
|
8
9
|
import { generateFieldSetContent, generateMultipleFieldSetContents, } from "./generate-fake-content.js";
|
|
9
|
-
export async function fillSiteWithFakeContent(ctx, { regSite,
|
|
10
|
+
export async function fillSiteWithFakeContent(ctx, stepHandle, { regSite, localizedValues }) {
|
|
10
11
|
const { service } = ctx;
|
|
11
12
|
const { fqdn } = regSite;
|
|
12
13
|
const report = createGeneratedContentReport();
|
|
@@ -15,7 +16,7 @@ export async function fillSiteWithFakeContent(ctx, { regSite, siteTitle }) {
|
|
|
15
16
|
labels: siteSchema.l10n,
|
|
16
17
|
logger: ctx.logger,
|
|
17
18
|
});
|
|
18
|
-
await updateSiteFields(ctx, report, { fqdn, siteSchema, siteTitle });
|
|
19
|
+
await updateSiteFields(ctx, report, { fqdn, siteSchema, siteTitle: localizedValues.siteTitle });
|
|
19
20
|
const tasks = createTaskCollector(ctx);
|
|
20
21
|
fillRoutingDocumentAndAddChildren(ctx, tasks, report, {
|
|
21
22
|
fqdn,
|
|
@@ -28,10 +29,17 @@ export async function fillSiteWithFakeContent(ctx, { regSite, siteTitle }) {
|
|
|
28
29
|
const { promise } = tasks.runAll({ maxParallel: 10, rateLimitPerSecond: 3 });
|
|
29
30
|
const { doneCount, errorMessages } = await promise;
|
|
30
31
|
if (errorMessages.length > 0) {
|
|
31
|
-
ctx.logger.warn(`Failed to generate
|
|
32
|
+
ctx.logger.warn(`Failed to generate documents:\n - ${errorMessages.join("\n - ")}`);
|
|
32
33
|
}
|
|
33
34
|
ctx.logger.debug(`… Executed ${doneCount} generating tasks`);
|
|
34
|
-
|
|
35
|
+
const results = report.getResults();
|
|
36
|
+
await updateGeneratedSiteStepSetAsCompleted(ctx, stepHandle, {
|
|
37
|
+
status: "completed",
|
|
38
|
+
contentEntryCount: results.entryCount,
|
|
39
|
+
contentInputTokenCount: results.llmReports.reduce((acc, r) => acc + r.inputTokenCount, 0),
|
|
40
|
+
contentOutputTokenCount: results.llmReports.reduce((acc, r) => acc + (r.outputTokenCount ?? 0), 0),
|
|
41
|
+
contentErrors: errorMessages.length > 0 ? errorMessages.join("\n - ") : null,
|
|
42
|
+
});
|
|
35
43
|
}
|
|
36
44
|
function fillRoutingDocumentAndAddChildren(ctx, tasks, report, siteOptions, nodeOptions) {
|
|
37
45
|
const { routingIds, nodeType } = nodeOptions;
|
|
@@ -137,13 +145,12 @@ async function updateRoutingDocument(ctx, report, siteOptions, nodeOptions) {
|
|
|
137
145
|
siteSchema,
|
|
138
146
|
schemaI18n,
|
|
139
147
|
withTitle: false,
|
|
140
|
-
|
|
141
|
-
});
|
|
148
|
+
llmTaskName: nodeType.kebabName,
|
|
149
|
+
}, report);
|
|
142
150
|
await ctx.service.connector.updateDocumentContent(fqdn, {
|
|
143
151
|
nodeId: routingIds.nodeId,
|
|
144
152
|
content: toRiDocumentContent(content, nodeType),
|
|
145
153
|
});
|
|
146
|
-
report.addContentCount(1);
|
|
147
154
|
}
|
|
148
155
|
async function addRegularDocuments(ctx, report, siteOptions, nodeOptions) {
|
|
149
156
|
ctx.logger.debug(`[TASK] Adding regular documents "${nodeOptions.nodeType.typeName}"…`);
|
|
@@ -160,8 +167,8 @@ async function addRegularDocuments(ctx, report, siteOptions, nodeOptions) {
|
|
|
160
167
|
count: getDefaultNodeContentCount(nodeType),
|
|
161
168
|
withTitle: true,
|
|
162
169
|
tolerateErrors,
|
|
163
|
-
|
|
164
|
-
});
|
|
170
|
+
llmTaskName: nodeType.kebabName,
|
|
171
|
+
}, report);
|
|
165
172
|
const errorMessages = dedupMessages(tolerateErrors.errorMessages);
|
|
166
173
|
if (errorMessages.length > 0) {
|
|
167
174
|
ctx.logger.warn(`Error generating content for ${nodeType.typeName}:\n - ${errorMessages.join("\n - ")}`);
|
|
@@ -170,7 +177,6 @@ async function addRegularDocuments(ctx, report, siteOptions, nodeOptions) {
|
|
|
170
177
|
parentNodeId,
|
|
171
178
|
contents: list.map((content) => toRiDocumentContent(content, nodeType)),
|
|
172
179
|
});
|
|
173
|
-
report.addContentCount(list.length);
|
|
174
180
|
}
|
|
175
181
|
async function addParts(ctx, report, siteOptions, nodeOptions) {
|
|
176
182
|
ctx.logger.debug(`[TASK] Adding parts "${nodeOptions.nodeType.typeName}"…`);
|
|
@@ -187,8 +193,8 @@ async function addParts(ctx, report, siteOptions, nodeOptions) {
|
|
|
187
193
|
count: getDefaultNodeContentCount(nodeType),
|
|
188
194
|
withTitle: true,
|
|
189
195
|
tolerateErrors,
|
|
190
|
-
|
|
191
|
-
});
|
|
196
|
+
llmTaskName: nodeType.kebabName,
|
|
197
|
+
}, report);
|
|
192
198
|
const errorMessages = dedupMessages(tolerateErrors.errorMessages);
|
|
193
199
|
if (errorMessages.length > 0) {
|
|
194
200
|
ctx.logger.warn(`Error generating content for ${nodeType.typeName}:\n - ${errorMessages.join("\n - ")}`);
|
|
@@ -197,7 +203,6 @@ async function addParts(ctx, report, siteOptions, nodeOptions) {
|
|
|
197
203
|
parentNodeId,
|
|
198
204
|
contents: list.map((content) => toRiPartContent(content, nodeType)),
|
|
199
205
|
});
|
|
200
|
-
report.addContentCount(list.length);
|
|
201
206
|
}
|
|
202
207
|
function toRiDocumentContent(content, nodeType) {
|
|
203
208
|
const { title, fields, featuredImage } = content;
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import { camelToKebabCase, camelToTitleCase } from "../lib/utils.js";
|
|
2
2
|
import { createNodeContents } from "./create-node-contents.js";
|
|
3
3
|
import { invokeGenerateFakeContent, } from "./invoke-generate-fake-content.js";
|
|
4
|
-
export async function generateFieldSetContent(ctx, options) {
|
|
4
|
+
export async function generateFieldSetContent(ctx, options, report) {
|
|
5
5
|
const list = await generateMultipleFieldSetContents(ctx, {
|
|
6
6
|
...options,
|
|
7
7
|
count: 1,
|
|
8
|
-
});
|
|
8
|
+
}, report);
|
|
9
9
|
if (list.length !== 1)
|
|
10
10
|
throw new Error(`Expected one item, got ${list.length}`);
|
|
11
11
|
return list[0];
|
|
12
12
|
}
|
|
13
|
-
export async function generateMultipleFieldSetContents(ctx, options) {
|
|
14
|
-
const { siteSchema, nodeType, documentType, schemaI18n, count, withTitle, tolerateErrors,
|
|
13
|
+
export async function generateMultipleFieldSetContents(ctx, options, report) {
|
|
14
|
+
const { siteSchema, nodeType, documentType, schemaI18n, count, withTitle, tolerateErrors, llmTaskName, } = options;
|
|
15
15
|
if (nodeType.kind === "site")
|
|
16
16
|
throw new Error("Cannot generate content for site node type");
|
|
17
17
|
// for a document, the LLM is best at generating the title, so we ask for it and remove it later
|
|
@@ -40,7 +40,7 @@ export async function generateMultipleFieldSetContents(ctx, options) {
|
|
|
40
40
|
defaultLanguage,
|
|
41
41
|
});
|
|
42
42
|
const language = defaultLanguage ?? "en";
|
|
43
|
-
let
|
|
43
|
+
let output = outputTags.length > 0
|
|
44
44
|
? await invokeGenerateFakeContent(ctx, {
|
|
45
45
|
count,
|
|
46
46
|
typeKind: nodeType.kind,
|
|
@@ -49,18 +49,22 @@ export async function generateMultipleFieldSetContents(ctx, options) {
|
|
|
49
49
|
documentDescription,
|
|
50
50
|
siteTheme,
|
|
51
51
|
language,
|
|
52
|
-
}, outputTags, { tolerateErrors,
|
|
52
|
+
}, outputTags, { tolerateErrors, llmTaskName })
|
|
53
53
|
: undefined;
|
|
54
|
-
if (skipTitle &&
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
54
|
+
if (skipTitle && output) {
|
|
55
|
+
output = {
|
|
56
|
+
contents: output.contents.map((content) => {
|
|
57
|
+
const { title, ...rest } = content;
|
|
58
|
+
return rest;
|
|
59
|
+
}),
|
|
60
|
+
llmReport: output.llmReport,
|
|
61
|
+
};
|
|
59
62
|
}
|
|
63
|
+
report.add(count, output?.llmReport);
|
|
60
64
|
return createNodeContents({
|
|
61
65
|
nodeType,
|
|
62
66
|
count,
|
|
63
|
-
generatedContents,
|
|
67
|
+
generatedContents: output?.contents,
|
|
64
68
|
outputTags,
|
|
65
69
|
language,
|
|
66
70
|
});
|
package/gen-backend/dist/generator/fake-content-generator.ts/invoke-generate-fake-content.js
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
|
-
import { PromptTemplate } from "@langchain/core/prompts";
|
|
2
1
|
import { languageLabelIn } from "@paroicms/public-anywhere-lib";
|
|
2
|
+
import { batchInvokeMinistral } from "../lib/calling-llm-mistral.js";
|
|
3
3
|
import { readPromptFile } from "../lib/create-prompt.js";
|
|
4
4
|
import { debugBatchLlmOutputs } from "../lib/debug-utils.js";
|
|
5
5
|
import { parseLlmResponseAsList } from "../lib/parse-llm-response.js";
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
const
|
|
6
|
+
import { buildPromptTemplate } from "../lib/prompt-template.js";
|
|
7
|
+
// Load and create prompt templates
|
|
8
|
+
const singlePromptTemplate = buildPromptTemplate(await readPromptFile("generate-fake-content-single.md"));
|
|
9
|
+
const multipleDocumentsPromptTemplate = buildPromptTemplate(await readPromptFile("generate-fake-content-multiple-documents.md"));
|
|
10
|
+
const multiplePartsPromptTemplate = buildPromptTemplate(await readPromptFile("generate-fake-content-multiple-parts.md"));
|
|
9
11
|
export async function invokeGenerateFakeContent(ctx, input, outputTags, options) {
|
|
10
12
|
const { language, typeKind } = input;
|
|
11
13
|
const single = input.count === 1;
|
|
12
|
-
const
|
|
14
|
+
const llmTaskName = `fake-content-${options.llmTaskName}${single ? "" : `-${input.count}`}`;
|
|
13
15
|
const tagAndDescriptions = outputTags
|
|
14
16
|
.map(({ tagName, tagDescription }) => `<${tagName}>${tagDescription}</${tagName}>`)
|
|
15
17
|
.join("\n\n");
|
|
@@ -36,22 +38,29 @@ export async function invokeGenerateFakeContent(ctx, input, outputTags, options)
|
|
|
36
38
|
batchInputs.push(llmInput);
|
|
37
39
|
startIndex = nextIndex;
|
|
38
40
|
}
|
|
39
|
-
const debug = await debugBatchLlmOutputs(ctx,
|
|
40
|
-
let
|
|
41
|
-
if (!
|
|
42
|
-
|
|
43
|
-
|
|
41
|
+
const debug = await debugBatchLlmOutputs(ctx, llmTaskName, ctx.mistralModelName, undefined, batchInputs);
|
|
42
|
+
let llmOutput = debug.stored;
|
|
43
|
+
if (!llmOutput) {
|
|
44
|
+
// Select the appropriate prompt template
|
|
45
|
+
const promptTemplate = single
|
|
46
|
+
? singlePromptTemplate
|
|
44
47
|
: typeKind === "document"
|
|
45
|
-
?
|
|
46
|
-
:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
? multipleDocumentsPromptTemplate
|
|
49
|
+
: multiplePartsPromptTemplate;
|
|
50
|
+
// Process batch inputs
|
|
51
|
+
const messages = batchInputs.map(promptTemplate);
|
|
52
|
+
const results = await batchInvokeMinistral(ctx, messages, {
|
|
53
|
+
llmTaskName,
|
|
54
|
+
temperature: 0.1,
|
|
55
|
+
maxTokens: 50_000,
|
|
56
|
+
timeoutMs: 60_000,
|
|
57
|
+
});
|
|
58
|
+
llmOutput = await debug.getMessageContents(results);
|
|
50
59
|
}
|
|
51
60
|
const results = [];
|
|
52
|
-
for (const llmMessageContent of
|
|
61
|
+
for (const llmMessageContent of llmOutput.outputs) {
|
|
53
62
|
const list = parseLlmResponseAsList(llmMessageContent, outputTags, options);
|
|
54
63
|
results.push(...list.map((fields) => Object.fromEntries(Object.entries(fields).map(([fieldName, value]) => [fieldName, { [language]: value }]))));
|
|
55
64
|
}
|
|
56
|
-
return results;
|
|
65
|
+
return { contents: results, llmReport: llmOutput.llmReport };
|
|
57
66
|
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { LlmError } from "./llm-utils.js";
|
|
2
|
+
export async function invokeClaude(ctx, options) {
|
|
3
|
+
const { anthropic, anthropicModelName } = ctx;
|
|
4
|
+
const { prompt, llmTaskName, maxTokens, systemInstruction, temperature } = options;
|
|
5
|
+
const system = systemInstruction === "beFast"
|
|
6
|
+
? "Please provide brief, direct answers without extensive deliberation. Focus on giving the most immediately useful information quickly."
|
|
7
|
+
: "Take max 20 seconds to think and provide a thorough response.";
|
|
8
|
+
const startTime = Date.now();
|
|
9
|
+
const response = await anthropic.messages.create({
|
|
10
|
+
model: anthropicModelName,
|
|
11
|
+
system,
|
|
12
|
+
messages: [{ role: "user", content: prompt }],
|
|
13
|
+
max_tokens: maxTokens,
|
|
14
|
+
temperature: temperature,
|
|
15
|
+
});
|
|
16
|
+
const llmReport = {
|
|
17
|
+
llmTaskName,
|
|
18
|
+
modelName: anthropicModelName,
|
|
19
|
+
inputTokenCount: response.usage.input_tokens,
|
|
20
|
+
durationMs: Date.now() - startTime,
|
|
21
|
+
outputTokenCount: response.usage.output_tokens,
|
|
22
|
+
};
|
|
23
|
+
const [first] = response.content;
|
|
24
|
+
if (first.type === "text") {
|
|
25
|
+
return {
|
|
26
|
+
messageContent: first.text,
|
|
27
|
+
report: llmReport,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
ctx.logger.error(`Unexpected response type from Claude: ${JSON.stringify(response.content, null, 2)}`);
|
|
31
|
+
llmReport.errorMessage = `Unexpected response type: ${first.type}`;
|
|
32
|
+
throw new LlmError(`Unexpected response type: "${first.type}"`, llmReport);
|
|
33
|
+
}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import { messageOf } from "@paroi/data-formatters-lib";
|
|
2
|
+
let seq = 0;
|
|
3
|
+
export async function batchInvokeMinistral(ctx, prompts, options) {
|
|
4
|
+
const startTime = Date.now();
|
|
5
|
+
const responses = await execBatchInvokeMinistral(ctx, prompts, options);
|
|
6
|
+
const llmMessages = responses
|
|
7
|
+
.map((msg) => msg.response.body.choices[0]?.message.content)
|
|
8
|
+
.filter(Boolean);
|
|
9
|
+
const llmReport = {
|
|
10
|
+
llmTaskName: options.llmTaskName,
|
|
11
|
+
modelName: ctx.mistralModelName,
|
|
12
|
+
inputTokenCount: responses
|
|
13
|
+
.map((msg) => msg.response.body.usage.prompt_tokens)
|
|
14
|
+
.reduce((a, b) => a + b, 0),
|
|
15
|
+
durationMs: Date.now() - startTime,
|
|
16
|
+
outputTokenCount: responses
|
|
17
|
+
.map((msg) => msg.response.body.usage.completion_tokens)
|
|
18
|
+
.reduce((a, b) => a + b, 0),
|
|
19
|
+
};
|
|
20
|
+
return {
|
|
21
|
+
llmMessages,
|
|
22
|
+
llmReport,
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
async function execBatchInvokeMinistral(ctx, prompts, options) {
|
|
26
|
+
const { mistral, mistralModelName, logger } = ctx;
|
|
27
|
+
const uploadedFileIds = [];
|
|
28
|
+
try {
|
|
29
|
+
const messages = prompts
|
|
30
|
+
.map((prompt, index) => ({
|
|
31
|
+
custom_id: `${index}`,
|
|
32
|
+
body: {
|
|
33
|
+
max_tokens: options.maxTokens,
|
|
34
|
+
temperature: options.temperature,
|
|
35
|
+
messages: [
|
|
36
|
+
{
|
|
37
|
+
role: "user",
|
|
38
|
+
content: prompt,
|
|
39
|
+
},
|
|
40
|
+
],
|
|
41
|
+
},
|
|
42
|
+
}))
|
|
43
|
+
.map((request) => JSON.stringify(request))
|
|
44
|
+
.join("\n");
|
|
45
|
+
const batchData = await mistral.files.upload({
|
|
46
|
+
file: {
|
|
47
|
+
fileName: `batch-input-${seq++}.jsonl`,
|
|
48
|
+
content: Buffer.from(messages),
|
|
49
|
+
},
|
|
50
|
+
purpose: "batch",
|
|
51
|
+
});
|
|
52
|
+
uploadedFileIds.push(batchData.id);
|
|
53
|
+
const createdJob = await mistral.batch.jobs.create({
|
|
54
|
+
inputFiles: [batchData.id],
|
|
55
|
+
model: mistralModelName,
|
|
56
|
+
endpoint: "/v1/chat/completions",
|
|
57
|
+
metadata: { jobType: "batchInvoke" },
|
|
58
|
+
timeoutHours: 1,
|
|
59
|
+
});
|
|
60
|
+
const outputFileId = await waitJobCompletion(ctx, {
|
|
61
|
+
jobId: createdJob.id,
|
|
62
|
+
timeoutMs: options.timeoutMs,
|
|
63
|
+
});
|
|
64
|
+
uploadedFileIds.push(outputFileId);
|
|
65
|
+
const outputFileStream = await mistral.files.download({ fileId: outputFileId });
|
|
66
|
+
const result = await readAsString(outputFileStream);
|
|
67
|
+
try {
|
|
68
|
+
return result
|
|
69
|
+
.trim()
|
|
70
|
+
.split("\n")
|
|
71
|
+
.map((line) => JSON.parse(line));
|
|
72
|
+
}
|
|
73
|
+
catch (error) {
|
|
74
|
+
logger.error("[Mistral] Error parsing batch job result:", error, result);
|
|
75
|
+
throw new Error("Failed to parse batch job result");
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
finally {
|
|
79
|
+
for (const fileId of uploadedFileIds) {
|
|
80
|
+
try {
|
|
81
|
+
await mistral.files.delete({ fileId });
|
|
82
|
+
}
|
|
83
|
+
catch (error) {
|
|
84
|
+
logger.error("[Mistral] Error deleting uploaded file:", error, fileId);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
async function waitJobCompletion(ctx, options) {
|
|
90
|
+
const { mistral, logger } = ctx;
|
|
91
|
+
const { jobId, timeoutMs } = options;
|
|
92
|
+
const startTime = Date.now();
|
|
93
|
+
let jobStatus;
|
|
94
|
+
let timeoutOccurred = false;
|
|
95
|
+
try {
|
|
96
|
+
while (true) {
|
|
97
|
+
jobStatus = await mistral.batch.jobs.get({ jobId });
|
|
98
|
+
const { status } = jobStatus;
|
|
99
|
+
if (status === "QUEUED" || status === "RUNNING" || status === "CANCELLATION_REQUESTED") {
|
|
100
|
+
const elapsedTime = Date.now() - startTime;
|
|
101
|
+
if (elapsedTime > timeoutMs) {
|
|
102
|
+
timeoutOccurred = true;
|
|
103
|
+
break;
|
|
104
|
+
}
|
|
105
|
+
await new Promise((resolve) => setTimeout(resolve, 2_000));
|
|
106
|
+
continue;
|
|
107
|
+
}
|
|
108
|
+
if (status === "FAILED" ||
|
|
109
|
+
status === "CANCELLED" ||
|
|
110
|
+
status === "TIMEOUT_EXCEEDED" ||
|
|
111
|
+
status === "SUCCESS") {
|
|
112
|
+
break;
|
|
113
|
+
}
|
|
114
|
+
throw new Error(`Unexpected batch job "${jobStatus.id}" status: "${status}"`);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
catch (error) {
|
|
118
|
+
if (!jobStatus) {
|
|
119
|
+
throw new Error(`[Mistral] Failed to wait for batch job "${jobId}" completion: ${messageOf(error)}`);
|
|
120
|
+
}
|
|
121
|
+
logger.error(`[Mistral] Error while waiting for job "${jobId}" completion:`, error);
|
|
122
|
+
}
|
|
123
|
+
if (!jobStatus)
|
|
124
|
+
throw new Error("[Mistral] Should have a job status here");
|
|
125
|
+
if (timeoutOccurred) {
|
|
126
|
+
logger.debug(`[Mistral] Batch job "${jobId}" timed out after ${timeoutMs}ms. Attempting to cancel…`);
|
|
127
|
+
try {
|
|
128
|
+
jobStatus = await mistral.batch.jobs.cancel({ jobId });
|
|
129
|
+
}
|
|
130
|
+
catch (error) {
|
|
131
|
+
throw new Error(`[Mistral] Failed to cancel batch job "${jobId}" after timeout: ${messageOf(error)}`);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
const { status, errors } = jobStatus;
|
|
135
|
+
if (status !== "SUCCESS") {
|
|
136
|
+
const errMessages = errors.map((e) => e.message).join(", ");
|
|
137
|
+
throw new Error(`[Mistral] Batch job ${jobStatus.id} failed with status "${status}": ${errMessages}`);
|
|
138
|
+
}
|
|
139
|
+
if (!jobStatus?.outputFile)
|
|
140
|
+
throw new Error("[Mistral] Missing output file");
|
|
141
|
+
return jobStatus.outputFile;
|
|
142
|
+
}
|
|
143
|
+
function readAsString(stream) {
|
|
144
|
+
return new Promise((resolve, reject) => {
|
|
145
|
+
const output = [];
|
|
146
|
+
stream.pipeTo(new WritableStream({
|
|
147
|
+
write(chunk) {
|
|
148
|
+
output.push(new TextDecoder("utf-8").decode(chunk));
|
|
149
|
+
},
|
|
150
|
+
close() {
|
|
151
|
+
resolve(output.join(""));
|
|
152
|
+
},
|
|
153
|
+
abort: reject,
|
|
154
|
+
}));
|
|
155
|
+
});
|
|
156
|
+
}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { PromptTemplate } from "@langchain/core/prompts";
|
|
2
1
|
import { connectorPackageDir } from "@paroicms/connector";
|
|
3
2
|
import { readFile } from "node:fs/promises";
|
|
4
3
|
import { join } from "node:path";
|
|
5
4
|
import { projectDir } from "../../context.js";
|
|
5
|
+
import { buildPromptTemplate } from "./prompt-template.js";
|
|
6
6
|
const contextContent = await readPromptFile("0-context.md");
|
|
7
7
|
const siteSchemaTsDefs = await readFile(join(connectorPackageDir, "typeonly", "site-schema-json-types.d.ts"), "utf-8");
|
|
8
8
|
const predefinedFields = JSON.parse(await readPromptFile("predefined-fields.json"));
|
|
@@ -27,7 +27,7 @@ ${contextContent}${schemaTypeDefTemplate}
|
|
|
27
27
|
|
|
28
28
|
${promptContent}
|
|
29
29
|
`;
|
|
30
|
-
return
|
|
30
|
+
return buildPromptTemplate(template);
|
|
31
31
|
}
|
|
32
32
|
export async function readPromptFile(fileName) {
|
|
33
33
|
return await readFile(join(projectDir, "prompts", fileName), "utf-8");
|
|
@@ -1,62 +1,61 @@
|
|
|
1
1
|
import { messageOf } from "@paroi/data-formatters-lib";
|
|
2
|
+
import { ensureDirectory } from "@paroicms/internal-server-lib";
|
|
2
3
|
import { readFile, writeFile } from "node:fs/promises";
|
|
3
4
|
import { join } from "node:path";
|
|
4
5
|
import { estimateTokenCount } from "./llm-tokens.js";
|
|
5
6
|
const debugSep = "\n\n========================\n\n";
|
|
6
|
-
export async function debugLlmOutput(ctx,
|
|
7
|
-
const
|
|
8
|
-
const
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
export async function debugLlmOutput(ctx, llmTaskName, llmModelName, stepHandle, llmInput) {
|
|
8
|
+
const aggregatedInput = Object.values(llmInput).join("\n");
|
|
9
|
+
const inputTokenCount = aggregatedInput ? estimateTokenCount(aggregatedInput) : 0;
|
|
10
|
+
const stored = await readDebugLlmOutputs(ctx, { llmTaskName, inputTokenCount, llmModelName });
|
|
11
|
+
const singleStored = stored && stored.outputs.length === 1
|
|
12
|
+
? {
|
|
13
|
+
output: stored.outputs[0],
|
|
14
|
+
llmReport: stored.llmReport,
|
|
15
|
+
}
|
|
16
|
+
: undefined;
|
|
17
|
+
if (singleStored) {
|
|
18
|
+
ctx.logger.info(`[${llmTaskName}][${llmModelName}] Found debug output (skip calling LLM)`);
|
|
11
19
|
}
|
|
12
20
|
else {
|
|
13
|
-
|
|
14
|
-
const tokenCount = aggregatedInput ? await estimateTokenCount(aggregatedInput) : 0;
|
|
15
|
-
ctx.logger.debug(`[${debugName}][${llmModelName}] Calling LLM… User tokens: ~${tokenCount}`);
|
|
21
|
+
ctx.logger.debug(`[${llmTaskName}][${llmModelName}] Calling LLM… User tokens: ~${inputTokenCount}`);
|
|
16
22
|
}
|
|
17
23
|
const startTs = Date.now();
|
|
18
24
|
return {
|
|
19
|
-
|
|
20
|
-
async getMessageContent(llmMessage) {
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
ctx.logger.debug(`… done. Duration: ${duration} ms, Tokens: ~${llmMessageContent.length} - [${debugName}][${llmModelName}]`);
|
|
26
|
-
await writeDebugLlmInputOutputs(ctx, debugName, llmModelName, [
|
|
25
|
+
stored: singleStored,
|
|
26
|
+
async getMessageContent(llmMessage, llmReport) {
|
|
27
|
+
const llmMessageContent = llmMessage;
|
|
28
|
+
const totalTokens = llmReport.outputTokenCount ?? 0;
|
|
29
|
+
ctx.logger.debug(`… done. Duration: ${llmReport.durationMs} ms, Tokens: ~${totalTokens} - [${llmTaskName}][${llmModelName}]`);
|
|
30
|
+
await writeDebugLlmInputOutputs(ctx, stepHandle, [
|
|
27
31
|
{
|
|
28
32
|
llmInput,
|
|
29
33
|
llmMessageContent,
|
|
30
34
|
},
|
|
31
|
-
]);
|
|
32
|
-
return llmMessageContent;
|
|
35
|
+
], llmReport, startTs);
|
|
36
|
+
return { output: llmMessageContent, llmReport };
|
|
33
37
|
},
|
|
34
38
|
};
|
|
35
39
|
}
|
|
36
|
-
export async function debugBatchLlmOutputs(ctx,
|
|
37
|
-
const
|
|
38
|
-
|
|
39
|
-
|
|
40
|
+
export async function debugBatchLlmOutputs(ctx, llmTaskName, llmModelName, stepHandle, llmInputs) {
|
|
41
|
+
const aggregatedInput = llmInputs
|
|
42
|
+
.map((llmInput) => Object.values(llmInput).join("\n"))
|
|
43
|
+
.join("\n\n");
|
|
44
|
+
const inputTokenCount = aggregatedInput ? estimateTokenCount(aggregatedInput) : 0;
|
|
45
|
+
const stored = await readDebugLlmOutputs(ctx, { llmTaskName, inputTokenCount, llmModelName });
|
|
46
|
+
if (stored) {
|
|
47
|
+
ctx.logger.info(`[${llmTaskName}][${llmModelName}] Found debug output (skip calling LLM)`);
|
|
40
48
|
}
|
|
41
49
|
else {
|
|
42
|
-
|
|
43
|
-
.map((llmInput) => Object.values(llmInput).join("\n"))
|
|
44
|
-
.join("\n\n");
|
|
45
|
-
const tokenCount = aggregatedInput ? await estimateTokenCount(aggregatedInput) : 0;
|
|
46
|
-
ctx.logger.debug(`[${debugName}][${llmModelName}] Calling LLM… User tokens: ~${tokenCount}`);
|
|
50
|
+
ctx.logger.debug(`[${llmTaskName}][${llmModelName}] Calling LLM… User tokens: ~${inputTokenCount}`);
|
|
47
51
|
}
|
|
48
52
|
const startTs = Date.now();
|
|
49
53
|
return {
|
|
50
|
-
|
|
51
|
-
async getMessageContents(llmMessages) {
|
|
52
|
-
const llmMessageContents = llmMessages
|
|
53
|
-
if (typeof llmMessage.content !== "string")
|
|
54
|
-
throw new Error("Expected a string");
|
|
55
|
-
return llmMessage.content;
|
|
56
|
-
});
|
|
54
|
+
stored,
|
|
55
|
+
async getMessageContents({ llmMessages, llmReport }) {
|
|
56
|
+
const llmMessageContents = llmMessages;
|
|
57
57
|
const duration = Date.now() - startTs;
|
|
58
|
-
|
|
59
|
-
ctx.logger.debug(`… done. Duration: ${duration} ms, Tokens: ~${totalTokens} - [${debugName}][${llmModelName}]`);
|
|
58
|
+
ctx.logger.debug(`… done. Duration: ${duration} ms, Tokens: ~${llmReport.outputTokenCount} - [${llmTaskName}][${llmModelName}]`);
|
|
60
59
|
if (llmMessageContents.length !== llmInputs.length) {
|
|
61
60
|
throw new Error(`Expected ${llmInputs.length} LLM outputs, but got ${llmMessageContents.length}`);
|
|
62
61
|
}
|
|
@@ -66,16 +65,17 @@ export async function debugBatchLlmOutputs(ctx, debugName, llmModelName, llmInpu
|
|
|
66
65
|
llmMessageContent: llmMessageContents[i],
|
|
67
66
|
};
|
|
68
67
|
});
|
|
69
|
-
await writeDebugLlmInputOutputs(ctx,
|
|
70
|
-
return llmMessageContents;
|
|
68
|
+
await writeDebugLlmInputOutputs(ctx, stepHandle, list, llmReport, startTs);
|
|
69
|
+
return { outputs: llmMessageContents, llmReport };
|
|
71
70
|
},
|
|
72
71
|
};
|
|
73
72
|
}
|
|
74
|
-
async function readDebugLlmOutputs(ctx,
|
|
73
|
+
async function readDebugLlmOutputs(ctx, options) {
|
|
75
74
|
const { logger, debugDir } = ctx;
|
|
76
75
|
if (!debugDir)
|
|
77
76
|
return;
|
|
78
|
-
const
|
|
77
|
+
const { llmTaskName, inputTokenCount, llmModelName } = options;
|
|
78
|
+
const debugFile = join(debugDir, `${llmTaskName}.txt`);
|
|
79
79
|
try {
|
|
80
80
|
const debugContent = await readFile(debugFile, "utf8");
|
|
81
81
|
const list = debugContent.split(debugSep);
|
|
@@ -86,8 +86,15 @@ async function readDebugLlmOutputs(ctx, debugName) {
|
|
|
86
86
|
for (let i = 1; i < list.length; i += 2) {
|
|
87
87
|
outputs.push(list[i]);
|
|
88
88
|
}
|
|
89
|
-
|
|
90
|
-
|
|
89
|
+
const llmReport = {
|
|
90
|
+
llmTaskName,
|
|
91
|
+
modelName: llmModelName,
|
|
92
|
+
inputTokenCount,
|
|
93
|
+
durationMs: 0,
|
|
94
|
+
outputTokenCount: estimateTokenCount(outputs.join(" ")),
|
|
95
|
+
};
|
|
96
|
+
logger.debug(`… found debug output for ${llmTaskName} (skip calling LLM)`);
|
|
97
|
+
return { outputs, llmReport };
|
|
91
98
|
}
|
|
92
99
|
catch (error) {
|
|
93
100
|
if (error.code !== "ENOENT") {
|
|
@@ -95,17 +102,36 @@ async function readDebugLlmOutputs(ctx, debugName) {
|
|
|
95
102
|
}
|
|
96
103
|
}
|
|
97
104
|
}
|
|
98
|
-
async function writeDebugLlmInputOutputs(ctx,
|
|
99
|
-
const { debugDir } = ctx;
|
|
105
|
+
async function writeDebugLlmInputOutputs(ctx, stepHandle, list, llmReport, startTs) {
|
|
106
|
+
const { debugDir, sessionId } = ctx;
|
|
100
107
|
if (!debugDir)
|
|
101
108
|
return;
|
|
102
|
-
const dt = new Date().toISOString();
|
|
103
|
-
const
|
|
104
|
-
|
|
109
|
+
const dt = new Date(startTs).toISOString();
|
|
110
|
+
const nameParts = [
|
|
111
|
+
dt.substring(0, 19).replace(/:/g, "-"),
|
|
112
|
+
stepHandle?.stepNumber,
|
|
113
|
+
llmReport.llmTaskName,
|
|
114
|
+
llmReport.errorMessage ? "ERROR" : undefined,
|
|
115
|
+
].filter(Boolean);
|
|
116
|
+
const baseName = nameParts.join("-");
|
|
117
|
+
const header = [
|
|
118
|
+
`Model: ${llmReport.modelName}`,
|
|
119
|
+
`Task: ${llmReport.llmTaskName}`,
|
|
120
|
+
`Input tokens: ~${llmReport.inputTokenCount}`,
|
|
121
|
+
`Output tokens: ~${llmReport.outputTokenCount}`,
|
|
122
|
+
`Duration: ${llmReport.durationMs} ms`,
|
|
123
|
+
`Date: ${dt}`,
|
|
124
|
+
];
|
|
125
|
+
if (llmReport.errorMessage) {
|
|
126
|
+
header.push(`Error: ${llmReport.errorMessage}`);
|
|
127
|
+
}
|
|
128
|
+
const content = [header.join("\n")];
|
|
105
129
|
for (const { llmInput, llmMessageContent } of list) {
|
|
106
130
|
content.push(debugSep, llmInputToDebugMessage(llmInput), debugSep, llmMessageContent);
|
|
107
131
|
}
|
|
108
|
-
|
|
132
|
+
const dir = join(debugDir, sessionId);
|
|
133
|
+
await ensureDirectory(dir);
|
|
134
|
+
await writeFile(join(dir, `${baseName}.txt`), content.join(""));
|
|
109
135
|
}
|
|
110
136
|
function llmInputToDebugMessage(input) {
|
|
111
137
|
return Object.entries(input)
|
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
const chunks = await splitter.splitText(text);
|
|
9
|
-
return chunks.length;
|
|
1
|
+
/**
|
|
2
|
+
* Estimates token count based on character count.
|
|
3
|
+
* Most LLMs use subword tokenization where 1 token is roughly 4 characters in English.
|
|
4
|
+
*/
|
|
5
|
+
export function estimateTokenCount(text) {
|
|
6
|
+
// Approximate token count using character count / 4 for English text
|
|
7
|
+
return Math.ceil(text.length / 4);
|
|
10
8
|
}
|