@paroicms/site-generator-plugin 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/gen-backend/dist/context.js +2 -0
- package/gen-backend/dist/data-format.js +37 -0
- package/gen-backend/dist/generator/actions.js +35 -0
- package/gen-backend/dist/generator/fake-content-generator.ts/create-database-with-fake-content.js +227 -0
- package/gen-backend/dist/generator/fake-content-generator.ts/create-node-contents.js +156 -0
- package/gen-backend/dist/generator/fake-content-generator.ts/fake-content-types.js +1 -0
- package/gen-backend/dist/generator/fake-content-generator.ts/generate-fake-content.js +127 -0
- package/gen-backend/dist/generator/fake-content-generator.ts/invoke-generate-fake-content.js +49 -0
- package/gen-backend/dist/generator/generator-types.js +1 -0
- package/gen-backend/dist/generator/helpers/esm-module.helper.js +6 -0
- package/gen-backend/dist/generator/helpers/js-utils.js +14 -0
- package/gen-backend/dist/generator/lib/common-types.js +1 -0
- package/gen-backend/dist/generator/lib/create-prompt.js +44 -0
- package/gen-backend/dist/generator/lib/debug-utils.js +118 -0
- package/gen-backend/dist/generator/lib/images-lib.js +16 -0
- package/gen-backend/dist/generator/lib/llm-invoke-types.js +1 -0
- package/gen-backend/dist/generator/lib/llm-tokens.js +10 -0
- package/gen-backend/dist/generator/lib/markdown-bulleted-list-parser.js +147 -0
- package/gen-backend/dist/generator/lib/parse-llm-response.js +160 -0
- package/gen-backend/dist/generator/lib/tasks.js +112 -0
- package/gen-backend/dist/generator/lib/utils.js +13 -0
- package/gen-backend/dist/generator/llm-queries/invoke-message-guard.js +86 -0
- package/gen-backend/dist/generator/llm-queries/invoke-new-site-analysis.js +169 -0
- package/gen-backend/dist/generator/llm-queries/invoke-update-site-schema.js +94 -0
- package/gen-backend/dist/generator/site-generator/common-template-creator.js +108 -0
- package/gen-backend/dist/generator/site-generator/document-template-creator.js +329 -0
- package/gen-backend/dist/generator/site-generator/id-key-provider.js +14 -0
- package/gen-backend/dist/generator/site-generator/jt-site-schema-helpers.js +55 -0
- package/gen-backend/dist/generator/site-generator/site-generator.js +75 -0
- package/gen-backend/dist/generator/site-generator/template-creator-types.js +1 -0
- package/gen-backend/dist/generator/site-generator/template-helpers.js +26 -0
- package/gen-backend/dist/generator/site-generator/theme-creator.js +180 -0
- package/gen-backend/dist/generator/site-generator/theme-css.js +323 -0
- package/gen-backend/dist/generator/site-schema-generator/analysis-types.js +1 -0
- package/gen-backend/dist/generator/site-schema-generator/create-l10n.js +42 -0
- package/gen-backend/dist/generator/site-schema-generator/create-site-schema.js +240 -0
- package/gen-backend/dist/generator/site-schema-generator/default-pages.js +38 -0
- package/gen-backend/dist/plugin.js +86 -0
- package/gen-backend/prompts/0-context.md +9 -0
- package/gen-backend/prompts/generate-fake-content-multiple.md +22 -0
- package/gen-backend/prompts/generate-fake-content-single.md +16 -0
- package/gen-backend/prompts/message-guard.md +89 -0
- package/gen-backend/prompts/new-site-1-analysis.md +214 -0
- package/gen-backend/prompts/new-site-2-fields.md +50 -0
- package/gen-backend/prompts/predefined-fields.json +110 -0
- package/gen-backend/prompts/test-message1.txt +1 -0
- package/gen-backend/prompts/update-site-schema-1-write-details.md +57 -0
- package/gen-backend/prompts/update-site-schema-2-execute.md +77 -0
- package/gen-front/dist/gen-front.css +1 -0
- package/gen-front/dist/gen-front.eot +0 -0
- package/gen-front/dist/gen-front.mjs +998 -0
- package/gen-front/dist/gen-front.svg +345 -0
- package/gen-front/dist/gen-front.ttf +0 -0
- package/gen-front/dist/gen-front.woff +0 -0
- package/gen-front/dist/gen-front.woff2 +0 -0
- package/gen-front/dist/gen-front2.woff2 +0 -0
- package/gen-front/dist/gen-front3.woff2 +0 -0
- package/package.json +79 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { PromptTemplate } from "@langchain/core/prompts";
|
|
2
|
+
import { connectorPackageDir } from "@paroicms/connector";
|
|
3
|
+
import { readFile } from "node:fs/promises";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { projectDir } from "../../context.js";
|
|
6
|
+
const contextContent = await readPromptFile("0-context.md");
|
|
7
|
+
const siteSchemaTsDefs = await readFile(join(connectorPackageDir, "typeonly", "site-schema-json-types.d.ts"), "utf-8");
|
|
8
|
+
const predefinedFields = JSON.parse(await readPromptFile("predefined-fields.json"));
|
|
9
|
+
export async function createPromptTemplate(options) {
|
|
10
|
+
const { fileName, withSiteSchemaTsDefs } = options;
|
|
11
|
+
const promptContent = await readPromptFile(fileName);
|
|
12
|
+
const schemaTypeDefTemplate = withSiteSchemaTsDefs
|
|
13
|
+
? `
|
|
14
|
+
|
|
15
|
+
All the site structure is described in a site-schema in JSON format. Here is the TypeScript definition for the site schema:
|
|
16
|
+
|
|
17
|
+
<site_schema_ts_defs>
|
|
18
|
+
{siteSchemaTsDefs}
|
|
19
|
+
</site_schema_ts_defs>`
|
|
20
|
+
: "";
|
|
21
|
+
const template = `
|
|
22
|
+
# Context
|
|
23
|
+
|
|
24
|
+
${contextContent}${schemaTypeDefTemplate}
|
|
25
|
+
|
|
26
|
+
# Task to do
|
|
27
|
+
|
|
28
|
+
${promptContent}
|
|
29
|
+
`;
|
|
30
|
+
return PromptTemplate.fromTemplate(template);
|
|
31
|
+
}
|
|
32
|
+
export async function readPromptFile(fileName) {
|
|
33
|
+
return await readFile(join(projectDir, "prompts", fileName), "utf-8");
|
|
34
|
+
}
|
|
35
|
+
export function getPredefinedFields() {
|
|
36
|
+
if (!predefinedFields)
|
|
37
|
+
throw new Error("Predefined fields not loaded");
|
|
38
|
+
return predefinedFields;
|
|
39
|
+
}
|
|
40
|
+
export function getSiteSchemaTsDefs() {
|
|
41
|
+
if (!siteSchemaTsDefs)
|
|
42
|
+
throw new Error("Site-schema defs not loaded");
|
|
43
|
+
return siteSchemaTsDefs;
|
|
44
|
+
}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { messageOf } from "@paroi/data-formatters-lib";
|
|
2
|
+
import { readFile, writeFile } from "node:fs/promises";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { estimateTokenCount } from "./llm-tokens.js";
|
|
5
|
+
const debugSep = "\n\n========================\n\n";
|
|
6
|
+
export async function debugLlmOutput(ctx, debugName, llmModelName, llmInput) {
|
|
7
|
+
const storedContents = await readDebugLlmOutputs(ctx, debugName);
|
|
8
|
+
const storedContent = storedContents?.[0];
|
|
9
|
+
if (storedContent) {
|
|
10
|
+
ctx.logger.info(`[${debugName}][${llmModelName}] Found debug output (skip calling LLM)`);
|
|
11
|
+
}
|
|
12
|
+
else {
|
|
13
|
+
const aggregatedInput = Object.values(llmInput).join("\n");
|
|
14
|
+
const tokenCount = aggregatedInput ? await estimateTokenCount(aggregatedInput) : 0;
|
|
15
|
+
ctx.logger.debug(`[${debugName}][${llmModelName}] Calling LLM… User tokens: ~${tokenCount}`);
|
|
16
|
+
}
|
|
17
|
+
const startTs = Date.now();
|
|
18
|
+
return {
|
|
19
|
+
storedContent,
|
|
20
|
+
async getMessageContent(llmMessage) {
|
|
21
|
+
if (typeof llmMessage.content !== "string")
|
|
22
|
+
throw new Error("Expected a string");
|
|
23
|
+
const llmMessageContent = llmMessage.content;
|
|
24
|
+
const duration = Date.now() - startTs;
|
|
25
|
+
ctx.logger.debug(`… done. Duration: ${duration} ms, Tokens: ~${llmMessageContent.length} - [${debugName}][${llmModelName}]`);
|
|
26
|
+
await writeDebugLlmInputOutputs(ctx, debugName, llmModelName, [
|
|
27
|
+
{
|
|
28
|
+
llmInput,
|
|
29
|
+
llmMessageContent,
|
|
30
|
+
},
|
|
31
|
+
]);
|
|
32
|
+
return llmMessageContent;
|
|
33
|
+
},
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
export async function debugBatchLlmOutputs(ctx, debugName, llmModelName, llmInputs) {
|
|
37
|
+
const storedContents = await readDebugLlmOutputs(ctx, debugName);
|
|
38
|
+
if (storedContents) {
|
|
39
|
+
ctx.logger.info(`[${debugName}][${llmModelName}] Found debug output (skip calling LLM)`);
|
|
40
|
+
}
|
|
41
|
+
else {
|
|
42
|
+
const aggregatedInput = llmInputs
|
|
43
|
+
.map((llmInput) => Object.values(llmInput).join("\n"))
|
|
44
|
+
.join("\n\n");
|
|
45
|
+
const tokenCount = aggregatedInput ? await estimateTokenCount(aggregatedInput) : 0;
|
|
46
|
+
ctx.logger.debug(`[${debugName}][${llmModelName}] Calling LLM… User tokens: ~${tokenCount}`);
|
|
47
|
+
}
|
|
48
|
+
const startTs = Date.now();
|
|
49
|
+
return {
|
|
50
|
+
storedContents,
|
|
51
|
+
async getMessageContents(llmMessages) {
|
|
52
|
+
const llmMessageContents = llmMessages.map((llmMessage) => {
|
|
53
|
+
if (typeof llmMessage.content !== "string")
|
|
54
|
+
throw new Error("Expected a string");
|
|
55
|
+
return llmMessage.content;
|
|
56
|
+
});
|
|
57
|
+
const duration = Date.now() - startTs;
|
|
58
|
+
const totalTokens = llmMessageContents.reduce((sum, content) => sum + content.length, 0);
|
|
59
|
+
ctx.logger.debug(`… done. Duration: ${duration} ms, Tokens: ~${totalTokens} - [${debugName}][${llmModelName}]`);
|
|
60
|
+
if (llmMessageContents.length !== llmInputs.length) {
|
|
61
|
+
throw new Error(`Expected ${llmInputs.length} LLM outputs, but got ${llmMessageContents.length}`);
|
|
62
|
+
}
|
|
63
|
+
const list = llmInputs.map((llmInput, i) => {
|
|
64
|
+
return {
|
|
65
|
+
llmInput,
|
|
66
|
+
llmMessageContent: llmMessageContents[i],
|
|
67
|
+
};
|
|
68
|
+
});
|
|
69
|
+
await writeDebugLlmInputOutputs(ctx, debugName, llmModelName, list);
|
|
70
|
+
return llmMessageContents;
|
|
71
|
+
},
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
async function readDebugLlmOutputs(ctx, debugName) {
|
|
75
|
+
const { logger, debugDir } = ctx;
|
|
76
|
+
if (!debugDir)
|
|
77
|
+
return;
|
|
78
|
+
const debugFile = join(debugDir, `${debugName}.txt`);
|
|
79
|
+
try {
|
|
80
|
+
const debugContent = await readFile(debugFile, "utf8");
|
|
81
|
+
const list = debugContent.split(debugSep);
|
|
82
|
+
if (list.length < 3)
|
|
83
|
+
return;
|
|
84
|
+
list.shift();
|
|
85
|
+
const outputs = [];
|
|
86
|
+
for (let i = 1; i < list.length; i += 2) {
|
|
87
|
+
outputs.push(list[i]);
|
|
88
|
+
}
|
|
89
|
+
logger.debug(`… found debug output for ${debugName} (skip calling LLM)`);
|
|
90
|
+
return outputs;
|
|
91
|
+
}
|
|
92
|
+
catch (error) {
|
|
93
|
+
if (error.code !== "ENOENT") {
|
|
94
|
+
logger.error(`Error reading debug output from "${debugFile}": ${messageOf(error)}`);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
async function writeDebugLlmInputOutputs(ctx, debugName, llmModelName, list) {
|
|
99
|
+
const { debugDir } = ctx;
|
|
100
|
+
if (!debugDir)
|
|
101
|
+
return;
|
|
102
|
+
const dt = new Date().toISOString();
|
|
103
|
+
const baseName = `${dt.substring(0, 19)}-${debugName}`;
|
|
104
|
+
const content = [`${llmModelName ?? "Unamed model"} - ${debugName} - ${dt}`];
|
|
105
|
+
for (const { llmInput, llmMessageContent } of list) {
|
|
106
|
+
content.push(debugSep, llmInputToDebugMessage(llmInput), debugSep, llmMessageContent);
|
|
107
|
+
}
|
|
108
|
+
await writeFile(join(debugDir, `${baseName}.txt`), content.join(""));
|
|
109
|
+
}
|
|
110
|
+
function llmInputToDebugMessage(input) {
|
|
111
|
+
return Object.entries(input)
|
|
112
|
+
.map(([key, value]) => {
|
|
113
|
+
return `<${key}>
|
|
114
|
+
${value}
|
|
115
|
+
</${key}>`;
|
|
116
|
+
})
|
|
117
|
+
.join("\n\n");
|
|
118
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { readdir } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { projectDir } from "../../context.js";
|
|
4
|
+
const availableExtensions = new Set([".jpeg", ".jpg", ".png", ".gif", ".svg", ".webp"]);
|
|
5
|
+
const imageFileNames = [];
|
|
6
|
+
export async function initializeImageNames() {
|
|
7
|
+
for (const file of await readdir(join(projectDir, "images"))) {
|
|
8
|
+
const ext = file.slice(file.lastIndexOf("."));
|
|
9
|
+
if (!availableExtensions.has(ext))
|
|
10
|
+
continue;
|
|
11
|
+
imageFileNames.push(file);
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
export function getRandomImagePath() {
|
|
15
|
+
return join(projectDir, "images", imageFileNames[Math.floor(Math.random() * imageFileNames.length)]);
|
|
16
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { TokenTextSplitter } from "langchain/text_splitter";
|
|
2
|
+
export async function estimateTokenCount(text) {
|
|
3
|
+
const splitter = new TokenTextSplitter({
|
|
4
|
+
encodingName: "cl100k_base",
|
|
5
|
+
chunkSize: 1,
|
|
6
|
+
chunkOverlap: 0,
|
|
7
|
+
});
|
|
8
|
+
const chunks = await splitter.splitText(text);
|
|
9
|
+
return chunks.length;
|
|
10
|
+
}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
export function parseMarkdownBulletedList(markdown) {
|
|
2
|
+
const lines = markdown.split(/\r?\n/);
|
|
3
|
+
const parsedLines = lines.map(parseLine).filter((line) => !!line);
|
|
4
|
+
return { home: convertParsedLinesToTree(parsedLines) };
|
|
5
|
+
}
|
|
6
|
+
export function convertParsedLinesToTree(lines) {
|
|
7
|
+
if (lines.length === 0) {
|
|
8
|
+
throw new Error("Input array cannot be empty");
|
|
9
|
+
}
|
|
10
|
+
const firstLine = lines[0];
|
|
11
|
+
// Ensure first line is a routing document with indent 0
|
|
12
|
+
if (firstLine.indent !== 0 || firstLine.kind !== "routingDocument") {
|
|
13
|
+
throw new Error("First line must be a routing document with zero indentation");
|
|
14
|
+
}
|
|
15
|
+
// Create root node
|
|
16
|
+
const root = {
|
|
17
|
+
kind: "routingDocument",
|
|
18
|
+
typeName: firstLine.typeName,
|
|
19
|
+
};
|
|
20
|
+
// Stack to keep track of parent nodes at each level
|
|
21
|
+
const stack = [root];
|
|
22
|
+
let currentDepth = 0;
|
|
23
|
+
// Process remaining lines
|
|
24
|
+
for (let i = 1; i < lines.length; ++i) {
|
|
25
|
+
const line = lines[i];
|
|
26
|
+
const node = createNode(line);
|
|
27
|
+
// If indent is greater than current level, add as child to last node in stack
|
|
28
|
+
if (line.indent > currentDepth) {
|
|
29
|
+
if (line.indent !== currentDepth + 1) {
|
|
30
|
+
throw new Error(`Invalid indentation level at line ${line.lineNumber}`);
|
|
31
|
+
}
|
|
32
|
+
const lastNode = stack[stack.length - 1];
|
|
33
|
+
appendChildToParentNode(node, lastNode);
|
|
34
|
+
stack.push(node);
|
|
35
|
+
}
|
|
36
|
+
// If indent is less than current level, pop stack until we reach correct level
|
|
37
|
+
else if (line.indent < currentDepth) {
|
|
38
|
+
while (line.indent <= currentDepth) {
|
|
39
|
+
stack.pop();
|
|
40
|
+
--currentDepth;
|
|
41
|
+
}
|
|
42
|
+
const lastNode = stack[stack.length - 1];
|
|
43
|
+
appendChildToParentNode(node, lastNode);
|
|
44
|
+
stack.push(node);
|
|
45
|
+
}
|
|
46
|
+
// If same indent, add as sibling
|
|
47
|
+
else {
|
|
48
|
+
stack.pop();
|
|
49
|
+
const lastNode = stack[stack.length - 1];
|
|
50
|
+
appendChildToParentNode(node, lastNode);
|
|
51
|
+
stack.push(node);
|
|
52
|
+
}
|
|
53
|
+
currentDepth = line.indent;
|
|
54
|
+
}
|
|
55
|
+
return root;
|
|
56
|
+
}
|
|
57
|
+
function appendChildToParentNode(child, parent) {
|
|
58
|
+
if (parent.kind === "routingDocument") {
|
|
59
|
+
parent.children ??= [];
|
|
60
|
+
parent.children.push(child);
|
|
61
|
+
}
|
|
62
|
+
else if (parent.kind === "regularDocument") {
|
|
63
|
+
if (child.kind === "routingDocument") {
|
|
64
|
+
throw new Error(`Regular document type "${parent.typeName}" cannot have ${child.kind} child "${child.typeName}"`);
|
|
65
|
+
}
|
|
66
|
+
parent.children ??= [];
|
|
67
|
+
parent.children.push(child);
|
|
68
|
+
}
|
|
69
|
+
else if (parent.kind === "part") {
|
|
70
|
+
if (child.kind !== "part") {
|
|
71
|
+
throw new Error(`Part type "${parent.typeName}" cannot have ${child.kind} child "${child.typeName}"`);
|
|
72
|
+
}
|
|
73
|
+
parent.children ??= [];
|
|
74
|
+
parent.children.push(child);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
function createNode(line) {
|
|
78
|
+
switch (line.kind) {
|
|
79
|
+
case "routingDocument":
|
|
80
|
+
return {
|
|
81
|
+
kind: "routingDocument",
|
|
82
|
+
typeName: line.typeName,
|
|
83
|
+
};
|
|
84
|
+
case "regularDocument":
|
|
85
|
+
return {
|
|
86
|
+
kind: "regularDocument",
|
|
87
|
+
typeName: line.typeName,
|
|
88
|
+
};
|
|
89
|
+
case "part":
|
|
90
|
+
if (!line.listName) {
|
|
91
|
+
throw new Error("Part node must have a listName");
|
|
92
|
+
}
|
|
93
|
+
return {
|
|
94
|
+
kind: "part",
|
|
95
|
+
typeName: line.typeName,
|
|
96
|
+
listName: line.listName,
|
|
97
|
+
};
|
|
98
|
+
default:
|
|
99
|
+
throw new Error(`Unknown node kind: ${line.kind}`);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
export function parseLine(input, index) {
|
|
103
|
+
if (!input.trim())
|
|
104
|
+
return;
|
|
105
|
+
const lineNumber = index + 1;
|
|
106
|
+
// Count indentation (2 spaces per level)
|
|
107
|
+
const indentSpaces = input.match(/^[ ]+/);
|
|
108
|
+
const bulletIndex = indentSpaces ? indentSpaces[0].length : 0;
|
|
109
|
+
const indent = bulletIndex / 2;
|
|
110
|
+
const bulletChar = input.charAt(bulletIndex);
|
|
111
|
+
if (bulletChar !== "*" && bulletChar !== "-") {
|
|
112
|
+
throw new Error(`Missing bullet at line ${lineNumber}`);
|
|
113
|
+
}
|
|
114
|
+
const cleanLine = input.substring(bulletIndex + 1).trim();
|
|
115
|
+
// list of `article` (regular documents)
|
|
116
|
+
const regularDocumentMatch = cleanLine.match(/^list of `([^`]+)` \(regular documents\)$/);
|
|
117
|
+
if (regularDocumentMatch) {
|
|
118
|
+
return {
|
|
119
|
+
kind: "regularDocument",
|
|
120
|
+
indent,
|
|
121
|
+
typeName: regularDocumentMatch[1],
|
|
122
|
+
lineNumber,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
// list of `pageSection` (parts), list name: `partSections`
|
|
126
|
+
const partMatch = cleanLine.match(/^list of `([^`]+)` \(parts\),? list name: `([^`]+)`$/);
|
|
127
|
+
if (partMatch) {
|
|
128
|
+
return {
|
|
129
|
+
kind: "part",
|
|
130
|
+
indent,
|
|
131
|
+
typeName: partMatch[1],
|
|
132
|
+
listName: partMatch[2],
|
|
133
|
+
lineNumber,
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
// `pages` (routing document)
|
|
137
|
+
const routingDocumentMatch = cleanLine.match(/^`([^`]+)` \(routing document\)$/);
|
|
138
|
+
if (routingDocumentMatch) {
|
|
139
|
+
return {
|
|
140
|
+
kind: "routingDocument",
|
|
141
|
+
indent,
|
|
142
|
+
typeName: routingDocumentMatch[1],
|
|
143
|
+
lineNumber,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
throw new Error(`Invalid line at line ${lineNumber}`);
|
|
147
|
+
}
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import { parse } from "yaml";
|
|
2
|
+
export function parseLlmResponseAsProperties(llmResponse, outputTags) {
|
|
3
|
+
const rawTags = parseLlmRawTags(llmResponse, outputTags.map((tag) => tag.tagName));
|
|
4
|
+
const map = new Map(rawTags.map((tag) => [tag.tagName, tag]));
|
|
5
|
+
if (rawTags.length !== outputTags.length) {
|
|
6
|
+
const missingTags = outputTags.filter((tag) => !map.has(tag.tagName));
|
|
7
|
+
throw new Error(`Missing tags: ${missingTags.map((tag) => tag.tagName).join(", ")}`);
|
|
8
|
+
}
|
|
9
|
+
const resultObj = {};
|
|
10
|
+
for (const outputTag of outputTags) {
|
|
11
|
+
const raw = map.get(outputTag.tagName);
|
|
12
|
+
if (!raw) {
|
|
13
|
+
if (!outputTag.optional)
|
|
14
|
+
throw new Error(`Missing tag: ${outputTag.tagName}`);
|
|
15
|
+
continue;
|
|
16
|
+
}
|
|
17
|
+
resultObj[outputTag.key] = formatRawContent(raw.content, outputTag);
|
|
18
|
+
}
|
|
19
|
+
return resultObj;
|
|
20
|
+
}
|
|
21
|
+
export function parseLlmResponseAsList(llmResponse, outputTags, options = {}) {
|
|
22
|
+
const { tolerateErrors } = options;
|
|
23
|
+
const rawTags = parseLlmRawTags(llmResponse, outputTags.map((tag) => tag.tagName), options);
|
|
24
|
+
if (rawTags.length === 0)
|
|
25
|
+
return [];
|
|
26
|
+
const outputTagMap = new Map(outputTags.map((tag) => [tag.tagName, tag]));
|
|
27
|
+
const result = [];
|
|
28
|
+
let current = {};
|
|
29
|
+
for (const rawTag of rawTags) {
|
|
30
|
+
const outputTag = outputTagMap.get(rawTag.tagName);
|
|
31
|
+
if (!outputTag)
|
|
32
|
+
throw new Error(`Unexpected output tag "${rawTag.tagName}"`); // it's a real bug
|
|
33
|
+
if (rawTag.tagName in current) {
|
|
34
|
+
const rawTag = ensureProperties(current, outputTags, options);
|
|
35
|
+
if (rawTag) {
|
|
36
|
+
result.push(rawTag);
|
|
37
|
+
}
|
|
38
|
+
current = {};
|
|
39
|
+
}
|
|
40
|
+
if (rawTag.content === "") {
|
|
41
|
+
if (!outputTag.optional) {
|
|
42
|
+
const message = `Empty tag <${outputTag.tagName}>`;
|
|
43
|
+
if (!tolerateErrors)
|
|
44
|
+
throw new Error(message);
|
|
45
|
+
tolerateErrors.errorMessages.push(message);
|
|
46
|
+
current = {};
|
|
47
|
+
}
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
current[outputTag.key] = formatRawContent(rawTag.content, outputTag);
|
|
51
|
+
}
|
|
52
|
+
if (Object.keys(current).length > 0) {
|
|
53
|
+
const rawTag = ensureProperties(current, outputTags, options);
|
|
54
|
+
if (rawTag) {
|
|
55
|
+
result.push(rawTag);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return result;
|
|
59
|
+
}
|
|
60
|
+
function ensureProperties(obj, outputTags, options) {
|
|
61
|
+
const { tolerateErrors } = options;
|
|
62
|
+
for (const tag of outputTags) {
|
|
63
|
+
if (!(tag.key in obj) && !tag.optional) {
|
|
64
|
+
const message = `Missing tag <${tag.tagName}>`;
|
|
65
|
+
if (!tolerateErrors)
|
|
66
|
+
throw new Error(message);
|
|
67
|
+
tolerateErrors.errorMessages.push(message);
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return obj;
|
|
72
|
+
}
|
|
73
|
+
function formatRawContent(rawContent, tag) {
|
|
74
|
+
const { format, optional } = tag;
|
|
75
|
+
if (!rawContent && optional)
|
|
76
|
+
return;
|
|
77
|
+
switch (format) {
|
|
78
|
+
case "yaml":
|
|
79
|
+
return parse(rawContent);
|
|
80
|
+
case "json":
|
|
81
|
+
return JSON.parse(rawContent);
|
|
82
|
+
case "markdown":
|
|
83
|
+
case "text":
|
|
84
|
+
return rawContent;
|
|
85
|
+
default:
|
|
86
|
+
throw new Error(`Unknown format "${format}"`);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
export function parseLlmRawTags(llmResponse, tagNames, options = {}) {
|
|
90
|
+
const { tolerateErrors } = options;
|
|
91
|
+
const tagList = [];
|
|
92
|
+
// Create regex to match all opening and closing tags
|
|
93
|
+
const tagNamesPattern = tagNames.join("|");
|
|
94
|
+
const pattern = new RegExp(`<(${tagNamesPattern})>|</(${tagNamesPattern})>`, "g");
|
|
95
|
+
const matches = [];
|
|
96
|
+
let match;
|
|
97
|
+
// Find all tags (opening and closing) and store them with their positions
|
|
98
|
+
// biome-ignore lint/suspicious/noAssignInExpressions: regex exec requires assignment in loop
|
|
99
|
+
while ((match = pattern.exec(llmResponse)) !== null) {
|
|
100
|
+
const isOpening = match[1] !== undefined;
|
|
101
|
+
const tagName = isOpening ? match[1] : match[2];
|
|
102
|
+
matches.push({ isOpening, tagName, position: match.index });
|
|
103
|
+
}
|
|
104
|
+
// Process the tags to extract content
|
|
105
|
+
for (let i = 0; i < matches.length; ++i) {
|
|
106
|
+
const current = matches[i];
|
|
107
|
+
if (current.isOpening) {
|
|
108
|
+
// Find the next corresponding closing tag
|
|
109
|
+
let j = i + 1;
|
|
110
|
+
let foundClosing = false;
|
|
111
|
+
while (j < matches.length) {
|
|
112
|
+
const next = matches[j];
|
|
113
|
+
// If we encounter another opening tag of any type before finding our closing tag,
|
|
114
|
+
// it's an error if not tolerating errors
|
|
115
|
+
if (next.isOpening) {
|
|
116
|
+
const message = `Missing closing tag for <${current.tagName}>`;
|
|
117
|
+
if (!tolerateErrors)
|
|
118
|
+
throw new Error(message);
|
|
119
|
+
tolerateErrors.errorMessages.push(message);
|
|
120
|
+
foundClosing = undefined;
|
|
121
|
+
// If we are tolerating errors, we skip this opening tag entirely
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
if (!next.isOpening && next.tagName === current.tagName) {
|
|
125
|
+
// Found a matching closing tag
|
|
126
|
+
const contentStart = current.position + `<${current.tagName}>`.length;
|
|
127
|
+
const contentEnd = next.position;
|
|
128
|
+
const content = llmResponse.substring(contentStart, contentEnd).trim();
|
|
129
|
+
tagList.push({
|
|
130
|
+
tagName: current.tagName,
|
|
131
|
+
content,
|
|
132
|
+
});
|
|
133
|
+
// Skip to after this closing tag
|
|
134
|
+
i = j;
|
|
135
|
+
foundClosing = true;
|
|
136
|
+
break;
|
|
137
|
+
}
|
|
138
|
+
if (!next.isOpening && next.tagName !== current.tagName) {
|
|
139
|
+
// Found a non-matching closing tag
|
|
140
|
+
const message = `Mismatched tags: opening <${current.tagName}>, closing </${next.tagName}>`;
|
|
141
|
+
if (!tolerateErrors)
|
|
142
|
+
throw new Error(message);
|
|
143
|
+
tolerateErrors.errorMessages.push(message);
|
|
144
|
+
foundClosing = undefined;
|
|
145
|
+
// If we are tolerating errors, we skip this current opening tag entirely
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
++j;
|
|
149
|
+
}
|
|
150
|
+
// Handle case where no matching closing tag was found
|
|
151
|
+
if (foundClosing === false) {
|
|
152
|
+
const message = `Unclosed tag <${current.tagName}>`;
|
|
153
|
+
if (!tolerateErrors)
|
|
154
|
+
throw new Error(message);
|
|
155
|
+
tolerateErrors.errorMessages.push(message);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
return tagList;
|
|
160
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import { messageOf } from "@paroi/data-formatters-lib";
|
|
2
|
+
export function createTaskCollector(ctx) {
|
|
3
|
+
const tasks = [];
|
|
4
|
+
return {
|
|
5
|
+
tasks,
|
|
6
|
+
add(task) {
|
|
7
|
+
tasks.push(task);
|
|
8
|
+
},
|
|
9
|
+
runAll(options) {
|
|
10
|
+
return runTasks(ctx, tasks, options);
|
|
11
|
+
},
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
export function runTasks(ctx, tasks, options) {
|
|
15
|
+
const { logger } = ctx;
|
|
16
|
+
const { maxParallel, rateLimitPerSecond } = options;
|
|
17
|
+
let resolve;
|
|
18
|
+
let reject;
|
|
19
|
+
const promise = new Promise((resolveCb, rejectCb) => {
|
|
20
|
+
resolve = resolveCb;
|
|
21
|
+
reject = rejectCb;
|
|
22
|
+
});
|
|
23
|
+
let timeoutId;
|
|
24
|
+
let runningCount = 0;
|
|
25
|
+
let index = 0;
|
|
26
|
+
let doneCount = 0;
|
|
27
|
+
const errorMessages = [];
|
|
28
|
+
// Track task start timestamps for rate limiting
|
|
29
|
+
const startTimestamps = [];
|
|
30
|
+
// Flag to indicate whether the task runner has been stopped
|
|
31
|
+
let mustStop = false;
|
|
32
|
+
// Resolve function for stop promise
|
|
33
|
+
let stopResolve;
|
|
34
|
+
const stopPromise = new Promise((resolveStop) => {
|
|
35
|
+
stopResolve = resolveStop;
|
|
36
|
+
});
|
|
37
|
+
function runNext() {
|
|
38
|
+
// Don't start new tasks if stopped
|
|
39
|
+
if (mustStop) {
|
|
40
|
+
if (runningCount === 0) {
|
|
41
|
+
// All running tasks have completed, we can resolve the stop promise
|
|
42
|
+
if (index < tasks.length) {
|
|
43
|
+
reject(new Error("Stopped before all tasks completed"));
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
resolve({ doneCount, errorMessages });
|
|
47
|
+
}
|
|
48
|
+
stopResolve();
|
|
49
|
+
}
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
if (index >= tasks.length) {
|
|
53
|
+
if (runningCount === 0) {
|
|
54
|
+
resolve({ doneCount, errorMessages });
|
|
55
|
+
}
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
if (runningCount >= maxParallel)
|
|
59
|
+
return;
|
|
60
|
+
// Check rate limit if specified
|
|
61
|
+
if (rateLimitPerSecond !== undefined && rateLimitPerSecond > 0) {
|
|
62
|
+
const now = Date.now();
|
|
63
|
+
// Remove timestamps older than 1 second
|
|
64
|
+
const oneSecondAgo = now - 1000;
|
|
65
|
+
while (startTimestamps.length > 0 && startTimestamps[0] <= oneSecondAgo) {
|
|
66
|
+
startTimestamps.shift();
|
|
67
|
+
}
|
|
68
|
+
// Check if we've hit the rate limit
|
|
69
|
+
if (startTimestamps.length >= rateLimitPerSecond) {
|
|
70
|
+
// We've reached our rate limit, schedule retry after delay
|
|
71
|
+
const oldestTimestamp = startTimestamps[0];
|
|
72
|
+
const delayMs = Math.max(10, oldestTimestamp + 1000 - now);
|
|
73
|
+
clearTimeout(timeoutId);
|
|
74
|
+
timeoutId = setTimeout(runNext, delayMs);
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
// Record this task start time
|
|
78
|
+
startTimestamps.push(now);
|
|
79
|
+
}
|
|
80
|
+
const task = tasks[index];
|
|
81
|
+
++index;
|
|
82
|
+
++runningCount;
|
|
83
|
+
task().then(() => {
|
|
84
|
+
++doneCount;
|
|
85
|
+
--runningCount;
|
|
86
|
+
runNext();
|
|
87
|
+
}, (err) => {
|
|
88
|
+
logger.error(err);
|
|
89
|
+
errorMessages.push(messageOf(err));
|
|
90
|
+
--runningCount;
|
|
91
|
+
runNext();
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
for (let i = 0; i < maxParallel; ++i) {
|
|
95
|
+
runNext();
|
|
96
|
+
}
|
|
97
|
+
function stop() {
|
|
98
|
+
// Mark as stopped to prevent new tasks from starting
|
|
99
|
+
mustStop = true;
|
|
100
|
+
clearTimeout(timeoutId);
|
|
101
|
+
// If no tasks are running, resolve immediately
|
|
102
|
+
if (runningCount === 0) {
|
|
103
|
+
stopResolve();
|
|
104
|
+
}
|
|
105
|
+
// Otherwise, stopResolve will be called when runningCount reaches 0
|
|
106
|
+
return stopPromise;
|
|
107
|
+
}
|
|
108
|
+
return {
|
|
109
|
+
promise,
|
|
110
|
+
stop,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export function camelToKebabCase(s) {
|
|
2
|
+
return s
|
|
3
|
+
.replace(/[A-Z]/g, (m) => `-${m.toLowerCase()}`)
|
|
4
|
+
.replace(/^-+/, "")
|
|
5
|
+
.replace(/-+$/, "")
|
|
6
|
+
.replace(/--+/g, "-");
|
|
7
|
+
}
|
|
8
|
+
export function camelToTitleCase(camelCase) {
|
|
9
|
+
return camelCase
|
|
10
|
+
.replace(/([A-Z])/g, " $1")
|
|
11
|
+
.replace(/^./, (str) => str.toUpperCase())
|
|
12
|
+
.trimStart();
|
|
13
|
+
}
|