@paroicms/site-generator-plugin 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +9 -0
  2. package/gen-backend/dist/context.js +2 -0
  3. package/gen-backend/dist/data-format.js +37 -0
  4. package/gen-backend/dist/generator/actions.js +35 -0
  5. package/gen-backend/dist/generator/fake-content-generator.ts/create-database-with-fake-content.js +227 -0
  6. package/gen-backend/dist/generator/fake-content-generator.ts/create-node-contents.js +156 -0
  7. package/gen-backend/dist/generator/fake-content-generator.ts/fake-content-types.js +1 -0
  8. package/gen-backend/dist/generator/fake-content-generator.ts/generate-fake-content.js +127 -0
  9. package/gen-backend/dist/generator/fake-content-generator.ts/invoke-generate-fake-content.js +49 -0
  10. package/gen-backend/dist/generator/generator-types.js +1 -0
  11. package/gen-backend/dist/generator/helpers/esm-module.helper.js +6 -0
  12. package/gen-backend/dist/generator/helpers/js-utils.js +14 -0
  13. package/gen-backend/dist/generator/lib/common-types.js +1 -0
  14. package/gen-backend/dist/generator/lib/create-prompt.js +44 -0
  15. package/gen-backend/dist/generator/lib/debug-utils.js +118 -0
  16. package/gen-backend/dist/generator/lib/images-lib.js +16 -0
  17. package/gen-backend/dist/generator/lib/llm-invoke-types.js +1 -0
  18. package/gen-backend/dist/generator/lib/llm-tokens.js +10 -0
  19. package/gen-backend/dist/generator/lib/markdown-bulleted-list-parser.js +147 -0
  20. package/gen-backend/dist/generator/lib/parse-llm-response.js +160 -0
  21. package/gen-backend/dist/generator/lib/tasks.js +112 -0
  22. package/gen-backend/dist/generator/lib/utils.js +13 -0
  23. package/gen-backend/dist/generator/llm-queries/invoke-message-guard.js +86 -0
  24. package/gen-backend/dist/generator/llm-queries/invoke-new-site-analysis.js +169 -0
  25. package/gen-backend/dist/generator/llm-queries/invoke-update-site-schema.js +94 -0
  26. package/gen-backend/dist/generator/site-generator/common-template-creator.js +108 -0
  27. package/gen-backend/dist/generator/site-generator/document-template-creator.js +329 -0
  28. package/gen-backend/dist/generator/site-generator/id-key-provider.js +14 -0
  29. package/gen-backend/dist/generator/site-generator/jt-site-schema-helpers.js +55 -0
  30. package/gen-backend/dist/generator/site-generator/site-generator.js +75 -0
  31. package/gen-backend/dist/generator/site-generator/template-creator-types.js +1 -0
  32. package/gen-backend/dist/generator/site-generator/template-helpers.js +26 -0
  33. package/gen-backend/dist/generator/site-generator/theme-creator.js +180 -0
  34. package/gen-backend/dist/generator/site-generator/theme-css.js +323 -0
  35. package/gen-backend/dist/generator/site-schema-generator/analysis-types.js +1 -0
  36. package/gen-backend/dist/generator/site-schema-generator/create-l10n.js +42 -0
  37. package/gen-backend/dist/generator/site-schema-generator/create-site-schema.js +240 -0
  38. package/gen-backend/dist/generator/site-schema-generator/default-pages.js +38 -0
  39. package/gen-backend/dist/plugin.js +86 -0
  40. package/gen-backend/prompts/0-context.md +9 -0
  41. package/gen-backend/prompts/generate-fake-content-multiple.md +22 -0
  42. package/gen-backend/prompts/generate-fake-content-single.md +16 -0
  43. package/gen-backend/prompts/message-guard.md +89 -0
  44. package/gen-backend/prompts/new-site-1-analysis.md +214 -0
  45. package/gen-backend/prompts/new-site-2-fields.md +50 -0
  46. package/gen-backend/prompts/predefined-fields.json +110 -0
  47. package/gen-backend/prompts/test-message1.txt +1 -0
  48. package/gen-backend/prompts/update-site-schema-1-write-details.md +57 -0
  49. package/gen-backend/prompts/update-site-schema-2-execute.md +77 -0
  50. package/gen-front/dist/gen-front.css +1 -0
  51. package/gen-front/dist/gen-front.eot +0 -0
  52. package/gen-front/dist/gen-front.mjs +998 -0
  53. package/gen-front/dist/gen-front.svg +345 -0
  54. package/gen-front/dist/gen-front.ttf +0 -0
  55. package/gen-front/dist/gen-front.woff +0 -0
  56. package/gen-front/dist/gen-front.woff2 +0 -0
  57. package/gen-front/dist/gen-front2.woff2 +0 -0
  58. package/gen-front/dist/gen-front3.woff2 +0 -0
  59. package/package.json +79 -0
@@ -0,0 +1,44 @@
1
+ import { PromptTemplate } from "@langchain/core/prompts";
2
+ import { connectorPackageDir } from "@paroicms/connector";
3
+ import { readFile } from "node:fs/promises";
4
+ import { join } from "node:path";
5
+ import { projectDir } from "../../context.js";
6
+ const contextContent = await readPromptFile("0-context.md");
7
+ const siteSchemaTsDefs = await readFile(join(connectorPackageDir, "typeonly", "site-schema-json-types.d.ts"), "utf-8");
8
+ const predefinedFields = JSON.parse(await readPromptFile("predefined-fields.json"));
9
+ export async function createPromptTemplate(options) {
10
+ const { fileName, withSiteSchemaTsDefs } = options;
11
+ const promptContent = await readPromptFile(fileName);
12
+ const schemaTypeDefTemplate = withSiteSchemaTsDefs
13
+ ? `
14
+
15
+ All the site structure is described in a site-schema in JSON format. Here is the TypeScript definition for the site schema:
16
+
17
+ <site_schema_ts_defs>
18
+ {siteSchemaTsDefs}
19
+ </site_schema_ts_defs>`
20
+ : "";
21
+ const template = `
22
+ # Context
23
+
24
+ ${contextContent}${schemaTypeDefTemplate}
25
+
26
+ # Task to do
27
+
28
+ ${promptContent}
29
+ `;
30
+ return PromptTemplate.fromTemplate(template);
31
+ }
32
+ export async function readPromptFile(fileName) {
33
+ return await readFile(join(projectDir, "prompts", fileName), "utf-8");
34
+ }
35
+ export function getPredefinedFields() {
36
+ if (!predefinedFields)
37
+ throw new Error("Predefined fields not loaded");
38
+ return predefinedFields;
39
+ }
40
+ export function getSiteSchemaTsDefs() {
41
+ if (!siteSchemaTsDefs)
42
+ throw new Error("Site-schema defs not loaded");
43
+ return siteSchemaTsDefs;
44
+ }
@@ -0,0 +1,118 @@
1
+ import { messageOf } from "@paroi/data-formatters-lib";
2
+ import { readFile, writeFile } from "node:fs/promises";
3
+ import { join } from "node:path";
4
+ import { estimateTokenCount } from "./llm-tokens.js";
5
+ const debugSep = "\n\n========================\n\n";
6
+ export async function debugLlmOutput(ctx, debugName, llmModelName, llmInput) {
7
+ const storedContents = await readDebugLlmOutputs(ctx, debugName);
8
+ const storedContent = storedContents?.[0];
9
+ if (storedContent) {
10
+ ctx.logger.info(`[${debugName}][${llmModelName}] Found debug output (skip calling LLM)`);
11
+ }
12
+ else {
13
+ const aggregatedInput = Object.values(llmInput).join("\n");
14
+ const tokenCount = aggregatedInput ? await estimateTokenCount(aggregatedInput) : 0;
15
+ ctx.logger.debug(`[${debugName}][${llmModelName}] Calling LLM… User tokens: ~${tokenCount}`);
16
+ }
17
+ const startTs = Date.now();
18
+ return {
19
+ storedContent,
20
+ async getMessageContent(llmMessage) {
21
+ if (typeof llmMessage.content !== "string")
22
+ throw new Error("Expected a string");
23
+ const llmMessageContent = llmMessage.content;
24
+ const duration = Date.now() - startTs;
25
+ ctx.logger.debug(`… done. Duration: ${duration} ms, Tokens: ~${llmMessageContent.length} - [${debugName}][${llmModelName}]`);
26
+ await writeDebugLlmInputOutputs(ctx, debugName, llmModelName, [
27
+ {
28
+ llmInput,
29
+ llmMessageContent,
30
+ },
31
+ ]);
32
+ return llmMessageContent;
33
+ },
34
+ };
35
+ }
36
+ export async function debugBatchLlmOutputs(ctx, debugName, llmModelName, llmInputs) {
37
+ const storedContents = await readDebugLlmOutputs(ctx, debugName);
38
+ if (storedContents) {
39
+ ctx.logger.info(`[${debugName}][${llmModelName}] Found debug output (skip calling LLM)`);
40
+ }
41
+ else {
42
+ const aggregatedInput = llmInputs
43
+ .map((llmInput) => Object.values(llmInput).join("\n"))
44
+ .join("\n\n");
45
+ const tokenCount = aggregatedInput ? await estimateTokenCount(aggregatedInput) : 0;
46
+ ctx.logger.debug(`[${debugName}][${llmModelName}] Calling LLM… User tokens: ~${tokenCount}`);
47
+ }
48
+ const startTs = Date.now();
49
+ return {
50
+ storedContents,
51
+ async getMessageContents(llmMessages) {
52
+ const llmMessageContents = llmMessages.map((llmMessage) => {
53
+ if (typeof llmMessage.content !== "string")
54
+ throw new Error("Expected a string");
55
+ return llmMessage.content;
56
+ });
57
+ const duration = Date.now() - startTs;
58
+ const totalTokens = llmMessageContents.reduce((sum, content) => sum + content.length, 0);
59
+ ctx.logger.debug(`… done. Duration: ${duration} ms, Tokens: ~${totalTokens} - [${debugName}][${llmModelName}]`);
60
+ if (llmMessageContents.length !== llmInputs.length) {
61
+ throw new Error(`Expected ${llmInputs.length} LLM outputs, but got ${llmMessageContents.length}`);
62
+ }
63
+ const list = llmInputs.map((llmInput, i) => {
64
+ return {
65
+ llmInput,
66
+ llmMessageContent: llmMessageContents[i],
67
+ };
68
+ });
69
+ await writeDebugLlmInputOutputs(ctx, debugName, llmModelName, list);
70
+ return llmMessageContents;
71
+ },
72
+ };
73
+ }
74
+ async function readDebugLlmOutputs(ctx, debugName) {
75
+ const { logger, debugDir } = ctx;
76
+ if (!debugDir)
77
+ return;
78
+ const debugFile = join(debugDir, `${debugName}.txt`);
79
+ try {
80
+ const debugContent = await readFile(debugFile, "utf8");
81
+ const list = debugContent.split(debugSep);
82
+ if (list.length < 3)
83
+ return;
84
+ list.shift();
85
+ const outputs = [];
86
+ for (let i = 1; i < list.length; i += 2) {
87
+ outputs.push(list[i]);
88
+ }
89
+ logger.debug(`… found debug output for ${debugName} (skip calling LLM)`);
90
+ return outputs;
91
+ }
92
+ catch (error) {
93
+ if (error.code !== "ENOENT") {
94
+ logger.error(`Error reading debug output from "${debugFile}": ${messageOf(error)}`);
95
+ }
96
+ }
97
+ }
98
+ async function writeDebugLlmInputOutputs(ctx, debugName, llmModelName, list) {
99
+ const { debugDir } = ctx;
100
+ if (!debugDir)
101
+ return;
102
+ const dt = new Date().toISOString();
103
+ const baseName = `${dt.substring(0, 19)}-${debugName}`;
104
+ const content = [`${llmModelName ?? "Unamed model"} - ${debugName} - ${dt}`];
105
+ for (const { llmInput, llmMessageContent } of list) {
106
+ content.push(debugSep, llmInputToDebugMessage(llmInput), debugSep, llmMessageContent);
107
+ }
108
+ await writeFile(join(debugDir, `${baseName}.txt`), content.join(""));
109
+ }
110
+ function llmInputToDebugMessage(input) {
111
+ return Object.entries(input)
112
+ .map(([key, value]) => {
113
+ return `<${key}>
114
+ ${value}
115
+ </${key}>`;
116
+ })
117
+ .join("\n\n");
118
+ }
@@ -0,0 +1,16 @@
1
+ import { readdir } from "node:fs/promises";
2
+ import { join } from "node:path";
3
+ import { projectDir } from "../../context.js";
4
+ const availableExtensions = new Set([".jpeg", ".jpg", ".png", ".gif", ".svg", ".webp"]);
5
+ const imageFileNames = [];
6
+ export async function initializeImageNames() {
7
+ for (const file of await readdir(join(projectDir, "images"))) {
8
+ const ext = file.slice(file.lastIndexOf("."));
9
+ if (!availableExtensions.has(ext))
10
+ continue;
11
+ imageFileNames.push(file);
12
+ }
13
+ }
14
+ export function getRandomImagePath() {
15
+ return join(projectDir, "images", imageFileNames[Math.floor(Math.random() * imageFileNames.length)]);
16
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,10 @@
1
+ import { TokenTextSplitter } from "langchain/text_splitter";
2
+ export async function estimateTokenCount(text) {
3
+ const splitter = new TokenTextSplitter({
4
+ encodingName: "cl100k_base",
5
+ chunkSize: 1,
6
+ chunkOverlap: 0,
7
+ });
8
+ const chunks = await splitter.splitText(text);
9
+ return chunks.length;
10
+ }
@@ -0,0 +1,147 @@
1
+ export function parseMarkdownBulletedList(markdown) {
2
+ const lines = markdown.split(/\r?\n/);
3
+ const parsedLines = lines.map(parseLine).filter((line) => !!line);
4
+ return { home: convertParsedLinesToTree(parsedLines) };
5
+ }
6
+ export function convertParsedLinesToTree(lines) {
7
+ if (lines.length === 0) {
8
+ throw new Error("Input array cannot be empty");
9
+ }
10
+ const firstLine = lines[0];
11
+ // Ensure first line is a routing document with indent 0
12
+ if (firstLine.indent !== 0 || firstLine.kind !== "routingDocument") {
13
+ throw new Error("First line must be a routing document with zero indentation");
14
+ }
15
+ // Create root node
16
+ const root = {
17
+ kind: "routingDocument",
18
+ typeName: firstLine.typeName,
19
+ };
20
+ // Stack to keep track of parent nodes at each level
21
+ const stack = [root];
22
+ let currentDepth = 0;
23
+ // Process remaining lines
24
+ for (let i = 1; i < lines.length; ++i) {
25
+ const line = lines[i];
26
+ const node = createNode(line);
27
+ // If indent is greater than current level, add as child to last node in stack
28
+ if (line.indent > currentDepth) {
29
+ if (line.indent !== currentDepth + 1) {
30
+ throw new Error(`Invalid indentation level at line ${line.lineNumber}`);
31
+ }
32
+ const lastNode = stack[stack.length - 1];
33
+ appendChildToParentNode(node, lastNode);
34
+ stack.push(node);
35
+ }
36
+ // If indent is less than current level, pop stack until we reach correct level
37
+ else if (line.indent < currentDepth) {
38
+ while (line.indent <= currentDepth) {
39
+ stack.pop();
40
+ --currentDepth;
41
+ }
42
+ const lastNode = stack[stack.length - 1];
43
+ appendChildToParentNode(node, lastNode);
44
+ stack.push(node);
45
+ }
46
+ // If same indent, add as sibling
47
+ else {
48
+ stack.pop();
49
+ const lastNode = stack[stack.length - 1];
50
+ appendChildToParentNode(node, lastNode);
51
+ stack.push(node);
52
+ }
53
+ currentDepth = line.indent;
54
+ }
55
+ return root;
56
+ }
57
+ function appendChildToParentNode(child, parent) {
58
+ if (parent.kind === "routingDocument") {
59
+ parent.children ??= [];
60
+ parent.children.push(child);
61
+ }
62
+ else if (parent.kind === "regularDocument") {
63
+ if (child.kind === "routingDocument") {
64
+ throw new Error(`Regular document type "${parent.typeName}" cannot have ${child.kind} child "${child.typeName}"`);
65
+ }
66
+ parent.children ??= [];
67
+ parent.children.push(child);
68
+ }
69
+ else if (parent.kind === "part") {
70
+ if (child.kind !== "part") {
71
+ throw new Error(`Part type "${parent.typeName}" cannot have ${child.kind} child "${child.typeName}"`);
72
+ }
73
+ parent.children ??= [];
74
+ parent.children.push(child);
75
+ }
76
+ }
77
+ function createNode(line) {
78
+ switch (line.kind) {
79
+ case "routingDocument":
80
+ return {
81
+ kind: "routingDocument",
82
+ typeName: line.typeName,
83
+ };
84
+ case "regularDocument":
85
+ return {
86
+ kind: "regularDocument",
87
+ typeName: line.typeName,
88
+ };
89
+ case "part":
90
+ if (!line.listName) {
91
+ throw new Error("Part node must have a listName");
92
+ }
93
+ return {
94
+ kind: "part",
95
+ typeName: line.typeName,
96
+ listName: line.listName,
97
+ };
98
+ default:
99
+ throw new Error(`Unknown node kind: ${line.kind}`);
100
+ }
101
+ }
102
+ export function parseLine(input, index) {
103
+ if (!input.trim())
104
+ return;
105
+ const lineNumber = index + 1;
106
+ // Count indentation (2 spaces per level)
107
+ const indentSpaces = input.match(/^[ ]+/);
108
+ const bulletIndex = indentSpaces ? indentSpaces[0].length : 0;
109
+ const indent = bulletIndex / 2;
110
+ const bulletChar = input.charAt(bulletIndex);
111
+ if (bulletChar !== "*" && bulletChar !== "-") {
112
+ throw new Error(`Missing bullet at line ${lineNumber}`);
113
+ }
114
+ const cleanLine = input.substring(bulletIndex + 1).trim();
115
+ // list of `article` (regular documents)
116
+ const regularDocumentMatch = cleanLine.match(/^list of `([^`]+)` \(regular documents\)$/);
117
+ if (regularDocumentMatch) {
118
+ return {
119
+ kind: "regularDocument",
120
+ indent,
121
+ typeName: regularDocumentMatch[1],
122
+ lineNumber,
123
+ };
124
+ }
125
+ // list of `pageSection` (parts), list name: `partSections`
126
+ const partMatch = cleanLine.match(/^list of `([^`]+)` \(parts\),? list name: `([^`]+)`$/);
127
+ if (partMatch) {
128
+ return {
129
+ kind: "part",
130
+ indent,
131
+ typeName: partMatch[1],
132
+ listName: partMatch[2],
133
+ lineNumber,
134
+ };
135
+ }
136
+ // `pages` (routing document)
137
+ const routingDocumentMatch = cleanLine.match(/^`([^`]+)` \(routing document\)$/);
138
+ if (routingDocumentMatch) {
139
+ return {
140
+ kind: "routingDocument",
141
+ indent,
142
+ typeName: routingDocumentMatch[1],
143
+ lineNumber,
144
+ };
145
+ }
146
+ throw new Error(`Invalid line at line ${lineNumber}`);
147
+ }
@@ -0,0 +1,160 @@
1
+ import { parse } from "yaml";
2
+ export function parseLlmResponseAsProperties(llmResponse, outputTags) {
3
+ const rawTags = parseLlmRawTags(llmResponse, outputTags.map((tag) => tag.tagName));
4
+ const map = new Map(rawTags.map((tag) => [tag.tagName, tag]));
5
+ if (rawTags.length !== outputTags.length) {
6
+ const missingTags = outputTags.filter((tag) => !map.has(tag.tagName));
7
+ throw new Error(`Missing tags: ${missingTags.map((tag) => tag.tagName).join(", ")}`);
8
+ }
9
+ const resultObj = {};
10
+ for (const outputTag of outputTags) {
11
+ const raw = map.get(outputTag.tagName);
12
+ if (!raw) {
13
+ if (!outputTag.optional)
14
+ throw new Error(`Missing tag: ${outputTag.tagName}`);
15
+ continue;
16
+ }
17
+ resultObj[outputTag.key] = formatRawContent(raw.content, outputTag);
18
+ }
19
+ return resultObj;
20
+ }
21
+ export function parseLlmResponseAsList(llmResponse, outputTags, options = {}) {
22
+ const { tolerateErrors } = options;
23
+ const rawTags = parseLlmRawTags(llmResponse, outputTags.map((tag) => tag.tagName), options);
24
+ if (rawTags.length === 0)
25
+ return [];
26
+ const outputTagMap = new Map(outputTags.map((tag) => [tag.tagName, tag]));
27
+ const result = [];
28
+ let current = {};
29
+ for (const rawTag of rawTags) {
30
+ const outputTag = outputTagMap.get(rawTag.tagName);
31
+ if (!outputTag)
32
+ throw new Error(`Unexpected output tag "${rawTag.tagName}"`); // it's a real bug
33
+ if (rawTag.tagName in current) {
34
+ const rawTag = ensureProperties(current, outputTags, options);
35
+ if (rawTag) {
36
+ result.push(rawTag);
37
+ }
38
+ current = {};
39
+ }
40
+ if (rawTag.content === "") {
41
+ if (!outputTag.optional) {
42
+ const message = `Empty tag <${outputTag.tagName}>`;
43
+ if (!tolerateErrors)
44
+ throw new Error(message);
45
+ tolerateErrors.errorMessages.push(message);
46
+ current = {};
47
+ }
48
+ continue;
49
+ }
50
+ current[outputTag.key] = formatRawContent(rawTag.content, outputTag);
51
+ }
52
+ if (Object.keys(current).length > 0) {
53
+ const rawTag = ensureProperties(current, outputTags, options);
54
+ if (rawTag) {
55
+ result.push(rawTag);
56
+ }
57
+ }
58
+ return result;
59
+ }
60
+ function ensureProperties(obj, outputTags, options) {
61
+ const { tolerateErrors } = options;
62
+ for (const tag of outputTags) {
63
+ if (!(tag.key in obj) && !tag.optional) {
64
+ const message = `Missing tag <${tag.tagName}>`;
65
+ if (!tolerateErrors)
66
+ throw new Error(message);
67
+ tolerateErrors.errorMessages.push(message);
68
+ return;
69
+ }
70
+ }
71
+ return obj;
72
+ }
73
+ function formatRawContent(rawContent, tag) {
74
+ const { format, optional } = tag;
75
+ if (!rawContent && optional)
76
+ return;
77
+ switch (format) {
78
+ case "yaml":
79
+ return parse(rawContent);
80
+ case "json":
81
+ return JSON.parse(rawContent);
82
+ case "markdown":
83
+ case "text":
84
+ return rawContent;
85
+ default:
86
+ throw new Error(`Unknown format "${format}"`);
87
+ }
88
+ }
89
+ export function parseLlmRawTags(llmResponse, tagNames, options = {}) {
90
+ const { tolerateErrors } = options;
91
+ const tagList = [];
92
+ // Create regex to match all opening and closing tags
93
+ const tagNamesPattern = tagNames.join("|");
94
+ const pattern = new RegExp(`<(${tagNamesPattern})>|</(${tagNamesPattern})>`, "g");
95
+ const matches = [];
96
+ let match;
97
+ // Find all tags (opening and closing) and store them with their positions
98
+ // biome-ignore lint/suspicious/noAssignInExpressions: regex exec requires assignment in loop
99
+ while ((match = pattern.exec(llmResponse)) !== null) {
100
+ const isOpening = match[1] !== undefined;
101
+ const tagName = isOpening ? match[1] : match[2];
102
+ matches.push({ isOpening, tagName, position: match.index });
103
+ }
104
+ // Process the tags to extract content
105
+ for (let i = 0; i < matches.length; ++i) {
106
+ const current = matches[i];
107
+ if (current.isOpening) {
108
+ // Find the next corresponding closing tag
109
+ let j = i + 1;
110
+ let foundClosing = false;
111
+ while (j < matches.length) {
112
+ const next = matches[j];
113
+ // If we encounter another opening tag of any type before finding our closing tag,
114
+ // it's an error if not tolerating errors
115
+ if (next.isOpening) {
116
+ const message = `Missing closing tag for <${current.tagName}>`;
117
+ if (!tolerateErrors)
118
+ throw new Error(message);
119
+ tolerateErrors.errorMessages.push(message);
120
+ foundClosing = undefined;
121
+ // If we are tolerating errors, we skip this opening tag entirely
122
+ break;
123
+ }
124
+ if (!next.isOpening && next.tagName === current.tagName) {
125
+ // Found a matching closing tag
126
+ const contentStart = current.position + `<${current.tagName}>`.length;
127
+ const contentEnd = next.position;
128
+ const content = llmResponse.substring(contentStart, contentEnd).trim();
129
+ tagList.push({
130
+ tagName: current.tagName,
131
+ content,
132
+ });
133
+ // Skip to after this closing tag
134
+ i = j;
135
+ foundClosing = true;
136
+ break;
137
+ }
138
+ if (!next.isOpening && next.tagName !== current.tagName) {
139
+ // Found a non-matching closing tag
140
+ const message = `Mismatched tags: opening <${current.tagName}>, closing </${next.tagName}>`;
141
+ if (!tolerateErrors)
142
+ throw new Error(message);
143
+ tolerateErrors.errorMessages.push(message);
144
+ foundClosing = undefined;
145
+ // If we are tolerating errors, we skip this current opening tag entirely
146
+ break;
147
+ }
148
+ ++j;
149
+ }
150
+ // Handle case where no matching closing tag was found
151
+ if (foundClosing === false) {
152
+ const message = `Unclosed tag <${current.tagName}>`;
153
+ if (!tolerateErrors)
154
+ throw new Error(message);
155
+ tolerateErrors.errorMessages.push(message);
156
+ }
157
+ }
158
+ }
159
+ return tagList;
160
+ }
@@ -0,0 +1,112 @@
1
+ import { messageOf } from "@paroi/data-formatters-lib";
2
+ export function createTaskCollector(ctx) {
3
+ const tasks = [];
4
+ return {
5
+ tasks,
6
+ add(task) {
7
+ tasks.push(task);
8
+ },
9
+ runAll(options) {
10
+ return runTasks(ctx, tasks, options);
11
+ },
12
+ };
13
+ }
14
+ export function runTasks(ctx, tasks, options) {
15
+ const { logger } = ctx;
16
+ const { maxParallel, rateLimitPerSecond } = options;
17
+ let resolve;
18
+ let reject;
19
+ const promise = new Promise((resolveCb, rejectCb) => {
20
+ resolve = resolveCb;
21
+ reject = rejectCb;
22
+ });
23
+ let timeoutId;
24
+ let runningCount = 0;
25
+ let index = 0;
26
+ let doneCount = 0;
27
+ const errorMessages = [];
28
+ // Track task start timestamps for rate limiting
29
+ const startTimestamps = [];
30
+ // Flag to indicate whether the task runner has been stopped
31
+ let mustStop = false;
32
+ // Resolve function for stop promise
33
+ let stopResolve;
34
+ const stopPromise = new Promise((resolveStop) => {
35
+ stopResolve = resolveStop;
36
+ });
37
+ function runNext() {
38
+ // Don't start new tasks if stopped
39
+ if (mustStop) {
40
+ if (runningCount === 0) {
41
+ // All running tasks have completed, we can resolve the stop promise
42
+ if (index < tasks.length) {
43
+ reject(new Error("Stopped before all tasks completed"));
44
+ }
45
+ else {
46
+ resolve({ doneCount, errorMessages });
47
+ }
48
+ stopResolve();
49
+ }
50
+ return;
51
+ }
52
+ if (index >= tasks.length) {
53
+ if (runningCount === 0) {
54
+ resolve({ doneCount, errorMessages });
55
+ }
56
+ return;
57
+ }
58
+ if (runningCount >= maxParallel)
59
+ return;
60
+ // Check rate limit if specified
61
+ if (rateLimitPerSecond !== undefined && rateLimitPerSecond > 0) {
62
+ const now = Date.now();
63
+ // Remove timestamps older than 1 second
64
+ const oneSecondAgo = now - 1000;
65
+ while (startTimestamps.length > 0 && startTimestamps[0] <= oneSecondAgo) {
66
+ startTimestamps.shift();
67
+ }
68
+ // Check if we've hit the rate limit
69
+ if (startTimestamps.length >= rateLimitPerSecond) {
70
+ // We've reached our rate limit, schedule retry after delay
71
+ const oldestTimestamp = startTimestamps[0];
72
+ const delayMs = Math.max(10, oldestTimestamp + 1000 - now);
73
+ clearTimeout(timeoutId);
74
+ timeoutId = setTimeout(runNext, delayMs);
75
+ return;
76
+ }
77
+ // Record this task start time
78
+ startTimestamps.push(now);
79
+ }
80
+ const task = tasks[index];
81
+ ++index;
82
+ ++runningCount;
83
+ task().then(() => {
84
+ ++doneCount;
85
+ --runningCount;
86
+ runNext();
87
+ }, (err) => {
88
+ logger.error(err);
89
+ errorMessages.push(messageOf(err));
90
+ --runningCount;
91
+ runNext();
92
+ });
93
+ }
94
+ for (let i = 0; i < maxParallel; ++i) {
95
+ runNext();
96
+ }
97
+ function stop() {
98
+ // Mark as stopped to prevent new tasks from starting
99
+ mustStop = true;
100
+ clearTimeout(timeoutId);
101
+ // If no tasks are running, resolve immediately
102
+ if (runningCount === 0) {
103
+ stopResolve();
104
+ }
105
+ // Otherwise, stopResolve will be called when runningCount reaches 0
106
+ return stopPromise;
107
+ }
108
+ return {
109
+ promise,
110
+ stop,
111
+ };
112
+ }
@@ -0,0 +1,13 @@
1
+ export function camelToKebabCase(s) {
2
+ return s
3
+ .replace(/[A-Z]/g, (m) => `-${m.toLowerCase()}`)
4
+ .replace(/^-+/, "")
5
+ .replace(/-+$/, "")
6
+ .replace(/--+/g, "-");
7
+ }
8
+ export function camelToTitleCase(camelCase) {
9
+ return camelCase
10
+ .replace(/([A-Z])/g, " $1")
11
+ .replace(/^./, (str) => str.toUpperCase())
12
+ .trimStart();
13
+ }