@paroicms/site-generator-plugin 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,8 @@
1
- import { strVal } from "@paroi/data-formatters-lib";
1
+ import { strVal, strValOrUndef } from "@paroi/data-formatters-lib";
2
2
  export async function readSession(ctx, sessionId) {
3
3
  const { cn } = ctx;
4
4
  const row = await cn("PaGenSession")
5
- .select("id", "status", "guardCount", "promptCount", "nodeTypeCount", "contentCount", "errorMessage")
5
+ .select("id", "createdAt", "status", "guardCount", "promptCount", "nodeTypeCount", "contentCount", "errorMessage")
6
6
  .where({ id: sessionId })
7
7
  .first();
8
8
  if (!row)
@@ -15,7 +15,7 @@ export async function readSession(ctx, sessionId) {
15
15
  promptCount: Number(row.promptCount),
16
16
  nodeTypeCount: Number(row.nodeTypeCount),
17
17
  contentCount: Number(row.contentCount),
18
- errorMessage: strVal(row.errorMessage),
18
+ errorMessage: strValOrUndef(row.errorMessage),
19
19
  };
20
20
  }
21
21
  export async function insertSession(ctx, { sessionId }) {
@@ -0,0 +1,17 @@
1
+ export function dedupMessages(messages) {
2
+ const counters = new Map();
3
+ const result = [];
4
+ for (const m of messages) {
5
+ const counter = counters.get(m);
6
+ if (counter) {
7
+ counters.set(m, counter + 1);
8
+ continue;
9
+ }
10
+ counters.set(m, 1);
11
+ result.push(m);
12
+ }
13
+ return result.map((m) => {
14
+ const counter = counters.get(m);
15
+ return counter && counter > 1 ? `${m} (×${counter})` : m;
16
+ });
17
+ }
@@ -2,6 +2,7 @@ import { getPartTypeByName, getRegularDocumentTypeByName, getRoutingDocumentType
2
2
  import { createSimpleTranslator, } from "@paroicms/public-server-lib";
3
3
  import { getRandomImagePath } from "../lib/images-lib.js";
4
4
  import { createTaskCollector } from "../lib/tasks.js";
5
+ import { dedupMessages } from "./content-helpers.js";
5
6
  import { createGeneratedContentReport } from "./content-report.js";
6
7
  import { generateLocalizedFooterMention } from "./create-node-contents.js";
7
8
  import { generateFieldSetContent, generateMultipleFieldSetContents, } from "./generate-fake-content.js";
@@ -161,8 +162,9 @@ async function addRegularDocuments(ctx, report, siteOptions, nodeOptions) {
161
162
  tolerateErrors,
162
163
  debugName: nodeType.kebabName,
163
164
  });
164
- if (tolerateErrors.errorMessages.length > 0) {
165
- ctx.logger.warn(`Error generating content for ${nodeType.typeName}:\n - ${tolerateErrors.errorMessages.join("\n - ")}`);
165
+ const errorMessages = dedupMessages(tolerateErrors.errorMessages);
166
+ if (errorMessages.length > 0) {
167
+ ctx.logger.warn(`Error generating content for ${nodeType.typeName}:\n - ${errorMessages.join("\n - ")}`);
166
168
  }
167
169
  await ctx.service.connector.addMultipleDocumentContents(fqdn, {
168
170
  parentNodeId,
@@ -187,8 +189,9 @@ async function addParts(ctx, report, siteOptions, nodeOptions) {
187
189
  tolerateErrors,
188
190
  debugName: nodeType.kebabName,
189
191
  });
190
- if (tolerateErrors.errorMessages.length > 0) {
191
- ctx.logger.warn(`Error generating content for ${nodeType.typeName}:\n - ${tolerateErrors.errorMessages.join("\n - ")}`);
192
+ const errorMessages = dedupMessages(tolerateErrors.errorMessages);
193
+ if (errorMessages.length > 0) {
194
+ ctx.logger.warn(`Error generating content for ${nodeType.typeName}:\n - ${errorMessages.join("\n - ")}`);
192
195
  }
193
196
  await ctx.service.connector.addMultiplePartContents(fqdn, {
194
197
  parentNodeId,
@@ -106,9 +106,9 @@ export function parseLlmRawTags(llmResponse, tagNames, options = {}) {
106
106
  const current = matches[i];
107
107
  if (current.isOpening) {
108
108
  // Find the next corresponding closing tag
109
- let j = i + 1;
109
+ const j = i + 1;
110
110
  let foundClosing = false;
111
- while (j < matches.length) {
111
+ if (j < matches.length) {
112
112
  const next = matches[j];
113
113
  // If we encounter another opening tag of any type before finding our closing tag,
114
114
  // it's an error if not tolerating errors
@@ -119,9 +119,15 @@ export function parseLlmRawTags(llmResponse, tagNames, options = {}) {
119
119
  tolerateErrors.errorMessages.push(message);
120
120
  foundClosing = undefined;
121
121
  // If we are tolerating errors, we skip this opening tag entirely
122
- break;
123
122
  }
124
- if (!next.isOpening && next.tagName === current.tagName) {
123
+ else {
124
+ if (next.tagName !== current.tagName) {
125
+ // Found a non-matching closing tag
126
+ const message = `Mismatched tags: opening <${current.tagName}>, closing </${next.tagName}>`;
127
+ if (!tolerateErrors)
128
+ throw new Error(message);
129
+ tolerateErrors.errorMessages.push(message);
130
+ }
125
131
  // Found a matching closing tag
126
132
  const contentStart = current.position + `<${current.tagName}>`.length;
127
133
  const contentEnd = next.position;
@@ -133,19 +139,7 @@ export function parseLlmRawTags(llmResponse, tagNames, options = {}) {
133
139
  // Skip to after this closing tag
134
140
  i = j;
135
141
  foundClosing = true;
136
- break;
137
142
  }
138
- if (!next.isOpening && next.tagName !== current.tagName) {
139
- // Found a non-matching closing tag
140
- const message = `Mismatched tags: opening <${current.tagName}>, closing </${next.tagName}>`;
141
- if (!tolerateErrors)
142
- throw new Error(message);
143
- tolerateErrors.errorMessages.push(message);
144
- foundClosing = undefined;
145
- // If we are tolerating errors, we skip this current opening tag entirely
146
- break;
147
- }
148
- ++j;
149
143
  }
150
144
  // Handle case where no matching closing tag was found
151
145
  if (foundClosing === false) {
@@ -18,13 +18,13 @@ const fieldsPrompt = await createPromptTemplate({
18
18
  export async function invokeNewSiteAnalysis(ctx, input) {
19
19
  const { analysis, explanation, unusedInformation } = await invokeAnalysisStep1(ctx, input);
20
20
  const siteSchema = createSiteSchemaFromAnalysis(analysis);
21
- await invokeAnalysisStep2(ctx, { prompt: unusedInformation ?? "" }, siteSchema);
21
+ const { unusedInformation: unusedInformation2 } = await invokeAnalysisStep2(ctx, { prompt: createUnusedInformationPrompt(unusedInformation, analysis) ?? "" }, siteSchema);
22
22
  reorderSiteSchemaNodeTypes(siteSchema);
23
23
  const l10n = createL10n(analysis, siteSchema);
24
24
  const siteTitle = {
25
25
  [analysis.siteProperties.language]: analysis.siteProperties.title,
26
26
  };
27
- if (!unusedInformation) {
27
+ if (!unusedInformation2) {
28
28
  await updateSession(ctx, { status: "analyzed", promptCountInc: 1 });
29
29
  return {
30
30
  siteTitle,
@@ -34,9 +34,9 @@ export async function invokeNewSiteAnalysis(ctx, input) {
34
34
  explanation,
35
35
  };
36
36
  }
37
- ctx.logger.debug("Unused information:", unusedInformation);
37
+ ctx.logger.debug("Unused information:", unusedInformation2);
38
38
  const updated = await invokeUpdateSiteSchema(ctx, {
39
- prompt: unusedInformation,
39
+ prompt: unusedInformation2,
40
40
  generatedSchema: {
41
41
  siteTitle,
42
42
  siteSchema,
@@ -125,16 +125,23 @@ siteSchema) {
125
125
  const llmMessage = await fieldsPrompt.pipe(ctx.goodModel).invoke(llmInput);
126
126
  llmMessageContent = await debug.getMessageContent(llmMessage);
127
127
  }
128
- const { assignedFields } = parseLlmResponseAsProperties(llmMessageContent, [
128
+ const { assignedFields, unusedInformation } = parseLlmResponseAsProperties(llmMessageContent, [
129
129
  {
130
130
  tagName: "yaml_result",
131
131
  key: "assignedFields",
132
132
  format: "yaml",
133
133
  },
134
+ {
135
+ tagName: "unused_information_md",
136
+ key: "unusedInformation",
137
+ format: "markdown",
138
+ optional: true,
139
+ },
134
140
  ]);
135
141
  if (siteSchema.nodeTypes) {
136
142
  assignFieldsToNodeTypes(ctx, assignedFields, siteSchema.nodeTypes);
137
143
  }
144
+ return { unusedInformation };
138
145
  }
139
146
  function assignFieldsToNodeTypes(ctx, assignedFields, nodeTypes) {
140
147
  const remainingTypeNames = new Set(Object.keys(assignedFields));
@@ -172,3 +179,15 @@ function reorderSiteSchemaNodeTypes(siteSchema) {
172
179
  "orderChildrenBy",
173
180
  ]));
174
181
  }
182
+ function createUnusedInformationPrompt(unusedInformation, analysis) {
183
+ const prompts = Object.entries(analysis.dictionary)
184
+ .map(([typeName, entry]) => {
185
+ return entry.prompt ? `${typeName}: ${entry.prompt}` : undefined;
186
+ })
187
+ .filter(Boolean);
188
+ if (prompts.length > 0) {
189
+ const nodeTypePrompts = `To do:\n\n- ${prompts.join("- \n")}`;
190
+ return unusedInformation ? `${nodeTypePrompts}\n\n${unusedInformation}` : nodeTypePrompts;
191
+ }
192
+ return unusedInformation;
193
+ }
@@ -9,6 +9,7 @@ export function createSiteSchemaFromAnalysis(analysis) {
9
9
  "@paroicms/content-loading-plugin",
10
10
  "@paroicms/public-menu-plugin",
11
11
  "@paroicms/contact-form-plugin",
12
+ "@paroicms/video-plugin",
12
13
  ],
13
14
  nodeTypes: [
14
15
  {
@@ -31,7 +31,7 @@ const plugin = {
31
31
  modelName: "claude-3-7-sonnet-20250219",
32
32
  anthropicApiKey: pluginConf.anthropicApiKey,
33
33
  temperature: 0.1,
34
- maxTokens: 4096,
34
+ maxTokens: 6500,
35
35
  clientOptions: {
36
36
  timeout: 60_000,
37
37
  },
@@ -40,7 +40,7 @@ const plugin = {
40
40
  modelName: "claude-3-7-sonnet-20250219",
41
41
  anthropicApiKey: pluginConf.anthropicApiKey,
42
42
  temperature: 0.1,
43
- maxTokens: 4096,
43
+ maxTokens: 3000,
44
44
  clientOptions: {
45
45
  timeout: 20_000,
46
46
  },
@@ -1,4 +1,4 @@
1
- We use **ParoiCMS** technology. With this technology, a web page is called a **document**. A website is a tree of documents. The home page is a document, the site section with news posts is a document, each post is a document. Each document has its own path in the URL.
1
+ We use **ParoiCMS** for creating a website. With this technology, a web page is called a **document**. A website is a tree of documents. The home page is a document, the site section with news posts is a document, each post is a document. Each document has its own path in the URL.
2
2
 
3
3
  There is a special kind of documents that we want to detect: **routing documents** are the site sections. They can't be duplicated. They are never items of a list. For example, the homepage document, the search-page document, the "about us" document, the parent page of blog posts are _routing documents_. Other documents are **regular documents**, and they are always items of a list.
4
4
 
@@ -6,4 +6,4 @@ A document always has the following base attributes: a localized _title_, a _pub
6
6
 
7
7
  A document can contain lists of **parts**. A _part_ is a sub-section of a document, or of another _part_. A part always has a _publish date_ and a _draft_ flag. It may contain a sequence of fields and/or a sequence of child parts. A part is always an item of a list.
8
8
 
9
- Important: In the current version, we don't support any taxonomy. No categories, no tags etc.
9
+ Any routing document which is parent of regular documents can be used as a **taxonomy**. Then, the terms are the regular child documents. Then a taxonomy can be used in any document or part, by declaring a **labeling field**.
@@ -100,7 +100,9 @@ Here's an example of correct output using parts, and with the default contact an
100
100
  * `home` (routing document)
101
101
  * list of `homeSection` (parts), list name: `homeSections`
102
102
  * `news` (routing document)
103
- * list of `article` (regular documents)
103
+ * list of `post` (regular documents)
104
+ * `tags` (routing document)
105
+ * list of `tag` (regular documents)
104
106
  * `pages` (routing document)
105
107
  * list of `page` (regular documents)
106
108
  * list of `pageSection` (parts), list name: `pageSections`
@@ -125,6 +127,7 @@ Guidelines for creating the dictionnary YAML:
125
127
  - ogType: (optional, and document only) If you think of a particular Open-Graph type for this document, give it here.
126
128
  - label: A label of the node type, in the _website language_.
127
129
  - description: A description (5-40 words) for describing the purpose and theme of the node type. Write it in the _website language_.
130
+ - prompt: This is an optional property. If there is an information to process later about this node type (a description of fields), then write it here. Keep is short.
128
131
  - For a list type (only for part list, never for document list), provide the following properties:
129
132
  - confidence: Your confidence level for the accuracy of this node type (0.0-1.0).
130
133
  - kind: Must be `partList`.
@@ -155,14 +158,26 @@ news:
155
158
  kind: routingDocument
156
159
  entryPage: true
157
160
  label: News
158
- description: This is the blog section of the website. The news document contains all the topical articles.
159
- article:
161
+ description: This is the blog section of the website. The news document contains all the topical posts.
162
+ post:
160
163
  confidence: 0.8
161
164
  kind: regularDocument
162
165
  temporal: true
163
166
  ogType: article
164
- label: Article
165
- description: A topical article about the subject of the website whatever it is.
167
+ label: Post
168
+ description: A topical post about the subject of the website whatever it is.
169
+ prompt: Add a labeling field using the tags taxonomy
170
+ tags:
171
+ confidence: 0.9
172
+ kind: routingDocument
173
+ label: Tags
174
+ description: Tags taxonomy for post documents.
175
+ tag:
176
+ confidence: 0.9
177
+ kind: regularDocument
178
+ temporal: true
179
+ label: Tag
180
+ description: A tag is a term in the tags taxonomy.
166
181
  pages:
167
182
  confidence: 0.9
168
183
  kind: routingDocument
@@ -33,6 +33,7 @@ Guidelines for creating the dictionnary YAML:
33
33
  - By default, for most of node types, if you are not sure about what could be the best fields, then remember that a document is a webpage and just use a `[htmlContent]`.
34
34
  - Except if there are specific instructions in the website description, here is the default value for the `_site` node type: `["logo", "footerMention"]`.
35
35
  - Gallery of medias: there is a predefined field named `"gallery"`. It contains a list of medias. The theme can render it as a carousel, a slider, an image gallery, a slideshow, etc.
36
+ - This task is about predefined fields only. Custom fields will be added in a further step.
36
37
 
37
38
  Here is an example of expected output:
38
39
 
@@ -73,7 +73,23 @@ Important:
73
73
  - Never add an unknown predefined field.
74
74
  - The type name of the "site" node type is omitted from the JSON but its value is always `_site`.
75
75
 
76
- # 3. Examine the current JSON data, which conforms to the `JtSiteSchema` type:
76
+ # 3. Labeling fields (using a taxonomy)
77
+
78
+ A labeling field lets the user assign taxonomy terms to a document (or part).
79
+
80
+ <field_type_example>
81
+ {{
82
+ "name": "tags",
83
+ "localized": false,
84
+ "storedAs": "labeling",
85
+ "taxonomy": "tags",
86
+ "multiple": true
87
+ }},
88
+ </field_type_example>
89
+
90
+ Most of the time, the field name will be the same as the taxonomy type name.
91
+
92
+ # 4. Examine the current JSON data, which conforms to the `JtSiteSchema` type:
77
93
 
78
94
  <site_schema_json>
79
95
  {siteSchemaJson}
@@ -85,19 +101,19 @@ Also, the attached locales:
85
101
  {l10nJson}
86
102
  </l10n_json>
87
103
 
88
- # 4. Now, here is what to do:
104
+ # 5. Now, here is what to do:
89
105
 
90
106
  <user_request>
91
107
  {taskDetailsMd}
92
108
  </user_request>
93
109
 
94
- # 5. Guidelines
110
+ # 6. Guidelines
95
111
 
96
112
  - Don't assume how the CMS works. If you are not sure how to do something, don't do it.
97
113
  - You are allowed to be proactive, but only when the user asks you to do something.
98
114
  - Remember to adhere strictly to the TypeScript typing when making changes. If the update message requests changes that would violate the typing, then prioritize maintaining the correct structure over making those specific changes.
99
115
 
100
- # 6. Output
116
+ # 7. Output
101
117
 
102
118
  If there is a change in the site schema, then provide the updated site schema in JSON within <updated_site_schema_json> tags. Otherwise, let this tag empty.
103
119