@paroicms/site-generator-plugin 0.7.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/gen-backend/ddl/site-generator.ddl.sql +22 -0
  2. package/gen-backend/dist/commands/execute-command.js +60 -0
  3. package/gen-backend/dist/{generator/session → commands}/generator-session.js +16 -8
  4. package/gen-backend/dist/context.js +6 -0
  5. package/gen-backend/dist/db/db-init.js +35 -0
  6. package/gen-backend/dist/db/db.queries.js +60 -0
  7. package/gen-backend/dist/db/ddl-migration.js +15 -0
  8. package/gen-backend/dist/generator/fake-content-generator.ts/content-helpers.js +17 -0
  9. package/gen-backend/dist/generator/fake-content-generator.ts/content-report.js +11 -0
  10. package/gen-backend/dist/generator/fake-content-generator.ts/create-database-with-fake-content.js +27 -18
  11. package/gen-backend/dist/generator/lib/parse-llm-response.js +10 -16
  12. package/gen-backend/dist/generator/llm-queries/invoke-message-guard.js +2 -0
  13. package/gen-backend/dist/generator/llm-queries/invoke-new-site-analysis.js +28 -6
  14. package/gen-backend/dist/generator/llm-queries/invoke-update-site-schema.js +5 -1
  15. package/gen-backend/dist/generator/site-generator/site-generator.js +14 -11
  16. package/gen-backend/dist/generator/site-schema-generator/create-site-schema.js +1 -0
  17. package/gen-backend/dist/lib/site-remover.js +39 -0
  18. package/gen-backend/dist/plugin.js +43 -25
  19. package/gen-backend/prompts/0-context.md +2 -2
  20. package/gen-backend/prompts/new-site-1-analysis.md +20 -5
  21. package/gen-backend/prompts/new-site-2-fields.md +1 -0
  22. package/gen-backend/prompts/update-site-schema-2-execute.md +20 -4
  23. package/gen-front/dist/gen-front.css +1 -1
  24. package/gen-front/dist/gen-front.woff2 +0 -0
  25. package/gen-front/dist/gen-front2.woff2 +0 -0
  26. package/package.json +7 -5
  27. package/gen-backend/dist/generator/actions.js +0 -45
  28. /package/gen-backend/dist/{generator/generator-types.js → lib/internal-types.js} +0 -0
@@ -0,0 +1,39 @@
1
+ import { readdir, stat } from "node:fs/promises";
2
+ import { join } from "node:path";
3
+ let runningId;
4
+ export function startSiteRemover(ctx) {
5
+ clearInterval(runningId);
6
+ let sitesDir;
7
+ runningId = setInterval(async () => {
8
+ if (!sitesDir)
9
+ return;
10
+ try {
11
+ await removeExpiredSites(ctx);
12
+ }
13
+ catch (error) {
14
+ ctx.logger.error("[site-remover]", error);
15
+ }
16
+ }, 1000 * 60 * 60).unref(); // Check every hour
17
+ return {
18
+ stop() {
19
+ clearInterval(runningId);
20
+ },
21
+ };
22
+ }
23
+ export async function removeExpiredSites(ctx) {
24
+ const { sitesDir, packConf, service } = ctx;
25
+ const now = new Date();
26
+ const expirationTime = new Date(now.getTime() - 1000 * 60 * 60 * 24 * 2); // 2 days
27
+ // List entries in sitesDir, filter directories named as uuidv4, and remove them.
28
+ const uuidv4Regex = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
29
+ const entries = await readdir(sitesDir, { withFileTypes: true });
30
+ for (const entry of entries) {
31
+ if (!entry.isDirectory() || !uuidv4Regex.test(entry.name))
32
+ continue;
33
+ const dirPath = join(sitesDir, entry.name);
34
+ const st = await stat(dirPath);
35
+ if (st.ctime < expirationTime) {
36
+ await service.connector.removeSite(`${entry.name}.${packConf.parentDomain}`);
37
+ }
38
+ }
39
+ }
@@ -1,22 +1,24 @@
1
1
  import { ChatAnthropic } from "@langchain/anthropic";
2
2
  import { ChatMistralAI } from "@langchain/mistralai";
3
- import { strVal } from "@paroi/data-formatters-lib";
4
- import { pathExists } from "@paroicms/internal-server-lib";
3
+ import { getJwtSecretSync, pathExists } from "@paroicms/internal-server-lib";
5
4
  import { ApiError, escapeHtml } from "@paroicms/public-server-lib";
6
- import { readFileSync } from "node:fs";
7
- import { dirname, join } from "node:path";
8
- import { projectDir } from "./context.js";
5
+ import { join } from "node:path";
6
+ import { executeCommand } from "./commands/execute-command.js";
7
+ import { SLUG, packageDir, pluginVersion } from "./context.js";
9
8
  import { formatGeneratorCommand, formatGeneratorPluginConfiguration } from "./data-format.js";
10
- import { executeCommand } from "./generator/actions.js";
9
+ import { createOrOpenSiteGeneratorConnection } from "./db/db-init.js";
11
10
  import { initializeImageNames } from "./generator/lib/images-lib.js";
12
- const packageDir = dirname(projectDir);
13
- const version = strVal(JSON.parse(readFileSync(join(packageDir, "package.json"), "utf-8")).version);
14
- const SLUG = "site-generator";
11
+ import { removeExpiredSites } from "./lib/site-remover.js";
15
12
  await initializeImageNames();
16
13
  const plugin = {
17
- version,
14
+ version: pluginVersion,
18
15
  slug: SLUG,
19
16
  async siteInit(service) {
17
+ const { cn, logNextQuery } = await createOrOpenSiteGeneratorConnection({
18
+ sqliteFile: join(service.registeredSite.dataDir, "site-generator.sqlite"),
19
+ canCreate: true,
20
+ logger: service.logger,
21
+ });
20
22
  const pluginConf = formatGeneratorPluginConfiguration(service.configuration);
21
23
  let debugDir = pluginConf.debugDir;
22
24
  if (debugDir) {
@@ -29,7 +31,7 @@ const plugin = {
29
31
  modelName: "claude-3-7-sonnet-20250219",
30
32
  anthropicApiKey: pluginConf.anthropicApiKey,
31
33
  temperature: 0.1,
32
- maxTokens: 4096,
34
+ maxTokens: 6500,
33
35
  clientOptions: {
34
36
  timeout: 60_000,
35
37
  },
@@ -38,7 +40,7 @@ const plugin = {
38
40
  modelName: "claude-3-7-sonnet-20250219",
39
41
  anthropicApiKey: pluginConf.anthropicApiKey,
40
42
  temperature: 0.1,
41
- maxTokens: 4096,
43
+ maxTokens: 3000,
42
44
  clientOptions: {
43
45
  timeout: 20_000,
44
46
  },
@@ -55,6 +57,32 @@ const plugin = {
55
57
  temperature: 0.2,
56
58
  maxTokens: 50_000,
57
59
  });
60
+ let rawContext;
61
+ service.registerHook("initialized", (service) => {
62
+ const packConf = service.connector.getSitePackConf(pluginConf.packName);
63
+ const { sitesDir, packName } = packConf;
64
+ if (!sitesDir || packConf.serveOn !== "subDomain") {
65
+ throw new Error(`Site-generator plugin can generate sites only for sub-domain pack with "sitesDir", but pack "${packName}" doesn't have it`);
66
+ }
67
+ rawContext = {
68
+ cn,
69
+ logNextQuery,
70
+ jwtSecret: getJwtSecretSync(join(service.registeredSite.dataDir, "site-generator-secret.txt")),
71
+ pluginConf,
72
+ bestModel,
73
+ bestModelName: bestModel.model,
74
+ goodModel,
75
+ goodModelName: goodModel.model,
76
+ cheapModel,
77
+ cheapModelName: cheapModel.model,
78
+ debugDir,
79
+ sitesDir,
80
+ packConf,
81
+ service,
82
+ logger: service.logger,
83
+ };
84
+ removeExpiredSites(rawContext).catch(console.error);
85
+ });
58
86
  service.setPublicAssetsDirectory(join(packageDir, "gen-front", "dist"));
59
87
  const scriptAttr = [
60
88
  ["type", "module"],
@@ -64,24 +92,14 @@ const plugin = {
64
92
  ];
65
93
  service.addHeadTag(`<link rel="stylesheet" href="${escapeHtml(`${service.pluginAssetsUrl}/gen-front.css`)}">`, `<script ${scriptAttr.map(([key, val]) => `${key}="${escapeHtml(val)}"`).join(" ")}></script>`);
66
94
  service.setPublicApiHandler(async (service, httpContext, relativePath) => {
95
+ if (!rawContext)
96
+ throw new Error("should be initialized");
97
+ const ctx = rawContext;
67
98
  const { req, res } = httpContext;
68
99
  if (relativePath !== "") {
69
100
  res.status(404).send({ status: 404 });
70
101
  return;
71
102
  }
72
- const ctx = {
73
- pluginConf,
74
- packConf: service.connector.getSitePackConf(pluginConf.packName),
75
- service,
76
- logger: service.logger,
77
- bestModel,
78
- bestModelName: bestModel.model,
79
- goodModel,
80
- goodModelName: goodModel.model,
81
- cheapModel,
82
- cheapModelName: cheapModel.model,
83
- debugDir,
84
- };
85
103
  let command;
86
104
  try {
87
105
  command = formatGeneratorCommand(req.body);
@@ -1,4 +1,4 @@
1
- We use **ParoiCMS** technology. With this technology, a web page is called a **document**. A website is a tree of documents. The home page is a document, the site section with news posts is a document, each post is a document. Each document has its own path in the URL.
1
+ We use **ParoiCMS** for creating a website. With this technology, a web page is called a **document**. A website is a tree of documents. The home page is a document, the site section with news posts is a document, each post is a document. Each document has its own path in the URL.
2
2
 
3
3
  There is a special kind of documents that we want to detect: **routing documents** are the site sections. They can't be duplicated. They are never items of a list. For example, the homepage document, the search-page document, the "about us" document, the parent page of blog posts are _routing documents_. Other documents are **regular documents**, and they are always items of a list.
4
4
 
@@ -6,4 +6,4 @@ A document always has the following base attributes: a localized _title_, a _pub
6
6
 
7
7
  A document can contain lists of **parts**. A _part_ is a sub-section of a document, or of another _part_. A part always has a _publish date_ and a _draft_ flag. It may contain a sequence of fields and/or a sequence of child parts. A part is always an item of a list.
8
8
 
9
- Important: In the current version, we don't support any taxonomy. No categories, no tags etc.
9
+ Any routing document which is parent of regular documents can be used as a **taxonomy**. Then, the terms are the regular child documents. Then a taxonomy can be used in any document or part, by declaring a **labeling field**.
@@ -100,7 +100,9 @@ Here's an example of correct output using parts, and with the default contact an
100
100
  * `home` (routing document)
101
101
  * list of `homeSection` (parts), list name: `homeSections`
102
102
  * `news` (routing document)
103
- * list of `article` (regular documents)
103
+ * list of `post` (regular documents)
104
+ * `tags` (routing document)
105
+ * list of `tag` (regular documents)
104
106
  * `pages` (routing document)
105
107
  * list of `page` (regular documents)
106
108
  * list of `pageSection` (parts), list name: `pageSections`
@@ -125,6 +127,7 @@ Guidelines for creating the dictionnary YAML:
125
127
  - ogType: (optional, and document only) If you think of a particular Open-Graph type for this document, give it here.
126
128
  - label: A label of the node type, in the _website language_.
127
129
  - description: A description (5-40 words) for describing the purpose and theme of the node type. Write it in the _website language_.
130
+ - prompt: This is an optional property. If there is an information to process later about this node type (a description of fields), then write it here. Keep is short.
128
131
  - For a list type (only for part list, never for document list), provide the following properties:
129
132
  - confidence: Your confidence level for the accuracy of this node type (0.0-1.0).
130
133
  - kind: Must be `partList`.
@@ -155,14 +158,26 @@ news:
155
158
  kind: routingDocument
156
159
  entryPage: true
157
160
  label: News
158
- description: This is the blog section of the website. The news document contains all the topical articles.
159
- article:
161
+ description: This is the blog section of the website. The news document contains all the topical posts.
162
+ post:
160
163
  confidence: 0.8
161
164
  kind: regularDocument
162
165
  temporal: true
163
166
  ogType: article
164
- label: Article
165
- description: A topical article about the subject of the website whatever it is.
167
+ label: Post
168
+ description: A topical post about the subject of the website whatever it is.
169
+ prompt: Add a labeling field using the tags taxonomy
170
+ tags:
171
+ confidence: 0.9
172
+ kind: routingDocument
173
+ label: Tags
174
+ description: Tags taxonomy for post documents.
175
+ tag:
176
+ confidence: 0.9
177
+ kind: regularDocument
178
+ temporal: true
179
+ label: Tag
180
+ description: A tag is a term in the tags taxonomy.
166
181
  pages:
167
182
  confidence: 0.9
168
183
  kind: routingDocument
@@ -33,6 +33,7 @@ Guidelines for creating the dictionnary YAML:
33
33
  - By default, for most of node types, if you are not sure about what could be the best fields, then remember that a document is a webpage and just use a `[htmlContent]`.
34
34
  - Except if there are specific instructions in the website description, here is the default value for the `_site` node type: `["logo", "footerMention"]`.
35
35
  - Gallery of medias: there is a predefined field named `"gallery"`. It contains a list of medias. The theme can render it as a carousel, a slider, an image gallery, a slideshow, etc.
36
+ - This task is about predefined fields only. Custom fields will be added in a further step.
36
37
 
37
38
  Here is an example of expected output:
38
39
 
@@ -73,7 +73,23 @@ Important:
73
73
  - Never add an unknown predefined field.
74
74
  - The type name of the "site" node type is omitted from the JSON but its value is always `_site`.
75
75
 
76
- # 3. Examine the current JSON data, which conforms to the `JtSiteSchema` type:
76
+ # 3. Labeling fields (using a taxonomy)
77
+
78
+ A labeling field lets the user assign taxonomy terms to a document (or part).
79
+
80
+ <field_type_example>
81
+ {{
82
+ "name": "tags",
83
+ "localized": false,
84
+ "storedAs": "labeling",
85
+ "taxonomy": "tags",
86
+ "multiple": true
87
+ }},
88
+ </field_type_example>
89
+
90
+ Most of the time, the field name will be the same as the taxonomy type name.
91
+
92
+ # 4. Examine the current JSON data, which conforms to the `JtSiteSchema` type:
77
93
 
78
94
  <site_schema_json>
79
95
  {siteSchemaJson}
@@ -85,19 +101,19 @@ Also, the attached locales:
85
101
  {l10nJson}
86
102
  </l10n_json>
87
103
 
88
- # 4. Now, here is what to do:
104
+ # 5. Now, here is what to do:
89
105
 
90
106
  <user_request>
91
107
  {taskDetailsMd}
92
108
  </user_request>
93
109
 
94
- # 5. Guidelines
110
+ # 6. Guidelines
95
111
 
96
112
  - Don't assume how the CMS works. If you are not sure how to do something, don't do it.
97
113
  - You are allowed to be proactive, but only when the user asks you to do something.
98
114
  - Remember to adhere strictly to the TypeScript typing when making changes. If the update message requests changes that would violate the typing, then prioritize maintaining the correct structure over making those specific changes.
99
115
 
100
- # 6. Output
116
+ # 7. Output
101
117
 
102
118
  If there is a change in the site schema, then provide the updated site schema in JSON within <updated_site_schema_json> tags. Otherwise, let this tag empty.
103
119