@griddo/cx 11.11.8-rc.1 → 11.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/commands/end-render.js +5 -5
- package/build/commands/end-render.js.map +4 -4
- package/build/commands/prepare-assets-directory.js +3 -3
- package/build/commands/prepare-assets-directory.js.map +4 -4
- package/build/commands/prepare-domains-render.js +9 -9
- package/build/commands/prepare-domains-render.js.map +4 -4
- package/build/commands/reset-render.js +8 -8
- package/build/commands/reset-render.js.map +4 -4
- package/build/commands/start-render.js +35 -33
- package/build/commands/start-render.js.map +4 -4
- package/build/commands/upload-search-content.js +6 -6
- package/build/commands/upload-search-content.js.map +4 -4
- package/build/core/paths.d.ts +2 -0
- package/build/index.js +25 -23
- package/build/services/generate-md.d.ts +8 -0
- package/build/services/llms.d.ts +2 -2
- package/build/shared/brush.d.ts +6 -0
- package/build/shared/context.d.ts +1 -0
- package/build/shared/envs.d.ts +2 -1
- package/build/shared/load-config.d.ts +16 -0
- package/build/ssg-adapters/gatsby/actions/index.d.ts +2 -0
- package/build/ssg-adapters/gatsby/actions/llms.d.ts +2 -0
- package/build/ssg-adapters/gatsby/shared/extract-assets.d.ts +4 -1
- package/cli.mjs +8 -0
- package/exporter/build.sh +4 -5
- package/exporter/core/GriddoLog.ts +1 -1
- package/exporter/core/check-env-health.ts +1 -1
- package/exporter/core/errors.ts +1 -1
- package/exporter/core/life-cycle.ts +1 -1
- package/exporter/core/paths.ts +3 -0
- package/exporter/core/print-logos.ts +1 -1
- package/exporter/services/api.ts +1 -1
- package/exporter/services/generate-md.ts +151 -0
- package/exporter/services/llms.ts +16 -12
- package/exporter/services/render.ts +1 -1
- package/exporter/services/store.ts +1 -1
- package/exporter/shared/brush.ts +30 -0
- package/exporter/shared/context.ts +1 -0
- package/exporter/shared/envs.ts +2 -0
- package/exporter/shared/load-config.ts +164 -0
- package/exporter/shared/npm-modules/README.md +0 -1
- package/exporter/ssg-adapters/gatsby/actions/index.ts +2 -0
- package/exporter/ssg-adapters/gatsby/actions/llms.ts +11 -0
- package/exporter/ssg-adapters/gatsby/actions/meta.ts +0 -2
- package/exporter/ssg-adapters/gatsby/actions/relocation.ts +5 -3
- package/exporter/ssg-adapters/gatsby/actions/sync.ts +5 -3
- package/exporter/ssg-adapters/gatsby/index.ts +2 -3
- package/exporter/ssg-adapters/gatsby/shared/extract-assets.ts +16 -21
- package/exporter/ssg-adapters/gatsby/shared/sync-render.ts +37 -10
- package/package.json +6 -4
- package/tsconfig.exporter.json +2 -2
- package/build/shared/npm-modules/brush.d.ts +0 -18
- package/exporter/commands/generate-md.noop +0 -109
- package/exporter/shared/npm-modules/brush.ts +0 -39
package/build/shared/envs.d.ts
CHANGED
|
@@ -18,4 +18,5 @@ declare const GRIDDO_VERBOSE_LOGS: boolean;
|
|
|
18
18
|
declare const GRIDDO_RENDER_API_FETCH_RETRY_WAIT_SECONDS: number;
|
|
19
19
|
declare const GRIDDO_RENDER_API_FETCH_RETRY_ATTEMPTS: number;
|
|
20
20
|
declare const GRIDDO_RENDER_LIFECYCLE_RETRY_ATTEMPTS: number;
|
|
21
|
-
|
|
21
|
+
declare const GRIDDO_RENDER_ENABLED_LLM_MD: boolean;
|
|
22
|
+
export { GRIDDO_AI_EMBEDDINGS, GRIDDO_API_CONCURRENCY_COUNT, GRIDDO_API_URL, GRIDDO_ASSET_PREFIX, GRIDDO_BOT_PASSWORD, GRIDDO_BOT_USER, GRIDDO_BUILD_LOGS, GRIDDO_BUILD_LOGS_BUFFER_SIZE, GRIDDO_PUBLIC_API_URL, GRIDDO_REACT_APP_INSTANCE, GRIDDO_RENDER_API_FETCH_RETRY_ATTEMPTS, GRIDDO_RENDER_API_FETCH_RETRY_WAIT_SECONDS, GRIDDO_RENDER_DISABLE_LLMS_TXT, GRIDDO_RENDER_ENABLED_LLM_MD, GRIDDO_RENDER_LIFECYCLE_RETRY_ATTEMPTS, GRIDDO_SEARCH_FEATURE, GRIDDO_SKIP_BUILD_CHECKS, GRIDDO_SSG_BUNDLE_ANALYZER, GRIDDO_SSG_VERBOSE_LOGS, GRIDDO_USE_DIST_BACKUP, GRIDDO_VERBOSE_LOGS, };
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export interface LoadConfigOptions {
|
|
2
|
+
dirname: string;
|
|
3
|
+
configName?: string;
|
|
4
|
+
tsconfigName?: string;
|
|
5
|
+
}
|
|
6
|
+
export interface LoadConfigResult<T = unknown> {
|
|
7
|
+
config: T;
|
|
8
|
+
path: string;
|
|
9
|
+
tsconfigPath: string | null;
|
|
10
|
+
}
|
|
11
|
+
export declare class ConfigLoadError extends Error {
|
|
12
|
+
readonly cause?: Error | undefined;
|
|
13
|
+
constructor(message: string, cause?: Error | undefined);
|
|
14
|
+
}
|
|
15
|
+
export declare function loadConfig<T = unknown>(options: LoadConfigOptions): Promise<LoadConfigResult<T>>;
|
|
16
|
+
export declare function loadGriddoComponentsConfig<T = unknown>(): Promise<LoadConfigResult<T>>;
|
|
@@ -5,6 +5,7 @@ import { dataAction } from "./data";
|
|
|
5
5
|
import { dryRenderSyncAction } from "./dry-render-sync";
|
|
6
6
|
import { healthCheckAction } from "./healthCheck";
|
|
7
7
|
import { initAction } from "./init";
|
|
8
|
+
import { llmsAction } from "./llms";
|
|
8
9
|
import { logsAction } from "./logs";
|
|
9
10
|
import { metaAction } from "./meta";
|
|
10
11
|
import { prepareAction } from "./prepare";
|
|
@@ -30,6 +31,7 @@ export declare const actions: {
|
|
|
30
31
|
dryRenderSync: typeof dryRenderSyncAction;
|
|
31
32
|
healthCheck: typeof healthCheckAction;
|
|
32
33
|
init: typeof initAction;
|
|
34
|
+
llms: typeof llmsAction;
|
|
33
35
|
logs: typeof logsAction;
|
|
34
36
|
meta: typeof metaAction;
|
|
35
37
|
prepare: typeof prepareAction;
|
|
@@ -3,5 +3,8 @@
|
|
|
3
3
|
* with files of type: js, json and css.
|
|
4
4
|
* TODO: Explicar que el static se copia a assets porque el js va en el subdominio de assets.
|
|
5
5
|
*/
|
|
6
|
-
declare function extractAssetsFromDist(domain: string
|
|
6
|
+
declare function extractAssetsFromDist(domain: string, dirs: {
|
|
7
|
+
root: string;
|
|
8
|
+
exports: string;
|
|
9
|
+
}): Promise<void>;
|
|
7
10
|
export { extractAssetsFromDist };
|
package/cli.mjs
CHANGED
|
@@ -22,6 +22,14 @@ function getDBPath(rootFlag) {
|
|
|
22
22
|
const COMMANDS_PATH = path.join(__dirname, "build/commands");
|
|
23
23
|
|
|
24
24
|
export const AVAILABLE_COMMANDS = [
|
|
25
|
+
{
|
|
26
|
+
name: "generate-md",
|
|
27
|
+
description: "Generate MD files for the instance",
|
|
28
|
+
domainArgument: true,
|
|
29
|
+
rootArgument: true,
|
|
30
|
+
script: "generate-md",
|
|
31
|
+
isCommandPack: false,
|
|
32
|
+
},
|
|
25
33
|
{
|
|
26
34
|
name: "render",
|
|
27
35
|
description: "Render the instance",
|
package/exporter/build.sh
CHANGED
|
@@ -3,9 +3,9 @@
|
|
|
3
3
|
|
|
4
4
|
# CLI options for esbuild
|
|
5
5
|
log="--log-level=error"
|
|
6
|
-
production_node_opts="--bundle --platform=node --minify --sourcemap --outfile=./build/commands/"
|
|
7
|
-
debugging_node_opts="--bundle --platform=node --sourcemap --outfile=./build/commands/"
|
|
8
|
-
react_opts="--platform=node --external:@griddo-instance --external:react --external:react-dom --bundle --minify"
|
|
6
|
+
production_node_opts="--bundle --platform=node --minify --sourcemap --external:@kreuzberg/html-to-markdown --external:jiti --outfile=./build/commands/"
|
|
7
|
+
debugging_node_opts="--bundle --platform=node --sourcemap --external:@kreuzberg/html-to-markdown --external:jiti --outfile=./build/commands/"
|
|
8
|
+
react_opts="--platform=node --external:@griddo-instance --external:react --external:react-dom --external:@kreuzberg/html-to-markdown --bundle --minify"
|
|
9
9
|
|
|
10
10
|
# Manage --minify for debugging
|
|
11
11
|
bundle_node_opts=$production_node_opts
|
|
@@ -14,7 +14,7 @@ if [ "$1" = "--debug" ]; then
|
|
|
14
14
|
fi
|
|
15
15
|
|
|
16
16
|
# library
|
|
17
|
-
esbuild ${log} ./exporter/index.ts --bundle --platform=node
|
|
17
|
+
esbuild ${log} ./exporter/index.ts --bundle --platform=node --external:@kreuzberg/html-to-markdown --outfile=./build/index.js
|
|
18
18
|
esbuild ${log} ./exporter/react/index.tsx ${react_opts} --outfile=./build/react/index.js
|
|
19
19
|
|
|
20
20
|
# This .sh needs to be run from a npm script, so esbuild dependency is available
|
|
@@ -24,7 +24,6 @@ esbuild ${log} ./exporter/commands/reset-render.ts ${bundle_node_opts}reset-rend
|
|
|
24
24
|
esbuild ${log} ./exporter/commands/start-render.ts ${bundle_node_opts}start-render.js
|
|
25
25
|
esbuild ${log} ./exporter/commands/prepare-domains-render.ts ${bundle_node_opts}prepare-domains-render.js
|
|
26
26
|
esbuild ${log} ./exporter/commands/prepare-assets-directory.ts ${bundle_node_opts}prepare-assets-directory.js
|
|
27
|
-
# esbuild ${log} ./exporter/commands/generate-md.ts ${bundle_node_opts}generate-md.js
|
|
28
27
|
|
|
29
28
|
# types
|
|
30
29
|
tsgo --emitDeclarationOnly --declaration --outDir build --project tsconfig.exporter.json
|
package/exporter/core/errors.ts
CHANGED
|
@@ -2,7 +2,7 @@ import type { ErrorsType } from "../shared/errors";
|
|
|
2
2
|
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
|
|
5
|
-
import { brush } from "../shared/
|
|
5
|
+
import { brush } from "../shared/brush";
|
|
6
6
|
import { RENDER_MODE } from "../shared/types/render";
|
|
7
7
|
import { readDB, writeDB } from "./db";
|
|
8
8
|
import { distRollback } from "./dist-rollback";
|
|
@@ -2,8 +2,8 @@ import type { LifeCycleAction } from "../shared/types/global";
|
|
|
2
2
|
|
|
3
3
|
import { GRIDDO_RENDER_LIFECYCLE_RETRY_ATTEMPTS } from "@shared/envs";
|
|
4
4
|
|
|
5
|
+
import { brush } from "../shared/brush";
|
|
5
6
|
import { LifecycleExecutionError } from "../shared/errors";
|
|
6
|
-
import { brush } from "../shared/npm-modules/brush";
|
|
7
7
|
import { throwError } from "./errors";
|
|
8
8
|
import { GriddoLog } from "./GriddoLog";
|
|
9
9
|
|
package/exporter/services/api.ts
CHANGED
|
@@ -21,8 +21,8 @@ import { RenderError } from "../core/errors";
|
|
|
21
21
|
import { pathExists } from "../core/fs";
|
|
22
22
|
import { GriddoLog } from "../core/GriddoLog";
|
|
23
23
|
import { addLogToBuffer } from "../core/logger";
|
|
24
|
+
import { brush } from "../shared/brush";
|
|
24
25
|
import { DEFAULT_HEADERS } from "../shared/headers";
|
|
25
|
-
import { brush } from "../shared/npm-modules/brush";
|
|
26
26
|
import { AuthService } from "./auth";
|
|
27
27
|
import { getRenderPathsHydratedWithDomainFromDB } from "./render";
|
|
28
28
|
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import type { Core } from "@griddo/core";
|
|
2
|
+
|
|
3
|
+
import fsp from "node:fs/promises";
|
|
4
|
+
import path from "node:path";
|
|
5
|
+
|
|
6
|
+
import { convert, JsHeadingStyle } from "@kreuzberg/html-to-markdown";
|
|
7
|
+
import { GRIDDO_RENDER_ENABLED_LLM_MD } from "@shared/envs";
|
|
8
|
+
|
|
9
|
+
import { throwError } from "../core/errors";
|
|
10
|
+
import { GriddoLog } from "../core/GriddoLog";
|
|
11
|
+
import { getRenderPathsHydratedWithDomainFromDB } from "../services/render";
|
|
12
|
+
import { ReadFromStoreError } from "../shared/errors";
|
|
13
|
+
import { loadGriddoComponentsConfig } from "../shared/load-config";
|
|
14
|
+
|
|
15
|
+
export async function generateMdsFromHtmls(domain: string) {
|
|
16
|
+
if (!GRIDDO_RENDER_ENABLED_LLM_MD) {
|
|
17
|
+
GriddoLog.verbose(`Skipping markdown generation: Disabled by environment variable.`);
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const { __root } = await getRenderPathsHydratedWithDomainFromDB({ domain });
|
|
22
|
+
const distDirectory = path.join(__root, "current-dist");
|
|
23
|
+
|
|
24
|
+
const configFile = await loadGriddoComponentsConfig<Core.Config>();
|
|
25
|
+
|
|
26
|
+
const coreConfig = configFile.config;
|
|
27
|
+
const pages = getPageDataPagesFromExports(distDirectory, coreConfig);
|
|
28
|
+
|
|
29
|
+
try {
|
|
30
|
+
for await (const page of pages) {
|
|
31
|
+
// Es home de dominio
|
|
32
|
+
// Evitamos hacer markdown del home principal del dominio porque ahí irá el llms.txt
|
|
33
|
+
if (page.path === path.join(distDirectory, "index.html")) continue;
|
|
34
|
+
|
|
35
|
+
// No es index.html
|
|
36
|
+
// evita parsear algunos htmls que se cuelan de gatsby...
|
|
37
|
+
if (!page.path.endsWith("index.html")) continue;
|
|
38
|
+
|
|
39
|
+
const pathWithoutIndexHtml = page.path.split(/\/index\.html$/);
|
|
40
|
+
let file = "";
|
|
41
|
+
if (pathWithoutIndexHtml.length === 2) {
|
|
42
|
+
file = pathWithoutIndexHtml[0];
|
|
43
|
+
}
|
|
44
|
+
await fsp.writeFile(`${file}.md`, page.content);
|
|
45
|
+
GriddoLog.info(`Generating MD for a ${page.path}`);
|
|
46
|
+
}
|
|
47
|
+
} catch (error) {
|
|
48
|
+
GriddoLog.error(`Failed to generate MDs due to config error:`, error);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// -- HELPERS --
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Removes unwanted HTML tags that add noise for LLMs.
|
|
56
|
+
* @param html - The HTML content to clean.
|
|
57
|
+
* @returns The cleaned HTML content.
|
|
58
|
+
*/
|
|
59
|
+
function cleanHtml(html: string): string {
|
|
60
|
+
// elimina tags que hacen ruido para LLMs
|
|
61
|
+
const tagsToRemove = [
|
|
62
|
+
"style",
|
|
63
|
+
"script",
|
|
64
|
+
"noscript",
|
|
65
|
+
"iframe",
|
|
66
|
+
"form",
|
|
67
|
+
"input",
|
|
68
|
+
"button",
|
|
69
|
+
"nav",
|
|
70
|
+
"footer",
|
|
71
|
+
"object",
|
|
72
|
+
"svg", // elimina SVGs inline (no aportan valor para LLMs)
|
|
73
|
+
];
|
|
74
|
+
|
|
75
|
+
let cleanedHtml = html;
|
|
76
|
+
for (const tag of tagsToRemove) {
|
|
77
|
+
// elimina tanto tags auto-cerradas como con contenido
|
|
78
|
+
const regex = new RegExp(`<${tag}[^>]*>.*?</${tag}>|<${tag}[^>]*/>`, "gis");
|
|
79
|
+
cleanedHtml = cleanedHtml.replace(regex, "");
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return cleanedHtml;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Converts HTML to Markdown.
|
|
87
|
+
* @param html - The HTML content to convert.
|
|
88
|
+
* @returns The Markdown content.
|
|
89
|
+
*/
|
|
90
|
+
export function htmlToMarkdown(html: string, config: Core.Config): string {
|
|
91
|
+
// pre-procesa el HTML para eliminar elementos no deseados
|
|
92
|
+
const cleanedHtml = cleanHtml(html);
|
|
93
|
+
|
|
94
|
+
// const llmsConfig = config.config.schemas.config?.llms;
|
|
95
|
+
const llmsConfig = config.schemas?.config;
|
|
96
|
+
|
|
97
|
+
// convierte HTML a Markdown con @kreuzberg/html-to-markdown
|
|
98
|
+
let md = convert(cleanedHtml, {
|
|
99
|
+
headingStyle: JsHeadingStyle.Atx,
|
|
100
|
+
bullets: "-",
|
|
101
|
+
extractMetadata: false,
|
|
102
|
+
skipImages: true,
|
|
103
|
+
...llmsConfig,
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
// post-procesa el markdown
|
|
107
|
+
md = md
|
|
108
|
+
.replace(/ /g, " ")
|
|
109
|
+
.replace(/&/g, "&")
|
|
110
|
+
.replace(/\n{3,}/g, "\n\n") // compacta saltos
|
|
111
|
+
.trim();
|
|
112
|
+
|
|
113
|
+
return md;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Walk recursively in a basePath and return an array of pages with json
|
|
118
|
+
* extension with the full absolute path and the path includes "page-data".
|
|
119
|
+
*
|
|
120
|
+
* @param basePath The path to walk recursively.
|
|
121
|
+
* @returns An array of pages with json extension with the full absolute path
|
|
122
|
+
* and the path includes "page-data".
|
|
123
|
+
*/
|
|
124
|
+
async function* walkRecursively(basePath: string): AsyncGenerator<string> {
|
|
125
|
+
const filesHandle = await fsp.opendir(basePath);
|
|
126
|
+
|
|
127
|
+
for await (const fileDirent of filesHandle) {
|
|
128
|
+
if (fileDirent.isDirectory()) {
|
|
129
|
+
yield* walkRecursively(path.join(basePath, fileDirent.name));
|
|
130
|
+
} else if (fileDirent.isFile() && path.extname(fileDirent.name) === ".html") {
|
|
131
|
+
yield path.join(basePath, fileDirent.name);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
async function* getPageDataPagesFromExports(
|
|
137
|
+
basePath: string,
|
|
138
|
+
config: Core.Config,
|
|
139
|
+
): AsyncGenerator<{ path: string; content: string }> {
|
|
140
|
+
const jsonFiles = walkRecursively(basePath);
|
|
141
|
+
|
|
142
|
+
for await (const filePath of jsonFiles) {
|
|
143
|
+
try {
|
|
144
|
+
const fileContent = await fsp.readFile(filePath, "utf8");
|
|
145
|
+
const content = htmlToMarkdown(fileContent, config);
|
|
146
|
+
yield { path: filePath, content };
|
|
147
|
+
} catch (error) {
|
|
148
|
+
throwError(ReadFromStoreError, error);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
@@ -4,12 +4,12 @@ import fsp from "node:fs/promises";
|
|
|
4
4
|
import path from "node:path";
|
|
5
5
|
|
|
6
6
|
import { readDB } from "@core/db";
|
|
7
|
+
import { GriddoLog } from "@core/GriddoLog";
|
|
8
|
+
import { removeTrailingSlash } from "@core/paths";
|
|
9
|
+
import { get } from "@services/api";
|
|
7
10
|
import { getRenderPathsHydratedWithDomainFromDB } from "@services/render";
|
|
8
|
-
import {
|
|
9
|
-
|
|
10
|
-
import { GriddoLog } from "../core/GriddoLog";
|
|
11
|
-
import { LLMS } from "../shared/endpoints";
|
|
12
|
-
import { get } from "./api";
|
|
11
|
+
import { LLMS } from "@shared/endpoints";
|
|
12
|
+
import { GRIDDO_RENDER_DISABLE_LLMS_TXT, GRIDDO_RENDER_ENABLED_LLM_MD } from "@shared/envs";
|
|
13
13
|
|
|
14
14
|
export interface ClientLLMsCustomContent {
|
|
15
15
|
header: string;
|
|
@@ -34,12 +34,14 @@ async function getClientLLMsTxtTemplate(filePath: string) {
|
|
|
34
34
|
}
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
-
async function
|
|
37
|
+
async function generateLlmsTxt(domain: string): Promise<void> {
|
|
38
38
|
if (GRIDDO_RENDER_DISABLE_LLMS_TXT) {
|
|
39
39
|
GriddoLog.verbose(`Skipping llms.txt generation: Disabled by environment variable.`);
|
|
40
40
|
return;
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
+
const pageIndexName = GRIDDO_RENDER_ENABLED_LLM_MD ? ".md" : "";
|
|
44
|
+
|
|
43
45
|
try {
|
|
44
46
|
// db.json
|
|
45
47
|
const db = await readDB();
|
|
@@ -66,10 +68,12 @@ async function generateLLMs(domain: string): Promise<void> {
|
|
|
66
68
|
}
|
|
67
69
|
|
|
68
70
|
// llms.txt url entry
|
|
69
|
-
const pageLinks = llmsResponse
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
71
|
+
const pageLinks = llmsResponse
|
|
72
|
+
.map(({ title, url, socialDescription }) => {
|
|
73
|
+
const description = socialDescription ? `: ${socialDescription}` : "";
|
|
74
|
+
return `- [${title}](${removeTrailingSlash(url)}${pageIndexName})${description}`;
|
|
75
|
+
})
|
|
76
|
+
.join("\n");
|
|
73
77
|
|
|
74
78
|
// instance custom content
|
|
75
79
|
const { __root, __components } = await getRenderPathsHydratedWithDomainFromDB({ domain });
|
|
@@ -82,7 +86,7 @@ Generated: ${new Date().toISOString().replace("T", " ").slice(0, 19)}`;
|
|
|
82
86
|
|
|
83
87
|
const finalContent = template
|
|
84
88
|
.replace(/\{\{\s*LLMS_HEADER\s*\}\}/g, `${griddoLLMsHeader}`)
|
|
85
|
-
.replace(/\{\{\s*LLMS_PAGE_LINKS\s*\}\}/g, `${pageLinks
|
|
89
|
+
.replace(/\{\{\s*LLMS_PAGE_LINKS\s*\}\}/g, `${pageLinks}`);
|
|
86
90
|
|
|
87
91
|
// write file
|
|
88
92
|
const distDirectory = path.join(__root, "current-dist");
|
|
@@ -96,4 +100,4 @@ Generated: ${new Date().toISOString().replace("T", " ").slice(0, 19)}`;
|
|
|
96
100
|
}
|
|
97
101
|
}
|
|
98
102
|
|
|
99
|
-
export {
|
|
103
|
+
export { generateLlmsTxt };
|
|
@@ -8,8 +8,8 @@ import { readDB, writeDB } from "../core/db";
|
|
|
8
8
|
import { throwError } from "../core/errors";
|
|
9
9
|
import { pathExists } from "../core/fs";
|
|
10
10
|
import { GriddoLog } from "../core/GriddoLog";
|
|
11
|
+
import { brush } from "../shared/brush";
|
|
11
12
|
import { RenderUUIDError } from "../shared/errors";
|
|
12
|
-
import { brush } from "../shared/npm-modules/brush";
|
|
13
13
|
import { RENDER_MODE } from "../shared/types/render";
|
|
14
14
|
import { AuthService } from "./auth";
|
|
15
15
|
import { getBuildMetadata } from "./manage-store";
|
|
@@ -14,6 +14,7 @@ import path from "node:path";
|
|
|
14
14
|
import pLimit from "p-limit";
|
|
15
15
|
|
|
16
16
|
import { GriddoLog } from "../core/GriddoLog";
|
|
17
|
+
import { brush } from "../shared/brush";
|
|
17
18
|
import { SETTINGS } from "../shared/endpoints";
|
|
18
19
|
import {
|
|
19
20
|
GRIDDO_API_CONCURRENCY_COUNT,
|
|
@@ -21,7 +22,6 @@ import {
|
|
|
21
22
|
GRIDDO_PUBLIC_API_URL,
|
|
22
23
|
GRIDDO_REACT_APP_INSTANCE,
|
|
23
24
|
} from "../shared/envs";
|
|
24
|
-
import { brush } from "../shared/npm-modules/brush";
|
|
25
25
|
import { siteFetchProgressBar } from "../shared/npm-modules/progress";
|
|
26
26
|
import { RENDER_MODE, type RenderMode } from "../shared/types/render";
|
|
27
27
|
import { get } from "./api";
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { styleText } from "node:util";
|
|
2
|
+
|
|
3
|
+
const NO_COLORS = process.env.NO_COLORS || process.env.CI;
|
|
4
|
+
|
|
5
|
+
const STYLES = [
|
|
6
|
+
"black",
|
|
7
|
+
"red",
|
|
8
|
+
"green",
|
|
9
|
+
"yellow",
|
|
10
|
+
"blue",
|
|
11
|
+
"magenta",
|
|
12
|
+
"cyan",
|
|
13
|
+
"white",
|
|
14
|
+
"gray",
|
|
15
|
+
"bold",
|
|
16
|
+
"dim",
|
|
17
|
+
] as const;
|
|
18
|
+
|
|
19
|
+
type StyleName = (typeof STYLES)[number];
|
|
20
|
+
type ColorFunction = (text: string | number) => string;
|
|
21
|
+
type Brush = Record<StyleName, ColorFunction>;
|
|
22
|
+
|
|
23
|
+
const brush = {} as Brush;
|
|
24
|
+
|
|
25
|
+
for (const style of STYLES) {
|
|
26
|
+
brush[style] = (text: string | number) =>
|
|
27
|
+
NO_COLORS ? String(text) : styleText(style, String(text));
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export { brush };
|
|
@@ -29,6 +29,7 @@ export class RenderContext<T = unknown> {
|
|
|
29
29
|
readonly dryRun: boolean;
|
|
30
30
|
readonly buildReportFileName: string;
|
|
31
31
|
readonly pathsHydratedWithDomain: Record<PlaceholderPath, string>;
|
|
32
|
+
workingDistDir: string = "";
|
|
32
33
|
pagesToCreate: number[] = [];
|
|
33
34
|
pagesToDelete: number[] = [];
|
|
34
35
|
ssg: T;
|
package/exporter/shared/envs.ts
CHANGED
|
@@ -42,6 +42,7 @@ const GRIDDO_VERBOSE_LOGS = envIsTruthy(env.GRIDDO_VERBOSE_LOGS || env.GRIDDO_RE
|
|
|
42
42
|
const GRIDDO_RENDER_API_FETCH_RETRY_WAIT_SECONDS = Number.parseInt(env.GRIDDO_RENDER_API_FETCH_RETRY_WAIT_SECONDS || "4");
|
|
43
43
|
const GRIDDO_RENDER_API_FETCH_RETRY_ATTEMPTS = Number.parseInt(env.GRIDDO_RENDER_API_FETCH_RETRY_ATTEMPTS || "4");
|
|
44
44
|
const GRIDDO_RENDER_LIFECYCLE_RETRY_ATTEMPTS = Number.parseInt(env.GRIDDO_RENDER_LIFECYCLE_RETRY_ATTEMPTS || "1");
|
|
45
|
+
const GRIDDO_RENDER_ENABLED_LLM_MD = envIsTruthy(env.GRIDDO_RENDER_ENABLED_LLM_MD);
|
|
45
46
|
|
|
46
47
|
export {
|
|
47
48
|
GRIDDO_AI_EMBEDDINGS,
|
|
@@ -57,6 +58,7 @@ export {
|
|
|
57
58
|
GRIDDO_RENDER_API_FETCH_RETRY_ATTEMPTS,
|
|
58
59
|
GRIDDO_RENDER_API_FETCH_RETRY_WAIT_SECONDS,
|
|
59
60
|
GRIDDO_RENDER_DISABLE_LLMS_TXT,
|
|
61
|
+
GRIDDO_RENDER_ENABLED_LLM_MD,
|
|
60
62
|
GRIDDO_RENDER_LIFECYCLE_RETRY_ATTEMPTS,
|
|
61
63
|
GRIDDO_SEARCH_FEATURE,
|
|
62
64
|
GRIDDO_SKIP_BUILD_CHECKS,
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
+
import { join, resolve } from "node:path";
|
|
3
|
+
|
|
4
|
+
import { GriddoLog } from "@core/GriddoLog";
|
|
5
|
+
import { resolveComponentsPath } from "@core/instance";
|
|
6
|
+
import { createJiti } from "jiti";
|
|
7
|
+
|
|
8
|
+
export interface LoadConfigOptions {
|
|
9
|
+
dirname: string;
|
|
10
|
+
configName?: string;
|
|
11
|
+
tsconfigName?: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface LoadConfigResult<T = unknown> {
|
|
15
|
+
config: T;
|
|
16
|
+
path: string;
|
|
17
|
+
tsconfigPath: string | null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export class ConfigLoadError extends Error {
|
|
21
|
+
constructor(
|
|
22
|
+
message: string,
|
|
23
|
+
public readonly cause?: Error,
|
|
24
|
+
) {
|
|
25
|
+
super(message);
|
|
26
|
+
this.name = "ConfigLoadError";
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Parsea el tsconfig.json y extrae los path aliases
|
|
32
|
+
*/
|
|
33
|
+
function parseTsConfigPaths(tsconfigPath: string, baseDir: string): Record<string, string> {
|
|
34
|
+
try {
|
|
35
|
+
const content = readFileSync(tsconfigPath, "utf-8");
|
|
36
|
+
const tsconfig = JSON.parse(content) as {
|
|
37
|
+
compilerOptions?: {
|
|
38
|
+
baseUrl?: string;
|
|
39
|
+
paths?: Record<string, string[]>;
|
|
40
|
+
};
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
const paths = tsconfig.compilerOptions?.paths;
|
|
44
|
+
const baseUrl = tsconfig.compilerOptions?.baseUrl ?? ".";
|
|
45
|
+
|
|
46
|
+
if (!paths) {
|
|
47
|
+
return {};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const aliases: Record<string, string> = {};
|
|
51
|
+
const resolvedBaseUrl = resolve(baseDir, baseUrl);
|
|
52
|
+
|
|
53
|
+
for (const [alias, targets] of Object.entries(paths)) {
|
|
54
|
+
if (!targets || targets.length === 0) continue;
|
|
55
|
+
|
|
56
|
+
// Remover el /* del final del alias (ej: "@ui/*" -> "@ui")
|
|
57
|
+
const cleanAlias = alias.replace(/\/\*$/, "");
|
|
58
|
+
// Remover el /* del target y resolver la ruta
|
|
59
|
+
const cleanTarget = targets[0].replace(/\/\*$/, "");
|
|
60
|
+
const resolvedTarget = resolve(resolvedBaseUrl, cleanTarget);
|
|
61
|
+
|
|
62
|
+
aliases[cleanAlias] = resolvedTarget;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return aliases;
|
|
66
|
+
} catch {
|
|
67
|
+
return {};
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export async function loadConfig<T = unknown>(
|
|
72
|
+
options: LoadConfigOptions,
|
|
73
|
+
): Promise<LoadConfigResult<T>> {
|
|
74
|
+
const { dirname, configName = "griddo.config", tsconfigName = "tsconfig.json" } = options;
|
|
75
|
+
|
|
76
|
+
// 1. Buscar archivos de configuración (.ts o .js)
|
|
77
|
+
const tsPath = join(dirname, `${configName}.ts`);
|
|
78
|
+
const jsPath = join(dirname, `${configName}.js`);
|
|
79
|
+
const mjsPath = join(dirname, `${configName}.mjs`);
|
|
80
|
+
|
|
81
|
+
let configPath: string | null = null;
|
|
82
|
+
|
|
83
|
+
if (existsSync(tsPath)) {
|
|
84
|
+
configPath = tsPath;
|
|
85
|
+
} else if (existsSync(jsPath)) {
|
|
86
|
+
configPath = jsPath;
|
|
87
|
+
} else if (existsSync(mjsPath)) {
|
|
88
|
+
configPath = mjsPath;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (!configPath) {
|
|
92
|
+
throw new ConfigLoadError(
|
|
93
|
+
`No se encontró archivo de configuración en ${dirname}. ` +
|
|
94
|
+
`Buscando: ${configName}.{ts,js,mjs}`,
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
GriddoLog.verbose(`[loadConfig] Configuración encontrada: ${configPath}`);
|
|
99
|
+
|
|
100
|
+
// 2. Buscar tsconfig.json en el directorio del config
|
|
101
|
+
const tsconfigPath = join(dirname, tsconfigName);
|
|
102
|
+
const hasTsconfig = existsSync(tsconfigPath);
|
|
103
|
+
|
|
104
|
+
if (hasTsconfig) {
|
|
105
|
+
GriddoLog.verbose(`[loadConfig] tsconfig encontrado: ${tsconfigPath}`);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// 3. Extraer path aliases del tsconfig si existe
|
|
109
|
+
const aliases = hasTsconfig ? parseTsConfigPaths(tsconfigPath, dirname) : {};
|
|
110
|
+
|
|
111
|
+
if (Object.keys(aliases).length > 0) {
|
|
112
|
+
GriddoLog.verbose(`[loadConfig] Path aliases resueltos:`, aliases);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// 4. Crear instancia de jiti con configuración adecuada
|
|
116
|
+
// jiti necesita un filename válido. Usamos el configPath directamente.
|
|
117
|
+
const absoluteConfigPath = resolve(configPath);
|
|
118
|
+
|
|
119
|
+
GriddoLog.verbose(`[loadConfig] Using jiti with path: ${absoluteConfigPath}`);
|
|
120
|
+
|
|
121
|
+
const jiti = createJiti(absoluteConfigPath, {
|
|
122
|
+
// Pasamos los aliases extraídos del tsconfig
|
|
123
|
+
alias: aliases,
|
|
124
|
+
// Interop para soportar tanto ESM como CJS
|
|
125
|
+
interopDefault: true,
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
try {
|
|
129
|
+
// 5. Importar el módulo usando jiti
|
|
130
|
+
const module = await jiti.import(configPath);
|
|
131
|
+
|
|
132
|
+
// 6. Extraer la configuración (maneja tanto default export como named exports)
|
|
133
|
+
const config = (module as { default?: T }).default ?? (module as T);
|
|
134
|
+
|
|
135
|
+
GriddoLog.verbose(`[loadConfig] Configuración cargada exitosamente`);
|
|
136
|
+
|
|
137
|
+
return {
|
|
138
|
+
config,
|
|
139
|
+
path: configPath,
|
|
140
|
+
tsconfigPath: hasTsconfig ? tsconfigPath : null,
|
|
141
|
+
};
|
|
142
|
+
} catch (error) {
|
|
143
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
144
|
+
throw new ConfigLoadError(
|
|
145
|
+
`Error al cargar la configuración desde ${configPath}: ${err.message}`,
|
|
146
|
+
err,
|
|
147
|
+
);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Función helper para cargar griddo.config.ts desde griddo-components
|
|
152
|
+
export async function loadGriddoComponentsConfig<T = unknown>(): Promise<LoadConfigResult<T>> {
|
|
153
|
+
const componentsPath = resolveComponentsPath();
|
|
154
|
+
|
|
155
|
+
if (!existsSync(componentsPath)) {
|
|
156
|
+
throw new ConfigLoadError(`instance dir not found: ${componentsPath}`);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return loadConfig<T>({
|
|
160
|
+
dirname: componentsPath,
|
|
161
|
+
configName: "griddo.config",
|
|
162
|
+
tsconfigName: "tsconfig.json",
|
|
163
|
+
});
|
|
164
|
+
}
|
|
@@ -14,7 +14,6 @@ Este software se instala frecuentemente y estas dependencias pequeñas se incluy
|
|
|
14
14
|
- **`find-up-simple.ts`** - Versión simplificada de `find-up` para buscar archivos hacia arriba en el árbol de directorios
|
|
15
15
|
- **`pkg-dir.ts`** - Encuentra el directorio raíz del paquete (que contiene `package.json`)
|
|
16
16
|
- **`xml-parser.ts`** - (GRIDDO dev) Parser simple de XML para sitemaps
|
|
17
|
-
- **`brush.ts`** - (GRIDDO dev) Version ultra-reducida de utilidad para añadir colores a texto parecido a chalk o kleur.
|
|
18
17
|
- **`progress.ts`** - (GRIDDO dev) Barra de progreso minimalista y sin dependencias externas para mostrar el avance del scraping o procesos por consola con estimación de tiempo y velocidad.
|
|
19
18
|
|
|
20
19
|
## Nota importante
|
|
@@ -6,6 +6,7 @@ import { dataAction } from "./data";
|
|
|
6
6
|
import { dryRenderSyncAction } from "./dry-render-sync";
|
|
7
7
|
import { healthCheckAction } from "./healthCheck";
|
|
8
8
|
import { initAction } from "./init";
|
|
9
|
+
import { llmsAction } from "./llms";
|
|
9
10
|
import { logsAction } from "./logs";
|
|
10
11
|
import { metaAction } from "./meta";
|
|
11
12
|
import { prepareAction } from "./prepare";
|
|
@@ -33,6 +34,7 @@ export const actions = {
|
|
|
33
34
|
dryRenderSync: dryRenderSyncAction,
|
|
34
35
|
healthCheck: healthCheckAction,
|
|
35
36
|
init: initAction,
|
|
37
|
+
llms: llmsAction,
|
|
36
38
|
logs: logsAction,
|
|
37
39
|
meta: metaAction,
|
|
38
40
|
prepare: prepareAction,
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { RenderContext } from "@shared/context";
|
|
2
|
+
|
|
3
|
+
import { generateMdsFromHtmls } from "@services/generate-md";
|
|
4
|
+
import { generateLlmsTxt } from "@services/llms";
|
|
5
|
+
|
|
6
|
+
export async function llmsAction(context: RenderContext) {
|
|
7
|
+
const { domain } = context;
|
|
8
|
+
|
|
9
|
+
await generateLlmsTxt(domain);
|
|
10
|
+
await generateMdsFromHtmls(domain);
|
|
11
|
+
}
|