@module-federation/rspress-plugin 2.4.0 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -4,15 +4,194 @@ import { logAndReport } from "@module-federation/error-codes/node";
4
4
  import { createLogger, createModuleFederationConfig } from "@module-federation/sdk";
5
5
  import promises_default from "node:fs/promises";
6
6
  import external_node_path_default from "node:path";
7
- import "node:crypto";
8
7
  import { load } from "cheerio";
9
8
  import { htmlToText } from "html-to-text";
9
+ import "node:crypto";
10
10
  import { groupBy } from "lodash-es";
11
11
  import { SEARCH_INDEX_NAME } from "@rspress/shared";
12
12
  import external_path_default from "path";
13
13
  import external_fs_default from "fs";
14
14
  const logger = createLogger('[ Module Federation Rspress Plugin ]');
15
15
  const src_logger = logger;
16
+ const replaceHtmlExt = (filepath)=>filepath.replace(external_node_path_default.extname(filepath), '.html');
17
+ const routePageToMdFilename = (routePath)=>{
18
+ const fileName = routePath.endsWith('/') ? `${routePath}index.md` : `${routePath}.md`;
19
+ return fileName.replace(/^\/+/, '');
20
+ };
21
+ const addBase = (url, base)=>{
22
+ if (!base || '/' === base) return url;
23
+ return `${base.replace(/\/$/, '')}/${url.replace(/^\//, '')}`;
24
+ };
25
+ const routePathToMdPath = (routePath, base)=>{
26
+ const normalizedRoutePath = routePath.replace(/\.html$/, '');
27
+ const mdPath = normalizedRoutePath.endsWith('/') ? `${normalizedRoutePath}index.md` : `${normalizedRoutePath}.md`;
28
+ return addBase(mdPath, base);
29
+ };
30
+ const getRouteHtmlPath = (route, { outputDir, defaultLang })=>replaceHtmlExt(external_node_path_default.join(outputDir, route.lang === defaultLang ? route.relativePath.replace(route.lang, '') : route.relativePath));
31
+ function normalizeText(text) {
32
+ return text.replace(/\s+/g, ' ').replace(/^#+\s*/, '').trim();
33
+ }
34
+ function htmlToMarkdown(html) {
35
+ const $ = load(html);
36
+ $("script, style, noscript, template").remove();
37
+ $('header, nav, aside, footer').remove();
38
+ $(".rp-nav,.rp-sidebar,.rp-aside,.rp-footer,.rp-doc-footer,.rp-not-doc").remove();
39
+ const docHtml = $('main').first().html() || $('article').first().html() || $('.rp-doc').first().html() || $('body').html() || html;
40
+ return htmlToText(docHtml, {
41
+ wordwrap: 80,
42
+ selectors: [
43
+ {
44
+ selector: 'a',
45
+ options: {
46
+ ignoreHref: false
47
+ }
48
+ },
49
+ {
50
+ selector: 'img',
51
+ format: 'skip'
52
+ },
53
+ ...[
54
+ 'h1',
55
+ 'h2',
56
+ 'h3',
57
+ 'h4',
58
+ 'h5',
59
+ 'h6'
60
+ ].map((tag)=>({
61
+ selector: tag,
62
+ options: {
63
+ uppercase: false
64
+ }
65
+ }))
66
+ ],
67
+ tables: true,
68
+ longWordSplit: {
69
+ forceWrapOnLimit: true
70
+ }
71
+ }).trim();
72
+ }
73
+ function extractPageInfoFromHtml(html) {
74
+ const $ = load(html);
75
+ const $doc = $('main').first().length ? $('main').first() : $('article').first().length ? $('article').first() : $('.rp-doc').first().length ? $('.rp-doc').first() : $('body');
76
+ $doc.find('a.header-anchor, .header-anchor, .rp-copy, .rp-llms-button').remove();
77
+ const title = normalizeText($doc.find('h1').first().text()) || normalizeText($('h1').first().text()) || normalizeText($('title').first().text());
78
+ const description = $doc.find('p').toArray().map((paragraph)=>normalizeText($(paragraph).text())).find(Boolean);
79
+ return {
80
+ title,
81
+ description
82
+ };
83
+ }
84
+ async function extractRouteMarkdown(routes, options) {
85
+ return Promise.all(routes.map(async (route)=>{
86
+ const htmlPath = getRouteHtmlPath(route, options);
87
+ const html = await promises_default.readFile(htmlPath, 'utf-8');
88
+ const { title, description } = extractPageInfoFromHtml(html);
89
+ return {
90
+ route,
91
+ markdown: htmlToMarkdown(html),
92
+ title,
93
+ description
94
+ };
95
+ }));
96
+ }
97
+ function getDefaultVersion(routeMarkdownList) {
98
+ return routeMarkdownList.find(({ route })=>route.version)?.route.version;
99
+ }
100
+ function getLlmsOutputPrefix(route, defaultLang, defaultVersion) {
101
+ const langPrefix = route.lang === defaultLang ? '' : `${route.lang}/`;
102
+ const versionPrefix = defaultVersion && route.version && route.version !== defaultVersion ? `${route.version}/` : '';
103
+ return `${versionPrefix}${langPrefix}`;
104
+ }
105
+ function generateLlmsFullTxt(routeMarkdownList, options) {
106
+ return routeMarkdownList.map(({ route, markdown })=>[
107
+ '---',
108
+ `url: ${routePathToMdPath(route.routePath, options.base)}`,
109
+ '---',
110
+ '',
111
+ markdown,
112
+ ''
113
+ ].join('\n')).join('\n');
114
+ }
115
+ async function writeRouteMarkdownIfEmpty(routeMarkdownList, options) {
116
+ await Promise.all(routeMarkdownList.map(async ({ route, markdown })=>{
117
+ const mdFilename = routePageToMdFilename(route.routePath);
118
+ const mdPath = external_node_path_default.join(options.outputDir, mdFilename);
119
+ await promises_default.mkdir(external_node_path_default.dirname(mdPath), {
120
+ recursive: true
121
+ });
122
+ let existingContent = '';
123
+ try {
124
+ existingContent = await promises_default.readFile(mdPath, 'utf-8');
125
+ } catch (error) {
126
+ if ('ENOENT' !== error.code) throw error;
127
+ }
128
+ if (!existingContent.trim()) await promises_default.writeFile(mdPath, markdown);
129
+ }));
130
+ }
131
+ async function patchLlmsFullTxtIfEmpty(routeMarkdownList, options, outputPrefix) {
132
+ const llmsFullPath = external_node_path_default.join(options.outputDir, outputPrefix, 'llms-full.txt');
133
+ await promises_default.mkdir(external_node_path_default.dirname(llmsFullPath), {
134
+ recursive: true
135
+ });
136
+ const markdownMap = new Map(routeMarkdownList.map(({ route, markdown })=>[
137
+ routePathToMdPath(route.routePath, options.base),
138
+ markdown
139
+ ]));
140
+ let original = '';
141
+ try {
142
+ original = await promises_default.readFile(llmsFullPath, 'utf-8');
143
+ } catch (error) {
144
+ if ('ENOENT' !== error.code) throw error;
145
+ }
146
+ if (!original) return void await promises_default.writeFile(llmsFullPath, generateLlmsFullTxt(routeMarkdownList, options));
147
+ const patched = original.replace(/(---\nurl:\s*([^\n]+)\n---\n)([\s\S]*?)(?=\n---\nurl:|$)/g, (section, header, url, body)=>{
148
+ if (body.trim()) return section;
149
+ const markdown = markdownMap.get(url.trim());
150
+ if (!markdown) return section;
151
+ return `${header}${markdown}\n`;
152
+ });
153
+ await promises_default.writeFile(llmsFullPath, patched);
154
+ }
155
+ async function patchLlmsTxtIfEmpty(routeMarkdownList, options, outputPrefix) {
156
+ const llmsPath = external_node_path_default.join(options.outputDir, outputPrefix, 'llms.txt');
157
+ let original = '';
158
+ try {
159
+ original = await promises_default.readFile(llmsPath, 'utf-8');
160
+ } catch (error) {
161
+ if ('ENOENT' !== error.code) throw error;
162
+ return;
163
+ }
164
+ const pageInfoMap = new Map(routeMarkdownList.map(({ route, title, description })=>[
165
+ routePathToMdPath(route.routePath, options.base),
166
+ {
167
+ title,
168
+ description
169
+ }
170
+ ]));
171
+ const patched = original.replace(/^- \[\]\(([^)]+)\)(?::\s*)?$/gm, (line, url)=>{
172
+ const pageInfo = pageInfoMap.get(url.trim());
173
+ if (!pageInfo?.title) return line;
174
+ const suffix = pageInfo.description ? `: ${pageInfo.description}` : '';
175
+ return `- [${pageInfo.title}](${url})${suffix}`;
176
+ });
177
+ if (patched !== original) await promises_default.writeFile(llmsPath, patched);
178
+ }
179
+ async function rebuildLlmsByHtml(routes, options) {
180
+ const routeMarkdownList = await extractRouteMarkdown(routes, options);
181
+ const defaultVersion = options.defaultVersion || getDefaultVersion(routeMarkdownList);
182
+ const groupedRouteMarkdown = new Map();
183
+ await writeRouteMarkdownIfEmpty(routeMarkdownList, options);
184
+ for (const routeMarkdown of routeMarkdownList){
185
+ const outputPrefix = getLlmsOutputPrefix(routeMarkdown.route, options.defaultLang, defaultVersion);
186
+ const group = groupedRouteMarkdown.get(outputPrefix) || [];
187
+ group.push(routeMarkdown);
188
+ groupedRouteMarkdown.set(outputPrefix, group);
189
+ }
190
+ await Promise.all(Array.from(groupedRouteMarkdown.entries()).map(async ([outputPrefix, group])=>{
191
+ await patchLlmsTxtIfEmpty(group, options, outputPrefix);
192
+ await patchLlmsFullTxtIfEmpty(group, options, outputPrefix);
193
+ }));
194
+ }
16
195
  function findSearchIndexPaths(outputDir) {
17
196
  const staticDir = external_path_default.join(outputDir, 'static');
18
197
  if (!external_fs_default.existsSync(staticDir)) return;
@@ -42,7 +221,7 @@ function generateTocFromHtml(html) {
42
221
  title
43
222
  };
44
223
  }
45
- const replaceHtmlExt = (filepath)=>filepath.replace(external_node_path_default.extname(filepath), '.html');
224
+ const rebuildSearchIndexByHtml_replaceHtmlExt = (filepath)=>filepath.replace(external_node_path_default.extname(filepath), '.html');
46
225
  async function extractPageDataFromHtml(routes, options) {
47
226
  return Promise.all(routes.map(async (route)=>{
48
227
  const { searchCodeBlocks, outputDir, defaultLang } = options;
@@ -58,7 +237,7 @@ async function extractPageDataFromHtml(routes, options) {
58
237
  _filepath: route.absolutePath,
59
238
  _relativePath: ''
60
239
  };
61
- const htmlPath = replaceHtmlExt(external_node_path_default.join(outputDir, route.lang === defaultLang ? route.relativePath.replace(route.lang, '') : route.relativePath));
240
+ const htmlPath = rebuildSearchIndexByHtml_replaceHtmlExt(external_node_path_default.join(outputDir, route.lang === defaultLang ? route.relativePath.replace(route.lang, '') : route.relativePath));
62
241
  const html = await promises_default.readFile(htmlPath, 'utf-8');
63
242
  let { toc: rawToc, title } = generateTocFromHtml(html);
64
243
  let content = html;
@@ -157,7 +336,7 @@ async function rebuildSearchIndexByHtml(routes, options) {
157
336
  }
158
337
  const isDev = ()=>'development' === process.env.NODE_ENV;
159
338
  function plugin_pluginModuleFederation(mfConfig, rspressOptions) {
160
- const { autoShared = true, rebuildSearchIndex = true } = rspressOptions || {};
339
+ const { autoShared = true, rebuildSearchIndex = true, rebuildLlms = true } = rspressOptions || {};
161
340
  if (autoShared) mfConfig.shared = {
162
341
  react: {
163
342
  singleton: true,
@@ -244,25 +423,38 @@ function plugin_pluginModuleFederation(mfConfig, rspressOptions) {
244
423
  routes = routeMetaArr;
245
424
  },
246
425
  async afterBuild (config) {
247
- if (!mfConfig.remotes || isDev() || !rebuildSearchIndex) return;
426
+ const shouldRebuildLlms = Boolean(config.llms && rebuildLlms);
427
+ if (!mfConfig.remotes || isDev() || !rebuildSearchIndex && !shouldRebuildLlms) return;
248
428
  if (!enableSSG) {
249
- src_logger.error('rebuildSearchIndex is only supported for ssg');
429
+ src_logger.error('rebuildSearchIndex and rebuildLlms are only supported for ssg');
250
430
  process.exit(1);
251
431
  }
252
- const searchConfig = config?.search || {};
253
- const replaceRules = config?.replaceRules || [];
254
- const domain = '';
255
- const versioned = searchConfig && searchConfig.versioned;
256
- const searchCodeBlocks = 'codeBlocks' in searchConfig ? Boolean(searchConfig.codeBlocks) : true;
257
- await rebuildSearchIndexByHtml(routes, {
258
- outputDir,
259
- versioned,
260
- replaceRules,
261
- domain,
262
- searchCodeBlocks,
263
- defaultLang: config.lang || 'en'
264
- });
265
- src_logger.info('rebuildSearchIndex success!');
432
+ const defaultLang = config.lang || 'en';
433
+ if (rebuildSearchIndex) {
434
+ const searchConfig = config?.search || {};
435
+ const replaceRules = config?.replaceRules || [];
436
+ const domain = '';
437
+ const versioned = searchConfig && searchConfig.versioned;
438
+ const searchCodeBlocks = 'codeBlocks' in searchConfig ? Boolean(searchConfig.codeBlocks) : true;
439
+ await rebuildSearchIndexByHtml(routes, {
440
+ outputDir,
441
+ versioned,
442
+ replaceRules,
443
+ domain,
444
+ searchCodeBlocks,
445
+ defaultLang
446
+ });
447
+ src_logger.info('rebuildSearchIndex success!');
448
+ }
449
+ if (shouldRebuildLlms) {
450
+ await rebuildLlmsByHtml(routes, {
451
+ outputDir,
452
+ defaultLang,
453
+ base: config.base,
454
+ defaultVersion: config.multiVersion?.default
455
+ });
456
+ src_logger.info('rebuildLlms success!');
457
+ }
266
458
  }
267
459
  };
268
460
  }
package/dist/plugin.d.ts CHANGED
@@ -3,6 +3,7 @@ import type { RspressPlugin } from '@rspress/core';
3
3
  type RspressPluginOptions = {
4
4
  autoShared?: boolean;
5
5
  rebuildSearchIndex?: boolean;
6
+ rebuildLlms?: boolean;
6
7
  };
7
8
  export declare function pluginModuleFederation(mfConfig: moduleFederationPlugin.ModuleFederationPluginOptions, rspressOptions?: RspressPluginOptions): RspressPlugin;
8
9
  export { createModuleFederationConfig } from '@module-federation/sdk';
@@ -0,0 +1,8 @@
1
+ import type { RouteMeta } from '@rspress/shared';
2
+ export type RebuildLlmsByHtmlOptions = {
3
+ outputDir: string;
4
+ defaultLang: string;
5
+ base?: string;
6
+ defaultVersion?: string;
7
+ };
8
+ export declare function rebuildLlmsByHtml(routes: RouteMeta[], options: RebuildLlmsByHtmlOptions): Promise<void>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@module-federation/rspress-plugin",
3
- "version": "2.4.0",
3
+ "version": "2.5.1",
4
4
  "type": "module",
5
5
  "description": "Module Federation plugin for Rspress",
6
6
  "keywords": [
@@ -41,10 +41,10 @@
41
41
  "html-to-text": "9.0.5",
42
42
  "lodash-es": "4.17.21",
43
43
  "@rspress/shared": "2.0.3",
44
- "@module-federation/enhanced": "2.4.0",
45
- "@module-federation/rsbuild-plugin": "2.4.0",
46
- "@module-federation/sdk": "2.4.0",
47
- "@module-federation/error-codes": "2.4.0"
44
+ "@module-federation/sdk": "2.5.1",
45
+ "@module-federation/enhanced": "2.5.1",
46
+ "@module-federation/rsbuild-plugin": "2.5.1",
47
+ "@module-federation/error-codes": "2.5.1"
48
48
  },
49
49
  "peerDependencies": {
50
50
  "@rspress/core": "^2.0.3"