@kenjura/ursa 0.10.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,51 @@ import {
8
8
  extractMetadata,
9
9
  extractRawMetadata,
10
10
  } from "../helper/metadataExtractor.js";
11
+ import {
12
+ hashContent,
13
+ loadHashCache,
14
+ saveHashCache,
15
+ needsRegeneration,
16
+ updateHash,
17
+ } from "../helper/contentHash.js";
18
+ import {
19
+ buildValidPaths,
20
+ markInactiveLinks,
21
+ } from "../helper/linkValidator.js";
22
+
23
+ // Helper function to build search index from processed files
24
+ function buildSearchIndex(jsonCache, source, output) {
25
+ const searchIndex = [];
26
+
27
+ for (const [filePath, jsonObject] of jsonCache.entries()) {
28
+ // Generate URL path relative to output
29
+ const relativePath = filePath.replace(source, '').replace(/\.(md|txt|yml)$/, '.html');
30
+ const url = relativePath.startsWith('/') ? relativePath : '/' + relativePath;
31
+
32
+ // Extract text content from body (strip HTML tags for search)
33
+ const textContent = jsonObject.bodyHtml.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
34
+ const excerpt = textContent.substring(0, 200); // First 200 chars for preview
35
+
36
+ searchIndex.push({
37
+ title: toTitleCase(jsonObject.name),
38
+ path: relativePath,
39
+ url: url,
40
+ content: excerpt
41
+ });
42
+ }
43
+
44
+ return searchIndex;
45
+ }
46
+
47
+ // Helper function to convert filename to title case
48
+ function toTitleCase(filename) {
49
+ return filename
50
+ .split(/[-_\s]+/) // Split on hyphens, underscores, and spaces
51
+ .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
52
+ .join(' ');
53
+ }
11
54
  import { renderFile } from "../helper/fileRenderer.js";
55
+ import { findStyleCss } from "../helper/findStyleCss.js";
12
56
  import { copy as copyDir, emptyDir, outputFile } from "fs-extra";
13
57
  import { basename, dirname, extname, join, parse, resolve } from "path";
14
58
  import { URL } from "url";
@@ -16,6 +60,8 @@ import o2x from "object-to-xml";
16
60
  import { existsSync } from "fs";
17
61
  import { fileExists } from "../helper/fileExists.js";
18
62
 
63
+ import { createWhitelistFilter } from "../helper/whitelistFilter.js";
64
+
19
65
  const DEFAULT_TEMPLATE_NAME =
20
66
  process.env.DEFAULT_TEMPLATE_NAME ?? "default-template";
21
67
 
@@ -23,18 +69,30 @@ export async function generate({
23
69
  _source = join(process.cwd(), "."),
24
70
  _meta = join(process.cwd(), "meta"),
25
71
  _output = join(process.cwd(), "build"),
72
+ _whitelist = null,
73
+ _incremental = false, // Legacy flag, now ignored (always incremental)
74
+ _clean = false, // When true, ignore cache and regenerate all files
26
75
  } = {}) {
27
- console.log({ _source, _meta, _output });
76
+ console.log({ _source, _meta, _output, _whitelist, _clean });
28
77
  const source = resolve(_source) + "/";
29
78
  const meta = resolve(_meta);
30
79
  const output = resolve(_output) + "/";
31
80
  console.log({ source, meta, output });
32
81
 
33
82
  const allSourceFilenamesUnfiltered = await recurse(source, [() => false]);
83
+
84
+ // Apply include filter (existing functionality)
34
85
  const includeFilter = process.env.INCLUDE_FILTER
35
86
  ? (fileName) => fileName.match(process.env.INCLUDE_FILTER)
36
87
  : Boolean;
37
- const allSourceFilenames = allSourceFilenamesUnfiltered.filter(includeFilter);
88
+ let allSourceFilenames = allSourceFilenamesUnfiltered.filter(includeFilter);
89
+
90
+ // Apply whitelist filter if specified
91
+ if (_whitelist) {
92
+ const whitelistFilter = await createWhitelistFilter(_whitelist, source);
93
+ allSourceFilenames = allSourceFilenames.filter(whitelistFilter);
94
+ console.log(`Whitelist applied: ${allSourceFilenames.length} files after filtering`);
95
+ }
38
96
  // console.log(allSourceFilenames);
39
97
 
40
98
  // if (source.substr(-1) !== "/") source += "/"; // warning: might not work in windows
@@ -43,16 +101,6 @@ export async function generate({
43
101
  const templates = await getTemplates(meta); // todo: error if no default template
44
102
  // console.log({ templates });
45
103
 
46
- const menu = await getMenu(allSourceFilenames, source);
47
-
48
- // clean build directory
49
- await emptyDir(output);
50
-
51
- // create public folder
52
- const pub = join(output, "public");
53
- await mkdir(pub);
54
- await copyDir(meta, pub);
55
-
56
104
  // read all articles, process them, copy them to build
57
105
  const articleExtensions = /\.(md|txt|yml)/;
58
106
  const allSourceFilenamesThatAreArticles = allSourceFilenames.filter(
@@ -63,118 +111,252 @@ export async function generate({
63
111
  (filename) => isDirectory(filename)
64
112
  );
65
113
 
66
- // process individual articles
67
- const jsonCache = new Map();
68
- await Promise.all(
69
- allSourceFilenamesThatAreArticles.map(async (file) => {
70
- console.log(`processing article ${file}`);
114
+ // Build set of valid internal paths for link validation (must be before menu)
115
+ const validPaths = buildValidPaths(allSourceFilenamesThatAreArticles, source);
116
+ console.log(`Built ${validPaths.size} valid paths for link validation`);
117
+
118
+ const menu = await getMenu(allSourceFilenames, source, validPaths);
119
+
120
+ // Load content hash cache from .ursa folder in source directory
121
+ let hashCache = new Map();
122
+ if (!_clean) {
123
+ hashCache = await loadHashCache(source);
124
+ console.log(`Loaded ${hashCache.size} cached content hashes from .ursa folder`);
125
+ } else {
126
+ console.log(`Clean build: ignoring cached hashes`);
127
+ }
128
+
129
+ // create public folder
130
+ const pub = join(output, "public");
131
+ await mkdir(pub, { recursive: true });
132
+ await copyDir(meta, pub);
71
133
 
134
+ // Track errors for error report
135
+ const errors = [];
136
+
137
+ // First pass: collect search index data
138
+ const searchIndex = [];
139
+ const jsonCache = new Map();
140
+
141
+ // Collect basic data for search index
142
+ for (const file of allSourceFilenamesThatAreArticles) {
143
+ try {
72
144
  const rawBody = await readFile(file, "utf8");
73
145
  const type = parse(file).ext;
74
- const meta = extractMetadata(rawBody);
75
- const rawMeta = extractRawMetadata(rawBody);
76
- const bodyLessMeta = rawBody.replace(rawMeta, "");
77
- const transformedMetadata = await getTransformedMetadata(
78
- dirname(file),
79
- meta
80
- );
81
146
  const ext = extname(file);
82
147
  const base = basename(file, ext);
83
148
  const dir = addTrailingSlash(dirname(file)).replace(source, "");
149
+
150
+ // Generate title from filename (in title case)
151
+ const title = toTitleCase(base);
152
+
153
+ // Generate URL path relative to output
154
+ const relativePath = file.replace(source, '').replace(/\.(md|txt|yml)$/, '.html');
155
+ const url = relativePath.startsWith('/') ? relativePath : '/' + relativePath;
156
+
157
+ // Basic content processing for search (without full rendering)
84
158
  const body = renderFile({
85
159
  fileContents: rawBody,
86
160
  type,
87
161
  dirname: dir,
88
162
  basename: base,
89
163
  });
164
+
165
+ // Extract text content from body (strip HTML tags for search)
166
+ const textContent = body && body.replace && body.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim() || 'body is undefined for some reason'
167
+ const excerpt = textContent.substring(0, 200); // First 200 chars for preview
168
+
169
+ searchIndex.push({
170
+ title: title,
171
+ path: relativePath,
172
+ url: url,
173
+ content: excerpt
174
+ });
175
+ } catch (e) {
176
+ console.error(`Error processing ${file} (first pass): ${e.message}`);
177
+ errors.push({ file, phase: 'search-index', error: e });
178
+ }
179
+ }
180
+
181
+ console.log(`Built search index with ${searchIndex.length} entries`);
182
+
183
+ // Track files that were regenerated (for incremental mode stats)
184
+ let regeneratedCount = 0;
185
+ let skippedCount = 0;
90
186
 
91
- const requestedTemplateName = meta && meta.template;
92
- const template =
93
- templates[requestedTemplateName] || templates[DEFAULT_TEMPLATE_NAME];
94
- // console.log({ requestedTemplateName, templates: templates.keys });
95
-
96
- const finalHtml = template
97
- .replace("${menu}", menu)
98
- .replace("${meta}", JSON.stringify(meta))
99
- .replace("${transformedMetadata}", transformedMetadata)
100
- .replace("${body}", body);
101
-
102
- const outputFilename = file
103
- .replace(source, output)
104
- .replace(parse(file).ext, ".html");
105
-
106
- console.log(`writing article to ${outputFilename}`);
107
-
108
- await outputFile(outputFilename, finalHtml);
109
-
110
- // json
111
-
112
- const jsonOutputFilename = outputFilename.replace(".html", ".json");
113
- const jsonObject = {
114
- name: base,
115
- contents: rawBody,
116
- // bodyLessMeta: bodyLessMeta,
117
- bodyHtml: body,
118
- metadata: meta,
119
- transformedMetadata,
120
- // html: finalHtml,
121
- };
122
- jsonCache.set(file, jsonObject);
123
- const json = JSON.stringify(jsonObject);
124
- console.log(`writing article to ${jsonOutputFilename}`);
125
- await outputFile(jsonOutputFilename, json);
126
-
127
- // xml
128
-
129
- const xmlOutputFilename = outputFilename.replace(".html", ".xml");
130
- const xml = `<article>${o2x(jsonObject)}</article>`;
131
- await outputFile(xmlOutputFilename, xml);
187
+ // Second pass: process individual articles with search data available
188
+ await Promise.all(
189
+ allSourceFilenamesThatAreArticles.map(async (file) => {
190
+ try {
191
+ const rawBody = await readFile(file, "utf8");
192
+
193
+ // Skip files that haven't changed (unless --clean flag is set)
194
+ if (!_clean && !needsRegeneration(file, rawBody, hashCache)) {
195
+ skippedCount++;
196
+ return; // Skip this file
197
+ }
198
+
199
+ console.log(`processing article ${file}`);
200
+ regeneratedCount++;
201
+
202
+ const type = parse(file).ext;
203
+ const meta = extractMetadata(rawBody);
204
+ const rawMeta = extractRawMetadata(rawBody);
205
+ const bodyLessMeta = rawMeta ? rawBody.replace(rawMeta, "") : rawBody;
206
+ const transformedMetadata = await getTransformedMetadata(
207
+ dirname(file),
208
+ meta
209
+ );
210
+ const ext = extname(file);
211
+ const base = basename(file, ext);
212
+ const dir = addTrailingSlash(dirname(file)).replace(source, "");
213
+
214
+ // Calculate the document's URL path (e.g., "/character/index.html")
215
+ const docUrlPath = '/' + dir + base + '.html';
216
+
217
+ // Generate title from filename (in title case)
218
+ const title = toTitleCase(base);
219
+
220
+ const body = renderFile({
221
+ fileContents: rawBody,
222
+ type,
223
+ dirname: dir,
224
+ basename: base,
225
+ });
226
+
227
+ // Find nearest style.css or _style.css up the tree
228
+ let embeddedStyle = "";
229
+ try {
230
+ const css = await findStyleCss(resolve(_source, dir));
231
+ if (css) {
232
+ embeddedStyle = css;
233
+ }
234
+ } catch (e) {
235
+ // ignore
236
+ console.error(e);
237
+ }
238
+
239
+ const requestedTemplateName = meta && meta.template;
240
+ const template =
241
+ templates[requestedTemplateName] || templates[DEFAULT_TEMPLATE_NAME];
242
+
243
+ if (!template) {
244
+ throw new Error(`Template not found. Requested: "${requestedTemplateName || DEFAULT_TEMPLATE_NAME}". Available templates: ${Object.keys(templates).join(', ') || 'none'}`);
245
+ }
246
+
247
+ // Insert embeddedStyle just before </head> if present, else at top
248
+ let finalHtml = template
249
+ .replace("${title}", title)
250
+ .replace("${menu}", menu)
251
+ .replace("${meta}", JSON.stringify(meta))
252
+ .replace("${transformedMetadata}", transformedMetadata)
253
+ .replace("${body}", body)
254
+ .replace("${embeddedStyle}", embeddedStyle)
255
+ .replace("${searchIndex}", JSON.stringify(searchIndex));
256
+
257
+ // Resolve links and mark broken internal links as inactive (debug mode on)
258
+ // Pass docUrlPath so relative links can be resolved correctly
259
+ finalHtml = markInactiveLinks(finalHtml, validPaths, docUrlPath, false);
260
+
261
+ const outputFilename = file
262
+ .replace(source, output)
263
+ .replace(parse(file).ext, ".html");
264
+
265
+ console.log(`writing article to ${outputFilename}`);
266
+
267
+ await outputFile(outputFilename, finalHtml);
268
+
269
+ // json
270
+
271
+ const jsonOutputFilename = outputFilename.replace(".html", ".json");
272
+ const url = '/' + outputFilename.replace(output, '');
273
+ const jsonObject = {
274
+ name: base,
275
+ url,
276
+ contents: rawBody,
277
+ // bodyLessMeta: bodyLessMeta,
278
+ bodyHtml: body,
279
+ metadata: meta,
280
+ transformedMetadata,
281
+ // html: finalHtml,
282
+ };
283
+ jsonCache.set(file, jsonObject);
284
+ const json = JSON.stringify(jsonObject);
285
+ console.log(`writing article to ${jsonOutputFilename}`);
286
+ await outputFile(jsonOutputFilename, json);
287
+
288
+ // xml
289
+
290
+ const xmlOutputFilename = outputFilename.replace(".html", ".xml");
291
+ const xml = `<article>${o2x(jsonObject)}</article>`;
292
+ await outputFile(xmlOutputFilename, xml);
293
+
294
+ // Update the content hash for this file
295
+ updateHash(file, rawBody, hashCache);
296
+ } catch (e) {
297
+ console.error(`Error processing ${file} (second pass): ${e.message}`);
298
+ errors.push({ file, phase: 'article-generation', error: e });
299
+ }
132
300
  })
133
301
  );
134
302
 
303
+ // Log build stats
304
+ console.log(`Build: ${regeneratedCount} regenerated, ${skippedCount} unchanged`);
305
+
135
306
  console.log(jsonCache.keys());
307
+
136
308
  // process directory indices
137
309
  await Promise.all(
138
310
  allSourceFilenamesThatAreDirectories.map(async (dir) => {
139
- console.log(`processing directory ${dir}`);
140
-
141
- const pathsInThisDirectory = allSourceFilenames.filter((filename) =>
142
- filename.match(new RegExp(`${dir}.+`))
143
- );
144
-
145
- const jsonObjects = pathsInThisDirectory
146
- .map((path) => {
147
- const object = jsonCache.get(path);
148
- return typeof object === "object" ? object : null;
149
- })
150
- .filter((a) => a);
311
+ try {
312
+ console.log(`processing directory ${dir}`);
151
313
 
152
- const json = JSON.stringify(jsonObjects);
314
+ const pathsInThisDirectory = allSourceFilenames.filter((filename) =>
315
+ filename.match(new RegExp(`${dir}.+`))
316
+ );
153
317
 
154
- const outputFilename = dir.replace(source, output) + ".json";
155
-
156
- console.log(`writing directory index to ${outputFilename}`);
157
- await outputFile(outputFilename, json);
158
-
159
- // html
160
- const htmlOutputFilename = dir.replace(source, output) + ".html";
161
- const indexAlreadyExists = fileExists(htmlOutputFilename);
162
- if (!indexAlreadyExists) {
163
- const template = templates["default-template"]; // TODO: figure out a way to specify template for a directory index
164
- const indexHtml = `<ul>${pathsInThisDirectory
318
+ const jsonObjects = pathsInThisDirectory
165
319
  .map((path) => {
166
- const partialPath = path
167
- .replace(source, "")
168
- .replace(parse(path).ext, ".html");
169
- const name = basename(path, parse(path).ext);
170
- return `<li><a href="${partialPath}">${name}</a></li>`;
320
+ const object = jsonCache.get(path);
321
+ return typeof object === "object" ? object : null;
171
322
  })
172
- .join("")}</ul>`;
173
- const finalHtml = template
174
- .replace("${menu}", menu)
175
- .replace("${body}", indexHtml);
176
- console.log(`writing directory index to ${htmlOutputFilename}`);
177
- await outputFile(htmlOutputFilename, finalHtml);
323
+ .filter((a) => a);
324
+
325
+ const json = JSON.stringify(jsonObjects);
326
+
327
+ const outputFilename = dir.replace(source, output) + ".json";
328
+
329
+ console.log(`writing directory index to ${outputFilename}`);
330
+ await outputFile(outputFilename, json);
331
+
332
+ // html
333
+ const htmlOutputFilename = dir.replace(source, output) + ".html";
334
+ const indexAlreadyExists = fileExists(htmlOutputFilename);
335
+ if (!indexAlreadyExists) {
336
+ const template = templates["default-template"]; // TODO: figure out a way to specify template for a directory index
337
+ const indexHtml = `<ul>${pathsInThisDirectory
338
+ .map((path) => {
339
+ const partialPath = path
340
+ .replace(source, "")
341
+ .replace(parse(path).ext, ".html");
342
+ const name = basename(path, parse(path).ext);
343
+ return `<li><a href="${partialPath}">${name}</a></li>`;
344
+ })
345
+ .join("")}</ul>`;
346
+ const finalHtml = template
347
+ .replace("${menu}", menu)
348
+ .replace("${body}", indexHtml)
349
+ .replace("${searchIndex}", JSON.stringify(searchIndex))
350
+ .replace("${title}", "Index")
351
+ .replace("${meta}", "{}")
352
+ .replace("${transformedMetadata}", "")
353
+ .replace("${embeddedStyle}", "");
354
+ console.log(`writing directory index to ${htmlOutputFilename}`);
355
+ await outputFile(htmlOutputFilename, finalHtml);
356
+ }
357
+ } catch (e) {
358
+ console.error(`Error processing directory ${dir}: ${e.message}`);
359
+ errors.push({ file: dir, phase: 'directory-index', error: e });
178
360
  }
179
361
  })
180
362
  );
@@ -186,15 +368,73 @@ export async function generate({
186
368
  );
187
369
  await Promise.all(
188
370
  allSourceFilenamesThatAreImages.map(async (file) => {
189
- console.log(`processing static file ${file}`);
190
-
191
- const outputFilename = file.replace(source, output);
192
-
193
- console.log(`writing static file to ${outputFilename}`);
194
-
195
- return await copyFile(file, outputFilename);
371
+ try {
372
+ // For incremental mode, check if file has changed using file stat as a quick check
373
+ if (_incremental) {
374
+ const fileStat = await stat(file);
375
+ const statKey = `${file}:stat`;
376
+ const newStatHash = `${fileStat.size}:${fileStat.mtimeMs}`;
377
+ if (hashCache.get(statKey) === newStatHash) {
378
+ return; // Skip unchanged static file
379
+ }
380
+ hashCache.set(statKey, newStatHash);
381
+ }
382
+
383
+ console.log(`processing static file ${file}`);
384
+
385
+ const outputFilename = file.replace(source, output);
386
+
387
+ console.log(`writing static file to ${outputFilename}`);
388
+
389
+ await mkdir(dirname(outputFilename), { recursive: true });
390
+ return await copyFile(file, outputFilename);
391
+ } catch (e) {
392
+ console.error(`Error processing static file ${file}: ${e.message}`);
393
+ errors.push({ file, phase: 'static-file', error: e });
394
+ }
196
395
  })
197
396
  );
397
+
398
+ // Save the hash cache to .ursa folder in source directory
399
+ if (hashCache.size > 0) {
400
+ await saveHashCache(source, hashCache);
401
+ }
402
+
403
+ // Write error report if there were any errors
404
+ if (errors.length > 0) {
405
+ const errorReportPath = join(output, '_errors.log');
406
+ const failedFiles = errors.map(e => e.file);
407
+
408
+ let report = `URSA GENERATION ERROR REPORT\n`;
409
+ report += `Generated: ${new Date().toISOString()}\n`;
410
+ report += `Total errors: ${errors.length}\n\n`;
411
+ report += `${'='.repeat(60)}\n`;
412
+ report += `FAILED FILES:\n`;
413
+ report += `${'='.repeat(60)}\n\n`;
414
+ failedFiles.forEach(f => {
415
+ report += ` - ${f}\n`;
416
+ });
417
+ report += `\n${'='.repeat(60)}\n`;
418
+ report += `ERROR DETAILS:\n`;
419
+ report += `${'='.repeat(60)}\n\n`;
420
+
421
+ errors.forEach(({ file, phase, error }) => {
422
+ report += `${'─'.repeat(60)}\n`;
423
+ report += `File: ${file}\n`;
424
+ report += `Phase: ${phase}\n`;
425
+ report += `Error: ${error.message}\n`;
426
+ if (error.stack) {
427
+ report += `Stack:\n${error.stack}\n`;
428
+ }
429
+ report += `\n`;
430
+ });
431
+
432
+ await outputFile(errorReportPath, report);
433
+ console.log(`\n⚠️ ${errors.length} error(s) occurred during generation.`);
434
+ console.log(` Error report written to: ${errorReportPath}\n`);
435
+ } else {
436
+ console.log(`\n✅ Generation complete with no errors.\n`);
437
+ }
198
438
  }
199
439
 
200
440
  /**
@@ -222,10 +462,10 @@ async function getTemplates(meta) {
222
462
  return templates;
223
463
  }
224
464
 
225
- async function getMenu(allSourceFilenames, source) {
465
+ async function getMenu(allSourceFilenames, source, validPaths) {
226
466
  // todo: handle various incarnations of menu filename
227
467
 
228
- const rawMenu = await getAutomenu(source);
468
+ const rawMenu = await getAutomenu(source, validPaths);
229
469
  const menuBody = renderFile({ fileContents: rawMenu, type: ".md" });
230
470
  return menuBody;
231
471