@kenjura/ursa 0.9.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,71 @@
1
+ import { createHash } from 'crypto';
2
+ import { readFile, writeFile, mkdir } from 'fs/promises';
3
+ import { existsSync } from 'fs';
4
+ import { dirname, join } from 'path';
5
+
6
+ const URSA_DIR = '.ursa';
7
+ const HASH_CACHE_FILE = 'content-hashes.json';
8
+
9
+ /**
10
+ * Get the path to the .ursa directory for a given source directory
11
+ */
12
+ export function getUrsaDir(sourceDir) {
13
+ return join(sourceDir, URSA_DIR);
14
+ }
15
+
16
+ /**
17
+ * Generate a short hash of content
18
+ */
19
+ export function hashContent(content) {
20
+ return createHash('md5').update(content).digest('hex').substring(0, 12);
21
+ }
22
+
23
+ /**
24
+ * Load the hash cache from disk (.ursa folder in source directory)
25
+ */
26
+ export async function loadHashCache(sourceDir) {
27
+ const cachePath = join(getUrsaDir(sourceDir), HASH_CACHE_FILE);
28
+ try {
29
+ if (existsSync(cachePath)) {
30
+ const data = await readFile(cachePath, 'utf8');
31
+ return new Map(Object.entries(JSON.parse(data)));
32
+ }
33
+ } catch (e) {
34
+ console.warn('Could not load hash cache:', e.message);
35
+ }
36
+ return new Map();
37
+ }
38
+
39
+ /**
40
+ * Save the hash cache to disk (.ursa folder in source directory)
41
+ */
42
+ export async function saveHashCache(sourceDir, hashMap) {
43
+ const ursaDir = getUrsaDir(sourceDir);
44
+ const cachePath = join(ursaDir, HASH_CACHE_FILE);
45
+ try {
46
+ await mkdir(ursaDir, { recursive: true });
47
+ const obj = Object.fromEntries(hashMap);
48
+ await writeFile(cachePath, JSON.stringify(obj, null, 2));
49
+ console.log(`Saved ${hashMap.size} hashes to ${cachePath}`);
50
+ } catch (e) {
51
+ console.warn('Could not save hash cache:', e.message);
52
+ }
53
+ }
54
+
55
+ /**
56
+ * Check if a file needs regeneration based on content hash
57
+ */
58
+ export function needsRegeneration(filePath, content, hashCache) {
59
+ const newHash = hashContent(content);
60
+ const oldHash = hashCache.get(filePath);
61
+ return newHash !== oldHash;
62
+ }
63
+
64
+ /**
65
+ * Update the hash for a file in the cache
66
+ */
67
+ export function updateHash(filePath, content, hashCache) {
68
+ const hash = hashContent(content);
69
+ hashCache.set(filePath, hash);
70
+ return hash;
71
+ }
@@ -0,0 +1,13 @@
1
+ import { open } from "node:fs/promises";
2
+
3
+ export async function fileExists(path) {
4
+ let filehandle = null;
5
+ try {
6
+ filehandle = await open(path, "r+");
7
+ return true;
8
+ } catch (err) {
9
+ return false;
10
+ } finally {
11
+ filehandle?.close();
12
+ }
13
+ }
@@ -0,0 +1,26 @@
1
+ import { join, dirname, resolve } from "path";
2
+ import { existsSync } from "fs";
3
+
4
+ /**
5
+ * Recursively search for style.css or _style.css up the directory tree.
6
+ * Returns the contents of the first found file, or null if not found.
7
+ * @param {string} startDir - Directory to start searching from
8
+ * @param {string[]} [names=["style.css", "_style.css"]] - Filenames to look for
9
+ * @param {string} [baseDir] - Stop searching when this directory is reached
10
+ * @returns {Promise<string|null>} CSS contents or null
11
+ */
12
+ export async function findStyleCss(startDir, names = ["style-ursa.css", "style.css", "_style.css"], baseDir = null) {
13
+ let dir = resolve(startDir);
14
+ baseDir = baseDir ? resolve(baseDir) : dir.split(/[\\/]/)[0] === '' ? '/' : dir.split(/[\\/]/)[0];
15
+ while (true) {
16
+ for (const name of names) {
17
+ const candidate = join(dir, name);
18
+ if (existsSync(candidate)) {
19
+ return (await import('fs/promises')).readFile(candidate, "utf8");
20
+ }
21
+ }
22
+ if (dir === baseDir || dir === dirname(dir)) break;
23
+ dir = dirname(dir);
24
+ }
25
+ return null;
26
+ }
@@ -0,0 +1,246 @@
1
+ import { extname, dirname, join, normalize, posix } from "path";
2
+
3
+ /**
4
+ * Build a set of valid internal paths from the list of source files
5
+ * @param {string[]} sourceFiles - Array of source file paths
6
+ * @param {string} source - Source directory path
7
+ * @returns {Set<string>} Set of valid internal paths (without extension, lowercased)
8
+ */
9
+ export function buildValidPaths(sourceFiles, source) {
10
+ const validPaths = new Set();
11
+
12
+ for (const file of sourceFiles) {
13
+ // Get the path relative to source, without extension
14
+ const ext = extname(file);
15
+ let relativePath = file.replace(source, "").replace(ext, "");
16
+
17
+ // Normalize: ensure leading slash, lowercase for comparison
18
+ if (!relativePath.startsWith("/")) {
19
+ relativePath = "/" + relativePath;
20
+ }
21
+
22
+ // Add both with and without trailing slash for directories
23
+ validPaths.add(relativePath.toLowerCase());
24
+ validPaths.add((relativePath + ".html").toLowerCase());
25
+
26
+ // Also add /index.html variant for directory indexes
27
+ if (relativePath.endsWith("/index")) {
28
+ const dirPath = relativePath.replace(/\/index$/, "");
29
+ validPaths.add(dirPath.toLowerCase());
30
+ validPaths.add((dirPath + "/").toLowerCase());
31
+ validPaths.add((dirPath + "/index.html").toLowerCase());
32
+ }
33
+ }
34
+
35
+ // Add root
36
+ validPaths.add("/");
37
+ validPaths.add("/index.html");
38
+
39
+ return validPaths;
40
+ }
41
+
42
+ /**
43
+ * Check if a link is an internal link (not external)
44
+ * @param {string} href - The href value
45
+ * @returns {boolean}
46
+ */
47
+ function isInternalLink(href) {
48
+ if (!href) return false;
49
+
50
+ // External links start with http://, https://, //, mailto:, tel:, etc.
51
+ if (href.match(/^(https?:)?\/\/|^mailto:|^tel:|^javascript:|^#/i)) {
52
+ return false;
53
+ }
54
+
55
+ // Data URLs
56
+ if (href.startsWith("data:")) {
57
+ return false;
58
+ }
59
+
60
+ return true;
61
+ }
62
+
63
+ /**
64
+ * Check if a link is relative (starts with ./ or ../ or doesn't start with /)
65
+ * @param {string} href - The href value
66
+ * @returns {boolean}
67
+ */
68
+ function isRelativeLink(href) {
69
+ if (!href) return false;
70
+ return href.startsWith('./') || href.startsWith('../') || !href.startsWith('/');
71
+ }
72
+
73
+ /**
74
+ * Resolve a relative href to an absolute path based on the current document's path
75
+ * @param {string} href - The relative href
76
+ * @param {string} currentDocPath - The current document's URL path (e.g., "/character/index.html")
77
+ * @returns {string} Absolute path
78
+ */
79
+ function resolveRelativePath(href, currentDocPath) {
80
+ // Get the directory of the current document
81
+ const currentDir = posix.dirname(currentDocPath);
82
+
83
+ // Join and normalize
84
+ const resolved = posix.normalize(posix.join(currentDir, href));
85
+
86
+ return resolved;
87
+ }
88
+
89
+ /**
90
+ * Normalize an href for comparison against valid paths
91
+ * @param {string} href - The href to normalize
92
+ * @param {string} currentDocPath - The current document's URL path (for relative link resolution)
93
+ * @returns {string} Normalized path
94
+ */
95
+ function normalizeHref(href, currentDocPath = null) {
96
+ // Remove hash fragments
97
+ let normalized = href.split("#")[0];
98
+
99
+ // Remove query strings
100
+ normalized = normalized.split("?")[0];
101
+
102
+ // Resolve relative links if we have the current doc path
103
+ if (currentDocPath && isRelativeLink(normalized)) {
104
+ normalized = resolveRelativePath(normalized, currentDocPath);
105
+ }
106
+
107
+ // Ensure leading slash for absolute paths
108
+ if (!normalized.startsWith("/")) {
109
+ normalized = "/" + normalized;
110
+ }
111
+
112
+ // Decode URI components
113
+ try {
114
+ normalized = decodeURIComponent(normalized);
115
+ } catch (e) {
116
+ // Ignore decode errors
117
+ }
118
+
119
+ return normalized.toLowerCase();
120
+ }
121
+
122
+ /**
123
+ * Resolve an href to a valid path, trying .html and /index.html extensions.
124
+ * Returns { resolvedHref, inactive, debug } where:
125
+ * - resolvedHref is the corrected href (with .html extension if needed)
126
+ * - inactive is true if the link doesn't resolve to a valid path
127
+ * - debug contains information about what was tried
128
+ *
129
+ * @param {string} href - The original href
130
+ * @param {Set<string>} validPaths - Set of valid internal paths (lowercased)
131
+ * @param {string} currentDocPath - The current document's URL path (for relative link resolution)
132
+ * @returns {{ resolvedHref: string, inactive: boolean, debug: string }}
133
+ */
134
+ function resolveHref(href, validPaths, currentDocPath = null) {
135
+ const debugTries = [];
136
+
137
+ // Get hash fragment if present (to preserve it)
138
+ const hashIndex = href.indexOf('#');
139
+ const hash = hashIndex >= 0 ? href.substring(hashIndex) : '';
140
+ const hrefWithoutHash = hashIndex >= 0 ? href.substring(0, hashIndex) : href;
141
+
142
+ // Normalize for checking (resolve relative paths if currentDocPath provided)
143
+ const normalized = normalizeHref(hrefWithoutHash, currentDocPath);
144
+
145
+ // Calculate the resolved absolute href (for updating the link)
146
+ const isRelative = isRelativeLink(hrefWithoutHash);
147
+ const absoluteHref = isRelative && currentDocPath
148
+ ? resolveRelativePath(hrefWithoutHash, currentDocPath)
149
+ : hrefWithoutHash;
150
+
151
+ // If exact match exists, return resolved absolute path
152
+ if (validPaths.has(normalized)) {
153
+ debugTries.push(`${normalized} → ✓ (exact)`);
154
+ return { resolvedHref: absoluteHref + hash, inactive: false, debug: debugTries.join(' | ') };
155
+ }
156
+
157
+ // Check if the href already has an extension
158
+ const ext = extname(hrefWithoutHash);
159
+ if (ext) {
160
+ // Has extension but doesn't exist
161
+ debugTries.push(`${normalized} → ✗`);
162
+ return { resolvedHref: absoluteHref + hash, inactive: true, debug: debugTries.join(' | ') };
163
+ }
164
+
165
+ // No extension - try .html first
166
+ const htmlPath = normalized + '.html';
167
+ debugTries.push(`${htmlPath} → ${validPaths.has(htmlPath) ? '✓' : '✗'}`);
168
+ if (validPaths.has(htmlPath)) {
169
+ // Construct the resolved href as absolute path with .html
170
+ const resolvedHref = absoluteHref + '.html' + hash;
171
+ return { resolvedHref, inactive: false, debug: debugTries.join(' | ') };
172
+ }
173
+
174
+ // Try /index.html
175
+ const indexPath = normalized.endsWith('/')
176
+ ? normalized + 'index.html'
177
+ : normalized + '/index.html';
178
+ debugTries.push(`${indexPath} → ${validPaths.has(indexPath) ? '✓' : '✗'}`);
179
+ if (validPaths.has(indexPath)) {
180
+ // Construct the resolved href as absolute path with /index.html
181
+ const resolvedHref = (absoluteHref.endsWith('/')
182
+ ? absoluteHref + 'index.html'
183
+ : absoluteHref + '/index.html') + hash;
184
+ return { resolvedHref, inactive: false, debug: debugTries.join(' | ') };
185
+ }
186
+
187
+ // Neither exists - mark as inactive, keep absolute href
188
+ return { resolvedHref: absoluteHref + hash, inactive: true, debug: debugTries.join(' | ') };
189
+ }
190
+
191
+ /**
192
+ * Process HTML to resolve internal links and add class="inactive" to broken links.
193
+ * This both:
194
+ * 1. Resolves relative links to absolute paths
195
+ * 2. Resolves extensionless links to .html (e.g., /foo/bar -> /foo/bar.html)
196
+ * 3. Marks broken links with the "inactive" class
197
+ *
198
+ * @param {string} html - The HTML content
199
+ * @param {Set<string>} validPaths - Set of valid internal paths
200
+ * @param {string} currentDocPath - The current document's URL path (e.g., "/character/index.html")
201
+ * @param {boolean} includeDebug - Whether to include debug info in link text
202
+ * @returns {string} Processed HTML with resolved links and inactive class on broken links
203
+ */
204
+ export function markInactiveLinks(html, validPaths, currentDocPath = '/', includeDebug = false) {
205
+ // Match anchor tags with href attribute
206
+ // This regex captures: everything before href, the href value, everything after, and the link text
207
+ return html.replace(/<a\s+([^>]*?)href=["']([^"']+)["']([^>]*)>([^<]*)<\/a>/gi, (match, before, href, after, text) => {
208
+ // Skip external links
209
+ if (!isInternalLink(href)) {
210
+ return match;
211
+ }
212
+
213
+ // Resolve the href (passing current doc path for relative link resolution)
214
+ const { resolvedHref, inactive, debug } = resolveHref(href, validPaths, currentDocPath);
215
+
216
+ // Build the class attribute
217
+ let newBefore = before;
218
+ let newAfter = after;
219
+
220
+ if (inactive) {
221
+ // Check if class already exists in before or after
222
+ const classInBefore = before.match(/class=["']([^"']*)["']/i);
223
+ const classInAfter = after.match(/class=["']([^"']*)["']/i);
224
+
225
+ if (classInBefore) {
226
+ const existingClass = classInBefore[1];
227
+ if (!existingClass.includes('inactive')) {
228
+ newBefore = before.replace(classInBefore[0], `class="${existingClass} inactive"`);
229
+ }
230
+ } else if (classInAfter) {
231
+ const existingClass = classInAfter[1];
232
+ if (!existingClass.includes('inactive')) {
233
+ newAfter = after.replace(classInAfter[0], `class="${existingClass} inactive"`);
234
+ }
235
+ } else {
236
+ // Add class attribute
237
+ newBefore = `class="inactive" ${before}`;
238
+ }
239
+ }
240
+
241
+ // Add debug text if requested
242
+ const debugText = includeDebug ? ` [DEBUG: ${debug}]` : '';
243
+
244
+ return `<a ${newBefore}href="${resolvedHref}"${newAfter}>${text}${debugText}</a>`;
245
+ });
246
+ }
@@ -3,6 +3,9 @@ import { parse } from "yaml";
3
3
  export function extractMetadata(rawBody) {
4
4
  const frontMatter = matchFrontMatter(rawBody);
5
5
  if (frontMatter === null) return null;
6
+
7
+ // Don't try to parse empty or whitespace-only content
8
+ if (frontMatter.trim().length === 0) return null;
6
9
 
7
10
  const parsedYml = parse(frontMatter);
8
11
  return parsedYml;
@@ -16,15 +19,23 @@ export function extractRawMetadata(rawBody) {
16
19
  }
17
20
 
18
21
  function matchFrontMatter(str) {
19
- const match = str.match(/---(.*?)---/s);
20
- if (Array.isArray(match) && match.length > 1) {
21
- return match[1];
22
- } else return null;
22
+ // Only match YAML front matter at the start of the file
23
+ // Must have --- at line start, content, then closing --- also at line start
24
+ // The (?=\n|$) ensures the closing --- is followed by newline or end of string
25
+ const match = str.match(/^---\n([\s\S]+?)\n---(?=\n|$)/);
26
+ if (!match || match.length < 2) return null;
27
+
28
+ // Return null if the captured content is empty or only whitespace
29
+ const content = match[1].trim();
30
+ return content.length > 0 ? match[1] : null;
23
31
  }
24
32
 
25
33
  function matchAllFrontMatter(str) {
26
- const match = str.match(/---(.*?)---\n+/s);
27
- if (Array.isArray(match) && match.length > 0) {
28
- return match[0];
29
- } else return null;
34
+ // Only match YAML front matter at the start of the file
35
+ const match = str.match(/^---\n([\s\S]+?)\n---(?=\n|$)/);
36
+ if (!match || match.length < 2) return null;
37
+
38
+ // Check if there's actual content between the delimiters
39
+ const content = match[1].trim();
40
+ return content.length > 0 ? match[0] + '\n' : null;
30
41
  }
@@ -0,0 +1,66 @@
1
+ import { readFile } from 'fs/promises';
2
+ import { resolve, relative } from 'path';
3
+ import { existsSync } from 'fs';
4
+
5
+ /**
6
+ * Creates a filter function based on a whitelist file
7
+ * @param {string} whitelistPath - Path to the whitelist file
8
+ * @param {string} sourceRoot - Root source directory for relative path matching
9
+ * @returns {Function} Filter function that returns true if file should be included
10
+ */
11
+ export async function createWhitelistFilter(whitelistPath, sourceRoot) {
12
+ if (!whitelistPath || !existsSync(whitelistPath)) {
13
+ return () => true; // No whitelist = include all files
14
+ }
15
+
16
+ try {
17
+ const whitelistContent = await readFile(whitelistPath, 'utf8');
18
+ const patterns = whitelistContent
19
+ .split('\n')
20
+ .map(line => line.trim())
21
+ .filter(line => line && !line.startsWith('#')); // Remove empty lines and comments
22
+
23
+ if (patterns.length === 0) {
24
+ return () => true; // Empty whitelist = include all files
25
+ }
26
+
27
+ return (filePath) => {
28
+ const absolutePath = resolve(filePath);
29
+ const relativePath = relative(sourceRoot, absolutePath);
30
+
31
+ return patterns.some(pattern => {
32
+ // Full absolute path match
33
+ if (pattern.startsWith('/') && absolutePath === pattern) {
34
+ return true;
35
+ }
36
+
37
+ // Relative path match (from source root)
38
+ if (relativePath === pattern || relativePath.includes(pattern)) {
39
+ return true;
40
+ }
41
+
42
+ // Directory match (pattern ends with /)
43
+ if (pattern.endsWith('/')) {
44
+ const dirPattern = pattern.slice(0, -1);
45
+ return relativePath.startsWith(dirPattern + '/') || relativePath === dirPattern;
46
+ }
47
+
48
+ // Filename match
49
+ const fileName = absolutePath.split('/').pop();
50
+ if (fileName === pattern) {
51
+ return true;
52
+ }
53
+
54
+ // Partial path match (anywhere in the path)
55
+ if (absolutePath.includes(pattern) || relativePath.includes(pattern)) {
56
+ return true;
57
+ }
58
+
59
+ return false;
60
+ });
61
+ };
62
+ } catch (error) {
63
+ console.warn(`Warning: Could not read whitelist file ${whitelistPath}:`, error.message);
64
+ return () => true; // Fallback to include all files
65
+ }
66
+ }
@@ -1,11 +1,14 @@
1
+ import { getImageTag } from './WikiImage.js';
2
+
1
3
  let instance = {};
2
4
 
3
5
  export function wikiToHtml({ wikitext, articleName, args } = {}) {
4
6
  if (!args) args = { db: "noDB", noSection: true, noTOC: true };
5
7
  if (!wikitext) return "nothing to render";
6
8
 
7
- const linkbase = ("/" + args.db + "/").replace(/\/\//g, "/");
8
- const imageroot = ("/" + args.db + "/img/").replace(/\/\//g, "/");
9
+ const db = args.db || "noDB";
10
+ const linkbase = ("/" + db + "/").replace(/\/\//g, "/");
11
+ const imageroot = ("/" + db + "/img/").replace(/\/\//g, "/");
9
12
 
10
13
  const allArticles = args.allArticles || [];
11
14
 
@@ -330,7 +333,7 @@ export function wikiToHtml({ wikitext, articleName, args } = {}) {
330
333
  case "IFRAME":
331
334
  return '<iframe src="' + articleName + '"' + getArg(0) + "></iframe>";
332
335
  case "IMAGE":
333
- return WikiImage.getImageTag({
336
+ return getImageTag({
334
337
  name: articleName,
335
338
  args: args,
336
339
  imgUrl: imageroot + articleName,
package/src/index.js CHANGED
@@ -2,7 +2,8 @@ import { generate } from "./jobs/generate.js";
2
2
 
3
3
  import { join, resolve } from "path";
4
4
 
5
- const source = process.env.SOURCE ?? join(process.cwd(), "source");
6
- const build = process.env.BUILD ?? join(process.cwd(), "build");
5
+ const _source = process.env.SOURCE ?? join(process.cwd(), "source");
6
+ const _meta = process.env.META ?? join(process.cwd(), "meta");
7
+ const _output = process.env.OUTPUT ?? join(process.cwd(), "output");
7
8
 
8
- generate({ source, build });
9
+ generate({ _source, _meta, _output });