wiki-search-index 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/builder/lib/build-index.mjs +4 -2
- package/builder/lib/markdown.mjs +12 -2
- package/builder/wiki-index.mjs +0 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -54,6 +54,8 @@ Run locally: `python3 -m http.server` from the repo root, then open
|
|
|
54
54
|
|
|
55
55
|
## Release notes
|
|
56
56
|
|
|
57
|
+
- 0.1.3 _Fix: HTML entities in headings now decode too, so anchors and section titles match GitHub (0.1.1 only handled body text)._
|
|
58
|
+
- 0.1.2 _Set the published CLI's execute bit (tidy hygiene; npm already chmods bins on install)._
|
|
57
59
|
- 0.1.1 _HTML entities (`—`, `Ӓ`, …) are decoded so they no longer pollute the index._
|
|
58
60
|
- 0.1.0 _Initial release of the `wiki-search-index` builder._
|
|
59
61
|
|
|
@@ -7,16 +7,18 @@
|
|
|
7
7
|
|
|
8
8
|
import {readdir, readFile} from 'node:fs/promises';
|
|
9
9
|
import {join} from 'node:path';
|
|
10
|
-
import {splitSections} from './markdown.mjs';
|
|
10
|
+
import {splitSections, decodeEntities} from './markdown.mjs';
|
|
11
11
|
import {createSlugger} from './slug.mjs';
|
|
12
12
|
|
|
13
13
|
// GitHub stores page "Foo Bar" as Foo-Bar.md and special pages (_Sidebar,
|
|
14
14
|
// _Footer, …) start with an underscore — those are chrome, not content.
|
|
15
15
|
const isContentPage = name => name.endsWith('.md') && !name.startsWith('_');
|
|
16
16
|
|
|
17
|
+
// Entity-decoded so a title carrying e.g. & displays the glyph, matching
|
|
18
|
+
// splitSections' headings (see decodeEntities).
|
|
17
19
|
const firstH1 = md => {
|
|
18
20
|
const m = /^#\s+(.+?)\s*#*\s*$/m.exec(md);
|
|
19
|
-
return m ? m[1].trim() : null;
|
|
21
|
+
return m ? decodeEntities(m[1]).trim() : null;
|
|
20
22
|
};
|
|
21
23
|
|
|
22
24
|
export const buildIndex = async ({wikiDir, urlTemplate, siteName, fragments = true}) => {
|
package/builder/lib/markdown.mjs
CHANGED
|
@@ -7,14 +7,16 @@ const ATX = /^(#{1,6})\s+(.*?)\s*#*\s*$/;
|
|
|
7
7
|
|
|
8
8
|
// Split markdown into sections. Text before the first heading becomes a
|
|
9
9
|
// preamble section with heading=null, level=0. `#` inside fenced code is
|
|
10
|
-
// ignored so code comments don't masquerade as headings.
|
|
10
|
+
// ignored so code comments don't masquerade as headings. Headings are
|
|
11
|
+
// entity-decoded so the display text and the derived slug match GitHub, which
|
|
12
|
+
// renders entities before slugging (see decodeEntities).
|
|
11
13
|
export const splitSections = md => {
|
|
12
14
|
const sections = [{level: 0, heading: null, lines: []}];
|
|
13
15
|
let inFence = false;
|
|
14
16
|
for (const line of md.split(/\r?\n/)) {
|
|
15
17
|
if (FENCE.test(line)) inFence = !inFence;
|
|
16
18
|
const m = inFence ? null : ATX.exec(line);
|
|
17
|
-
if (m) sections.push({level: m[1].length, heading: m[2].trim(), lines: []});
|
|
19
|
+
if (m) sections.push({level: m[1].length, heading: decodeEntities(m[2]).trim(), lines: []});
|
|
18
20
|
else sections.at(-1).lines.push(line);
|
|
19
21
|
}
|
|
20
22
|
return sections
|
|
@@ -81,6 +83,14 @@ const decodeEntity = (_m, body) => {
|
|
|
81
83
|
return cp > 0 && cp <= 0x10ffff ? String.fromCodePoint(cp) : ' ';
|
|
82
84
|
};
|
|
83
85
|
|
|
86
|
+
// Resolve every HTML entity in a string to its character. Shared by toPlainText
|
|
87
|
+
// (the term index) and the heading path (display text + slug). GitHub renders a
|
|
88
|
+
// heading's entities to glyphs before slugging, so "4.2.2 — 2026-05-29"
|
|
89
|
+
// must decode to "4.2.2 — 2026-05-29" first — then the slugger drops the em dash
|
|
90
|
+
// and the two flanking spaces collapse to "--" (#422--2026-05-29), exactly as
|
|
91
|
+
// GitHub does. Slugging the raw "—" instead leaks the junk token "mdash".
|
|
92
|
+
export const decodeEntities = s => s.replace(ENTITY_RE, decodeEntity);
|
|
93
|
+
|
|
84
94
|
// Reduce Markdown to plain, collapsed text. Code *text* is kept (API names are
|
|
85
95
|
// worth searching) — only the fence delimiters are removed.
|
|
86
96
|
export const toPlainText = md =>
|
package/builder/wiki-index.mjs
CHANGED
|
File without changes
|