wiki-search-index 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -54,6 +54,8 @@ Run locally: `python3 -m http.server` from the repo root, then open
54
54
 
55
55
  ## Release notes
56
56
 
57
+ - 0.1.3 _Fix: HTML entities in headings now decode too, so anchors and section titles match GitHub (0.1.1 only handled body text)._
58
+ - 0.1.2 _Set the published CLI's execute bit (tidy hygiene; npm already chmods bins on install)._
57
59
  - 0.1.1 _HTML entities (`—`, `Ӓ`, …) are decoded so they no longer pollute the index._
58
60
  - 0.1.0 _Initial release of the `wiki-search-index` builder._
59
61
 
@@ -7,16 +7,18 @@
7
7
 
8
8
  import {readdir, readFile} from 'node:fs/promises';
9
9
  import {join} from 'node:path';
10
- import {splitSections} from './markdown.mjs';
10
+ import {splitSections, decodeEntities} from './markdown.mjs';
11
11
  import {createSlugger} from './slug.mjs';
12
12
 
13
13
  // GitHub stores page "Foo Bar" as Foo-Bar.md and special pages (_Sidebar,
14
14
  // _Footer, …) start with an underscore — those are chrome, not content.
15
15
  const isContentPage = name => name.endsWith('.md') && !name.startsWith('_');
16
16
 
17
+ // Entity-decoded so a title carrying e.g. & displays the glyph, matching
18
+ // splitSections' headings (see decodeEntities).
17
19
  const firstH1 = md => {
18
20
  const m = /^#\s+(.+?)\s*#*\s*$/m.exec(md);
19
- return m ? m[1].trim() : null;
21
+ return m ? decodeEntities(m[1]).trim() : null;
20
22
  };
21
23
 
22
24
  export const buildIndex = async ({wikiDir, urlTemplate, siteName, fragments = true}) => {
@@ -7,14 +7,16 @@ const ATX = /^(#{1,6})\s+(.*?)\s*#*\s*$/;
7
7
 
8
8
  // Split markdown into sections. Text before the first heading becomes a
9
9
  // preamble section with heading=null, level=0. `#` inside fenced code is
10
- // ignored so code comments don't masquerade as headings.
10
+ // ignored so code comments don't masquerade as headings. Headings are
11
+ // entity-decoded so the display text and the derived slug match GitHub, which
12
+ // renders entities before slugging (see decodeEntities).
11
13
  export const splitSections = md => {
12
14
  const sections = [{level: 0, heading: null, lines: []}];
13
15
  let inFence = false;
14
16
  for (const line of md.split(/\r?\n/)) {
15
17
  if (FENCE.test(line)) inFence = !inFence;
16
18
  const m = inFence ? null : ATX.exec(line);
17
- if (m) sections.push({level: m[1].length, heading: m[2].trim(), lines: []});
19
+ if (m) sections.push({level: m[1].length, heading: decodeEntities(m[2]).trim(), lines: []});
18
20
  else sections.at(-1).lines.push(line);
19
21
  }
20
22
  return sections
@@ -81,6 +83,14 @@ const decodeEntity = (_m, body) => {
81
83
  return cp > 0 && cp <= 0x10ffff ? String.fromCodePoint(cp) : ' ';
82
84
  };
83
85
 
86
+ // Resolve every HTML entity in a string to its character. Shared by toPlainText
87
+ // (the term index) and the heading path (display text + slug). GitHub renders a
88
+ // heading's entities to glyphs before slugging, so "4.2.2 &mdash; 2026-05-29"
89
+ // must decode to "4.2.2 — 2026-05-29" first — then the slugger drops the em dash
90
+ // and the two flanking spaces collapse to "--" (#422--2026-05-29), exactly as
91
+ // GitHub does. Slugging the raw "&mdash;" instead leaks the junk token "mdash".
92
+ export const decodeEntities = s => s.replace(ENTITY_RE, decodeEntity);
93
+
84
94
  // Reduce Markdown to plain, collapsed text. Code *text* is kept (API names are
85
95
  // worth searching) — only the fence delimiters are removed.
86
96
  export const toPlainText = md =>
File without changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wiki-search-index",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "description": "Build a self-describing search index from a GitHub wiki (or any Markdown docs) — the indexer for wiki-search.",
5
5
  "type": "module",
6
6
  "bin": {