@storyteller-platform/align 0.1.24 → 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/align/align.cjs +21 -9
  2. package/dist/align/align.js +22 -11
  3. package/dist/align/getSentenceRanges.cjs +0 -58
  4. package/dist/align/getSentenceRanges.d.cts +1 -2
  5. package/dist/align/getSentenceRanges.d.ts +1 -2
  6. package/dist/align/getSentenceRanges.js +0 -57
  7. package/dist/align/interpolateSentenceRanges.cjs +124 -0
  8. package/dist/align/interpolateSentenceRanges.d.cts +23 -0
  9. package/dist/align/interpolateSentenceRanges.d.ts +23 -0
  10. package/dist/align/interpolateSentenceRanges.js +101 -0
  11. package/dist/align/search.cjs +18 -7
  12. package/dist/align/search.js +18 -7
  13. package/dist/align/slugify.cjs +31 -23
  14. package/dist/align/slugify.js +31 -23
  15. package/dist/index.d.cts +1 -2
  16. package/dist/index.d.ts +1 -2
  17. package/dist/markup/markup.cjs +21 -14
  18. package/dist/markup/markup.d.cts +2 -4
  19. package/dist/markup/markup.d.ts +2 -4
  20. package/dist/markup/markup.js +28 -16
  21. package/dist/markup/model.cjs +138 -5
  22. package/dist/markup/model.d.cts +2 -57
  23. package/dist/markup/model.d.ts +2 -57
  24. package/dist/markup/model.js +136 -5
  25. package/dist/markup/parseDom.cjs +80 -25
  26. package/dist/markup/parseDom.d.cts +4 -4
  27. package/dist/markup/parseDom.d.ts +4 -4
  28. package/dist/markup/parseDom.js +87 -24
  29. package/dist/markup/resolvedPos.cjs +85 -0
  30. package/dist/markup/resolvedPos.d.cts +2 -0
  31. package/dist/markup/resolvedPos.d.ts +2 -0
  32. package/dist/markup/resolvedPos.js +62 -0
  33. package/dist/markup/segmentation.cjs +4 -8
  34. package/dist/markup/segmentation.d.cts +3 -8
  35. package/dist/markup/segmentation.d.ts +3 -8
  36. package/dist/markup/segmentation.js +3 -7
  37. package/dist/markup/serializeDom.d.cts +1 -1
  38. package/dist/markup/serializeDom.d.ts +1 -1
  39. package/dist/markup/transform.cjs +59 -2
  40. package/dist/markup/transform.d.cts +8 -2
  41. package/dist/markup/transform.d.ts +8 -2
  42. package/dist/markup/transform.js +58 -1
  43. package/dist/model-Bv3yPEdd.d.cts +96 -0
  44. package/dist/model-Bv3yPEdd.d.ts +96 -0
  45. package/dist/snapshot/snapshot.cjs +8 -6
  46. package/dist/snapshot/snapshot.js +9 -7
  47. package/package.json +4 -4
@@ -12,16 +12,16 @@ function buildNgramIndex(text) {
12
12
  }
13
13
  return index;
14
14
  }
15
+ const NGRAM_SIZE = 5;
15
16
  function* ngrams(text) {
16
17
  const words = text.split("-");
17
- let pos = 0;
18
- for (const i of range(words.length - 4)) {
19
- const ngram = words.slice(i, i + 5).join("-");
20
- yield [ngram, pos];
21
- pos += words[i].length + 1;
18
+ for (const i of range(words.length - NGRAM_SIZE - 1)) {
19
+ const ngram = words.slice(i, i + NGRAM_SIZE).join("-");
20
+ yield [ngram, i];
22
21
  }
23
22
  }
24
23
  function collectBoundaryVotes(query, document) {
24
+ const queryWords = query.split("-");
25
25
  const documentIndex = buildNgramIndex(document);
26
26
  let skippedNgrams = 0;
27
27
  let totalNgrams = 0;
@@ -36,7 +36,7 @@ function collectBoundaryVotes(query, document) {
36
36
  }
37
37
  for (const documentStart of documentStarts) {
38
38
  startVotes.push(documentStart - start);
39
- endVotes.push(documentStart + (query.length - start));
39
+ endVotes.push(documentStart + (queryWords.length - start));
40
40
  }
41
41
  }
42
42
  if (skippedNgrams > totalNgrams / 2) {
@@ -72,6 +72,14 @@ function chooseBestFromBins(bins, dir) {
72
72
  }
73
73
  return dir > 0 ? max(best) ?? null : min(best) ?? null;
74
74
  }
75
+ function getOffsetFromWordIndex(wordIndex, document) {
76
+ const words = document.split("-");
77
+ let offset = 0;
78
+ for (const i of range(Math.min(words.length, Math.max(0, wordIndex)))) {
79
+ offset += words[i].length + 1;
80
+ }
81
+ return offset;
82
+ }
75
83
  function findBoundaries(query, document) {
76
84
  const boundaryVotes = collectBoundaryVotes(query, document);
77
85
  if (!boundaryVotes) return null;
@@ -86,7 +94,10 @@ function findBoundaries(query, document) {
86
94
  if (bestEnd === null) {
87
95
  return null;
88
96
  }
89
- return { start: bestStart, end: bestEnd };
97
+ return {
98
+ start: getOffsetFromWordIndex(bestStart, document),
99
+ end: getOffsetFromWordIndex(bestEnd, document)
100
+ };
90
101
  }
91
102
  export {
92
103
  buildNgramIndex,
@@ -25,16 +25,16 @@ var import_locale_currency = require("locale-currency");
25
25
  var import_to_words = require("to-words");
26
26
  var import_transliteration = require("@storyteller-platform/transliteration");
27
27
  const replacerMap = /* @__PURE__ */ new WeakMap();
28
- function createReplacers(locale) {
29
- const maximizedLocale = locale.maximize();
28
+ function getCurrencySymbols(locale) {
29
+ const region = locale.maximize().region;
30
30
  const demoNumber = 123456.789;
31
31
  const currencyFormat = new Intl.NumberFormat(locale, {
32
32
  style: "currency",
33
33
  // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
34
- currency: (0, import_locale_currency.getCurrency)(locale.maximize().region)
34
+ currency: region ? (0, import_locale_currency.getCurrency)(locale.maximize().region) : "USD"
35
35
  });
36
36
  const currencyParts = currencyFormat.formatToParts(demoNumber);
37
- const currencySymbols = currencyParts.reduce(
37
+ return currencyParts.reduce(
38
38
  (acc, part, index) => {
39
39
  if (part.type === "group") {
40
40
  return {
@@ -59,27 +59,12 @@ function createReplacers(locale) {
59
59
  },
60
60
  { group: "", decimal: "", currency: "", currencyLeading: true }
61
61
  );
62
- const numeralRegexPart = `(\\p{Number}[\\p{Number}${currencySymbols.group}]*(?:[${currencySymbols.decimal}]\\p{Number}*)?)`;
63
- const currencyRegex = currencySymbols.currencyLeading ? new RegExp(`[${currencySymbols.currency}]\\s?${numeralRegexPart}`, "gu") : new RegExp(`${numeralRegexPart}\\s?[${currencySymbols.currency}]`, "gu");
64
- function currencyReplacer(match) {
65
- const numeralMatch = match[1];
66
- if (!numeralMatch) return match[0];
67
- const normalizedNumeral = numeralMatch.replaceAll(new RegExp(`\\${currencySymbols.group}`, "g"), "").replace(new RegExp(`\\${currencySymbols.decimal}`), ".");
68
- const number = parseFloat(normalizedNumeral);
69
- if (Number.isNaN(number)) return match[0];
70
- try {
71
- return (0, import_to_words.toWords)(number, {
72
- localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`,
73
- currency: true,
74
- doNotAddOnly: true
75
- });
76
- } catch {
77
- return match[0];
78
- }
79
- }
62
+ }
63
+ function getNumberSymbols(locale) {
64
+ const demoNumber = 123456.789;
80
65
  const numberFormat = new Intl.NumberFormat(locale);
81
66
  const numberParts = numberFormat.formatToParts(demoNumber);
82
- const numberSymbols = numberParts.reduce(
67
+ return numberParts.reduce(
83
68
  (acc, part) => {
84
69
  if (part.type === "group") {
85
70
  return {
@@ -97,6 +82,29 @@ function createReplacers(locale) {
97
82
  },
98
83
  { group: "", decimal: "" }
99
84
  );
85
+ }
86
+ function createReplacers(locale) {
87
+ const maximizedLocale = locale.maximize();
88
+ const currencySymbols = getCurrencySymbols(maximizedLocale);
89
+ const numeralRegexPart = `(\\p{Number}[\\p{Number}${currencySymbols.group}]*(?:[${currencySymbols.decimal}]\\p{Number}*)?)`;
90
+ const currencyRegex = currencySymbols.currencyLeading ? new RegExp(`[${currencySymbols.currency}]\\s?${numeralRegexPart}`, "gu") : new RegExp(`${numeralRegexPart}\\s?[${currencySymbols.currency}]`, "gu");
91
+ function currencyReplacer(match) {
92
+ const numeralMatch = match[1];
93
+ if (!numeralMatch) return match[0];
94
+ const normalizedNumeral = numeralMatch.replaceAll(new RegExp(`\\${currencySymbols.group}`, "g"), "").replace(new RegExp(`\\${currencySymbols.decimal}`), ".");
95
+ const number = parseFloat(normalizedNumeral);
96
+ if (Number.isNaN(number)) return match[0];
97
+ try {
98
+ return (0, import_to_words.toWords)(number, {
99
+ localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`,
100
+ currency: true,
101
+ doNotAddOnly: true
102
+ });
103
+ } catch {
104
+ return match[0];
105
+ }
106
+ }
107
+ const numberSymbols = getNumberSymbols(maximizedLocale);
100
108
  const numberRegex = new RegExp(
101
109
  `(\\p{Number}[\\p{Number}${numberSymbols.group}]*(?:[${numberSymbols.decimal}]\\p{Number}*)?)`,
102
110
  "gu"
@@ -3,16 +3,16 @@ import { getCurrency } from "locale-currency";
3
3
  import { toWords } from "to-words";
4
4
  import { slugify as transliterateSlugify } from "@storyteller-platform/transliteration";
5
5
  const replacerMap = /* @__PURE__ */ new WeakMap();
6
- function createReplacers(locale) {
7
- const maximizedLocale = locale.maximize();
6
+ function getCurrencySymbols(locale) {
7
+ const region = locale.maximize().region;
8
8
  const demoNumber = 123456.789;
9
9
  const currencyFormat = new Intl.NumberFormat(locale, {
10
10
  style: "currency",
11
11
  // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
12
- currency: getCurrency(locale.maximize().region)
12
+ currency: region ? getCurrency(locale.maximize().region) : "USD"
13
13
  });
14
14
  const currencyParts = currencyFormat.formatToParts(demoNumber);
15
- const currencySymbols = currencyParts.reduce(
15
+ return currencyParts.reduce(
16
16
  (acc, part, index) => {
17
17
  if (part.type === "group") {
18
18
  return {
@@ -37,27 +37,12 @@ function createReplacers(locale) {
37
37
  },
38
38
  { group: "", decimal: "", currency: "", currencyLeading: true }
39
39
  );
40
- const numeralRegexPart = `(\\p{Number}[\\p{Number}${currencySymbols.group}]*(?:[${currencySymbols.decimal}]\\p{Number}*)?)`;
41
- const currencyRegex = currencySymbols.currencyLeading ? new RegExp(`[${currencySymbols.currency}]\\s?${numeralRegexPart}`, "gu") : new RegExp(`${numeralRegexPart}\\s?[${currencySymbols.currency}]`, "gu");
42
- function currencyReplacer(match) {
43
- const numeralMatch = match[1];
44
- if (!numeralMatch) return match[0];
45
- const normalizedNumeral = numeralMatch.replaceAll(new RegExp(`\\${currencySymbols.group}`, "g"), "").replace(new RegExp(`\\${currencySymbols.decimal}`), ".");
46
- const number = parseFloat(normalizedNumeral);
47
- if (Number.isNaN(number)) return match[0];
48
- try {
49
- return toWords(number, {
50
- localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`,
51
- currency: true,
52
- doNotAddOnly: true
53
- });
54
- } catch {
55
- return match[0];
56
- }
57
- }
40
+ }
41
+ function getNumberSymbols(locale) {
42
+ const demoNumber = 123456.789;
58
43
  const numberFormat = new Intl.NumberFormat(locale);
59
44
  const numberParts = numberFormat.formatToParts(demoNumber);
60
- const numberSymbols = numberParts.reduce(
45
+ return numberParts.reduce(
61
46
  (acc, part) => {
62
47
  if (part.type === "group") {
63
48
  return {
@@ -75,6 +60,29 @@ function createReplacers(locale) {
75
60
  },
76
61
  { group: "", decimal: "" }
77
62
  );
63
+ }
64
+ function createReplacers(locale) {
65
+ const maximizedLocale = locale.maximize();
66
+ const currencySymbols = getCurrencySymbols(maximizedLocale);
67
+ const numeralRegexPart = `(\\p{Number}[\\p{Number}${currencySymbols.group}]*(?:[${currencySymbols.decimal}]\\p{Number}*)?)`;
68
+ const currencyRegex = currencySymbols.currencyLeading ? new RegExp(`[${currencySymbols.currency}]\\s?${numeralRegexPart}`, "gu") : new RegExp(`${numeralRegexPart}\\s?[${currencySymbols.currency}]`, "gu");
69
+ function currencyReplacer(match) {
70
+ const numeralMatch = match[1];
71
+ if (!numeralMatch) return match[0];
72
+ const normalizedNumeral = numeralMatch.replaceAll(new RegExp(`\\${currencySymbols.group}`, "g"), "").replace(new RegExp(`\\${currencySymbols.decimal}`), ".");
73
+ const number = parseFloat(normalizedNumeral);
74
+ if (Number.isNaN(number)) return match[0];
75
+ try {
76
+ return toWords(number, {
77
+ localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`,
78
+ currency: true,
79
+ doNotAddOnly: true
80
+ });
81
+ } catch {
82
+ return match[0];
83
+ }
84
+ }
85
+ const numberSymbols = getNumberSymbols(maximizedLocale);
78
86
  const numberRegex = new RegExp(
79
87
  `(\\p{Number}[\\p{Number}${numberSymbols.group}]*(?:[${numberSymbols.decimal}]\\p{Number}*)?)`,
80
88
  "gu"
package/dist/index.d.cts CHANGED
@@ -7,9 +7,8 @@ import '@esfx/async-semaphore';
7
7
  import 'pino';
8
8
  import './process/AudioEncoding.cjs';
9
9
  import '@storyteller-platform/ghost-story/constants';
10
- import '@echogarden/text-segmentation';
11
10
  import '@storyteller-platform/epub';
12
- import './markup/map.cjs';
13
11
  import '@storyteller-platform/ghost-story/recognition';
14
12
  import './align/getSentenceRanges.cjs';
13
+ import '@echogarden/text-segmentation';
15
14
  import '@storyteller-platform/transliteration';
package/dist/index.d.ts CHANGED
@@ -7,9 +7,8 @@ import '@esfx/async-semaphore';
7
7
  import 'pino';
8
8
  import './process/AudioEncoding.js';
9
9
  import '@storyteller-platform/ghost-story/constants';
10
- import '@echogarden/text-segmentation';
11
10
  import '@storyteller-platform/epub';
12
- import './markup/map.js';
13
11
  import '@storyteller-platform/ghost-story/recognition';
14
12
  import './align/getSentenceRanges.js';
13
+ import '@echogarden/text-segmentation';
15
14
  import '@storyteller-platform/transliteration';
@@ -97,16 +97,11 @@ async function markup(input, output, options) {
97
97
  continue;
98
98
  }
99
99
  const chapterXml = await epub.readXhtmlItemContents(chapterId);
100
- const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
101
- import_epub.Epub.getXhtmlBody(chapterXml),
102
- { primaryLocale }
103
- );
104
- const { markedUp, timing: chapterTiming } = markupChapter(
100
+ const { markedUp, timing: chapterTiming } = await markupChapter(
105
101
  chapterId,
106
102
  chapterXml,
107
103
  options.granularity ?? "sentence",
108
- segmentation,
109
- mapping
104
+ primaryLocale
110
105
  );
111
106
  timing.add(chapterTiming.summary());
112
107
  await epub.writeXhtmlItemContents(chapterId, markedUp);
@@ -119,7 +114,7 @@ async function markup(input, output, options) {
119
114
  __callDispose(_stack, _error, _hasError);
120
115
  }
121
116
  }
122
- function markupChapter(chapterId, chapterXml, granularity, segmentation, mapping) {
117
+ async function markupChapter(chapterId, chapterXml, granularity, locale) {
123
118
  const timing = (0, import_ghost_story.createTiming)();
124
119
  const html = import_epub.Epub.findXmlChildByName("html", chapterXml);
125
120
  if (!html) throw new Error("Invalid XHTML document: no html element");
@@ -128,8 +123,14 @@ function markupChapter(chapterId, chapterXml, granularity, segmentation, mapping
128
123
  clearBodyElement(chapterXml);
129
124
  const taggedHtml = import_epub.Epub.findXmlChildByName("html", chapterXml);
130
125
  const taggedBody = import_epub.Epub.findXmlChildByName("body", taggedHtml["html"]);
126
+ const original = (0, import_parseDom.parseDom)(import_epub.Epub.getXmlChildren(body));
127
+ const inlined = (0, import_transform.inlineFootnotes)(original);
128
+ const lifted = (0, import_transform.liftText)(inlined.root);
129
+ const segmentation = await (0, import_segmentation.segmentChapter)(lifted.result, {
130
+ primaryLocale: locale
131
+ });
131
132
  timing.time("mark up", () => {
132
- let root = (0, import_parseDom.parseDom)(import_epub.Epub.getXmlChildren(body));
133
+ let root = inlined.root;
133
134
  let pos = 0;
134
135
  let i = 0;
135
136
  for (const sentence of segmentation) {
@@ -140,8 +141,8 @@ function markupChapter(chapterId, chapterXml, granularity, segmentation, mapping
140
141
  if (word.text.match(/\S/)) {
141
142
  root = (0, import_transform.addMark)(
142
143
  root,
143
- mapping.invert().map(wordPos),
144
- mapping.invert().map(wordPos + word.text.replace(/\n$/, "").length, -1),
144
+ lifted.mapping.invert().map(wordPos),
145
+ lifted.mapping.invert().map(wordPos + word.text.replace(/\n$/, "").length, -1),
145
146
  new import_model.Mark("span", { id: `${chapterId}-s${i}-w${j}` })
146
147
  );
147
148
  j++;
@@ -152,15 +153,21 @@ function markupChapter(chapterId, chapterXml, granularity, segmentation, mapping
152
153
  if (sentence.text.match(/\S/)) {
153
154
  root = (0, import_transform.addMark)(
154
155
  root,
155
- mapping.invert().map(pos),
156
- mapping.invert().map(pos + sentence.text.replace(/\n$/, "").length, -1),
156
+ lifted.mapping.invert().map(pos),
157
+ lifted.mapping.invert().map(pos + sentence.text.replace(/\n$/, "").length, -1),
157
158
  new import_model.Mark("span", { id: `${chapterId}-s${i}` })
158
159
  );
159
160
  i++;
160
161
  }
161
162
  pos += sentence.text.replace(/\n$/, "").length;
162
163
  }
163
- taggedBody["body"] = (0, import_serializeDom.serializeDom)(root);
164
+ const replaced = (0, import_transform.replaceFootnotes)(
165
+ original,
166
+ root,
167
+ inlined.footnotePairs,
168
+ inlined.mapping
169
+ );
170
+ taggedBody["body"] = (0, import_serializeDom.serializeDom)(replaced);
164
171
  });
165
172
  return { markedUp: chapterXml, timing };
166
173
  }
@@ -1,9 +1,7 @@
1
1
  import * as _storyteller_platform_ghost_story from '@storyteller-platform/ghost-story';
2
2
  import { TimingAggregator } from '@storyteller-platform/ghost-story';
3
- import { Sentence } from '@echogarden/text-segmentation';
4
3
  import { Logger } from 'pino';
5
4
  import { ParsedXml } from '@storyteller-platform/epub';
6
- import { Mapping } from './map.cjs';
7
5
 
8
6
  interface MarkupOptions {
9
7
  granularity?: "word" | "sentence";
@@ -12,9 +10,9 @@ interface MarkupOptions {
12
10
  logger?: Logger;
13
11
  }
14
12
  declare function markup(input: string, output: string, options: MarkupOptions): Promise<TimingAggregator>;
15
- declare function markupChapter(chapterId: string, chapterXml: ParsedXml, granularity: "word" | "sentence", segmentation: Sentence[], mapping: Mapping): {
13
+ declare function markupChapter(chapterId: string, chapterXml: ParsedXml, granularity: "word" | "sentence", locale: Intl.Locale | null): Promise<{
16
14
  markedUp: ParsedXml;
17
15
  timing: _storyteller_platform_ghost_story.Timing;
18
- };
16
+ }>;
19
17
 
20
18
  export { type MarkupOptions, markup, markupChapter };
@@ -1,9 +1,7 @@
1
1
  import * as _storyteller_platform_ghost_story from '@storyteller-platform/ghost-story';
2
2
  import { TimingAggregator } from '@storyteller-platform/ghost-story';
3
- import { Sentence } from '@echogarden/text-segmentation';
4
3
  import { Logger } from 'pino';
5
4
  import { ParsedXml } from '@storyteller-platform/epub';
6
- import { Mapping } from './map.js';
7
5
 
8
6
  interface MarkupOptions {
9
7
  granularity?: "word" | "sentence";
@@ -12,9 +10,9 @@ interface MarkupOptions {
12
10
  logger?: Logger;
13
11
  }
14
12
  declare function markup(input: string, output: string, options: MarkupOptions): Promise<TimingAggregator>;
15
- declare function markupChapter(chapterId: string, chapterXml: ParsedXml, granularity: "word" | "sentence", segmentation: Sentence[], mapping: Mapping): {
13
+ declare function markupChapter(chapterId: string, chapterXml: ParsedXml, granularity: "word" | "sentence", locale: Intl.Locale | null): Promise<{
16
14
  markedUp: ParsedXml;
17
15
  timing: _storyteller_platform_ghost_story.Timing;
18
- };
16
+ }>;
19
17
 
20
18
  export { type MarkupOptions, markup, markupChapter };
@@ -11,9 +11,14 @@ import {
11
11
  } from "@storyteller-platform/ghost-story";
12
12
  import { Mark } from "./model.js";
13
13
  import { parseDom } from "./parseDom.js";
14
- import { getXhtmlSegmentation } from "./segmentation.js";
14
+ import { segmentChapter } from "./segmentation.js";
15
15
  import { serializeDom } from "./serializeDom.js";
16
- import { addMark } from "./transform.js";
16
+ import {
17
+ addMark,
18
+ inlineFootnotes,
19
+ liftText,
20
+ replaceFootnotes
21
+ } from "./transform.js";
17
22
  async function markup(input, output, options) {
18
23
  var _stack = [];
19
24
  try {
@@ -35,16 +40,11 @@ async function markup(input, output, options) {
35
40
  continue;
36
41
  }
37
42
  const chapterXml = await epub.readXhtmlItemContents(chapterId);
38
- const { result: segmentation, mapping } = await getXhtmlSegmentation(
39
- Epub.getXhtmlBody(chapterXml),
40
- { primaryLocale }
41
- );
42
- const { markedUp, timing: chapterTiming } = markupChapter(
43
+ const { markedUp, timing: chapterTiming } = await markupChapter(
43
44
  chapterId,
44
45
  chapterXml,
45
46
  options.granularity ?? "sentence",
46
- segmentation,
47
- mapping
47
+ primaryLocale
48
48
  );
49
49
  timing.add(chapterTiming.summary());
50
50
  await epub.writeXhtmlItemContents(chapterId, markedUp);
@@ -57,7 +57,7 @@ async function markup(input, output, options) {
57
57
  __callDispose(_stack, _error, _hasError);
58
58
  }
59
59
  }
60
- function markupChapter(chapterId, chapterXml, granularity, segmentation, mapping) {
60
+ async function markupChapter(chapterId, chapterXml, granularity, locale) {
61
61
  const timing = createTiming();
62
62
  const html = Epub.findXmlChildByName("html", chapterXml);
63
63
  if (!html) throw new Error("Invalid XHTML document: no html element");
@@ -66,8 +66,14 @@ function markupChapter(chapterId, chapterXml, granularity, segmentation, mapping
66
66
  clearBodyElement(chapterXml);
67
67
  const taggedHtml = Epub.findXmlChildByName("html", chapterXml);
68
68
  const taggedBody = Epub.findXmlChildByName("body", taggedHtml["html"]);
69
+ const original = parseDom(Epub.getXmlChildren(body));
70
+ const inlined = inlineFootnotes(original);
71
+ const lifted = liftText(inlined.root);
72
+ const segmentation = await segmentChapter(lifted.result, {
73
+ primaryLocale: locale
74
+ });
69
75
  timing.time("mark up", () => {
70
- let root = parseDom(Epub.getXmlChildren(body));
76
+ let root = inlined.root;
71
77
  let pos = 0;
72
78
  let i = 0;
73
79
  for (const sentence of segmentation) {
@@ -78,8 +84,8 @@ function markupChapter(chapterId, chapterXml, granularity, segmentation, mapping
78
84
  if (word.text.match(/\S/)) {
79
85
  root = addMark(
80
86
  root,
81
- mapping.invert().map(wordPos),
82
- mapping.invert().map(wordPos + word.text.replace(/\n$/, "").length, -1),
87
+ lifted.mapping.invert().map(wordPos),
88
+ lifted.mapping.invert().map(wordPos + word.text.replace(/\n$/, "").length, -1),
83
89
  new Mark("span", { id: `${chapterId}-s${i}-w${j}` })
84
90
  );
85
91
  j++;
@@ -90,15 +96,21 @@ function markupChapter(chapterId, chapterXml, granularity, segmentation, mapping
90
96
  if (sentence.text.match(/\S/)) {
91
97
  root = addMark(
92
98
  root,
93
- mapping.invert().map(pos),
94
- mapping.invert().map(pos + sentence.text.replace(/\n$/, "").length, -1),
99
+ lifted.mapping.invert().map(pos),
100
+ lifted.mapping.invert().map(pos + sentence.text.replace(/\n$/, "").length, -1),
95
101
  new Mark("span", { id: `${chapterId}-s${i}` })
96
102
  );
97
103
  i++;
98
104
  }
99
105
  pos += sentence.text.replace(/\n$/, "").length;
100
106
  }
101
- taggedBody["body"] = serializeDom(root);
107
+ const replaced = replaceFootnotes(
108
+ original,
109
+ root,
110
+ inlined.footnotePairs,
111
+ inlined.mapping
112
+ );
113
+ taggedBody["body"] = serializeDom(replaced);
102
114
  });
103
115
  return { markedUp: chapterXml, timing };
104
116
  }
@@ -18,14 +18,17 @@ var __copyProps = (to, from, except, desc) => {
18
18
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
19
  var model_exports = {};
20
20
  __export(model_exports, {
21
+ FootnoteNode: () => FootnoteNode,
21
22
  Mark: () => Mark,
22
23
  Node: () => Node,
24
+ NoterefNode: () => NoterefNode,
23
25
  Root: () => Root,
24
26
  TextNode: () => TextNode,
25
27
  descendants: () => descendants
26
28
  });
27
29
  module.exports = __toCommonJS(model_exports);
28
30
  var import_itertools = require("itertools");
31
+ var import_resolvedPos = require("./resolvedPos.cjs");
29
32
  var import_semantics = require("./semantics.cjs");
30
33
  class Root {
31
34
  constructor(children) {
@@ -39,6 +42,12 @@ class Root {
39
42
  get textContent() {
40
43
  return this.children.reduce((acc, child) => acc + child.textContent, "");
41
44
  }
45
+ get nodeSize() {
46
+ return this.border + (this.children.reduce((acc, child) => acc + child.nodeSize, 0) || 1) + this.border;
47
+ }
48
+ get contentSize() {
49
+ return this.nodeSize - this.border * 2;
50
+ }
42
51
  split(at) {
43
52
  const children = [];
44
53
  let pos = this.border;
@@ -57,6 +66,58 @@ class Root {
57
66
  copy(opts = {}) {
58
67
  return new Root(opts.children ?? this.children);
59
68
  }
69
+ findIndex(pos) {
70
+ if (pos === 0) return { index: 0, offset: pos };
71
+ if (pos === this.contentSize) {
72
+ return { index: this.children.length, offset: pos };
73
+ }
74
+ if (pos > this.contentSize || pos < 0) {
75
+ throw new RangeError(`Position ${pos} outside of fragment`);
76
+ }
77
+ for (let i = 0, curPos = 0; ; i++) {
78
+ const cur = this.children[i];
79
+ const end = curPos + cur.nodeSize;
80
+ if (end >= pos) {
81
+ if (end === pos) return { index: i + 1, offset: end };
82
+ return { index: i, offset: curPos };
83
+ }
84
+ curPos = end;
85
+ }
86
+ }
87
+ replace(at, withNode) {
88
+ const children = [];
89
+ let pos = this.border;
90
+ for (const child of this.children) {
91
+ if (at === pos) {
92
+ children.push(withNode);
93
+ } else if (at > pos && at < pos + child.nodeSize) {
94
+ if (child instanceof TextNode) {
95
+ throw new Error("Tried to replace at a position within a text node");
96
+ }
97
+ children.push(child.replace(at - pos, withNode));
98
+ } else {
99
+ children.push(child);
100
+ }
101
+ pos += children.at(-1).nodeSize;
102
+ }
103
+ return this.copy({ children });
104
+ }
105
+ cut(pos) {
106
+ let currentPos = this.border;
107
+ for (const child of this.children) {
108
+ if (pos === currentPos) {
109
+ return child;
110
+ }
111
+ if (pos > currentPos && pos < currentPos + child.nodeSize) {
112
+ return child.cut(pos - currentPos);
113
+ }
114
+ currentPos += child.nodeSize;
115
+ }
116
+ return null;
117
+ }
118
+ resolve(pos) {
119
+ return import_resolvedPos.ResolvedPos.resolve(this, pos);
120
+ }
60
121
  }
61
122
  class Node {
62
123
  constructor(tagName, attrs = {}, children = [], marks = []) {
@@ -80,6 +141,9 @@ class Node {
80
141
  get nodeSize() {
81
142
  return this.border + (this.children.reduce((acc, child) => acc + child.nodeSize, 0) || 1) + this.border;
82
143
  }
144
+ get contentSize() {
145
+ return this.nodeSize - this.border * 2;
146
+ }
83
147
  get textContent() {
84
148
  return this.children.reduce((acc, child) => acc + child.textContent, "");
85
149
  }
@@ -102,14 +166,74 @@ class Node {
102
166
  }
103
167
  return this.copy({ children });
104
168
  }
169
+ static instance() {
170
+ return this;
171
+ }
172
+ static create(klass, ...args) {
173
+ return new klass(...args);
174
+ }
105
175
  copy(opts = {}) {
106
- return new Node(
176
+ return Node.create(
177
+ this.constructor,
107
178
  this.tagName,
108
179
  opts.attrs ?? this.attrs,
109
180
  opts.children ?? this.children,
110
181
  opts.marks ?? this.marks
111
182
  );
112
183
  }
184
+ replace(at, withNode) {
185
+ const children = [];
186
+ let pos = this.border;
187
+ for (const child of this.children) {
188
+ if (at === pos) {
189
+ children.push(withNode);
190
+ } else if (at > pos && at < pos + child.nodeSize) {
191
+ if (child instanceof TextNode) {
192
+ throw new Error("Tried to replace at a position within a text node");
193
+ }
194
+ children.push(child.replace(at - pos, withNode));
195
+ } else {
196
+ children.push(child);
197
+ }
198
+ pos += children.at(-1).nodeSize;
199
+ }
200
+ return this.copy({ children });
201
+ }
202
+ cut(pos) {
203
+ let currentPos = this.border;
204
+ for (const child of this.children) {
205
+ if (pos === currentPos) {
206
+ return child;
207
+ }
208
+ if (pos > currentPos && pos < currentPos + child.nodeSize) {
209
+ return child.cut(pos - currentPos);
210
+ }
211
+ currentPos += child.nodeSize;
212
+ }
213
+ return null;
214
+ }
215
+ findIndex(pos) {
216
+ if (pos === 0) return { index: 0, offset: pos };
217
+ if (pos === this.contentSize) {
218
+ return { index: this.children.length, offset: pos };
219
+ }
220
+ if (pos > this.contentSize || pos < 0) {
221
+ throw new RangeError(`Position ${pos} outside of fragment`);
222
+ }
223
+ for (let i = 0, curPos = 0; ; i++) {
224
+ const cur = this.children[i];
225
+ const end = curPos + cur.nodeSize;
226
+ if (end >= pos) {
227
+ if (end === pos) return { index: i + 1, offset: end };
228
+ return { index: i, offset: curPos };
229
+ }
230
+ curPos = end;
231
+ }
232
+ }
233
+ }
234
+ class NoterefNode extends Node {
235
+ }
236
+ class FootnoteNode extends Node {
113
237
  }
114
238
  class Mark {
115
239
  constructor(tagName, attrs = {}) {
@@ -138,6 +262,9 @@ class TextNode {
138
262
  get nodeSize() {
139
263
  return this.text.length;
140
264
  }
265
+ get contentSize() {
266
+ return this.nodeSize;
267
+ }
141
268
  get textContent() {
142
269
  return this.text;
143
270
  }
@@ -152,20 +279,26 @@ class TextNode {
152
279
  copy(opts = {}) {
153
280
  return new TextNode(this.text, opts.marks ?? this.marks);
154
281
  }
282
+ cut(pos) {
283
+ return new TextNode(this.text.slice(pos));
284
+ }
155
285
  }
156
- function descendants(root, cb, pos = 0) {
157
- for (const [i, child] of (0, import_itertools.enumerate)(root.children)) {
158
- const descend = cb(child, pos, root, i);
286
+ function descendants(node, cb, pos = 0) {
287
+ pos += node.border;
288
+ for (const [i, child] of (0, import_itertools.enumerate)(node.children)) {
289
+ const descend = cb(child, pos, node, i);
159
290
  if (descend && !child.isLeaf) {
160
- descendants(child, cb, pos + child.border);
291
+ descendants(child, cb, pos);
161
292
  }
162
293
  pos += child.nodeSize;
163
294
  }
164
295
  }
165
296
  // Annotate the CommonJS export names for ESM import in node:
166
297
  0 && (module.exports = {
298
+ FootnoteNode,
167
299
  Mark,
168
300
  Node,
301
+ NoterefNode,
169
302
  Root,
170
303
  TextNode,
171
304
  descendants