@storyteller-platform/align 0.1.27 → 0.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,130 +18,91 @@ var __copyProps = (to, from, except, desc) => {
18
18
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
19
  var textFragments_exports = {};
20
20
  __export(textFragments_exports, {
21
- TextFragmentTrie: () => TextFragmentTrie
21
+ TextFragmentFactory: () => TextFragmentFactory
22
22
  });
23
23
  module.exports = __toCommonJS(textFragments_exports);
24
24
  var import_itertools = require("itertools");
25
25
  var import_runes2 = require("runes2");
26
- class TextFragmentTrie {
27
- root = new Node(null, "");
26
+ class TextFragmentFactory {
27
+ runes;
28
28
  spans;
29
+ spanStarts;
30
+ charPositions;
29
31
  constructor(casedSpans, locale = new Intl.Locale("en-Latn-US")) {
30
32
  this.spans = casedSpans.map((span) => span.toLocaleLowerCase(locale));
31
- for (const [i, span] of (0, import_itertools.enumerate)(this.spans)) {
32
- const parents = [this.root];
33
- for (const [j, char] of (0, import_itertools.enumerate)((0, import_runes2.runes)(span))) {
34
- for (const [k, parent] of (0, import_itertools.enumerate)(parents)) {
35
- const newNode = new Node(parent, char, { span: i, pos: j });
36
- let node = parent.children.find((child) => child.eq(newNode));
37
- if (!node) {
38
- node = newNode;
39
- parent.children.push(node);
40
- } else {
41
- node.indices.push({ span: i, pos: j });
42
- }
43
- parents[k] = node;
44
- }
45
- parents.push(this.root);
46
- }
33
+ this.runes = (0, import_runes2.runes)(this.spans.join(""));
34
+ this.spanStarts = [];
35
+ let start = 0;
36
+ for (const span of this.spans) {
37
+ this.spanStarts.push(start);
38
+ start += span.length;
47
39
  }
48
- }
49
- findMinimalFragment(spanIndex) {
50
- let node = this.root;
51
- while (node.children.length) {
52
- const candidates = node.children.filter(
53
- (child2) => child2.indices.some(
54
- ({ span: childSpanIndex }) => childSpanIndex === spanIndex
55
- )
56
- );
57
- const child = (0, import_itertools.min)(
58
- candidates,
59
- // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
60
- (c) => c.indices.find((i) => i.span === spanIndex).pos
61
- );
62
- if (!child) {
63
- return this.nodeToFragment(node, spanIndex, true);
64
- }
65
- if (child.indices.length === 1) {
66
- return this.nodeToFragment(child, spanIndex);
67
- }
68
- node = child;
40
+ this.charPositions = /* @__PURE__ */ new Map();
41
+ for (const [i, char] of (0, import_itertools.enumerate)(this.runes)) {
42
+ const positions = this.charPositions.get(char) ?? [];
43
+ positions.push(i);
44
+ this.charPositions.set(char, positions);
69
45
  }
70
- return this.nodeToFragment(node, spanIndex, true);
71
46
  }
72
- nodeToFragment(node, spanIndex, findPrefix) {
47
+ findMinimalFragment(spanIndex) {
73
48
  const span = this.spans[spanIndex];
74
- let fragment = ":~:text=";
75
- let prefix = "";
76
- if (findPrefix) {
77
- const prev = this.spans[spanIndex - 1];
78
- if (prev) {
79
- const prefixes = node.indices.filter(({ span: s }) => s !== spanIndex).map(({ span: spanIndex2, pos }) => {
80
- let startNode2 = node;
81
- let startPos = pos;
82
- while (startNode2.parent && startNode2.parent !== this.root) {
83
- startPos -= startNode2.value.length;
84
- startNode2 = startNode2.parent;
85
- }
86
- const prev2 = this.spans[spanIndex2 - 1];
87
- const span2 = this.spans[spanIndex2];
88
- return (prev2 ?? "") + span2.slice(0, startPos);
89
- });
90
- const reversedPrefixes = prefixes.map((p) => (0, import_runes2.runes)(p).toReversed());
91
- for (const [i2, char] of (0, import_itertools.enumerate)((0, import_runes2.runes)(prev).toReversed())) {
92
- prefix = char + prefix;
93
- for (const [j, p] of (0, import_itertools.enumerate)([...reversedPrefixes.toReversed()])) {
94
- if (p[i2] !== char) {
95
- reversedPrefixes.splice(reversedPrefixes.length - 1 - j, 1);
96
- }
97
- }
98
- if (reversedPrefixes.length === 0) {
99
- break;
100
- }
49
+ if (!span) throw new RangeError(`Span index ${spanIndex} out of bounds`);
50
+ const startPos = this.spanStarts[spanIndex];
51
+ const chars = (0, import_runes2.runes)(span);
52
+ const firstChar = chars[0];
53
+ const candidates = this.charPositions.get(firstChar)?.filter((pos) => pos < startPos) ?? [];
54
+ let i = 1;
55
+ while (i < chars.length && candidates.length) {
56
+ const toRemove = [];
57
+ const char = chars[i];
58
+ for (let j = 0; j < candidates.length; j++) {
59
+ const candidate = candidates[j];
60
+ if (this.runes[candidate + i] !== char) {
61
+ toRemove.push(j);
101
62
  }
102
63
  }
64
+ toRemove.toReversed().map((r) => candidates.splice(r, 1));
65
+ i++;
103
66
  }
104
- if (prefix) {
105
- fragment += `${encodeTextFragmentPart(prefix)}-,`;
106
- }
107
- let startNode = node;
108
- let start = "";
109
- while (startNode) {
110
- start = startNode.value + start;
111
- startNode = startNode.parent;
112
- }
67
+ let fragment = "";
68
+ const start = chars.slice(0, i).join("");
113
69
  fragment += encodeTextFragmentPart(start);
114
- const remainingSentence = span.slice(start.length + node.value.length);
70
+ const remainingSpan = span.slice(i);
115
71
  let end = "";
116
- let i = remainingSentence.length - 1;
117
- while (remainingSentence.indexOf(end) !== i + 1 && i >= node.value.length) {
118
- end = remainingSentence.slice(i);
119
- i--;
72
+ let e = remainingSpan.length - 1;
73
+ if (remainingSpan.at(-1) === "\n") e--;
74
+ while (remainingSpan.indexOf(end) !== e + 1 && e >= 0) {
75
+ end = remainingSpan.slice(e);
76
+ e--;
120
77
  }
121
78
  if (end) {
122
79
  fragment += `,${encodeTextFragmentPart(end)}`;
123
80
  }
124
- return fragment;
81
+ if (candidates.length) {
82
+ let p = 1;
83
+ while (p < startPos) {
84
+ const toRemove = [];
85
+ const char = this.runes[startPos - p];
86
+ for (let j = 0; j < candidates.length; j++) {
87
+ const candidate = candidates[j];
88
+ if (this.runes[candidate - p] !== char) {
89
+ toRemove.push(j);
90
+ }
91
+ }
92
+ toRemove.toReversed().map((r) => candidates.splice(r, 1));
93
+ p++;
94
+ if (!candidates.length) break;
95
+ }
96
+ const prefix = this.runes.slice(startPos - p + 1, startPos).join("");
97
+ fragment = `${encodeTextFragmentPart(prefix)}-,${fragment}`;
98
+ }
99
+ return `:~:text=${fragment}`;
125
100
  }
126
101
  }
127
102
  function encodeTextFragmentPart(part) {
128
103
  return encodeURIComponent(part).replaceAll(/-/g, "%2d").replaceAll(/,/g, "%2c");
129
104
  }
130
- class Node {
131
- constructor(parent, value, firstIndex) {
132
- this.parent = parent;
133
- this.value = value;
134
- if (firstIndex !== void 0) {
135
- this.indices.push(firstIndex);
136
- }
137
- }
138
- children = [];
139
- indices = [];
140
- eq(other) {
141
- return this.value === other.value;
142
- }
143
- }
144
105
  // Annotate the CommonJS export names for ESM import in node:
145
106
  0 && (module.exports = {
146
- TextFragmentTrie
107
+ TextFragmentFactory
147
108
  });
@@ -1,23 +1,10 @@
1
- declare class TextFragmentTrie {
2
- private root;
1
+ declare class TextFragmentFactory {
2
+ private runes;
3
3
  private spans;
4
+ private spanStarts;
5
+ private charPositions;
4
6
  constructor(casedSpans: string[], locale?: Intl.Locale);
5
7
  findMinimalFragment(spanIndex: number): string;
6
- nodeToFragment(node: Node, spanIndex: number, findPrefix?: boolean): string;
7
- }
8
- declare class Node {
9
- parent: Node | null;
10
- value: string;
11
- children: Node[];
12
- indices: {
13
- span: number;
14
- pos: number;
15
- }[];
16
- constructor(parent: Node | null, value: string, firstIndex?: {
17
- span: number;
18
- pos: number;
19
- });
20
- eq(other: Node): boolean;
21
8
  }
22
9
 
23
- export { TextFragmentTrie };
10
+ export { TextFragmentFactory };
@@ -1,23 +1,10 @@
1
- declare class TextFragmentTrie {
2
- private root;
1
+ declare class TextFragmentFactory {
2
+ private runes;
3
3
  private spans;
4
+ private spanStarts;
5
+ private charPositions;
4
6
  constructor(casedSpans: string[], locale?: Intl.Locale);
5
7
  findMinimalFragment(spanIndex: number): string;
6
- nodeToFragment(node: Node, spanIndex: number, findPrefix?: boolean): string;
7
- }
8
- declare class Node {
9
- parent: Node | null;
10
- value: string;
11
- children: Node[];
12
- indices: {
13
- span: number;
14
- pos: number;
15
- }[];
16
- constructor(parent: Node | null, value: string, firstIndex?: {
17
- span: number;
18
- pos: number;
19
- });
20
- eq(other: Node): boolean;
21
8
  }
22
9
 
23
- export { TextFragmentTrie };
10
+ export { TextFragmentFactory };
@@ -1,124 +1,85 @@
1
1
  import "../chunk-BIEQXUOY.js";
2
- import { enumerate, min } from "itertools";
2
+ import { enumerate } from "itertools";
3
3
  import { runes } from "runes2";
4
- class TextFragmentTrie {
5
- root = new Node(null, "");
4
+ class TextFragmentFactory {
5
+ runes;
6
6
  spans;
7
+ spanStarts;
8
+ charPositions;
7
9
  constructor(casedSpans, locale = new Intl.Locale("en-Latn-US")) {
8
10
  this.spans = casedSpans.map((span) => span.toLocaleLowerCase(locale));
9
- for (const [i, span] of enumerate(this.spans)) {
10
- const parents = [this.root];
11
- for (const [j, char] of enumerate(runes(span))) {
12
- for (const [k, parent] of enumerate(parents)) {
13
- const newNode = new Node(parent, char, { span: i, pos: j });
14
- let node = parent.children.find((child) => child.eq(newNode));
15
- if (!node) {
16
- node = newNode;
17
- parent.children.push(node);
18
- } else {
19
- node.indices.push({ span: i, pos: j });
20
- }
21
- parents[k] = node;
22
- }
23
- parents.push(this.root);
24
- }
11
+ this.runes = runes(this.spans.join(""));
12
+ this.spanStarts = [];
13
+ let start = 0;
14
+ for (const span of this.spans) {
15
+ this.spanStarts.push(start);
16
+ start += span.length;
25
17
  }
26
- }
27
- findMinimalFragment(spanIndex) {
28
- let node = this.root;
29
- while (node.children.length) {
30
- const candidates = node.children.filter(
31
- (child2) => child2.indices.some(
32
- ({ span: childSpanIndex }) => childSpanIndex === spanIndex
33
- )
34
- );
35
- const child = min(
36
- candidates,
37
- // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
38
- (c) => c.indices.find((i) => i.span === spanIndex).pos
39
- );
40
- if (!child) {
41
- return this.nodeToFragment(node, spanIndex, true);
42
- }
43
- if (child.indices.length === 1) {
44
- return this.nodeToFragment(child, spanIndex);
45
- }
46
- node = child;
18
+ this.charPositions = /* @__PURE__ */ new Map();
19
+ for (const [i, char] of enumerate(this.runes)) {
20
+ const positions = this.charPositions.get(char) ?? [];
21
+ positions.push(i);
22
+ this.charPositions.set(char, positions);
47
23
  }
48
- return this.nodeToFragment(node, spanIndex, true);
49
24
  }
50
- nodeToFragment(node, spanIndex, findPrefix) {
25
+ findMinimalFragment(spanIndex) {
51
26
  const span = this.spans[spanIndex];
52
- let fragment = ":~:text=";
53
- let prefix = "";
54
- if (findPrefix) {
55
- const prev = this.spans[spanIndex - 1];
56
- if (prev) {
57
- const prefixes = node.indices.filter(({ span: s }) => s !== spanIndex).map(({ span: spanIndex2, pos }) => {
58
- let startNode2 = node;
59
- let startPos = pos;
60
- while (startNode2.parent && startNode2.parent !== this.root) {
61
- startPos -= startNode2.value.length;
62
- startNode2 = startNode2.parent;
63
- }
64
- const prev2 = this.spans[spanIndex2 - 1];
65
- const span2 = this.spans[spanIndex2];
66
- return (prev2 ?? "") + span2.slice(0, startPos);
67
- });
68
- const reversedPrefixes = prefixes.map((p) => runes(p).toReversed());
69
- for (const [i2, char] of enumerate(runes(prev).toReversed())) {
70
- prefix = char + prefix;
71
- for (const [j, p] of enumerate([...reversedPrefixes.toReversed()])) {
72
- if (p[i2] !== char) {
73
- reversedPrefixes.splice(reversedPrefixes.length - 1 - j, 1);
74
- }
75
- }
76
- if (reversedPrefixes.length === 0) {
77
- break;
78
- }
27
+ if (!span) throw new RangeError(`Span index ${spanIndex} out of bounds`);
28
+ const startPos = this.spanStarts[spanIndex];
29
+ const chars = runes(span);
30
+ const firstChar = chars[0];
31
+ const candidates = this.charPositions.get(firstChar)?.filter((pos) => pos < startPos) ?? [];
32
+ let i = 1;
33
+ while (i < chars.length && candidates.length) {
34
+ const toRemove = [];
35
+ const char = chars[i];
36
+ for (let j = 0; j < candidates.length; j++) {
37
+ const candidate = candidates[j];
38
+ if (this.runes[candidate + i] !== char) {
39
+ toRemove.push(j);
79
40
  }
80
41
  }
42
+ toRemove.toReversed().map((r) => candidates.splice(r, 1));
43
+ i++;
81
44
  }
82
- if (prefix) {
83
- fragment += `${encodeTextFragmentPart(prefix)}-,`;
84
- }
85
- let startNode = node;
86
- let start = "";
87
- while (startNode) {
88
- start = startNode.value + start;
89
- startNode = startNode.parent;
90
- }
45
+ let fragment = "";
46
+ const start = chars.slice(0, i).join("");
91
47
  fragment += encodeTextFragmentPart(start);
92
- const remainingSentence = span.slice(start.length + node.value.length);
48
+ const remainingSpan = span.slice(i);
93
49
  let end = "";
94
- let i = remainingSentence.length - 1;
95
- while (remainingSentence.indexOf(end) !== i + 1 && i >= node.value.length) {
96
- end = remainingSentence.slice(i);
97
- i--;
50
+ let e = remainingSpan.length - 1;
51
+ if (remainingSpan.at(-1) === "\n") e--;
52
+ while (remainingSpan.indexOf(end) !== e + 1 && e >= 0) {
53
+ end = remainingSpan.slice(e);
54
+ e--;
98
55
  }
99
56
  if (end) {
100
57
  fragment += `,${encodeTextFragmentPart(end)}`;
101
58
  }
102
- return fragment;
59
+ if (candidates.length) {
60
+ let p = 1;
61
+ while (p < startPos) {
62
+ const toRemove = [];
63
+ const char = this.runes[startPos - p];
64
+ for (let j = 0; j < candidates.length; j++) {
65
+ const candidate = candidates[j];
66
+ if (this.runes[candidate - p] !== char) {
67
+ toRemove.push(j);
68
+ }
69
+ }
70
+ toRemove.toReversed().map((r) => candidates.splice(r, 1));
71
+ p++;
72
+ if (!candidates.length) break;
73
+ }
74
+ const prefix = this.runes.slice(startPos - p + 1, startPos).join("");
75
+ fragment = `${encodeTextFragmentPart(prefix)}-,${fragment}`;
76
+ }
77
+ return `:~:text=${fragment}`;
103
78
  }
104
79
  }
105
80
  function encodeTextFragmentPart(part) {
106
81
  return encodeURIComponent(part).replaceAll(/-/g, "%2d").replaceAll(/,/g, "%2c");
107
82
  }
108
- class Node {
109
- constructor(parent, value, firstIndex) {
110
- this.parent = parent;
111
- this.value = value;
112
- if (firstIndex !== void 0) {
113
- this.indices.push(firstIndex);
114
- }
115
- }
116
- children = [];
117
- indices = [];
118
- eq(other) {
119
- return this.value === other.value;
120
- }
121
- }
122
83
  export {
123
- TextFragmentTrie
84
+ TextFragmentFactory
124
85
  };
package/dist/cli/bin.cjs CHANGED
@@ -227,6 +227,7 @@ async function main() {
227
227
  {
228
228
  granularity: parsed.granularity,
229
229
  textRef: parsed.textRef,
230
+ outFormat: parsed.outFormat,
230
231
  primaryLocale: parsed.language,
231
232
  logger,
232
233
  ...!parsed.noProgress && parsed.logLevel === "silent" && {
@@ -346,6 +347,7 @@ async function main() {
346
347
  {
347
348
  granularity: parsed.granularity,
348
349
  textRef: parsed.textRef,
350
+ outFormat: parsed.outFormat,
349
351
  primaryLocale,
350
352
  logger,
351
353
  ...!parsed.noProgress && parsed.logLevel === "silent" && {
package/dist/cli/bin.js CHANGED
@@ -178,6 +178,7 @@ async function main() {
178
178
  {
179
179
  granularity: parsed.granularity,
180
180
  textRef: parsed.textRef,
181
+ outFormat: parsed.outFormat,
181
182
  primaryLocale: parsed.language,
182
183
  logger,
183
184
  ...!parsed.noProgress && parsed.logLevel === "silent" && {
@@ -297,6 +298,7 @@ async function main() {
297
298
  {
298
299
  granularity: parsed.granularity,
299
300
  textRef: parsed.textRef,
301
+ outFormat: parsed.outFormat,
300
302
  primaryLocale,
301
303
  logger,
302
304
  ...!parsed.noProgress && parsed.logLevel === "silent" && {
@@ -34,7 +34,7 @@ module.exports = __toCommonJS(graphMetadata_exports);
34
34
  var import_itertools = require("itertools");
35
35
  var import_memoize = __toESM(require("memoize"), 1);
36
36
  var import_backtraceGraph = require("./backtraceGraph.cjs");
37
- var import_editDistance = require("./editDistance.cjs");
37
+ var import_native = require("./native.cjs");
38
38
  var import_utils = require("./utils.cjs");
39
39
  class SubgraphMetadata {
40
40
  constructor(refRaw, hypRaw, refTokenMatches, hypTokenMatches, refNorm, hypNorm) {
@@ -52,7 +52,7 @@ class SubgraphMetadata {
52
52
  this.hypCharTypes = getCharTypes(this.hyp);
53
53
  this.refIndexMap = createIndexMap(refTokenMatches);
54
54
  this.hypIndexMap = createIndexMap(hypTokenMatches);
55
- const { backtraceMatrix } = (0, import_editDistance.computeErrorAlignDistanceMatrix)(
55
+ const { backtraceMatrix } = (0, import_native.computeErrorAlignDistanceMatrix)(
56
56
  this.ref,
57
57
  this.hyp,
58
58
  true
@@ -2,7 +2,7 @@ import "../chunk-BIEQXUOY.js";
2
2
  import { range } from "itertools";
3
3
  import memoize from "memoize";
4
4
  import { BacktraceGraph } from "./backtraceGraph.js";
5
- import { computeErrorAlignDistanceMatrix } from "./editDistance.js";
5
+ import { computeErrorAlignDistanceMatrix } from "./native.js";
6
6
  import { END_DELIMITER, START_DELIMITER, categorizeChar } from "./utils.js";
7
7
  class SubgraphMetadata {
8
8
  constructor(refRaw, hypRaw, refTokenMatches, hypTokenMatches, refNorm, hypNorm) {
@@ -87,7 +87,7 @@ function findFootnotePairs(root) {
87
87
  const pairs = /* @__PURE__ */ new Map();
88
88
  (0, import_model.descendants)(root, (node, pos) => {
89
89
  if (node instanceof import_model.NoterefNode) {
90
- const id = node.attrs["href"]?.slice(1);
90
+ const id = node.attrs["href"]?.split("#")[1];
91
91
  if (id) {
92
92
  noterefs.set(id, pos);
93
93
  }
@@ -72,7 +72,7 @@ function findFootnotePairs(root) {
72
72
  const pairs = /* @__PURE__ */ new Map();
73
73
  descendants(root, (node, pos) => {
74
74
  if (node instanceof NoterefNode) {
75
- const id = node.attrs["href"]?.slice(1);
75
+ const id = node.attrs["href"]?.split("#")[1];
76
76
  if (id) {
77
77
  noterefs.set(id, pos);
78
78
  }