npm - @storyteller-platform/align - Versions diffs - 0.1.9 → 0.1.11 - Mend

@storyteller-platform/align 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

package/dist/align/__tests__/align.test.cjs +6 -5
package/dist/align/__tests__/align.test.js +6 -5
package/dist/align/align.cjs +133 -81
package/dist/align/align.d.cts +1 -0
package/dist/align/align.d.ts +1 -0
package/dist/align/align.js +133 -81
package/dist/align/getSentenceRanges.cjs +78 -149
package/dist/align/getSentenceRanges.d.cts +1 -1
package/dist/align/getSentenceRanges.d.ts +1 -1
package/dist/align/getSentenceRanges.js +78 -149
package/dist/align/slugify.cjs +16 -8
package/dist/align/slugify.js +16 -8
package/dist/errorAlign/__tests__/errorAlign.test.cjs +100 -0
package/dist/errorAlign/__tests__/errorAlign.test.d.cts +2 -0
package/dist/errorAlign/__tests__/errorAlign.test.d.ts +2 -0
package/dist/errorAlign/__tests__/errorAlign.test.js +77 -0
package/dist/errorAlign/__tests__/native.test.cjs +118 -0
package/dist/errorAlign/__tests__/native.test.d.cts +2 -0
package/dist/errorAlign/__tests__/native.test.d.ts +2 -0
package/dist/errorAlign/__tests__/native.test.js +107 -0
package/dist/errorAlign/backtraceGraph.cjs +298 -0
package/dist/errorAlign/backtraceGraph.d.cts +103 -0
package/dist/errorAlign/backtraceGraph.d.ts +103 -0
package/dist/errorAlign/backtraceGraph.js +270 -0
package/dist/errorAlign/beamSearch.cjs +302 -0
package/dist/errorAlign/beamSearch.d.cts +53 -0
package/dist/errorAlign/beamSearch.d.ts +53 -0
package/dist/errorAlign/beamSearch.js +268 -0
package/dist/errorAlign/core.cjs +33 -0
package/dist/errorAlign/core.d.cts +5 -0
package/dist/errorAlign/core.d.ts +5 -0
package/dist/errorAlign/core.js +11 -0
package/dist/errorAlign/editDistance.cjs +115 -0
package/dist/errorAlign/editDistance.d.cts +46 -0
package/dist/errorAlign/editDistance.d.ts +46 -0
package/dist/errorAlign/editDistance.js +90 -0
package/dist/errorAlign/errorAlign.cjs +159 -0
package/dist/errorAlign/errorAlign.d.cts +15 -0
package/dist/errorAlign/errorAlign.d.ts +15 -0
package/dist/errorAlign/errorAlign.js +145 -0
package/dist/errorAlign/graphMetadata.cjs +97 -0
package/dist/errorAlign/graphMetadata.d.cts +44 -0
package/dist/errorAlign/graphMetadata.d.ts +44 -0
package/dist/errorAlign/graphMetadata.js +64 -0
package/dist/errorAlign/hash.cjs +173 -0
package/dist/errorAlign/hash.d.cts +28 -0
package/dist/errorAlign/hash.d.ts +28 -0
package/dist/errorAlign/hash.js +150 -0
package/dist/errorAlign/native.cjs +60 -0
package/dist/errorAlign/native.d.cts +18 -0
package/dist/errorAlign/native.d.ts +18 -0
package/dist/errorAlign/native.js +24 -0
package/dist/errorAlign/node-gyp-build.d.cjs +1 -0
package/dist/errorAlign/node-gyp-build.d.d.cts +3 -0
package/dist/errorAlign/node-gyp-build.d.d.ts +3 -0
package/dist/errorAlign/node-gyp-build.d.js +0 -0
package/dist/errorAlign/pathToAlignment.cjs +122 -0
package/dist/errorAlign/pathToAlignment.d.cts +11 -0
package/dist/errorAlign/pathToAlignment.d.ts +11 -0
package/dist/errorAlign/pathToAlignment.js +89 -0
package/dist/errorAlign/utils.cjs +301 -0
package/dist/errorAlign/utils.d.cts +107 -0
package/dist/errorAlign/utils.d.ts +107 -0
package/dist/errorAlign/utils.js +248 -0
package/dist/index.d.cts +1 -0
package/dist/index.d.ts +1 -0
package/dist/markup/__tests__/markup.test.cjs +108 -81
package/dist/markup/__tests__/markup.test.js +109 -82
package/dist/markup/__tests__/parseDom.test.cjs +112 -0
package/dist/markup/__tests__/parseDom.test.d.cts +2 -0
package/dist/markup/__tests__/parseDom.test.d.ts +2 -0
package/dist/markup/__tests__/parseDom.test.js +89 -0
package/dist/markup/__tests__/serializeDom.test.cjs +120 -0
package/dist/markup/__tests__/serializeDom.test.d.cts +2 -0
package/dist/markup/__tests__/serializeDom.test.d.ts +2 -0
package/dist/markup/__tests__/serializeDom.test.js +97 -0
package/dist/markup/__tests__/transform.test.cjs +122 -0
package/dist/markup/__tests__/transform.test.d.cts +2 -0
package/dist/markup/__tests__/transform.test.d.ts +2 -0
package/dist/markup/__tests__/transform.test.js +99 -0
package/dist/markup/map.cjs +261 -0
package/dist/markup/map.d.cts +50 -0
package/dist/markup/map.d.ts +50 -0
package/dist/markup/map.js +236 -0
package/dist/markup/markup.cjs +23 -201
package/dist/markup/markup.d.cts +5 -9
package/dist/markup/markup.d.ts +5 -9
package/dist/markup/markup.js +24 -203
package/dist/markup/model.cjs +172 -0
package/dist/markup/model.d.cts +57 -0
package/dist/markup/model.d.ts +57 -0
package/dist/markup/model.js +145 -0
package/dist/markup/parseDom.cjs +59 -0
package/dist/markup/parseDom.d.cts +7 -0
package/dist/markup/parseDom.d.ts +7 -0
package/dist/markup/parseDom.js +35 -0
package/dist/markup/segmentation.cjs +11 -57
package/dist/markup/segmentation.d.cts +6 -2
package/dist/markup/segmentation.d.ts +6 -2
package/dist/markup/segmentation.js +11 -58
package/dist/markup/serializeDom.cjs +87 -0
package/dist/markup/serializeDom.d.cts +7 -0
package/dist/markup/serializeDom.d.ts +7 -0
package/dist/markup/serializeDom.js +63 -0
package/dist/markup/transform.cjs +92 -0
package/dist/markup/transform.d.cts +11 -0
package/dist/markup/transform.d.ts +11 -0
package/dist/markup/transform.js +71 -0
package/dist/types/node-gyp-build.d.cjs +1 -0
package/dist/types/node-gyp-build.d.d.cts +3 -0
package/dist/types/node-gyp-build.d.d.ts +3 -0
package/dist/types/node-gyp-build.d.js +0 -0
package/package.json +11 -4

package/dist/errorAlign/pathToAlignment.js ADDED Viewed

@@ -0,0 +1,89 @@
+import "../chunk-BIEQXUOY.js";
+import assert from "node:assert";
+import { Alignment, translateSlice } from "./utils.js";
+function getDeleteAlignment(startRefIndex, endRefIndex, subgraphMetadata) {
+  const refSlice = translateSlice(
+    [startRefIndex, endRefIndex],
+    subgraphMetadata.refIndexMap
+  );
+  assert(!!refSlice);
+  return new Alignment(
+    "DELETE",
+    refSlice,
+    null,
+    subgraphMetadata.refRaw.slice(...refSlice)
+  );
+}
+function getInsertAlignment(startHypIndex, endHypIndex, subgraphMetadata) {
+  const hypSlice = translateSlice(
+    [startHypIndex, endHypIndex],
+    subgraphMetadata.hypIndexMap
+  );
+  assert(!!hypSlice);
+  return new Alignment(
+    "INSERT",
+    null,
+    hypSlice,
+    null,
+    subgraphMetadata.hypRaw.slice(...hypSlice),
+    subgraphMetadata.hypIndexMap[startHypIndex] >= 0,
+    subgraphMetadata.hypIndexMap[endHypIndex - 1] >= 0
+  );
+}
+function getMatchOrSubstitutionAlignment(startHypIndex, endHypIndex, startRefIndex, endRefIndex, score, subgraphMetadata) {
+  const hypSlice = translateSlice(
+    [startHypIndex, endHypIndex],
+    subgraphMetadata.hypIndexMap
+  );
+  const refSlice = translateSlice(
+    [startRefIndex, endRefIndex],
+    subgraphMetadata.refIndexMap
+  );
+  assert(!!hypSlice);
+  assert(!!refSlice);
+  const isMatchSegment = score === 0;
+  const opType = isMatchSegment ? "MATCH" : "SUBSTITUTE";
+  return new Alignment(
+    opType,
+    refSlice,
+    hypSlice,
+    subgraphMetadata.refRaw.slice(...refSlice),
+    subgraphMetadata.hypRaw.slice(...hypSlice),
+    subgraphMetadata.hypIndexMap[startHypIndex] >= 0,
+    subgraphMetadata.hypIndexMap[endHypIndex - 1] >= 0
+  );
+}
+function getAlignments(path) {
+  const subgraphMetadata = path.src;
+  const segmentationIndices = path.endIndices;
+  const alignments = [];
+  let startHyp = 0;
+  let startRef = 0;
+  for (let [endHyp, endRef, score] of segmentationIndices) {
+    endHyp += 1;
+    endRef += 1;
+    if (startHyp === endHyp) {
+      const alignment = getDeleteAlignment(startRef, endRef, subgraphMetadata);
+      alignments.push(alignment);
+    } else if (startRef === endRef) {
+      const alignment = getInsertAlignment(startHyp, endHyp, subgraphMetadata);
+      alignments.push(alignment);
+    } else {
+      const alignment = getMatchOrSubstitutionAlignment(
+        startHyp,
+        endHyp,
+        startRef,
+        endRef,
+        score,
+        subgraphMetadata
+      );
+      alignments.push(alignment);
+    }
+    startHyp = endHyp;
+    startRef = endRef;
+  }
+  return alignments;
+}
+export {
+  getAlignments
+};

package/dist/errorAlign/utils.cjs ADDED Viewed

@@ -0,0 +1,301 @@
+"use strict";
+var __create = Object.create;
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __getProtoOf = Object.getPrototypeOf;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
+  // If the importer is in node compatibility mode or this is not an ESM
+  // file that has been converted to a CommonJS file using a Babel-
+  // compatible transform (i.e. "__esModule" has not been set), then set
+  // "default" to the CommonJS "module.exports" for node compatibility.
+  isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
+  mod
+));
+var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+var utils_exports = {};
+__export(utils_exports, {
+  Alignment: () => Alignment,
+  Counter: () => Counter,
+  DELIMITERS: () => DELIMITERS,
+  END_DELIMITER: () => END_DELIMITER,
+  NUMERIC_TOKEN: () => NUMERIC_TOKEN,
+  OP_TYPES: () => OP_TYPES,
+  OP_TYPE_COMBO_MAP: () => OP_TYPE_COMBO_MAP,
+  OP_TYPE_MAP: () => OP_TYPE_MAP,
+  STANDARD_TOKEN: () => STANDARD_TOKEN,
+  START_DELIMITER: () => START_DELIMITER,
+  basicNormalizer: () => basicNormalizer,
+  basicTokenizer: () => basicTokenizer,
+  categorizeChar: () => categorizeChar,
+  ensureLengthPreservation: () => ensureLengthPreservation,
+  getOpTypeComboIndex: () => getOpTypeComboIndex,
+  isConsonant: () => isConsonant,
+  isVowel: () => isVowel,
+  opTypePowerset: () => opTypePowerset,
+  reversed: () => reversed,
+  translateSlice: () => translateSlice,
+  unpackRegexMatch: () => unpackRegexMatch
+});
+module.exports = __toCommonJS(utils_exports);
+var import_node_assert = __toESM(require("node:assert"), 1);
+var import_itertools = require("itertools");
+const OP_TYPES = [
+  "MATCH",
+  "INSERT",
+  "DELETE",
+  "SUBSTITUTE"
+];
+class Alignment {
+  constructor(opType, refSlice = null, hypSlice = null, ref = null, hyp = null, leftCompound = false, rightCompound = false) {
+    this.opType = opType;
+    this.refSlice = refSlice;
+    this.hypSlice = hypSlice;
+    this.ref = ref;
+    this.hyp = hyp;
+    this.leftCompound = leftCompound;
+    this.rightCompound = rightCompound;
+    switch (opType) {
+      case "MATCH": {
+        if (ref === null || hyp === null) {
+          throw new TypeError("MATCH operation must have non-empty ref or hyp.");
+        }
+        if (leftCompound || rightCompound) {
+          throw new TypeError("MATCH operation cannot have compound markers.");
+        }
+        break;
+      }
+      case "INSERT": {
+        if (hyp === null || ref !== null) {
+          throw new TypeError(
+            "INSERT operation must have non-empty hyp and empty ref."
+          );
+        }
+        break;
+      }
+      case "DELETE": {
+        if (hyp !== null || ref === null) {
+          throw new TypeError(
+            "DELETE operation must have non-empty ref and empty hyp."
+          );
+        }
+        break;
+      }
+      case "SUBSTITUTE": {
+        if (ref === null || hyp === null) {
+          throw new TypeError(
+            "SUBSTITUTE operation must have both ref and hyp."
+          );
+        }
+      }
+    }
+  }
+  /** Return the hypothesis with compound markers if applicable. */
+  get hypWithCompoundMarkers() {
+    if (this.hyp === null) {
+      return null;
+    }
+    return `${this.leftCompound ? "-" : ""}"${this.hyp}"${this.rightCompound ? "-" : ""}`;
+  }
+  toString() {
+    switch (this.opType) {
+      case "DELETE": {
+        return `Alignment(${this.opType}: "${this.ref}")`;
+      }
+      case "INSERT": {
+        return `Alignment(${this.opType}: ${this.hypWithCompoundMarkers})`;
+      }
+      case "SUBSTITUTE": {
+        return `Alignment(${this.opType}: ${this.hypWithCompoundMarkers} -> "${this.ref}")`;
+      }
+      case "MATCH": {
+        return `Alignment(${this.opType}: "${this.hyp}" == "${this.ref}")`;
+      }
+    }
+  }
+}
+function opTypePowerset() {
+  const opCombinations = (0, import_itertools.map)(
+    (0, import_itertools.range)(1, OP_TYPES.length + 1),
+    (r) => combinations(OP_TYPES, r)
+  );
+  return (0, import_itertools.chain)(...opCombinations);
+}
+function* combinations(iterable, r) {
+  const pool = Array.from(iterable);
+  const n = pool.length;
+  if (r > n) {
+    return;
+  }
+  const indices = Array.from((0, import_itertools.range)(r));
+  yield indices.map((i) => pool[i]);
+  while (true) {
+    let i;
+    find: {
+      for (i of reversed((0, import_itertools.range)(r))) {
+        if (indices[i] !== i + n - r) {
+          break find;
+        }
+      }
+      return;
+    }
+    indices[i] += 1;
+    for (const j of (0, import_itertools.range)(i + 1, r)) {
+      indices[j] = indices[j - 1] + 1;
+    }
+    yield indices.map((i2) => pool[i2]);
+  }
+}
+function reversed(iterable) {
+  return Array.from(iterable).toReversed();
+}
+const START_DELIMITER = "<";
+const END_DELIMITER = ">";
+const DELIMITERS = /* @__PURE__ */ new Set([START_DELIMITER, END_DELIMITER]);
+const OP_TYPE_MAP = OP_TYPES.reduce(
+  (acc, opType) => ({ ...acc, [opType]: opType }),
+  {}
+);
+const OP_TYPE_COMBO_MAP = Array.from((0, import_itertools.enumerate)(opTypePowerset())).reduce((acc, [i, opTypes]) => ({ ...acc, [i]: opTypes }), {});
+function getOpTypeComboIndex(ops) {
+  return (0, import_itertools.find)(
+    (0, import_itertools.enumerate)(opTypePowerset()),
+    ([_i, set]) => set.length === ops.length && (0, import_itertools.every)((0, import_itertools.range)(set.length), (i) => set[i] === ops[i])
+  )[0];
+}
+const NUMERIC_TOKEN = "\\p{N}+([,.]\\p{N}+)*(?=\\s|$)";
+const STANDARD_TOKEN = "[\\p{L}\\p{N}]+(['][\\p{L}\\p{N}]+)*'?";
+function isVowel(c) {
+  (0, import_node_assert.default)(c.length === 1, "Input must be a single character");
+  return "aeiouy".includes(c);
+}
+function isConsonant(c) {
+  (0, import_node_assert.default)(c.length === 1, "Input must be a single character");
+  return "bcdfghjklmnpqrstvwxyz".includes(c);
+}
+function categorizeChar(c) {
+  if (DELIMITERS.has(c)) return 0;
+  if (isConsonant(c)) return 1;
+  if (isVowel(c)) return 2;
+  return 3;
+}
+function basicTokenizer(text) {
+  return Array.from(
+    text.matchAll(new RegExp(`(${NUMERIC_TOKEN}|${STANDARD_TOKEN})`, "udg"))
+  );
+}
+function basicNormalizer(text) {
+  return text.toLowerCase();
+}
+function ensureLengthPreservation(normalizer) {
+  return function wrapper(text, ...args) {
+    const normalized = normalizer(text, ...args);
+    if (normalized.length !== text.length) {
+      throw new RangeError("Normalizer must preserve length.");
+    }
+    return normalized;
+  };
+}
+function unpackRegexMatch(tokenizer) {
+  return function wrapper(text, ...args) {
+    const matches = tokenizer(text, ...args);
+    return matches.map((match) => [match[1], match.indices[1]]);
+  };
+}
+function translateSlice(segmentSlice, indexMap) {
+  const sliceIndices = indexMap.slice(...segmentSlice).filter((x) => x >= 0);
+  if (sliceIndices.length === 0) {
+    return null;
+  }
+  return [sliceIndices[0], sliceIndices.at(-1) + 1];
+}
+class Counter {
+  counts = /* @__PURE__ */ new Map();
+  constructor(init = []) {
+    if (init instanceof Map) {
+      this.counts = init;
+      return;
+    }
+    for (const element of init) {
+      this.counts.set(element, (this.counts.get(element) ?? 0) + 1);
+    }
+  }
+  elements() {
+    return this.counts.entries().flatMap(([e, c]) => Array.from((0, import_itertools.range)(c)).map(() => e));
+  }
+  mostCommon(n) {
+    const ordered = Array.from(this.counts.entries()).toSorted(
+      ([_a, a], [_b, b]) => a - b
+    );
+    if (n === void 0) return ordered;
+    return ordered.slice(0, n);
+  }
+  total() {
+    return this.counts.values().reduce((acc, v) => acc + v);
+  }
+  subtract(update) {
+    if (update instanceof Map) {
+      for (const [element, count] of update.entries()) {
+        this.counts.set(element, (this.counts.get(element) ?? 0) - count);
+      }
+      return;
+    }
+    for (const element of update) {
+      this.counts.set(element, (this.counts.get(element) ?? 0) - 1);
+    }
+  }
+  update(update) {
+    if (update instanceof Map) {
+      for (const [element, count] of update.entries()) {
+        this.counts.set(element, (this.counts.get(element) ?? 0) + count);
+      }
+      return;
+    }
+    for (const element of update) {
+      this.counts.set(element, (this.counts.get(element) ?? 0) + 1);
+    }
+  }
+  get(element) {
+    return this.counts.get(element) ?? 0;
+  }
+  set(element, count) {
+    this.counts.set(element, count);
+  }
+}
+// Annotate the CommonJS export names for ESM import in node:
+0 && (module.exports = {
+  Alignment,
+  Counter,
+  DELIMITERS,
+  END_DELIMITER,
+  NUMERIC_TOKEN,
+  OP_TYPES,
+  OP_TYPE_COMBO_MAP,
+  OP_TYPE_MAP,
+  STANDARD_TOKEN,
+  START_DELIMITER,
+  basicNormalizer,
+  basicTokenizer,
+  categorizeChar,
+  ensureLengthPreservation,
+  getOpTypeComboIndex,
+  isConsonant,
+  isVowel,
+  opTypePowerset,
+  reversed,
+  translateSlice,
+  unpackRegexMatch
+});

package/dist/errorAlign/utils.d.cts ADDED Viewed

@@ -0,0 +1,107 @@
+type OpType = "MATCH" | "INSERT" | "DELETE" | "SUBSTITUTE";
+declare const OP_TYPES: ["MATCH", "INSERT", "DELETE", "SUBSTITUTE"];
+type Slice = [number, number];
+/** Class representing an operation with its type and cost. */
+declare class Alignment {
+    opType: OpType;
+    refSlice: Slice | null;
+    hypSlice: Slice | null;
+    ref: string | null;
+    hyp: string | null;
+    leftCompound: boolean;
+    rightCompound: boolean;
+    constructor(opType: OpType, refSlice?: Slice | null, hypSlice?: Slice | null, ref?: string | null, hyp?: string | null, leftCompound?: boolean, rightCompound?: boolean);
+    /** Return the hypothesis with compound markers if applicable. */
+    get hypWithCompoundMarkers(): string | null;
+    toString(): string;
+}
+/**
+ * Generate all possible combinations of operation types, except the empty set.
+ *
+ * @returns All possible combinations of operation types.
+ */
+declare function opTypePowerset(): IterableIterator<NonNullable<"DELETE" | "MATCH" | "INSERT" | "SUBSTITUTE">[]>;
+declare function reversed<T>(iterable: IterableIterator<T>): T[];
+declare const START_DELIMITER = "<";
+declare const END_DELIMITER = ">";
+declare const DELIMITERS: Set<string>;
+declare const OP_TYPE_MAP: {
+    DELETE: "DELETE";
+    MATCH: "MATCH";
+    INSERT: "INSERT";
+    SUBSTITUTE: "SUBSTITUTE";
+};
+declare const OP_TYPE_COMBO_MAP: Record<number, OpType[]>;
+declare function getOpTypeComboIndex(ops: OpType[]): number;
+declare const NUMERIC_TOKEN = "\\p{N}+([,.]\\p{N}+)*(?=\\s|$)";
+declare const STANDARD_TOKEN = "[\\p{L}\\p{N}]+(['][\\p{L}\\p{N}]+)*'?";
+/**
+ * Check if the normalized character is a vowel.
+ *
+ * @param c The character to check.
+ * @returns True if the character is a vowel, false, otherwise.
+ */
+declare function isVowel(c: string): boolean;
+/**
+ * Check if the normalized character is a consonant.
+ *
+ * @param c The character to check.
+ * @returns True if the character is a consonant, false, otherwise.
+ */
+declare function isConsonant(c: string): boolean;
+/**
+ * Categorize a character as 'vowel', 'consonant', or 'unvoiced'.
+ *
+ * @param c The character to check.
+ * @returns The category of the character.
+ */
+declare function categorizeChar(c: string): number;
+/**
+ * Default tokenizer that splits text into words based on whitespace.
+ *
+ * @param text The input text to tokenize.
+ * @returns A list of tokens (words).
+ */
+declare function basicTokenizer(text: string): RegExpMatchArray[];
+/**
+ * Default normalizer that only converts text to lowercase.
+ *
+ * @param text The input text to normalize.
+ * @returns The normalized text.
+ */
+declare function basicNormalizer(text: string): string;
+/**
+ * Decorator to ensure that the normalizer preserves the length of the input text.
+ *
+ * @param normalizer The normalizer function to wrap.
+ * @returns The wrapped normalizer that preserves length.
+ */
+declare function ensureLengthPreservation<Args extends unknown[]>(normalizer: (text: string, ...args: Args) => string): (text: string, ...args: Args) => string;
+/**
+ * Unpack a regex match array to extract the matched string.
+ *
+ * @param tokenizer A function to tokenize the sequences. Must be regex-based and return match arrays.
+ * @returns A function that unpacks a list of match arrays into tuples (match string, span).
+ */
+declare function unpackRegexMatch<Args extends unknown[]>(tokenizer: (text: string, ...args: Args) => RegExpMatchArray[]): (text: string, ...args: Args) => [string, [number, number]][];
+/**
+ * Translate a slice from the alignment sequenc back to the original sequenc.
+ *
+ * @param segmentSlice The slice in the alignment sequence
+ * @param indexMap The mapping from alignment indices to original sequence indices.
+ * @returns The translated slice in the original sequence, or None if no valid indices.
+ */
+declare function translateSlice(segmentSlice: Slice, indexMap: number[]): Slice | null;
+declare class Counter<T> {
+    private counts;
+    constructor(init?: Iterable<T> | Map<T, number>);
+    elements(): IteratorObject<T, undefined, unknown>;
+    mostCommon(n?: number): [T, number][];
+    total(): number;
+    subtract(update: Iterable<T> | Map<T, number>): void;
+    update(update: Iterable<T> | Map<T, number>): void;
+    get(element: T): number;
+    set(element: T, count: number): void;
+}
+export { Alignment, Counter, DELIMITERS, END_DELIMITER, NUMERIC_TOKEN, OP_TYPES, OP_TYPE_COMBO_MAP, OP_TYPE_MAP, type OpType, STANDARD_TOKEN, START_DELIMITER, type Slice, basicNormalizer, basicTokenizer, categorizeChar, ensureLengthPreservation, getOpTypeComboIndex, isConsonant, isVowel, opTypePowerset, reversed, translateSlice, unpackRegexMatch };

package/dist/errorAlign/utils.d.ts ADDED Viewed

@@ -0,0 +1,107 @@
+type OpType = "MATCH" | "INSERT" | "DELETE" | "SUBSTITUTE";
+declare const OP_TYPES: ["MATCH", "INSERT", "DELETE", "SUBSTITUTE"];
+type Slice = [number, number];
+/** Class representing an operation with its type and cost. */
+declare class Alignment {
+    opType: OpType;
+    refSlice: Slice | null;
+    hypSlice: Slice | null;
+    ref: string | null;
+    hyp: string | null;
+    leftCompound: boolean;
+    rightCompound: boolean;
+    constructor(opType: OpType, refSlice?: Slice | null, hypSlice?: Slice | null, ref?: string | null, hyp?: string | null, leftCompound?: boolean, rightCompound?: boolean);
+    /** Return the hypothesis with compound markers if applicable. */
+    get hypWithCompoundMarkers(): string | null;
+    toString(): string;
+}
+/**
+ * Generate all possible combinations of operation types, except the empty set.
+ *
+ * @returns All possible combinations of operation types.
+ */
+declare function opTypePowerset(): IterableIterator<NonNullable<"DELETE" | "MATCH" | "INSERT" | "SUBSTITUTE">[]>;
+declare function reversed<T>(iterable: IterableIterator<T>): T[];
+declare const START_DELIMITER = "<";
+declare const END_DELIMITER = ">";
+declare const DELIMITERS: Set<string>;
+declare const OP_TYPE_MAP: {
+    DELETE: "DELETE";
+    MATCH: "MATCH";
+    INSERT: "INSERT";
+    SUBSTITUTE: "SUBSTITUTE";
+};
+declare const OP_TYPE_COMBO_MAP: Record<number, OpType[]>;
+declare function getOpTypeComboIndex(ops: OpType[]): number;
+declare const NUMERIC_TOKEN = "\\p{N}+([,.]\\p{N}+)*(?=\\s|$)";
+declare const STANDARD_TOKEN = "[\\p{L}\\p{N}]+(['][\\p{L}\\p{N}]+)*'?";
+/**
+ * Check if the normalized character is a vowel.
+ *
+ * @param c The character to check.
+ * @returns True if the character is a vowel, false, otherwise.
+ */
+declare function isVowel(c: string): boolean;
+/**
+ * Check if the normalized character is a consonant.
+ *
+ * @param c The character to check.
+ * @returns True if the character is a consonant, false, otherwise.
+ */
+declare function isConsonant(c: string): boolean;
+/**
+ * Categorize a character as 'vowel', 'consonant', or 'unvoiced'.
+ *
+ * @param c The character to check.
+ * @returns The category of the character.
+ */
+declare function categorizeChar(c: string): number;
+/**
+ * Default tokenizer that splits text into words based on whitespace.
+ *
+ * @param text The input text to tokenize.
+ * @returns A list of tokens (words).
+ */
+declare function basicTokenizer(text: string): RegExpMatchArray[];
+/**
+ * Default normalizer that only converts text to lowercase.
+ *
+ * @param text The input text to normalize.
+ * @returns The normalized text.
+ */
+declare function basicNormalizer(text: string): string;
+/**
+ * Decorator to ensure that the normalizer preserves the length of the input text.
+ *
+ * @param normalizer The normalizer function to wrap.
+ * @returns The wrapped normalizer that preserves length.
+ */
+declare function ensureLengthPreservation<Args extends unknown[]>(normalizer: (text: string, ...args: Args) => string): (text: string, ...args: Args) => string;
+/**
+ * Unpack a regex match array to extract the matched string.
+ *
+ * @param tokenizer A function to tokenize the sequences. Must be regex-based and return match arrays.
+ * @returns A function that unpacks a list of match arrays into tuples (match string, span).
+ */
+declare function unpackRegexMatch<Args extends unknown[]>(tokenizer: (text: string, ...args: Args) => RegExpMatchArray[]): (text: string, ...args: Args) => [string, [number, number]][];
+/**
+ * Translate a slice from the alignment sequenc back to the original sequenc.
+ *
+ * @param segmentSlice The slice in the alignment sequence
+ * @param indexMap The mapping from alignment indices to original sequence indices.
+ * @returns The translated slice in the original sequence, or None if no valid indices.
+ */
+declare function translateSlice(segmentSlice: Slice, indexMap: number[]): Slice | null;
+declare class Counter<T> {
+    private counts;
+    constructor(init?: Iterable<T> | Map<T, number>);
+    elements(): IteratorObject<T, undefined, unknown>;
+    mostCommon(n?: number): [T, number][];
+    total(): number;
+    subtract(update: Iterable<T> | Map<T, number>): void;
+    update(update: Iterable<T> | Map<T, number>): void;
+    get(element: T): number;
+    set(element: T, count: number): void;
+}
+export { Alignment, Counter, DELIMITERS, END_DELIMITER, NUMERIC_TOKEN, OP_TYPES, OP_TYPE_COMBO_MAP, OP_TYPE_MAP, type OpType, STANDARD_TOKEN, START_DELIMITER, type Slice, basicNormalizer, basicTokenizer, categorizeChar, ensureLengthPreservation, getOpTypeComboIndex, isConsonant, isVowel, opTypePowerset, reversed, translateSlice, unpackRegexMatch };