npm - @rubytech/create-realagent - Versions diffs - 1.0.826 → 1.0.829 - Mend

@rubytech/create-realagent 1.0.826 → 1.0.829

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (103) hide show

package/payload/platform/plugins/whatsapp-import/lib/src/derive-keys.ts DELETED Viewed

@@ -1,59 +0,0 @@
-import { createHash } from "node:crypto";
-// ---------------------------------------------------------------------------
-// derive-keys — natural-key derivation for whatsapp-import (Task 870).
-//
-// Pure functions. No I/O. The whole point is that re-imports of the same
-// archive collapse to the same Message identity regardless of release-level
-// drift in array indices, hash widths, or arbitrary tiebreakers.
-//
-// Key shape (Task 870 brief):
-//
-//   messageId = whatsapp-export:msg:<conversationSha256>:<dateSentISO>
-//                                  :<NFKC-trim-lower(senderName)>
-//                                  :<sha256-hex(body)>
-//
-// Operator constraint: the same archive must be re-imported with the same
-// `--timezone` flag. Different timezones reinterpret wall-clock instants and
-// will produce drifted messageIds — that is correct semantics, not a bug.
-// Documented in .docs/whatsapp.md natural-key contract section.
-// ---------------------------------------------------------------------------
-export function normaliseSenderName(name: string): string {
-  return name.normalize("NFKC").trim().toLowerCase();
-}
-export function sha256Hex(input: string): string {
-  return createHash("sha256").update(input).digest("hex");
-}
-export interface DeriveMessageIdInput {
-  /** SHA-256 of the source `_chat.txt` bytes — stable across re-imports. */
-  conversationSha256: string;
-  /** ISO 8601 with timezone offset, as emitted by parseExport. */
-  dateSent: string;
-  /** Raw senderName from the export line. Normalised internally. */
-  senderName: string;
-  /** Raw message body. Hashed internally. */
-  body: string;
-}
-export function deriveMessageId(input: DeriveMessageIdInput): string {
-  const norm = normaliseSenderName(input.senderName);
-  const bodyHash = sha256Hex(input.body);
-  return `whatsapp-export:msg:${input.conversationSha256}:${input.dateSent}:${norm}:${bodyHash}`;
-}
-export interface ObservationContentFields {
-  summary?: string | null;
-  from?: string | null;
-  to?: string | null;
-  subject?: string | null;
-}
-export function observationContentHash(fields: ObservationContentFields): string {
-  const parts = [fields.summary, fields.from, fields.to, fields.subject].map(
-    (p) => (p ?? "").normalize("NFKC").trim().toLowerCase(),
-  );
-  return sha256Hex(parts.join("|"));
-}

package/payload/platform/plugins/whatsapp-import/lib/src/filter.ts DELETED Viewed

@@ -1,136 +0,0 @@
-// ---------------------------------------------------------------------------
-// filter — operator-supplied gate over ParsedLine[] (Task 871).
-//
-// Phase 1 ingest is now mandatory-filter: the deterministic Bash entry refuses
-// to write a bulk archive without `--filter`. Three forms cover the operator
-// patterns named in the brief:
-//
-//   --filter all                                     → no row drop
-//   --filter senders=Alice,Bob Carter                → keep rows whose
-//                                                       senderName matches
-//                                                       any csv entry exactly
-//   --filter date-range=2024-01-01..2024-06-30       → keep rows whose
-//                                                       dateSent ISO falls
-//                                                       inside the inclusive
-//                                                       range (date or full
-//                                                       ISO 8601)
-//
-// Doctrine alignment:
-//   - feedback_compress_at_ingest_for_bulk_archives.md — the gate is
-//     mandatory at write-time, not after.
-//   - feedback_deterministic_means_remove_llm.md — the filter parser is a
-//     pure function, no LLM in the per-row decision path.
-//   - feedback_loud_failures.md — malformed `--filter` raises a structured
-//     error with a named reason rather than silently coercing to `all`.
-// ---------------------------------------------------------------------------
-import type { ParsedLine } from "./parse-export.js";
-export type Filter =
-  | { kind: "all" }
-  | { kind: "senders"; senders: string[] }
-  | { kind: "date-range"; fromIso: string; toIso: string };
-/**
- * Parse a CLI `--filter` argument into a structured Filter.
- *
- * Throws Error with message starting "filter: …" on malformed input. The
- * caller (ingest.mjs / vitest) surfaces the reason verbatim — the brief
- * mandates `[whatsapp-ingest] FAIL filter-required reason="…"` so the
- * operator can grep one line.
- */
-export function parseFilterArg(raw: string | undefined | null): Filter {
-  if (raw == null || raw.trim() === "") {
-    throw new Error(
-      'filter: --filter is required (one of "all", "senders=<csv>", "date-range=<isoFrom>..<isoTo>")',
-    );
-  }
-  const value = raw.trim();
-  if (value === "all") return { kind: "all" };
-  if (value.startsWith("senders=")) {
-    const csv = value.slice("senders=".length);
-    const senders = csv
-      .split(",")
-      .map((s) => s.trim())
-      .filter((s) => s.length > 0);
-    if (senders.length === 0) {
-      throw new Error('filter: senders= requires at least one comma-separated name');
-    }
-    return { kind: "senders", senders };
-  }
-  if (value.startsWith("date-range=")) {
-    const range = value.slice("date-range=".length);
-    const parts = range.split("..");
-    if (parts.length !== 2) {
-      throw new Error(
-        `filter: date-range must be "<isoFrom>..<isoTo>" — got "${range}"`,
-      );
-    }
-    const [fromIso, toIso] = parts.map((p) => p.trim());
-    if (!fromIso || !toIso) {
-      throw new Error(
-        `filter: date-range requires both endpoints — got "${range}"`,
-      );
-    }
-    if (Number.isNaN(Date.parse(fromIso))) {
-      throw new Error(`filter: date-range fromIso="${fromIso}" is not parseable as ISO 8601`);
-    }
-    if (Number.isNaN(Date.parse(toIso))) {
-      throw new Error(`filter: date-range toIso="${toIso}" is not parseable as ISO 8601`);
-    }
-    if (Date.parse(fromIso) > Date.parse(toIso)) {
-      throw new Error(`filter: date-range fromIso="${fromIso}" is later than toIso="${toIso}"`);
-    }
-    return { kind: "date-range", fromIso, toIso };
-  }
-  throw new Error(
-    `filter: unrecognised form "${value}" — must be "all", "senders=<csv>", or "date-range=<isoFrom>..<isoTo>"`,
-  );
-}
-/**
- * Apply a parsed Filter to ParsedLine[]. Returns a new array of kept lines
- * with the parser's original `sequenceIndex` preserved (the filter never
- * reorders). ingest.mjs re-stamps `sequenceIndex` to its post-filter position
- * during row construction for archive-write — re-stamping here too would be
- * redundant.
- */
-export function applyFilter(
-  parsedLines: readonly ParsedLine[],
-  filter: Filter,
-): ParsedLine[] {
-  const predicate = makePredicate(filter);
-  const kept: ParsedLine[] = [];
-  for (const line of parsedLines) {
-    if (!predicate(line)) continue;
-    kept.push(line);
-  }
-  return kept;
-}
-function makePredicate(filter: Filter): (line: ParsedLine) => boolean {
-  if (filter.kind === "all") return () => true;
-  if (filter.kind === "senders") {
-    const allow = new Set(filter.senders);
-    return (line) => allow.has(line.senderName);
-  }
-  // date-range: inclusive on both ends. Date-only endpoints widen to whole-
-  // day semantics: `from=YYYY-MM-DD` → `T00:00:00Z`, `to=YYYY-MM-DD` →
-  // `T23:59:59.999Z`. Full ISO 8601 endpoints with `T` are passed through.
-  // Without this widening, `--filter date-range=2024-01-01..2024-06-30`
-  // would silently drop every message later than 2024-06-30T00:00:00Z on the
-  // last day — a UX trap that contradicts the operator's reading.
-  const fromMs = parseRangeEndpoint(filter.fromIso, "start");
-  const toMs = parseRangeEndpoint(filter.toIso, "end");
-  return (line) => {
-    const ms = Date.parse(line.dateSent);
-    return ms >= fromMs && ms <= toMs;
-  };
-}
-function parseRangeEndpoint(iso: string, edge: "start" | "end"): number {
-  if (/T/.test(iso)) return Date.parse(iso);
-  // Date-only — widen to whole-day inclusive on the requested edge.
-  const suffix = edge === "start" ? "T00:00:00.000Z" : "T23:59:59.999Z";
-  return Date.parse(iso + suffix);
-}

package/payload/platform/plugins/whatsapp-import/lib/src/index.ts DELETED Viewed

@@ -1,19 +0,0 @@
-export { parseExport } from "./parse-export.js";
-export type {
-  ParseExportInput,
-  ParseExportResult,
-  ParseExportCounters,
-  ParsedLine,
-} from "./parse-export.js";
-export { parseFilterArg, applyFilter } from "./filter.js";
-export type { Filter } from "./filter.js";
-export {
-  normaliseSenderName,
-  sha256Hex,
-  deriveMessageId,
-  observationContentHash,
-} from "./derive-keys.js";
-export type {
-  DeriveMessageIdInput,
-  ObservationContentFields,
-} from "./derive-keys.js";

package/payload/platform/plugins/whatsapp-import/lib/src/parse-export.ts DELETED Viewed

@@ -1,471 +0,0 @@
-import { createHash } from "node:crypto";
-import { readFileSync } from "node:fs";
-// ---------------------------------------------------------------------------
-// parse-export — deterministic WhatsApp `_chat.txt` parser (Task 805).
-//
-// Pure function. No LLM in the per-line decision path. Replaces the prose
-// grammar that lived in references/export-parse.md when the database-operator
-// specialist's Sonnet was the line tokeniser. Every grammar branch here is
-// exercised by the vitest grid in `__tests__/parse-export.test.ts`; that
-// grid IS the contract — extending the grammar means a new test first.
-//
-// Doctrine alignment:
-//   - feedback_deterministic_means_remove_llm.md — the LLM is no longer in
-//     the per-line decision path.
-//   - feedback_deterministic_is_a_shell_script.md — TypeScript is the right
-//     deliverable shape here (UTF-8 decode + multi-line body assembly + sha256
-//     would be cumbersome in shell); the LITERAL-MAPPING rule yields to
-//     "Node module" because the per-line decision path is the deliverable, not
-//     a one-shot orchestrator.
-//   - feedback_loud_failures.md — encoding errors, empty files, and lines
-//     that match a timestamp prefix but cannot be tokenised throw with named
-//     reasons rather than degrading silently.
-// ---------------------------------------------------------------------------
-export interface ParseExportInput {
-  /** Absolute path to the `_chat.txt` file. */
-  filePath: string;
-  /** Account scope used to compose `conversationId`. */
-  accountId: string;
-  /** IANA timezone the operator confirmed (e.g. `Europe/London`). */
-  timezone: string;
-  /**
-   * Date ordering and year shape. Omit for auto-detect (Task 845): the parser
-   * probes the first matched line as DD/MM and locks that ordering if range-valid;
-   * otherwise locks MM/DD. Year shape is independent — `\d{2,4}` accepts 2-digit
-   * (mapped `2000+yy`) and 4-digit (passed through) years per-line, including
-   * mixed-year files.
-   */
-  dateFormat?: "DD/MM/YY" | "MM/DD/YY" | "DD/MM/YYYY" | "MM/DD/YYYY";
-}
-export interface ParsedLine {
-  senderName: string;
-  /** ISO 8601 with timezone offset for the supplied IANA zone. */
-  dateSent: string;
-  body: string;
-  /** Position within emitted (post-skip) messages, 0-based. */
-  sequenceIndex: number;
-}
-export interface ParseExportCounters {
-  parsed: number;
-  systemSkipped: number;
-  mediaSkipped: number;
-  parseErrors: number;
-}
-export interface ParseExportResult {
-  conversationId: string;
-  /** `whatsapp-export:<sha256-hex>` of the raw file bytes. */
-  archiveSourceFile: string;
-  parsedLines: ParsedLine[];
-  counters: ParseExportCounters;
-}
-// Year capture is `\d{2}|\d{4}` so a single regex covers both 2-digit (legacy)
-// and 4-digit (modern WhatsApp default) prefixes — Task 845. Exactly 2 or 4
-// chars; 3-digit years (truncation typos, hand-edited files) are rejected as
-// not-a-prefix and surface via parse-grammar-miss, not silently coerced into
-// year-202-AD timestamps. Year semantics are resolved per-match in
-// `matchTimestampPrefix` from the captured length, not from the regex shape,
-// so mixed-year files parse natively.
-const TIMESTAMP_PREFIX_DDMMYY =
-  /^\[(\d{2})\/(\d{2})\/(\d{4}|\d{2}),\s+(\d{1,2}):(\d{2})(?::(\d{2}))?\]\s*(.*)$/;
-const TIMESTAMP_PREFIX_MMDDYY = TIMESTAMP_PREFIX_DDMMYY; // shape is identical; ordering differs in interpretation only
-// System-message patterns that appear WITHOUT a `: ` sender/body separator.
-// WhatsApp emits group-event and security-code lines as `<Sender> <verb> ...`
-// (no colon). Lines that match the timestamp prefix but lack `: ` and do not
-// match one of these patterns are LOUD-FAIL parse errors — never silently
-// dropped.
-const LINE_LEVEL_SYSTEM_PATTERNS: RegExp[] = [
-  /^Messages and calls are end-to-end encrypted/i,
-  /'s security code changed\.?$/i,
-  / created group ["“”]/,
-  / added /,
-  / removed /,
-  / left$/,
-  / changed the subject from /,
-  / changed this group's icon/,
-  / joined using this group's invite link/,
-  /^You're now an admin$/i,
-  /^You created group/i,
-];
-// Body-level patterns evaluated after `Sender: body` split. These are real
-// messages syntactically but carry no graph value (deletions, media-only).
-const BODY_LEVEL_SYSTEM_PATTERNS: RegExp[] = [
-  /^You deleted this message\.?$/,
-  /^This message was deleted\.?$/,
-];
-const MEDIA_ONLY_PATTERNS: RegExp[] = [
-  /^<Media omitted>$/,
-  /^IMG-\d+-\w+\.(jpg|jpeg|png|heic|gif)\s*\(file attached\)$/i,
-  /^VID-\d+-\w+\.mp4\s*\(file attached\)$/i,
-  /^PTT-\d+-\w+\.opus\s*\(file attached\)$/i,
-  /^AUD-\d+-\w+\.opus\s*\(file attached\)$/i,
-  /^STK-\d+-\w+\.webp\s*\(file attached\)$/i,
-  /^.+\.(pdf|docx|doc|xlsx|xls|pptx|ppt|zip|csv|txt)\s*\(file attached\)$/i,
-  /^‎.+attached:\s*.+$/, // alternative LRM-prefixed format on some platforms
-];
-export function parseExport(input: ParseExportInput): ParseExportResult {
-  const { filePath, accountId, timezone, dateFormat: explicitDateFormat } = input;
-  if (!accountId || !accountId.trim()) {
-    throw new Error("parse-export: accountId is required.");
-  }
-  if (!timezone || !timezone.trim()) {
-    throw new Error("parse-export: timezone is required (e.g. 'Europe/London').");
-  }
-  const rawBytes = readFileSync(filePath);
-  const sha256Hex = createHash("sha256").update(rawBytes).digest("hex");
-  const archiveSourceFile = `whatsapp-export:${sha256Hex}`;
-  const conversationId = `whatsapp-export:${sha256Hex}:${accountId}`;
-  const text = decodeAndNormalise(rawBytes);
-  if (text.length === 0) {
-    throw new Error(
-      `parse-export: file is empty — not a _chat.txt. file=${filePath}`,
-    );
-  }
-  const lines = text.split("\n");
-  // Auto-detect when `dateFormat` is omitted (Task 845): probe the first line
-  // that contains a timestamp prefix as DD/MM; lock DD/MM if range-valid,
-  // otherwise lock MM/DD. WhatsApp's locale is set per device, so a single
-  // file never mixes DD/MM and MM/DD — locking once from line 1 is correct.
-  // Concatenated multi-locale exports require an explicit `dateFormat`.
-  const ordering = resolveOrdering(explicitDateFormat, lines);
-  const counters: ParseExportCounters = {
-    parsed: 0,
-    systemSkipped: 0,
-    mediaSkipped: 0,
-    parseErrors: 0,
-  };
-  // Stage 1 — tokenise into raw messages (timestamp + remainder), accumulating
-  // continuation lines into the previous remainder. Stage 2 then categorises
-  // each tokenised message (system / media / real) so the counter increments
-  // happen exactly once per source line.
-  interface RawMessage {
-    rawLineIndex: number; // 1-based file line number for LOUD-FAIL diagnostics
-    year: number;
-    month: number;
-    day: number;
-    hour: number;
-    minute: number;
-    second: number;
-    remainder: string; // everything after `]` on the prefix line, plus continuation lines
-  }
-  const raw: RawMessage[] = [];
-  for (let i = 0; i < lines.length; i++) {
-    const line = lines[i];
-    if (line.length === 0 && i === lines.length - 1) continue; // trailing newline
-    const prefixMatch = matchTimestampPrefix(line, ordering);
-    if (prefixMatch) {
-      raw.push({
-        rawLineIndex: i + 1,
-        ...prefixMatch.dateParts,
-        remainder: prefixMatch.remainder,
-      });
-    } else {
-      // Continuation of the previous message body. If there is no previous
-      // message, this line is leading garbage — ignore it (matches the
-      // export-parse.md edge case where a leading BOM or blank line precedes
-      // the first timestamp).
-      const last = raw[raw.length - 1];
-      if (last) {
-        last.remainder += "\n" + line;
-      }
-    }
-  }
-  // Stage 2 — categorise each raw message. Do NOT trim trailing whitespace
-  // from the remainder before splitting — `Joel: ` (sender + colon + trailing
-  // space + newline) collapses to `Joel:` after a `\s+$` trim and the `: `
-  // separator disappears, turning an empty-body system skip into a LOUD-FAIL.
-  const parsedLines: ParsedLine[] = [];
-  for (const r of raw) {
-    const remainder = r.remainder;
-    const colonIdx = findFirstColonSeparator(remainder);
-    if (colonIdx === -1) {
-      // No `: ` separator. Must match a known system pattern or LOUD-FAIL.
-      const trimmed = remainder.replace(/\s+$/, "");
-      if (matchesAny(trimmed, LINE_LEVEL_SYSTEM_PATTERNS)) {
-        counters.systemSkipped++;
-        continue;
-      }
-      counters.parseErrors++;
-      throw new Error(
-        `parse-export: parse-error file=${filePath} line=${r.rawLineIndex} reason=no-sender-body-separator content="${trimmed.slice(0, 80)}"`,
-      );
-    }
-    const senderName = remainder.slice(0, colonIdx).trim();
-    const body = remainder.slice(colonIdx + 2).replace(/\s+$/, "");
-    if (body.length === 0) {
-      counters.systemSkipped++;
-      continue;
-    }
-    if (matchesAny(body, BODY_LEVEL_SYSTEM_PATTERNS)) {
-      counters.systemSkipped++;
-      continue;
-    }
-    if (matchesAny(body, MEDIA_ONLY_PATTERNS)) {
-      counters.mediaSkipped++;
-      continue;
-    }
-    const dateSent = isoWithOffset(
-      r.year,
-      r.month,
-      r.day,
-      r.hour,
-      r.minute,
-      r.second,
-      timezone,
-    );
-    parsedLines.push({
-      senderName,
-      dateSent,
-      body,
-      sequenceIndex: parsedLines.length,
-    });
-    counters.parsed++;
-  }
-  if (parsedLines.length === 0 && counters.systemSkipped === 0 && counters.mediaSkipped === 0) {
-    // Task 845: include a sanitised first-line sample so the operator knows
-    // WHY the file rejected — closes the diagnostic gap that left conversation
-    // 47c6a590-0c2c-4006-9aca-6ee9ec93c95f guessing. Echoed to stderr too so
-    // server.log has a grep-able adjunct to the existing parse-failed line.
-    const sample = sampleFirstNonBlankLine(lines, 100);
-    process.stderr.write(
-      `[whatsapp-import] parse-grammar-miss first-line="${sample}"\n`,
-    );
-    throw new Error(
-      `parse-export: zero parsed lines after walking ${filePath} — not a _chat.txt or all lines failed grammar. parse-grammar-miss first-line="${sample}"`,
-    );
-  }
-  return {
-    conversationId,
-    archiveSourceFile,
-    parsedLines,
-    counters,
-  };
-}
-// ---------------------------------------------------------------------------
-// Internals
-// ---------------------------------------------------------------------------
-function decodeAndNormalise(bytes: Buffer): string {
-  // Strict UTF-8 decode. Node's TextDecoder with `fatal: true` throws on
-  // invalid bytes — that's the LOUD-FAIL the brief mandates for encoding
-  // errors. The default `Buffer.toString('utf8')` silently substitutes
-  // U+FFFD, which would let bad bytes propagate into the graph.
-  let text: string;
-  try {
-    text = new TextDecoder("utf-8", { fatal: true }).decode(bytes);
-  } catch (err) {
-    throw new Error(
-      `parse-export: UTF-8 decode failed — ${err instanceof Error ? err.message : String(err)}. The file is not valid UTF-8; re-export from WhatsApp.`,
-    );
-  }
-  // Strip leading BOM (U+FEFF).
-  if (text.charCodeAt(0) === 0xfeff) {
-    text = text.slice(1);
-  }
-  // Normalise mixed line endings to LF.
-  text = text.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
-  // Task 887 — strip Unicode bidi marks (U+200E LRM, U+200F RLM) only at
-  // line-start, where some WhatsApp builds prefix the timestamp header.
-  // Without stripping, `^\[(\d{2})\/...` fails on the prefixed line, the
-  // line is appended as a continuation of the previous body, and the next
-  // clean header parses its senderName off the polluted body — leaking 23
-  // bogus :Person nodes per import in the Adam Mackay archive. Body-internal
-  // bidi marks (e.g. the LRM in `: ‎Forwarded`) are preserved — they carry
-  // semantic information about message origin and are exercised by
-  // parse-export.test.ts. Counts emitted to stderr for the operator's tail.
-  const leadingBidiMatches = text.match(/(?:^|\n)[‎‏]+/g) || [];
-  let lrmStripped = 0;
-  let rlmStripped = 0;
-  for (const m of leadingBidiMatches) {
-    for (const ch of m) {
-      if (ch === "‎") lrmStripped++;
-      else if (ch === "‏") rlmStripped++;
-    }
-  }
-  if (leadingBidiMatches.length > 0) {
-    text = text.replace(/(^|\n)[‎‏]+/g, "$1");
-    process.stderr.write(
-      `[whatsapp-ingest] decoded normalised lrm-stripped=${lrmStripped} rlm-stripped=${rlmStripped}\n`,
-    );
-  }
-  return text;
-}
-interface TimestampMatch {
-  dateParts: {
-    year: number;
-    month: number;
-    day: number;
-    hour: number;
-    minute: number;
-    second: number;
-  };
-  remainder: string;
-}
-type Ordering = "DDMM" | "MMDD";
-function matchTimestampPrefix(
-  line: string,
-  ordering: Ordering,
-): TimestampMatch | null {
-  const re = ordering === "MMDD" ? TIMESTAMP_PREFIX_MMDDYY : TIMESTAMP_PREFIX_DDMMYY;
-  const m = line.match(re);
-  if (!m) return null;
-  const a = parseInt(m[1], 10); // dd or mm depending on ordering
-  const b = parseInt(m[2], 10); // mm or dd
-  const yearRaw = m[3];
-  const hour = parseInt(m[4], 10);
-  const minute = parseInt(m[5], 10);
-  const second = m[6] !== undefined ? parseInt(m[6], 10) : 0;
-  const remainder = m[7] ?? "";
-  const day = ordering === "MMDD" ? b : a;
-  const month = ordering === "MMDD" ? a : b;
-  // Range-check before passing to Date.UTC — that function silently rolls
-  // over invalid components (Date.UTC(2026, 13, 1) → 2027-02-01), which
-  // would corrupt timestamps when the operator passes the wrong ordering
-  // for a US-locale export. Reject as not-a-prefix; the caller retries the
-  // file with the correct ordering or LOUD-FAILs when the file isn't a chat.
-  if (month < 1 || month > 12 || day < 1 || day > 31) return null;
-  if (hour > 23 || minute > 59 || second > 59) return null;
-  // Task 845: branch year semantics on captured length. WhatsApp's modern
-  // exports emit 4-digit years; legacy exports emit 2-digit. Both are
-  // accepted by the same regex and disambiguated here so a single file may
-  // hold both shapes (mixed-year imports parse natively).
-  const year = yearRaw.length === 2 ? 2000 + parseInt(yearRaw, 10) : parseInt(yearRaw, 10);
-  return {
-    dateParts: { year, month, day, hour, minute, second },
-    remainder,
-  };
-}
-function resolveOrdering(
-  explicit: ParseExportInput["dateFormat"],
-  lines: readonly string[],
-): Ordering {
-  if (explicit === "MM/DD/YY" || explicit === "MM/DD/YYYY") return "MMDD";
-  if (explicit === "DD/MM/YY" || explicit === "DD/MM/YYYY") return "DDMM";
-  // Auto-detect: probe the first prefix-matching line as DD/MM. If range-valid,
-  // lock DD/MM (WhatsApp's global default). Otherwise lock MM/DD (US-locale
-  // exports, which are the only meaningful exception). Locked once.
-  for (const line of lines) {
-    if (matchTimestampPrefix(line, "DDMM")) return "DDMM";
-    if (matchTimestampPrefix(line, "MMDD")) return "MMDD";
-  }
-  return "DDMM"; // No matching line — caller will throw zero-parsed-lines anyway.
-}
-function sampleFirstNonBlankLine(lines: readonly string[], maxScan: number): string {
-  const scanLimit = Math.min(maxScan, lines.length);
-  for (let i = 0; i < scanLimit; i++) {
-    const trimmed = lines[i].trim();
-    if (trimmed.length === 0) continue;
-    // Strip control characters (including tab, BEL, etc.) so the diagnostic
-    // line stays single-line and grep-friendly. Truncate to 80 chars per
-    // Task 845 brief — enough to recognise the offending header shape.
-    const sanitised = trimmed.replace(/[\x00-\x1F\x7F]/g, "");
-    return sanitised.slice(0, 80);
-  }
-  return "";
-}
-function findFirstColonSeparator(remainder: string): number {
-  // Split on the FIRST `: ` (colon-space). A sender display name may itself
-  // contain a `:` (e.g. "Joel: Work"), so we anchor on the first colon
-  // followed by a space — that's the WhatsApp export's stable separator.
-  const idx = remainder.indexOf(": ");
-  return idx;
-}
-function matchesAny(text: string, patterns: RegExp[]): boolean {
-  for (const p of patterns) {
-    if (p.test(text)) return true;
-  }
-  return false;
-}
-function isoWithOffset(
-  year: number,
-  month: number,
-  day: number,
-  hour: number,
-  minute: number,
-  second: number,
-  timezone: string,
-): string {
-  // Produce ISO 8601 with the offset that the supplied IANA zone holds for
-  // this wall-clock instant. Two-step refinement is needed to handle DST:
-  // the wall-clock components describe a local time, and we need the offset
-  // for the corresponding UTC instant in `timezone`.
-  const guessUtcMs = Date.UTC(year, month - 1, day, hour, minute, second);
-  let offMin = offsetMinutesAt(new Date(guessUtcMs), timezone);
-  const refinedUtcMs = guessUtcMs - offMin * 60_000;
-  offMin = offsetMinutesAt(new Date(refinedUtcMs), timezone);
-  const sign = offMin >= 0 ? "+" : "-";
-  const absOff = Math.abs(offMin);
-  const offHH = String(Math.floor(absOff / 60)).padStart(2, "0");
-  const offMM = String(absOff % 60).padStart(2, "0");
-  const Y = String(year).padStart(4, "0");
-  const M = String(month).padStart(2, "0");
-  const D = String(day).padStart(2, "0");
-  const H = String(hour).padStart(2, "0");
-  const Mi = String(minute).padStart(2, "0");
-  const S = String(second).padStart(2, "0");
-  return `${Y}-${M}-${D}T${H}:${Mi}:${S}${sign}${offHH}:${offMM}`;
-}
-function offsetMinutesAt(date: Date, timezone: string): number {
-  // Use Intl.DateTimeFormat with longOffset to read the IANA-zone offset for
-  // the given UTC instant. Output format: "GMT+01:00", "GMT-05:00", or "GMT".
-  const formatter = new Intl.DateTimeFormat("en-US", {
-    timeZone: timezone,
-    timeZoneName: "longOffset",
-  });
-  const parts = formatter.formatToParts(date);
-  const tzPart = parts.find((p) => p.type === "timeZoneName");
-  if (!tzPart) {
-    throw new Error(`parse-export: unable to read offset for timezone "${timezone}".`);
-  }
-  const value = tzPart.value;
-  if (value === "GMT" || value === "UTC") return 0;
-  const m = value.match(/^(?:GMT|UTC)([+-])(\d{1,2}):?(\d{2})?$/);
-  if (!m) {
-    throw new Error(
-      `parse-export: cannot parse timezone offset "${value}" for IANA zone "${timezone}".`,
-    );
-  }
-  const sign = m[1] === "+" ? 1 : -1;
-  const hh = parseInt(m[2], 10);
-  const mm = m[3] ? parseInt(m[3], 10) : 0;
-  return sign * (hh * 60 + mm);
-}