@astilba/core 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cldr.d.ts +1 -1
- package/dist/harness.d.ts +1 -1
- package/dist/index.d.ts +25 -2
- package/dist/index.js +53 -1
- package/dist/index.js.map +1 -1
- package/dist/{model-5mrSQGoC.d.ts → model-DY8mEqBM.d.ts} +4 -4
- package/package.json +1 -1
package/dist/cldr.d.ts
CHANGED
package/dist/harness.d.ts
CHANGED
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,29 @@
|
|
|
1
|
-
import { a as PluralKind, c as ValueToken, i as Key, l as keyId, n as CLDRCategory, o as PluralSet, r as CanonicalModel, s as Value, t as ALL_CLDR_CATEGORIES } from "./model-
|
|
1
|
+
import { a as PluralKind, c as ValueToken, i as Key, l as keyId, n as CLDRCategory, o as PluralSet, r as CanonicalModel, s as Value, t as ALL_CLDR_CATEGORIES } from "./model-DY8mEqBM.js";
|
|
2
2
|
import { LanguagePlurals, PluralRule, SUPPORTED_LANGUAGES, allCategoriesFor, categoriesFor, getPlurals, isSupportedLanguage, operands, primarySubtag, representativeCount, selectCategory } from "./cldr.js";
|
|
3
3
|
|
|
4
|
+
//#region src/dto.d.ts
|
|
5
|
+
/** A persisted value — only the byte-exact `raw`; tokens are re-derived on load. */
|
|
6
|
+
interface StoredValue {
|
|
7
|
+
raw: string;
|
|
8
|
+
}
|
|
9
|
+
/** A persisted `PluralSet`: the category map flattened to a plain object. */
|
|
10
|
+
interface StoredPluralSet {
|
|
11
|
+
kind: PluralKind;
|
|
12
|
+
/** CLDR category -> value. `kind:"none"` carries a single `"other"` entry. */
|
|
13
|
+
values: Record<string, StoredValue>;
|
|
14
|
+
/** present only for the rare mixed bare+plural context (model.ts); else omitted */
|
|
15
|
+
bare?: StoredValue;
|
|
16
|
+
}
|
|
17
|
+
/** The stored message for one (key, language): each context's variants. */
|
|
18
|
+
interface StoredMessage {
|
|
19
|
+
/** context value -> its variants. `""` === the no-context case. */
|
|
20
|
+
contexts: Record<string, StoredPluralSet>;
|
|
21
|
+
}
|
|
22
|
+
/** Serialize one key's `contexts` (for a single language) to its stored JSON. */
|
|
23
|
+
declare const toStored: (contexts: Map<string, PluralSet>) => StoredMessage;
|
|
24
|
+
/** Rebuild one key's `contexts` from its stored JSON (inverse of `toStored`). */
|
|
25
|
+
declare const fromStored: (message: StoredMessage) => Map<string, PluralSet>;
|
|
26
|
+
//#endregion
|
|
4
27
|
//#region src/errors.d.ts
|
|
5
28
|
/**
|
|
6
29
|
* The base error type. `AstilbaError` is the one class consumers catch
|
|
@@ -94,5 +117,5 @@ interface PlaceholderDiff {
|
|
|
94
117
|
*/
|
|
95
118
|
declare const placeholderDiff: (source: ValueToken[], translated: ValueToken[]) => PlaceholderDiff;
|
|
96
119
|
//#endregion
|
|
97
|
-
export { ALL_CLDR_CATEGORIES, AstilbaError, type AstilbaErrorCode, type CLDRCategory, type CanonicalModel, type Key, type LanguagePlurals, type MaskResult, type PlaceholderCheck, type PlaceholderDiff, type PluralKind, type PluralRule, type PluralSet, SUPPORTED_LANGUAGES, type SentinelCheck, type Tokenizer, type Value, type ValueToken, allCategoriesFor, categoriesFor, getPlurals, isSupportedLanguage, keyId, maskTokens, operands, placeholderDiff, primarySubtag, representativeCount, selectCategory, sentinel, unmask, validatePlaceholderTokens, validatePlaceholders, validateSentinels };
|
|
120
|
+
export { ALL_CLDR_CATEGORIES, AstilbaError, type AstilbaErrorCode, type CLDRCategory, type CanonicalModel, type Key, type LanguagePlurals, type MaskResult, type PlaceholderCheck, type PlaceholderDiff, type PluralKind, type PluralRule, type PluralSet, SUPPORTED_LANGUAGES, type SentinelCheck, type StoredMessage, type StoredPluralSet, type StoredValue, type Tokenizer, type Value, type ValueToken, allCategoriesFor, categoriesFor, fromStored, getPlurals, isSupportedLanguage, keyId, maskTokens, operands, placeholderDiff, primarySubtag, representativeCount, selectCategory, sentinel, toStored, unmask, validatePlaceholderTokens, validatePlaceholders, validateSentinels };
|
|
98
121
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.js
CHANGED
|
@@ -10,6 +10,58 @@ const ALL_CLDR_CATEGORIES = [
|
|
|
10
10
|
];
|
|
11
11
|
const keyId = (namespace, base) => `${namespace}:${base}`;
|
|
12
12
|
//#endregion
|
|
13
|
+
//#region src/dto.ts
|
|
14
|
+
/**
|
|
15
|
+
* Plain-JSON persistence mirror of a key's per-language data (`Key.contexts`).
|
|
16
|
+
*
|
|
17
|
+
* The in-memory model is `Map`-based (model.ts), so `JSON.stringify` silently
|
|
18
|
+
* loses it; this is the JSON a backend stores per (key, language). Only
|
|
19
|
+
* `Value.raw` is persisted — `tokens` are a derived view rebuilt on load, never
|
|
20
|
+
* the source of truth, and export reads `raw` alone.
|
|
21
|
+
*/
|
|
22
|
+
const emptyMap = () => Object.create(null);
|
|
23
|
+
/** Serialize one key's `contexts` (for a single language) to its stored JSON. */
|
|
24
|
+
const toStored = (contexts) => {
|
|
25
|
+
const out = emptyMap();
|
|
26
|
+
for (const [context, set] of contexts) {
|
|
27
|
+
const values = emptyMap();
|
|
28
|
+
for (const [category, value] of set.values) values[category] = { raw: value.raw };
|
|
29
|
+
out[context] = set.bare ? {
|
|
30
|
+
bare: { raw: set.bare.raw },
|
|
31
|
+
kind: set.kind,
|
|
32
|
+
values
|
|
33
|
+
} : {
|
|
34
|
+
kind: set.kind,
|
|
35
|
+
values
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
return { contexts: out };
|
|
39
|
+
};
|
|
40
|
+
const rebuild = (stored) => ({
|
|
41
|
+
raw: stored.raw,
|
|
42
|
+
tokens: [{
|
|
43
|
+
raw: stored.raw,
|
|
44
|
+
type: "text"
|
|
45
|
+
}]
|
|
46
|
+
});
|
|
47
|
+
/** Rebuild one key's `contexts` from its stored JSON (inverse of `toStored`). */
|
|
48
|
+
const fromStored = (message) => {
|
|
49
|
+
const contexts = /* @__PURE__ */ new Map();
|
|
50
|
+
for (const [context, stored] of Object.entries(message.contexts)) {
|
|
51
|
+
const values = /* @__PURE__ */ new Map();
|
|
52
|
+
for (const category of ALL_CLDR_CATEGORIES) if (category in stored.values) values.set(category, rebuild(stored.values[category]));
|
|
53
|
+
contexts.set(context, stored.bare ? {
|
|
54
|
+
bare: rebuild(stored.bare),
|
|
55
|
+
kind: stored.kind,
|
|
56
|
+
values
|
|
57
|
+
} : {
|
|
58
|
+
kind: stored.kind,
|
|
59
|
+
values
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
return contexts;
|
|
63
|
+
};
|
|
64
|
+
//#endregion
|
|
13
65
|
//#region src/errors.ts
|
|
14
66
|
var AstilbaError = class extends Error {
|
|
15
67
|
code;
|
|
@@ -197,6 +249,6 @@ const placeholderDiff = (source, translated) => {
|
|
|
197
249
|
};
|
|
198
250
|
};
|
|
199
251
|
//#endregion
|
|
200
|
-
export { ALL_CLDR_CATEGORIES, AstilbaError, SUPPORTED_LANGUAGES, allCategoriesFor, categoriesFor, getPlurals, isSupportedLanguage, keyId, maskTokens, operands, placeholderDiff, primarySubtag, representativeCount, selectCategory, sentinel, unmask, validatePlaceholderTokens, validatePlaceholders, validateSentinels };
|
|
252
|
+
export { ALL_CLDR_CATEGORIES, AstilbaError, SUPPORTED_LANGUAGES, allCategoriesFor, categoriesFor, fromStored, getPlurals, isSupportedLanguage, keyId, maskTokens, operands, placeholderDiff, primarySubtag, representativeCount, selectCategory, sentinel, toStored, unmask, validatePlaceholderTokens, validatePlaceholders, validateSentinels };
|
|
201
253
|
|
|
202
254
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","names":[],"sources":["../src/model.ts","../src/errors.ts","../src/mask.ts"],"sourcesContent":["/**\n * The canonical message model.\n *\n * This is the format-agnostic centre of Astilba: every file/syntax adapter maps\n * files <-> this model. Phase 0 ships one adapter (native i18next v4) but the\n * model is deliberately syntax-neutral so ICU and other dialects can map onto it\n * later without changing it.\n *\n * Invariants worth stating out loud:\n * - Value text is preserved EXACTLY (never mutate value bytes). `Value.raw` is\n * the source of truth; `Value.tokens` is a derived view\n * used only for masking/analysis and is never used to reconstruct output.\n * - Plurals are represented STRUCTURALLY as a CLDR-category -> value map, not as\n * suffixed flat keys. The suffix set is re-derived per target language on\n * export, never carried through.\n */\n\n/** The six CLDR plural categories. The set that actually applies is language-dependent. */\nexport type CLDRCategory = \"zero\" | \"one\" | \"two\" | \"few\" | \"many\" | \"other\";\n\nexport const ALL_CLDR_CATEGORIES: readonly CLDRCategory[] = [\n \"zero\",\n \"one\",\n \"two\",\n \"few\",\n \"many\",\n \"other\",\n];\n\n/**\n * Whether a key is pluralised, and if so how.\n * - \"none\" -> a plain string; no plural suffix on export.\n * - \"cardinal\" -> `_one`, `_other`, ... suffixes.\n * - \"ordinal\" -> `_ordinal_one`, ... suffixes (i18next v4 ordinal form).\n *\n * Note \"none\" is distinct from a single-category cardinal (e.g. Japanese, whose\n * only category is `other`): `foo` (none) and `foo_other` (cardinal) are\n * different keys and must round-trip differently.\n */\nexport type PluralKind = \"none\" | \"cardinal\" | \"ordinal\";\n\n/** A token within a value. Derived from `Value.raw`; used for masking + validation. */\nexport type ValueToken =\n | { type: \"text\"; raw: string }\n /** `{{var}}`, `{{var, format}}`, `{{var, format(options)}}` */\n | {\n type: \"interpolation\";\n raw: string;\n variable: string;\n format?: string;\n }\n /** `$t(ref)`, `$t(ref, {\"opt\": ...})` */\n | {\n type: \"nesting\";\n raw: string;\n ref: string;\n options?: string;\n }\n /** an HTML/XML tag `<...>` or an entity `&...;` — opaque markup */\n | { type: \"markup\"; raw: string };\n\n/**\n * A translatable value. `raw` is byte-exact source; `tokens` is the parsed view.\n * `tokens.map(t => t.raw).join(\"\")` always reconstructs `raw` exactly.\n */\nexport interface Value {\n raw: string;\n tokens: ValueToken[];\n}\n\n/**\n * The set of values for one (base, context) cell.\n *\n * - kind \"none\": `values` holds a single \"other\" entry = the plain string.\n * - kind \"cardinal\"/\"ordinal\": `values` holds the per-category plural forms.\n * - `bare`: present only in the rare i18next case where a context key has BOTH\n * a suffix-less form (used when `t()` is called without `count`) AND plural\n * forms (used when `count` is given). Kept so both render paths are lossless.\n */\nexport interface PluralSet {\n kind: PluralKind;\n values: Map<CLDRCategory, Value>;\n bare?: Value;\n}\n\n/**\n * A logical message: a base key with one entry per context value.\n * The no-context case is a single entry keyed \"\" (empty string).\n *\n * `base` is the full key path WITHOUT namespace and WITHOUT plural/context\n * suffixes, using the project key separator (default \".\"), e.g. `account.friend`.\n */\nexport interface Key {\n namespace: string;\n base: string;\n /** contextValue -> its PluralSet. \"\" === no context. */\n contexts: Map<string, PluralSet>;\n}\n\n/**\n * A single language's worth of canonical data. Round-trip is per-language.\n *\n * IN-MEMORY ONLY: this is `Map`-based for fast lookup, so it is NOT directly\n * JSON-serializable (`JSON.stringify` yields `{}` for the Maps). A persistence/\n * transport DTO (plain objects or a `toJSON`/`fromJSON` pair) is a v1.0 item,\n * needed once the backend stores or ships the model.\n */\nexport interface CanonicalModel {\n /** BCP-47, e.g. `en`, `en-US`, `pt-BR`. */\n language: string;\n /** `${namespace}:${base}` -> Key */\n keys: Map<string, Key>;\n}\n\nexport const keyId = (namespace: string, base: string): string =>\n `${namespace}:${base}`;\n","/**\n * The base error type. `AstilbaError` is the one class consumers catch\n * (`if (e instanceof AstilbaError) e.code`); `code` is an OPEN string so each\n * format adapter owns its own code constants without a closed core enum coupling\n * core to any one syntax. The i18next-v4 codes + their loud, fix-it-yourself\n * factory functions live with the adapter (errors-i18next.ts).\n */\n\n/** An error code is an open string; adapters define their own (e.g. `ICU_NOT_SUPPORTED`). */\nexport type AstilbaErrorCode = string;\n\nexport class AstilbaError extends Error {\n readonly code: string;\n /** The fully-qualified key (`namespace:flatKey`) the problem was found at, if any. */\n readonly key?: string;\n readonly details?: Record<string, unknown>;\n\n constructor(\n code: string,\n message: string,\n opts: { key?: string; details?: Record<string, unknown> } = {}\n ) {\n super(message);\n this.name = \"AstilbaError\";\n this.code = code;\n this.key = opts.key;\n this.details = opts.details;\n }\n}\n","/**\n * MT masking & placeholder validation — the FORMAT-NEUTRAL core. It operates on\n * canonical `ValueToken[]` (which the model\n * already carries) instead of parsing strings itself: a syntax adapter tokenizes,\n * core masks and validates. The one place a raw string must be re-tokenized is a\n * translation returned from MT — it isn't in the model — so the string-entry\n * `validatePlaceholders` takes the adapter's `Tokenizer` by injection.\n *\n * Two jobs:\n * 1. maskTokens()/unmask(): replace every non-text token (the WHOLE token, incl.\n * formatter and `$t()` ref name) with an opaque sentinel before an MT/LLM call,\n * and restore it after. Because the formatter keyword and ref live INSIDE the\n * masked span, MT never sees them — that alone defeats the \"translated\n * `one`/`other` -> `uno`/`otros`\" class of bug.\n * 2. validatePlaceholderTokens(): a fail-closed, CI-failable check comparing a\n * source value's tokens against its translation's tokens (the caller restores\n * any masked sentinels first) — every interpolation variable, formatter\n * keyword, nesting ref, and markup tag must survive unmodified. This is the\n * validator shipped in the free utility.\n *\n * Sentinels use private-use-area delimiters so they carry no linguistic content\n * for an MT engine to \"helpfully\" translate, while still being detectable if the\n * engine mangles them.\n */\n\nimport { AstilbaError } from \"./errors.ts\";\nimport type { ValueToken } from \"./model.ts\";\n\nconst OPEN = \"\\uE000\";\nconst CLOSE = \"\\uE001\";\n\nexport const sentinel = (index: number): string => `${OPEN}${index}${CLOSE}`;\n\nconst SENTINEL_RE = new RegExp(`${OPEN}(\\\\d+)${CLOSE}`, \"gu\");\n\n/**\n * Turns a raw value string into canonical tokens. Supplied by whichever syntax\n * adapter is in use — the one syntax-specific dependency the string-entry validator\n * needs, to re-tokenize an MT-returned translation that was never in the model.\n */\nexport type Tokenizer = (raw: string) => ValueToken[];\n\nexport interface MaskResult {\n masked: string;\n /** original token raws, indexed by sentinel number */\n parts: string[];\n}\n\n/**\n * Replace interpolation / nesting / markup tokens with sentinels. Rejects loudly\n * if the literal text already contains a reserved sentinel delimiter — rare but\n * legal in real values (e.g. private-use-area glyphs from icon fonts like Material\n * Icons / Nerd Fonts) — rather than letting unmask() silently corrupt it.\n */\nexport const maskTokens = (tokens: ValueToken[]): MaskResult => {\n const parts: string[] = [];\n let masked = \"\";\n for (const tok of tokens) {\n if (tok.type === \"text\") {\n if (tok.raw.includes(OPEN) || tok.raw.includes(CLOSE)) {\n throw new AstilbaError(\n \"MASK_VALIDATION\",\n \"Value text contains a reserved masking sentinel delimiter \" +\n \"(U+E000/U+E001); it cannot be masked without ambiguity. Strip or \" +\n \"escape these private-use-area characters before masking.\"\n );\n }\n masked += tok.raw;\n } else {\n masked += sentinel(parts.length);\n parts.push(tok.raw);\n }\n }\n return { masked, parts };\n};\n\nexport const unmask = (masked: string, parts: string[]): string =>\n masked.replace(SENTINEL_RE, (_, n: string) => {\n const part = parts[Number(n)];\n if (part === undefined) {\n throw new AstilbaError(\n \"MASK_VALIDATION\",\n `Unknown sentinel index ${n} during unmask.`\n );\n }\n return part;\n });\n\nexport interface SentinelCheck {\n ok: boolean;\n errors: string[];\n}\n\n/**\n * Validate that an MT engine returned every sentinel exactly once, unmodified,\n * and invented none. Reordering is allowed (target languages reorder freely);\n * pass `requireOrder` to also assert original order.\n */\nexport const validateSentinels = (\n translated: string,\n parts: string[],\n opts: { requireOrder?: boolean } = {}\n): SentinelCheck => {\n const errors: string[] = [];\n const seen = new Map<number, number>();\n const order: number[] = [];\n let m: RegExpExecArray | null;\n SENTINEL_RE.lastIndex = 0;\n while ((m = SENTINEL_RE.exec(translated)) !== null) {\n const idx = Number(m[1]);\n seen.set(idx, (seen.get(idx) ?? 0) + 1);\n order.push(idx);\n }\n\n for (let i = 0; i < parts.length; i += 1) {\n const count = seen.get(i) ?? 0;\n if (count === 0) {\n errors.push(`placeholder #${i} (${parts[i]}) was dropped by MT`);\n } else if (count > 1) {\n errors.push(`placeholder #${i} (${parts[i]}) was duplicated by MT`);\n }\n }\n for (const idx of seen.keys()) {\n if (idx >= parts.length) {\n errors.push(`MT invented an unknown placeholder #${idx}`);\n }\n }\n // Detect a corrupted sentinel: stray delimiter chars not part of a valid token.\n const stripped = translated.replace(SENTINEL_RE, \"\");\n if (stripped.includes(OPEN) || stripped.includes(CLOSE)) {\n errors.push(\"MT corrupted a placeholder sentinel (stray delimiter found)\");\n }\n if (opts.requireOrder === true) {\n const expected = [...order].toSorted((a, b) => a - b);\n if (order.join(\",\") !== expected.join(\",\")) {\n errors.push(\"placeholder order was changed\");\n }\n }\n\n return { errors, ok: errors.length === 0 };\n};\n\n// --- post-hoc placeholder validation (the free-utility CI check) -------------\n\n// Escape `\\` then `|` so the `|` field separator below is unambiguous even if a\n// variable / format / ref / options string itself contains one.\nconst esc = (s: string): string =>\n s.replaceAll(\"\\\\\", \"\\\\\\\\\").replaceAll(\"|\", \"\\\\|\");\n\n/**\n * Canonical identity for placeholder equality, computed from the `ValueToken`\n * fields directly — variable + format for interpolation, ref for nesting, raw for\n * markup. No syntax-specific normalisation: a value and its own translation carry\n * byte-identical placeholders, so the raw canonical fields ARE the faithful\n * identity (an adapter wanting looser matching can pre-normalise its tokens before\n * calling `validatePlaceholderTokens`). Returns `null` for text (not a placeholder).\n */\nconst signature = (tok: ValueToken): string | null => {\n switch (tok.type) {\n case \"interpolation\": {\n return `interp:${esc(tok.variable)}|${esc(tok.format ?? \"\")}`;\n }\n case \"nesting\": {\n // options are part of the placeholder's identity: $t(a, {\"count\": 3}) and\n // $t(a, {\"count\": 0}) render differently, so a mutated option must not pass.\n return `nest:${esc(tok.ref)}|${esc(tok.options ?? \"\")}`;\n }\n case \"markup\": {\n // one field (the whole raw tag), so no separator to disambiguate — and the\n // `markup:` prefix keeps it distinct from interp/nesting signatures.\n return `markup:${tok.raw}`;\n }\n case \"text\": {\n return null;\n }\n default: {\n // Exhaustive over ValueToken: a future variant becomes a compile error here\n // rather than silently slipping through this fail-closed validator as text.\n return tok satisfies never;\n }\n }\n};\n\n// Group tokens by signature; both the validator and the diff count from this.\nconst bySignature = (tokens: ValueToken[]): Map<string, ValueToken[]> => {\n const groups = new Map<string, ValueToken[]>();\n for (const tok of tokens) {\n const sig = signature(tok);\n if (sig === null) {\n continue;\n }\n const arr = groups.get(sig);\n if (arr) {\n arr.push(tok);\n } else {\n groups.set(sig, [tok]);\n }\n }\n return groups;\n};\n\nexport interface PlaceholderCheck {\n ok: boolean;\n errors: string[];\n}\n\n/**\n * Compare a source value's tokens against its translation's tokens. Fails closed:\n * any added, dropped, or modified placeholder/markup is an error. Catches a\n * translated variable name, a translated formatter keyword, a mangled `$t()` ref,\n * or a dropped tag — the exact placeholder-corruption failure mode this guards.\n */\nexport const validatePlaceholderTokens = (\n source: ValueToken[],\n translated: ValueToken[]\n): PlaceholderCheck => {\n const src = bySignature(source);\n const dst = bySignature(translated);\n const errors: string[] = [];\n\n for (const [sig, toks] of src) {\n if ((dst.get(sig)?.length ?? 0) < toks.length) {\n errors.push(\n `source placeholder \"${sig}\" is missing or altered in the translation`\n );\n }\n }\n for (const [sig, toks] of dst) {\n if (toks.length > (src.get(sig)?.length ?? 0)) {\n errors.push(`translation introduced an unexpected placeholder \"${sig}\"`);\n }\n }\n\n return { errors, ok: errors.length === 0 };\n};\n\n/**\n * String-entry placeholder validator: tokenizes both sides with the supplied\n * (syntax-specific) `tokenize` and defers to {@link validatePlaceholderTokens}.\n * The free-utility CI check — the adapter pre-binds its tokenizer so callers get\n * the ergonomic two-argument form.\n */\nexport const validatePlaceholders = (\n source: string,\n translated: string,\n tokenize: Tokenizer\n): PlaceholderCheck =>\n validatePlaceholderTokens(tokenize(source), tokenize(translated));\n\n/** The actual tokens that differ between source and translation. */\nexport interface PlaceholderDiff {\n /** placeholders in `source` that the translation dropped (or has fewer of) */\n dropped: ValueToken[];\n /** placeholders the translation introduced that `source` did not have */\n added: ValueToken[];\n}\n\n/**\n * The structured placeholder diff: the tokens dropped from / added to a\n * translation relative to its source. Shares {@link validatePlaceholderTokens}'s\n * identity rule — both go through the same {@link signature}, so the diff is\n * empty exactly when that validator reports `ok` — but returns the tokens so a\n * caller can render a precise report and decide whether a dropped+added pair is\n * an unambiguous \"changed\" or must be listed separately.\n *\n * @param source canonical tokens of the source value\n * @param translated canonical tokens of the translation\n * @returns dropped and added tokens; both empty ⇔ placeholders are preserved\n */\nexport const placeholderDiff = (\n source: ValueToken[],\n translated: ValueToken[]\n): PlaceholderDiff => {\n const src = bySignature(source);\n const dst = bySignature(translated);\n const dropped: ValueToken[] = [];\n const added: ValueToken[] = [];\n for (const [sig, toks] of src) {\n const have = dst.get(sig)?.length ?? 0;\n for (let i = have; i < toks.length; i += 1) {\n dropped.push(toks[i]);\n }\n }\n for (const [sig, toks] of dst) {\n const have = src.get(sig)?.length ?? 0;\n for (let i = have; i < toks.length; i += 1) {\n added.push(toks[i]);\n }\n }\n return { added, dropped };\n};\n"],"mappings":";;AAoBA,MAAa,sBAA+C;CAC1D;CACA;CACA;CACA;CACA;CACA;AACF;AAuFA,MAAa,SAAS,WAAmB,SACvC,GAAG,UAAU,GAAG;;;ACxGlB,IAAa,eAAb,cAAkC,MAAM;CACtC;;CAEA;CACA;CAEA,YACE,MACA,SACA,OAA4D,CAAC,GAC7D;EACA,MAAM,OAAO;EACb,KAAK,OAAO;EACZ,KAAK,OAAO;EACZ,KAAK,MAAM,KAAK;EAChB,KAAK,UAAU,KAAK;CACtB;AACF;;;;;;;;;;;;;;;;;;;;;;;;;;;ACAA,MAAM,OAAO;AACb,MAAM,QAAQ;AAEd,MAAa,YAAY,UAA0B,GAAG,OAAO,QAAQ;AAErE,MAAM,cAAc,IAAI,OAAO,GAAG,KAAK,QAAQ,SAAS,IAAI;;;;;;;AAqB5D,MAAa,cAAc,WAAqC;CAC9D,MAAM,QAAkB,CAAC;CACzB,IAAI,SAAS;CACb,KAAK,MAAM,OAAO,QAChB,IAAI,IAAI,SAAS,QAAQ;EACvB,IAAI,IAAI,IAAI,SAAS,IAAI,KAAK,IAAI,IAAI,SAAS,KAAK,GAClD,MAAM,IAAI,aACR,mBACA,qLAGF;EAEF,UAAU,IAAI;CAChB,OAAO;EACL,UAAU,SAAS,MAAM,MAAM;EAC/B,MAAM,KAAK,IAAI,GAAG;CACpB;CAEF,OAAO;EAAE;EAAQ;CAAM;AACzB;AAEA,MAAa,UAAU,QAAgB,UACrC,OAAO,QAAQ,cAAc,GAAG,MAAc;CAC5C,MAAM,OAAO,MAAM,OAAO,CAAC;CAC3B,IAAI,SAAS,KAAA,GACX,MAAM,IAAI,aACR,mBACA,0BAA0B,EAAE,gBAC9B;CAEF,OAAO;AACT,CAAC;;;;;;AAYH,MAAa,qBACX,YACA,OACA,OAAmC,CAAC,MAClB;CAClB,MAAM,SAAmB,CAAC;CAC1B,MAAM,uBAAO,IAAI,IAAoB;CACrC,MAAM,QAAkB,CAAC;CACzB,IAAI;CACJ,YAAY,YAAY;CACxB,QAAQ,IAAI,YAAY,KAAK,UAAU,OAAO,MAAM;EAClD,MAAM,MAAM,OAAO,EAAE,EAAE;EACvB,KAAK,IAAI,MAAM,KAAK,IAAI,GAAG,KAAK,KAAK,CAAC;EACtC,MAAM,KAAK,GAAG;CAChB;CAEA,KAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,GAAG;EACxC,MAAM,QAAQ,KAAK,IAAI,CAAC,KAAK;EAC7B,IAAI,UAAU,GACZ,OAAO,KAAK,gBAAgB,EAAE,IAAI,MAAM,GAAG,oBAAoB;OAC1D,IAAI,QAAQ,GACjB,OAAO,KAAK,gBAAgB,EAAE,IAAI,MAAM,GAAG,uBAAuB;CAEtE;CACA,KAAK,MAAM,OAAO,KAAK,KAAK,GAC1B,IAAI,OAAO,MAAM,QACf,OAAO,KAAK,uCAAuC,KAAK;CAI5D,MAAM,WAAW,WAAW,QAAQ,aAAa,EAAE;CACnD,IAAI,SAAS,SAAS,IAAI,KAAK,SAAS,SAAS,KAAK,GACpD,OAAO,KAAK,6DAA6D;CAE3E,IAAI,KAAK,iBAAiB,MAAM;EAC9B,MAAM,WAAW,CAAC,GAAG,KAAK,CAAC,CAAC,UAAU,GAAG,MAAM,IAAI,CAAC;EACpD,IAAI,MAAM,KAAK,GAAG,MAAM,SAAS,KAAK,GAAG,GACvC,OAAO,KAAK,+BAA+B;CAE/C;CAEA,OAAO;EAAE;EAAQ,IAAI,OAAO,WAAW;CAAE;AAC3C;AAMA,MAAM,OAAO,MACX,EAAE,WAAW,MAAM,MAAM,CAAC,CAAC,WAAW,KAAK,KAAK;;;;;;;;;AAUlD,MAAM,aAAa,QAAmC;CACpD,QAAQ,IAAI,MAAZ;EACE,KAAK,iBACH,OAAO,UAAU,IAAI,IAAI,QAAQ,EAAE,GAAG,IAAI,IAAI,UAAU,EAAE;EAE5D,KAAK,WAGH,OAAO,QAAQ,IAAI,IAAI,GAAG,EAAE,GAAG,IAAI,IAAI,WAAW,EAAE;EAEtD,KAAK,UAGH,OAAO,UAAU,IAAI;EAEvB,KAAK,QACH,OAAO;EAET,SAGE,OAAO;CAEX;AACF;AAGA,MAAM,eAAe,WAAoD;CACvE,MAAM,yBAAS,IAAI,IAA0B;CAC7C,KAAK,MAAM,OAAO,QAAQ;EACxB,MAAM,MAAM,UAAU,GAAG;EACzB,IAAI,QAAQ,MACV;EAEF,MAAM,MAAM,OAAO,IAAI,GAAG;EAC1B,IAAI,KACF,IAAI,KAAK,GAAG;OAEZ,OAAO,IAAI,KAAK,CAAC,GAAG,CAAC;CAEzB;CACA,OAAO;AACT;;;;;;;AAaA,MAAa,6BACX,QACA,eACqB;CACrB,MAAM,MAAM,YAAY,MAAM;CAC9B,MAAM,MAAM,YAAY,UAAU;CAClC,MAAM,SAAmB,CAAC;CAE1B,KAAK,MAAM,CAAC,KAAK,SAAS,KACxB,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,UAAU,KAAK,KAAK,QACrC,OAAO,KACL,uBAAuB,IAAI,2CAC7B;CAGJ,KAAK,MAAM,CAAC,KAAK,SAAS,KACxB,IAAI,KAAK,UAAU,IAAI,IAAI,GAAG,CAAC,EAAE,UAAU,IACzC,OAAO,KAAK,qDAAqD,IAAI,EAAE;CAI3E,OAAO;EAAE;EAAQ,IAAI,OAAO,WAAW;CAAE;AAC3C;;;;;;;AAQA,MAAa,wBACX,QACA,YACA,aAEA,0BAA0B,SAAS,MAAM,GAAG,SAAS,UAAU,CAAC;;;;;;;;;;;;;AAsBlE,MAAa,mBACX,QACA,eACoB;CACpB,MAAM,MAAM,YAAY,MAAM;CAC9B,MAAM,MAAM,YAAY,UAAU;CAClC,MAAM,UAAwB,CAAC;CAC/B,MAAM,QAAsB,CAAC;CAC7B,KAAK,MAAM,CAAC,KAAK,SAAS,KAAK;EAC7B,MAAM,OAAO,IAAI,IAAI,GAAG,CAAC,EAAE,UAAU;EACrC,KAAK,IAAI,IAAI,MAAM,IAAI,KAAK,QAAQ,KAAK,GACvC,QAAQ,KAAK,KAAK,EAAE;CAExB;CACA,KAAK,MAAM,CAAC,KAAK,SAAS,KAAK;EAC7B,MAAM,OAAO,IAAI,IAAI,GAAG,CAAC,EAAE,UAAU;EACrC,KAAK,IAAI,IAAI,MAAM,IAAI,KAAK,QAAQ,KAAK,GACvC,MAAM,KAAK,KAAK,EAAE;CAEtB;CACA,OAAO;EAAE;EAAO;CAAQ;AAC1B"}
|
|
1
|
+
{"version":3,"file":"index.js","names":[],"sources":["../src/model.ts","../src/dto.ts","../src/errors.ts","../src/mask.ts"],"sourcesContent":["/**\n * The canonical message model.\n *\n * This is the format-agnostic centre of Astilba: every file/syntax adapter maps\n * files <-> this model. Phase 0 ships one adapter (native i18next v4) but the\n * model is deliberately syntax-neutral so ICU and other dialects can map onto it\n * later without changing it.\n *\n * Invariants worth stating out loud:\n * - Value text is preserved EXACTLY (never mutate value bytes). `Value.raw` is\n * the source of truth; `Value.tokens` is a derived view\n * used only for masking/analysis and is never used to reconstruct output.\n * - Plurals are represented STRUCTURALLY as a CLDR-category -> value map, not as\n * suffixed flat keys. The suffix set is re-derived per target language on\n * export, never carried through.\n */\n\n/** The six CLDR plural categories. The set that actually applies is language-dependent. */\nexport type CLDRCategory = \"zero\" | \"one\" | \"two\" | \"few\" | \"many\" | \"other\";\n\nexport const ALL_CLDR_CATEGORIES: readonly CLDRCategory[] = [\n \"zero\",\n \"one\",\n \"two\",\n \"few\",\n \"many\",\n \"other\",\n];\n\n/**\n * Whether a key is pluralised, and if so how.\n * - \"none\" -> a plain string; no plural suffix on export.\n * - \"cardinal\" -> `_one`, `_other`, ... suffixes.\n * - \"ordinal\" -> `_ordinal_one`, ... suffixes (i18next v4 ordinal form).\n *\n * Note \"none\" is distinct from a single-category cardinal (e.g. Japanese, whose\n * only category is `other`): `foo` (none) and `foo_other` (cardinal) are\n * different keys and must round-trip differently.\n */\nexport type PluralKind = \"none\" | \"cardinal\" | \"ordinal\";\n\n/** A token within a value. Derived from `Value.raw`; used for masking + validation. */\nexport type ValueToken =\n | { type: \"text\"; raw: string }\n /** `{{var}}`, `{{var, format}}`, `{{var, format(options)}}` */\n | {\n type: \"interpolation\";\n raw: string;\n variable: string;\n format?: string;\n }\n /** `$t(ref)`, `$t(ref, {\"opt\": ...})` */\n | {\n type: \"nesting\";\n raw: string;\n ref: string;\n options?: string;\n }\n /** an HTML/XML tag `<...>` or an entity `&...;` — opaque markup */\n | { type: \"markup\"; raw: string };\n\n/**\n * A translatable value. `raw` is byte-exact source; `tokens` is the parsed view.\n * `tokens.map(t => t.raw).join(\"\")` always reconstructs `raw` exactly.\n */\nexport interface Value {\n raw: string;\n tokens: ValueToken[];\n}\n\n/**\n * The set of values for one (base, context) cell.\n *\n * - kind \"none\": `values` holds a single \"other\" entry = the plain string.\n * - kind \"cardinal\"/\"ordinal\": `values` holds the per-category plural forms.\n * - `bare`: present only in the rare i18next case where a context key has BOTH\n * a suffix-less form (used when `t()` is called without `count`) AND plural\n * forms (used when `count` is given). Kept so both render paths are lossless.\n */\nexport interface PluralSet {\n kind: PluralKind;\n values: Map<CLDRCategory, Value>;\n bare?: Value;\n}\n\n/**\n * A logical message: a base key with one entry per context value.\n * The no-context case is a single entry keyed \"\" (empty string).\n *\n * `base` is the full key path WITHOUT namespace and WITHOUT plural/context\n * suffixes, using the project key separator (default \".\"), e.g. `account.friend`.\n */\nexport interface Key {\n namespace: string;\n base: string;\n /** contextValue -> its PluralSet. \"\" === no context. */\n contexts: Map<string, PluralSet>;\n}\n\n/**\n * A single language's worth of canonical data. Round-trip is per-language.\n *\n * IN-MEMORY ONLY: this is `Map`-based for fast lookup, so it is NOT directly\n * JSON-serializable (`JSON.stringify` yields `{}` for the Maps). `toStored`/\n * `fromStored` (dto.ts) are the plain-JSON persistence mirror for one key's\n * per-language data.\n */\nexport interface CanonicalModel {\n /** BCP-47, e.g. `en`, `en-US`, `pt-BR`. */\n language: string;\n /** `${namespace}:${base}` -> Key */\n keys: Map<string, Key>;\n}\n\nexport const keyId = (namespace: string, base: string): string =>\n `${namespace}:${base}`;\n","/**\n * Plain-JSON persistence mirror of a key's per-language data (`Key.contexts`).\n *\n * The in-memory model is `Map`-based (model.ts), so `JSON.stringify` silently\n * loses it; this is the JSON a backend stores per (key, language). Only\n * `Value.raw` is persisted — `tokens` are a derived view rebuilt on load, never\n * the source of truth, and export reads `raw` alone.\n */\n\nimport { ALL_CLDR_CATEGORIES } from \"./model.ts\";\nimport type { CLDRCategory, PluralKind, PluralSet, Value } from \"./model.ts\";\n\n/** A persisted value — only the byte-exact `raw`; tokens are re-derived on load. */\nexport interface StoredValue {\n raw: string;\n}\n\n/** A persisted `PluralSet`: the category map flattened to a plain object. */\nexport interface StoredPluralSet {\n kind: PluralKind;\n /** CLDR category -> value. `kind:\"none\"` carries a single `\"other\"` entry. */\n values: Record<string, StoredValue>;\n /** present only for the rare mixed bare+plural context (model.ts); else omitted */\n bare?: StoredValue;\n}\n\n/** The stored message for one (key, language): each context's variants. */\nexport interface StoredMessage {\n /** context value -> its variants. `\"\"` === the no-context case. */\n contexts: Record<string, StoredPluralSet>;\n}\n\n// pollution-safe empty map: a null-prototype object stores arbitrary string keys\n// (incl. \"__proto__\") as own data properties, never hitting an inherited setter.\nconst emptyMap = <V>(): Record<string, V> =>\n // oxlint-disable-next-line typescript/no-unsafe-type-assertion -- Object.create(null) is untyped by design\n Object.create(null) as Record<string, V>;\n\n/** Serialize one key's `contexts` (for a single language) to its stored JSON. */\nexport const toStored = (contexts: Map<string, PluralSet>): StoredMessage => {\n const out = emptyMap<StoredPluralSet>();\n for (const [context, set] of contexts) {\n const values = emptyMap<StoredValue>();\n for (const [category, value] of set.values) {\n values[category] = { raw: value.raw };\n }\n out[context] = set.bare\n ? { bare: { raw: set.bare.raw }, kind: set.kind, values }\n : { kind: set.kind, values };\n }\n return { contexts: out };\n};\n\n// raw is authoritative; a single text token is a valid `Value` (its tokens\n// reconstruct raw) and export reads only raw. Re-tokenizing for masking/analysis\n// is the adapter's job, not core's — it owns the syntax.\nconst rebuild = (stored: StoredValue): Value => ({\n raw: stored.raw,\n tokens: [{ raw: stored.raw, type: \"text\" }],\n});\n\n/** Rebuild one key's `contexts` from its stored JSON (inverse of `toStored`). */\nexport const fromStored = (message: StoredMessage): Map<string, PluralSet> => {\n const contexts = new Map<string, PluralSet>();\n for (const [context, stored] of Object.entries(message.contexts)) {\n const values = new Map<CLDRCategory, Value>();\n for (const category of ALL_CLDR_CATEGORIES) {\n if (category in stored.values) {\n values.set(category, rebuild(stored.values[category]));\n }\n }\n contexts.set(\n context,\n stored.bare\n ? { bare: rebuild(stored.bare), kind: stored.kind, values }\n : { kind: stored.kind, values }\n );\n }\n return contexts;\n};\n","/**\n * The base error type. `AstilbaError` is the one class consumers catch\n * (`if (e instanceof AstilbaError) e.code`); `code` is an OPEN string so each\n * format adapter owns its own code constants without a closed core enum coupling\n * core to any one syntax. The i18next-v4 codes + their loud, fix-it-yourself\n * factory functions live with the adapter (errors-i18next.ts).\n */\n\n/** An error code is an open string; adapters define their own (e.g. `ICU_NOT_SUPPORTED`). */\nexport type AstilbaErrorCode = string;\n\nexport class AstilbaError extends Error {\n readonly code: string;\n /** The fully-qualified key (`namespace:flatKey`) the problem was found at, if any. */\n readonly key?: string;\n readonly details?: Record<string, unknown>;\n\n constructor(\n code: string,\n message: string,\n opts: { key?: string; details?: Record<string, unknown> } = {}\n ) {\n super(message);\n this.name = \"AstilbaError\";\n this.code = code;\n this.key = opts.key;\n this.details = opts.details;\n }\n}\n","/**\n * MT masking & placeholder validation — the FORMAT-NEUTRAL core. It operates on\n * canonical `ValueToken[]` (which the model\n * already carries) instead of parsing strings itself: a syntax adapter tokenizes,\n * core masks and validates. The one place a raw string must be re-tokenized is a\n * translation returned from MT — it isn't in the model — so the string-entry\n * `validatePlaceholders` takes the adapter's `Tokenizer` by injection.\n *\n * Two jobs:\n * 1. maskTokens()/unmask(): replace every non-text token (the WHOLE token, incl.\n * formatter and `$t()` ref name) with an opaque sentinel before an MT/LLM call,\n * and restore it after. Because the formatter keyword and ref live INSIDE the\n * masked span, MT never sees them — that alone defeats the \"translated\n * `one`/`other` -> `uno`/`otros`\" class of bug.\n * 2. validatePlaceholderTokens(): a fail-closed, CI-failable check comparing a\n * source value's tokens against its translation's tokens (the caller restores\n * any masked sentinels first) — every interpolation variable, formatter\n * keyword, nesting ref, and markup tag must survive unmodified. This is the\n * validator shipped in the free utility.\n *\n * Sentinels use private-use-area delimiters so they carry no linguistic content\n * for an MT engine to \"helpfully\" translate, while still being detectable if the\n * engine mangles them.\n */\n\nimport { AstilbaError } from \"./errors.ts\";\nimport type { ValueToken } from \"./model.ts\";\n\nconst OPEN = \"\\uE000\";\nconst CLOSE = \"\\uE001\";\n\nexport const sentinel = (index: number): string => `${OPEN}${index}${CLOSE}`;\n\nconst SENTINEL_RE = new RegExp(`${OPEN}(\\\\d+)${CLOSE}`, \"gu\");\n\n/**\n * Turns a raw value string into canonical tokens. Supplied by whichever syntax\n * adapter is in use — the one syntax-specific dependency the string-entry validator\n * needs, to re-tokenize an MT-returned translation that was never in the model.\n */\nexport type Tokenizer = (raw: string) => ValueToken[];\n\nexport interface MaskResult {\n masked: string;\n /** original token raws, indexed by sentinel number */\n parts: string[];\n}\n\n/**\n * Replace interpolation / nesting / markup tokens with sentinels. Rejects loudly\n * if the literal text already contains a reserved sentinel delimiter — rare but\n * legal in real values (e.g. private-use-area glyphs from icon fonts like Material\n * Icons / Nerd Fonts) — rather than letting unmask() silently corrupt it.\n */\nexport const maskTokens = (tokens: ValueToken[]): MaskResult => {\n const parts: string[] = [];\n let masked = \"\";\n for (const tok of tokens) {\n if (tok.type === \"text\") {\n if (tok.raw.includes(OPEN) || tok.raw.includes(CLOSE)) {\n throw new AstilbaError(\n \"MASK_VALIDATION\",\n \"Value text contains a reserved masking sentinel delimiter \" +\n \"(U+E000/U+E001); it cannot be masked without ambiguity. Strip or \" +\n \"escape these private-use-area characters before masking.\"\n );\n }\n masked += tok.raw;\n } else {\n masked += sentinel(parts.length);\n parts.push(tok.raw);\n }\n }\n return { masked, parts };\n};\n\nexport const unmask = (masked: string, parts: string[]): string =>\n masked.replace(SENTINEL_RE, (_, n: string) => {\n const part = parts[Number(n)];\n if (part === undefined) {\n throw new AstilbaError(\n \"MASK_VALIDATION\",\n `Unknown sentinel index ${n} during unmask.`\n );\n }\n return part;\n });\n\nexport interface SentinelCheck {\n ok: boolean;\n errors: string[];\n}\n\n/**\n * Validate that an MT engine returned every sentinel exactly once, unmodified,\n * and invented none. Reordering is allowed (target languages reorder freely);\n * pass `requireOrder` to also assert original order.\n */\nexport const validateSentinels = (\n translated: string,\n parts: string[],\n opts: { requireOrder?: boolean } = {}\n): SentinelCheck => {\n const errors: string[] = [];\n const seen = new Map<number, number>();\n const order: number[] = [];\n let m: RegExpExecArray | null;\n SENTINEL_RE.lastIndex = 0;\n while ((m = SENTINEL_RE.exec(translated)) !== null) {\n const idx = Number(m[1]);\n seen.set(idx, (seen.get(idx) ?? 0) + 1);\n order.push(idx);\n }\n\n for (let i = 0; i < parts.length; i += 1) {\n const count = seen.get(i) ?? 0;\n if (count === 0) {\n errors.push(`placeholder #${i} (${parts[i]}) was dropped by MT`);\n } else if (count > 1) {\n errors.push(`placeholder #${i} (${parts[i]}) was duplicated by MT`);\n }\n }\n for (const idx of seen.keys()) {\n if (idx >= parts.length) {\n errors.push(`MT invented an unknown placeholder #${idx}`);\n }\n }\n // Detect a corrupted sentinel: stray delimiter chars not part of a valid token.\n const stripped = translated.replace(SENTINEL_RE, \"\");\n if (stripped.includes(OPEN) || stripped.includes(CLOSE)) {\n errors.push(\"MT corrupted a placeholder sentinel (stray delimiter found)\");\n }\n if (opts.requireOrder === true) {\n const expected = [...order].toSorted((a, b) => a - b);\n if (order.join(\",\") !== expected.join(\",\")) {\n errors.push(\"placeholder order was changed\");\n }\n }\n\n return { errors, ok: errors.length === 0 };\n};\n\n// --- post-hoc placeholder validation (the free-utility CI check) -------------\n\n// Escape `\\` then `|` so the `|` field separator below is unambiguous even if a\n// variable / format / ref / options string itself contains one.\nconst esc = (s: string): string =>\n s.replaceAll(\"\\\\\", \"\\\\\\\\\").replaceAll(\"|\", \"\\\\|\");\n\n/**\n * Canonical identity for placeholder equality, computed from the `ValueToken`\n * fields directly — variable + format for interpolation, ref for nesting, raw for\n * markup. No syntax-specific normalisation: a value and its own translation carry\n * byte-identical placeholders, so the raw canonical fields ARE the faithful\n * identity (an adapter wanting looser matching can pre-normalise its tokens before\n * calling `validatePlaceholderTokens`). Returns `null` for text (not a placeholder).\n */\nconst signature = (tok: ValueToken): string | null => {\n switch (tok.type) {\n case \"interpolation\": {\n return `interp:${esc(tok.variable)}|${esc(tok.format ?? \"\")}`;\n }\n case \"nesting\": {\n // options are part of the placeholder's identity: $t(a, {\"count\": 3}) and\n // $t(a, {\"count\": 0}) render differently, so a mutated option must not pass.\n return `nest:${esc(tok.ref)}|${esc(tok.options ?? \"\")}`;\n }\n case \"markup\": {\n // one field (the whole raw tag), so no separator to disambiguate — and the\n // `markup:` prefix keeps it distinct from interp/nesting signatures.\n return `markup:${tok.raw}`;\n }\n case \"text\": {\n return null;\n }\n default: {\n // Exhaustive over ValueToken: a future variant becomes a compile error here\n // rather than silently slipping through this fail-closed validator as text.\n return tok satisfies never;\n }\n }\n};\n\n// Group tokens by signature; both the validator and the diff count from this.\nconst bySignature = (tokens: ValueToken[]): Map<string, ValueToken[]> => {\n const groups = new Map<string, ValueToken[]>();\n for (const tok of tokens) {\n const sig = signature(tok);\n if (sig === null) {\n continue;\n }\n const arr = groups.get(sig);\n if (arr) {\n arr.push(tok);\n } else {\n groups.set(sig, [tok]);\n }\n }\n return groups;\n};\n\nexport interface PlaceholderCheck {\n ok: boolean;\n errors: string[];\n}\n\n/**\n * Compare a source value's tokens against its translation's tokens. Fails closed:\n * any added, dropped, or modified placeholder/markup is an error. Catches a\n * translated variable name, a translated formatter keyword, a mangled `$t()` ref,\n * or a dropped tag — the exact placeholder-corruption failure mode this guards.\n */\nexport const validatePlaceholderTokens = (\n source: ValueToken[],\n translated: ValueToken[]\n): PlaceholderCheck => {\n const src = bySignature(source);\n const dst = bySignature(translated);\n const errors: string[] = [];\n\n for (const [sig, toks] of src) {\n if ((dst.get(sig)?.length ?? 0) < toks.length) {\n errors.push(\n `source placeholder \"${sig}\" is missing or altered in the translation`\n );\n }\n }\n for (const [sig, toks] of dst) {\n if (toks.length > (src.get(sig)?.length ?? 0)) {\n errors.push(`translation introduced an unexpected placeholder \"${sig}\"`);\n }\n }\n\n return { errors, ok: errors.length === 0 };\n};\n\n/**\n * String-entry placeholder validator: tokenizes both sides with the supplied\n * (syntax-specific) `tokenize` and defers to {@link validatePlaceholderTokens}.\n * The free-utility CI check — the adapter pre-binds its tokenizer so callers get\n * the ergonomic two-argument form.\n */\nexport const validatePlaceholders = (\n source: string,\n translated: string,\n tokenize: Tokenizer\n): PlaceholderCheck =>\n validatePlaceholderTokens(tokenize(source), tokenize(translated));\n\n/** The actual tokens that differ between source and translation. */\nexport interface PlaceholderDiff {\n /** placeholders in `source` that the translation dropped (or has fewer of) */\n dropped: ValueToken[];\n /** placeholders the translation introduced that `source` did not have */\n added: ValueToken[];\n}\n\n/**\n * The structured placeholder diff: the tokens dropped from / added to a\n * translation relative to its source. Shares {@link validatePlaceholderTokens}'s\n * identity rule — both go through the same {@link signature}, so the diff is\n * empty exactly when that validator reports `ok` — but returns the tokens so a\n * caller can render a precise report and decide whether a dropped+added pair is\n * an unambiguous \"changed\" or must be listed separately.\n *\n * @param source canonical tokens of the source value\n * @param translated canonical tokens of the translation\n * @returns dropped and added tokens; both empty ⇔ placeholders are preserved\n */\nexport const placeholderDiff = (\n source: ValueToken[],\n translated: ValueToken[]\n): PlaceholderDiff => {\n const src = bySignature(source);\n const dst = bySignature(translated);\n const dropped: ValueToken[] = [];\n const added: ValueToken[] = [];\n for (const [sig, toks] of src) {\n const have = dst.get(sig)?.length ?? 0;\n for (let i = have; i < toks.length; i += 1) {\n dropped.push(toks[i]);\n }\n }\n for (const [sig, toks] of dst) {\n const have = src.get(sig)?.length ?? 0;\n for (let i = have; i < toks.length; i += 1) {\n added.push(toks[i]);\n }\n }\n return { added, dropped };\n};\n"],"mappings":";;AAoBA,MAAa,sBAA+C;CAC1D;CACA;CACA;CACA;CACA;CACA;AACF;AAuFA,MAAa,SAAS,WAAmB,SACvC,GAAG,UAAU,GAAG;;;;;;;;;;;ACjFlB,MAAM,iBAEJ,OAAO,OAAO,IAAI;;AAGpB,MAAa,YAAY,aAAoD;CAC3E,MAAM,MAAM,SAA0B;CACtC,KAAK,MAAM,CAAC,SAAS,QAAQ,UAAU;EACrC,MAAM,SAAS,SAAsB;EACrC,KAAK,MAAM,CAAC,UAAU,UAAU,IAAI,QAClC,OAAO,YAAY,EAAE,KAAK,MAAM,IAAI;EAEtC,IAAI,WAAW,IAAI,OACf;GAAE,MAAM,EAAE,KAAK,IAAI,KAAK,IAAI;GAAG,MAAM,IAAI;GAAM;EAAO,IACtD;GAAE,MAAM,IAAI;GAAM;EAAO;CAC/B;CACA,OAAO,EAAE,UAAU,IAAI;AACzB;AAKA,MAAM,WAAW,YAAgC;CAC/C,KAAK,OAAO;CACZ,QAAQ,CAAC;EAAE,KAAK,OAAO;EAAK,MAAM;CAAO,CAAC;AAC5C;;AAGA,MAAa,cAAc,YAAmD;CAC5E,MAAM,2BAAW,IAAI,IAAuB;CAC5C,KAAK,MAAM,CAAC,SAAS,WAAW,OAAO,QAAQ,QAAQ,QAAQ,GAAG;EAChE,MAAM,yBAAS,IAAI,IAAyB;EAC5C,KAAK,MAAM,YAAY,qBACrB,IAAI,YAAY,OAAO,QACrB,OAAO,IAAI,UAAU,QAAQ,OAAO,OAAO,SAAS,CAAC;EAGzD,SAAS,IACP,SACA,OAAO,OACH;GAAE,MAAM,QAAQ,OAAO,IAAI;GAAG,MAAM,OAAO;GAAM;EAAO,IACxD;GAAE,MAAM,OAAO;GAAM;EAAO,CAClC;CACF;CACA,OAAO;AACT;;;ACpEA,IAAa,eAAb,cAAkC,MAAM;CACtC;;CAEA;CACA;CAEA,YACE,MACA,SACA,OAA4D,CAAC,GAC7D;EACA,MAAM,OAAO;EACb,KAAK,OAAO;EACZ,KAAK,OAAO;EACZ,KAAK,MAAM,KAAK;EAChB,KAAK,UAAU,KAAK;CACtB;AACF;;;;;;;;;;;;;;;;;;;;;;;;;;;ACAA,MAAM,OAAO;AACb,MAAM,QAAQ;AAEd,MAAa,YAAY,UAA0B,GAAG,OAAO,QAAQ;AAErE,MAAM,cAAc,IAAI,OAAO,GAAG,KAAK,QAAQ,SAAS,IAAI;;;;;;;AAqB5D,MAAa,cAAc,WAAqC;CAC9D,MAAM,QAAkB,CAAC;CACzB,IAAI,SAAS;CACb,KAAK,MAAM,OAAO,QAChB,IAAI,IAAI,SAAS,QAAQ;EACvB,IAAI,IAAI,IAAI,SAAS,IAAI,KAAK,IAAI,IAAI,SAAS,KAAK,GAClD,MAAM,IAAI,aACR,mBACA,qLAGF;EAEF,UAAU,IAAI;CAChB,OAAO;EACL,UAAU,SAAS,MAAM,MAAM;EAC/B,MAAM,KAAK,IAAI,GAAG;CACpB;CAEF,OAAO;EAAE;EAAQ;CAAM;AACzB;AAEA,MAAa,UAAU,QAAgB,UACrC,OAAO,QAAQ,cAAc,GAAG,MAAc;CAC5C,MAAM,OAAO,MAAM,OAAO,CAAC;CAC3B,IAAI,SAAS,KAAA,GACX,MAAM,IAAI,aACR,mBACA,0BAA0B,EAAE,gBAC9B;CAEF,OAAO;AACT,CAAC;;;;;;AAYH,MAAa,qBACX,YACA,OACA,OAAmC,CAAC,MAClB;CAClB,MAAM,SAAmB,CAAC;CAC1B,MAAM,uBAAO,IAAI,IAAoB;CACrC,MAAM,QAAkB,CAAC;CACzB,IAAI;CACJ,YAAY,YAAY;CACxB,QAAQ,IAAI,YAAY,KAAK,UAAU,OAAO,MAAM;EAClD,MAAM,MAAM,OAAO,EAAE,EAAE;EACvB,KAAK,IAAI,MAAM,KAAK,IAAI,GAAG,KAAK,KAAK,CAAC;EACtC,MAAM,KAAK,GAAG;CAChB;CAEA,KAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,GAAG;EACxC,MAAM,QAAQ,KAAK,IAAI,CAAC,KAAK;EAC7B,IAAI,UAAU,GACZ,OAAO,KAAK,gBAAgB,EAAE,IAAI,MAAM,GAAG,oBAAoB;OAC1D,IAAI,QAAQ,GACjB,OAAO,KAAK,gBAAgB,EAAE,IAAI,MAAM,GAAG,uBAAuB;CAEtE;CACA,KAAK,MAAM,OAAO,KAAK,KAAK,GAC1B,IAAI,OAAO,MAAM,QACf,OAAO,KAAK,uCAAuC,KAAK;CAI5D,MAAM,WAAW,WAAW,QAAQ,aAAa,EAAE;CACnD,IAAI,SAAS,SAAS,IAAI,KAAK,SAAS,SAAS,KAAK,GACpD,OAAO,KAAK,6DAA6D;CAE3E,IAAI,KAAK,iBAAiB,MAAM;EAC9B,MAAM,WAAW,CAAC,GAAG,KAAK,CAAC,CAAC,UAAU,GAAG,MAAM,IAAI,CAAC;EACpD,IAAI,MAAM,KAAK,GAAG,MAAM,SAAS,KAAK,GAAG,GACvC,OAAO,KAAK,+BAA+B;CAE/C;CAEA,OAAO;EAAE;EAAQ,IAAI,OAAO,WAAW;CAAE;AAC3C;AAMA,MAAM,OAAO,MACX,EAAE,WAAW,MAAM,MAAM,CAAC,CAAC,WAAW,KAAK,KAAK;;;;;;;;;AAUlD,MAAM,aAAa,QAAmC;CACpD,QAAQ,IAAI,MAAZ;EACE,KAAK,iBACH,OAAO,UAAU,IAAI,IAAI,QAAQ,EAAE,GAAG,IAAI,IAAI,UAAU,EAAE;EAE5D,KAAK,WAGH,OAAO,QAAQ,IAAI,IAAI,GAAG,EAAE,GAAG,IAAI,IAAI,WAAW,EAAE;EAEtD,KAAK,UAGH,OAAO,UAAU,IAAI;EAEvB,KAAK,QACH,OAAO;EAET,SAGE,OAAO;CAEX;AACF;AAGA,MAAM,eAAe,WAAoD;CACvE,MAAM,yBAAS,IAAI,IAA0B;CAC7C,KAAK,MAAM,OAAO,QAAQ;EACxB,MAAM,MAAM,UAAU,GAAG;EACzB,IAAI,QAAQ,MACV;EAEF,MAAM,MAAM,OAAO,IAAI,GAAG;EAC1B,IAAI,KACF,IAAI,KAAK,GAAG;OAEZ,OAAO,IAAI,KAAK,CAAC,GAAG,CAAC;CAEzB;CACA,OAAO;AACT;;;;;;;AAaA,MAAa,6BACX,QACA,eACqB;CACrB,MAAM,MAAM,YAAY,MAAM;CAC9B,MAAM,MAAM,YAAY,UAAU;CAClC,MAAM,SAAmB,CAAC;CAE1B,KAAK,MAAM,CAAC,KAAK,SAAS,KACxB,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,UAAU,KAAK,KAAK,QACrC,OAAO,KACL,uBAAuB,IAAI,2CAC7B;CAGJ,KAAK,MAAM,CAAC,KAAK,SAAS,KACxB,IAAI,KAAK,UAAU,IAAI,IAAI,GAAG,CAAC,EAAE,UAAU,IACzC,OAAO,KAAK,qDAAqD,IAAI,EAAE;CAI3E,OAAO;EAAE;EAAQ,IAAI,OAAO,WAAW;CAAE;AAC3C;;;;;;;AAQA,MAAa,wBACX,QACA,YACA,aAEA,0BAA0B,SAAS,MAAM,GAAG,SAAS,UAAU,CAAC;;;;;;;;;;;;;AAsBlE,MAAa,mBACX,QACA,eACoB;CACpB,MAAM,MAAM,YAAY,MAAM;CAC9B,MAAM,MAAM,YAAY,UAAU;CAClC,MAAM,UAAwB,CAAC;CAC/B,MAAM,QAAsB,CAAC;CAC7B,KAAK,MAAM,CAAC,KAAK,SAAS,KAAK;EAC7B,MAAM,OAAO,IAAI,IAAI,GAAG,CAAC,EAAE,UAAU;EACrC,KAAK,IAAI,IAAI,MAAM,IAAI,KAAK,QAAQ,KAAK,GACvC,QAAQ,KAAK,KAAK,EAAE;CAExB;CACA,KAAK,MAAM,CAAC,KAAK,SAAS,KAAK;EAC7B,MAAM,OAAO,IAAI,IAAI,GAAG,CAAC,EAAE,UAAU;EACrC,KAAK,IAAI,IAAI,MAAM,IAAI,KAAK,QAAQ,KAAK,GACvC,MAAM,KAAK,KAAK,EAAE;CAEtB;CACA,OAAO;EAAE;EAAO;CAAQ;AAC1B"}
|
|
@@ -86,9 +86,9 @@ interface Key {
|
|
|
86
86
|
* A single language's worth of canonical data. Round-trip is per-language.
|
|
87
87
|
*
|
|
88
88
|
* IN-MEMORY ONLY: this is `Map`-based for fast lookup, so it is NOT directly
|
|
89
|
-
* JSON-serializable (`JSON.stringify` yields `{}` for the Maps).
|
|
90
|
-
*
|
|
91
|
-
*
|
|
89
|
+
* JSON-serializable (`JSON.stringify` yields `{}` for the Maps). `toStored`/
|
|
90
|
+
* `fromStored` (dto.ts) are the plain-JSON persistence mirror for one key's
|
|
91
|
+
* per-language data.
|
|
92
92
|
*/
|
|
93
93
|
interface CanonicalModel {
|
|
94
94
|
/** BCP-47, e.g. `en`, `en-US`, `pt-BR`. */
|
|
@@ -99,4 +99,4 @@ interface CanonicalModel {
|
|
|
99
99
|
declare const keyId: (namespace: string, base: string) => string;
|
|
100
100
|
//#endregion
|
|
101
101
|
export { PluralKind as a, ValueToken as c, Key as i, keyId as l, CLDRCategory as n, PluralSet as o, CanonicalModel as r, Value as s, ALL_CLDR_CATEGORIES as t };
|
|
102
|
-
//# sourceMappingURL=model-
|
|
102
|
+
//# sourceMappingURL=model-DY8mEqBM.d.ts.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@astilba/core",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Format-neutral canonical i18n message model, vendored CLDR plural rules, and the round-trip message-fidelity harness (driver + FormatAdapter/RenderOracle contracts). Syntax adapters (e.g. @astilba/adapter-i18next-v4) plug in on top.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"cldr",
|