@verbatra/sdk 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -8,6 +8,7 @@ import * as deepl from 'deepl-node';
8
8
  import log from 'loglevel';
9
9
  import { GoogleGenAI } from '@google/genai';
10
10
  import OpenAI from 'openai';
11
+ import { randomUUID } from 'crypto';
11
12
  import { access, writeFile, rename, rm, open } from 'fs/promises';
12
13
  import { parse, TYPE } from '@formatjs/icu-messageformat-parser';
13
14
  import ExcelJS from 'exceljs';
@@ -44,13 +45,16 @@ function fnv1a64(input) {
44
45
  }
45
46
  return hash.toString(16).padStart(16, "0");
46
47
  }
48
+ function normalizeText(text) {
49
+ return text.normalize("NFC").replace(/\r\n?/g, "\n");
50
+ }
47
51
  function canonicalize(entry) {
48
52
  return JSON.stringify([
49
- entry.value,
50
- entry.description ?? null,
51
- entry.meaning ?? null,
53
+ normalizeText(entry.value),
54
+ entry.description == null ? null : normalizeText(entry.description),
55
+ entry.meaning == null ? null : normalizeText(entry.meaning),
52
56
  entry.isPlural,
53
- [...entry.placeholders].sort()
57
+ [...entry.placeholders].map(normalizeText).sort()
54
58
  ]);
55
59
  }
56
60
  function contentHash(entry) {
@@ -114,17 +118,31 @@ z.object({
114
118
  format: supportedFormatSchema,
115
119
  entries: z.map(z.string(), translationEntrySchema)
116
120
  });
117
- function difference(a, b) {
118
- return [...new Set(a.filter((item) => !b.has(item)))].sort();
121
+ function counts(items) {
122
+ const map = /* @__PURE__ */ new Map();
123
+ for (const item of items) {
124
+ map.set(item, (map.get(item) ?? 0) + 1);
125
+ }
126
+ return map;
127
+ }
128
+ function multisetExcess(a, b) {
129
+ const excess = [];
130
+ for (const [token, count] of a) {
131
+ const surplus = count - (b.get(token) ?? 0);
132
+ for (let i = 0; i < surplus; i += 1) {
133
+ excess.push(token);
134
+ }
135
+ }
136
+ return excess.sort();
119
137
  }
120
138
  function sameOrder(a, b) {
121
139
  return a.length === b.length && a.every((item, index) => item === b[index]);
122
140
  }
123
141
  function checkPlaceholders(source, translated) {
124
- const sourceSet = new Set(source);
125
- const translatedSet = new Set(translated);
126
- const missing = difference(source, translatedSet);
127
- const extra = difference(translated, sourceSet);
142
+ const sourceCounts = counts(source);
143
+ const translatedCounts = counts(translated);
144
+ const missing = multisetExcess(sourceCounts, translatedCounts);
145
+ const extra = multisetExcess(translatedCounts, sourceCounts);
128
146
  const reordered = missing.length === 0 && extra.length === 0 && !sameOrder(source, translated);
129
147
  return {
130
148
  matches: missing.length === 0 && extra.length === 0 && !reordered,
@@ -266,6 +284,12 @@ async function runLlmTranslation(request, mechanism) {
266
284
  );
267
285
  return completion.usage === void 0 ? { values, integrity } : { values, integrity, usage: completion.usage };
268
286
  }
287
+ var OUTPUT_TRUNCATED_MESSAGE = "The provider stopped because the output-token limit was reached. Reduce the batch size or raise the configured max output tokens.";
288
+ function assertNotTruncated(truncated) {
289
+ if (truncated) {
290
+ throw new ProviderError("OUTPUT_TRUNCATED", OUTPUT_TRUNCATED_MESSAGE);
291
+ }
292
+ }
269
293
  function readRequiredEnv(name) {
270
294
  const value = process.env[name];
271
295
  if (value === void 0 || value.length === 0) {
@@ -361,6 +385,7 @@ function createMechanism(client, config) {
361
385
  translate: async ({ payloadJson }) => {
362
386
  const body = buildRequest(config, payloadJson);
363
387
  const message = await callClient(client, body);
388
+ assertNotTruncated(message.stop_reason === "max_tokens");
364
389
  const raw = requireToolInput(message.content);
365
390
  const usage = toUsage(message.usage);
366
391
  return usage === void 0 ? { raw } : { raw, usage };
@@ -608,6 +633,7 @@ function extractGeminiResult(response) {
608
633
  if (candidate.finishReason !== void 0 && BLOCKED_FINISH_REASONS.has(candidate.finishReason)) {
609
634
  throw new ProviderError("PROVIDER_BLOCKED", "The provider filtered the translation response.");
610
635
  }
636
+ assertNotTruncated(candidate.finishReason === "MAX_TOKENS");
611
637
  const text = response.text;
612
638
  if (text === void 0 || text === "") {
613
639
  throw new ProviderError("INVALID_RESPONSE", "The provider returned no translation content.");
@@ -704,10 +730,12 @@ function toUsage3(usage) {
704
730
  return { inputTokens: prompt_tokens, outputTokens: completion_tokens };
705
731
  }
706
732
  function extractOpenAiResult(completion) {
707
- const message = completion.choices[0]?.message;
708
- if (message === void 0) {
733
+ const choice = completion.choices[0];
734
+ if (choice === void 0) {
709
735
  throw new ProviderError("INVALID_RESPONSE", "The provider returned no message.");
710
736
  }
737
+ assertNotTruncated(choice.finish_reason === "length");
738
+ const message = choice.message;
711
739
  if (message.refusal !== void 0 && message.refusal !== null && message.refusal !== "") {
712
740
  throw new ProviderError("PROVIDER_REFUSED", "The provider refused the translation request.");
713
741
  }
@@ -760,6 +788,7 @@ function buildProvider(config) {
760
788
  }
761
789
 
762
790
  // src/config/schema.ts
791
+ var DEFAULT_MAX_BATCH_SIZE = 50;
763
792
  var verbatraConfigSchema = z.strictObject({
764
793
  sourceLocale: z.string().min(1),
765
794
  targetLocales: z.array(z.string().min(1)).min(1),
@@ -769,7 +798,31 @@ var verbatraConfigSchema = z.strictObject({
769
798
  }),
770
799
  provider: providerConfigSchema,
771
800
  glossary: z.record(z.string(), z.string()).optional(),
772
- tone: z.enum(["formal", "informal", "neutral"]).optional()
801
+ tone: z.enum(["formal", "informal", "neutral"]).optional(),
802
+ /**
803
+ * Opt-in orphan pruning, off by default (absent is treated as false). When true, keys present in a
804
+ * target file but absent from the source (the diff's orphaned keys) are removed from the written file
805
+ * and the lock. A per-run `prune` option on `translate` (the CLI `--prune` flag) overrides this. This
806
+ * is non-secret, consistent with the config's no-secret invariant.
807
+ */
808
+ prune: z.boolean().optional(),
809
+ /**
810
+ * Opt-in plural-category generation, off by default (absent is treated as false). When true, and only
811
+ * for an i18next-JSON project translated by an LLM provider, verbatra synthesizes the CLDR plural forms
812
+ * a target language requires but the source does not supply (for example Polish few/many). A per-run
813
+ * `generatePlurals` option on `translate` overrides this. Unsupported cases (DeepL, non-i18next, an
814
+ * unknown language) fall back to the per-locale plural warning.
815
+ */
816
+ generatePlurals: z.boolean().optional(),
817
+ /**
818
+ * Optional maximum number of entries sent in a single provider request. A locale's missing-plus-changed
819
+ * entries are split into sequential sub-batches no larger than this so one oversized request cannot sink
820
+ * the whole locale; a failed sub-batch is withheld and retried while the others still make progress.
821
+ * Must be a positive integer (non-integer, zero, or negative is rejected at this boundary, never
822
+ * coerced). When absent, {@link DEFAULT_MAX_BATCH_SIZE} applies: 50, a conservative count that stays
823
+ * well inside provider context windows for typical short i18n strings while keeping request counts low.
824
+ */
825
+ maxBatchSize: z.number().int().positive().optional()
773
826
  }).refine((config) => !config.targetLocales.includes(config.sourceLocale), {
774
827
  message: "targetLocales must not include the source locale",
775
828
  path: ["targetLocales"]
@@ -914,8 +967,11 @@ async function readBoundedBytes(path, maxBytes) {
914
967
  await handle.close();
915
968
  }
916
969
  }
970
+ function tempFileName(path) {
971
+ return join(dirname(path), `.${basename(path)}.tmp-${process.pid}-${Date.now()}-${randomUUID()}`);
972
+ }
917
973
  async function atomicWrite(path, data) {
918
- const tmp = join(dirname(path), `.${basename(path)}.tmp-${process.pid}-${Date.now()}`);
974
+ const tmp = tempFileName(path);
919
975
  await (typeof data === "string" ? writeFile(tmp, data, "utf8") : writeFile(tmp, data));
920
976
  try {
921
977
  await rename(tmp, path);
@@ -1015,8 +1071,11 @@ async function cleanup(ops, tmp) {
1015
1071
  } catch {
1016
1072
  }
1017
1073
  }
1074
+ function tempFileName2(path) {
1075
+ return join(dirname(path), `.${basename(path)}.tmp-${process.pid}-${Date.now()}-${randomUUID()}`);
1076
+ }
1018
1077
  async function atomicWriteFile(path, data, ops = nodeOps) {
1019
- const tmp = join(dirname(path), `.${basename(path)}.tmp-${process.pid}-${Date.now()}`);
1078
+ const tmp = tempFileName2(path);
1020
1079
  try {
1021
1080
  await ops.writeFile(tmp, data);
1022
1081
  await ops.rename(tmp, path);
@@ -1054,20 +1113,117 @@ async function readBounded2(filePath) {
1054
1113
  await handle.close();
1055
1114
  }
1056
1115
  }
1057
- function addEntries(node, prefix, namespace, derive, out) {
1116
+ var BACKSLASH = "\\";
1117
+ var DOT = ".";
1118
+ var ESCAPED_BACKSLASH = "\\\\";
1119
+ var ESCAPED_DOT = "\\.";
1120
+ function needsEncoding(segment) {
1121
+ return segment.includes(BACKSLASH) || segment.includes(DOT);
1122
+ }
1123
+ function encodeSegment(segment) {
1124
+ if (!needsEncoding(segment)) {
1125
+ return segment;
1126
+ }
1127
+ let out = "";
1128
+ for (const char of segment) {
1129
+ if (char === BACKSLASH) {
1130
+ out += ESCAPED_BACKSLASH;
1131
+ } else if (char === DOT) {
1132
+ out += ESCAPED_DOT;
1133
+ } else {
1134
+ out += char;
1135
+ }
1136
+ }
1137
+ return out;
1138
+ }
1139
+ function decodeSegment(segment) {
1140
+ if (!segment.includes(BACKSLASH)) {
1141
+ return segment;
1142
+ }
1143
+ let out = "";
1144
+ let escaping = false;
1145
+ for (const char of segment) {
1146
+ if (escaping) {
1147
+ out += char;
1148
+ escaping = false;
1149
+ } else if (char === BACKSLASH) {
1150
+ escaping = true;
1151
+ } else {
1152
+ out += char;
1153
+ }
1154
+ }
1155
+ return out;
1156
+ }
1157
+ function joinEncodedSegments(segments) {
1158
+ return segments.join(DOT);
1159
+ }
1160
+ function decodeKeyToSegments(key) {
1161
+ if (!key.includes(BACKSLASH)) {
1162
+ return key.split(DOT);
1163
+ }
1164
+ const segments = [];
1165
+ let current = "";
1166
+ let escaping = false;
1167
+ for (const char of key) {
1168
+ if (escaping) {
1169
+ current += BACKSLASH + char;
1170
+ escaping = false;
1171
+ } else if (char === BACKSLASH) {
1172
+ escaping = true;
1173
+ } else if (char === DOT) {
1174
+ segments.push(current);
1175
+ current = "";
1176
+ } else {
1177
+ current += char;
1178
+ }
1179
+ }
1180
+ if (escaping) {
1181
+ current += BACKSLASH;
1182
+ }
1183
+ segments.push(current);
1184
+ return segments.map(decodeSegment);
1185
+ }
1186
+ function addLeaf(ctx, segments, key, value) {
1187
+ const effectivePath = segments.join(".");
1188
+ const mapKey = joinEncodedSegments(segments.map(encodeSegment));
1189
+ if (ctx.claimed.has(effectivePath) && ctx.claimed.get(effectivePath) !== mapKey) {
1190
+ throw new AdapterError(
1191
+ "INVALID_STRUCTURE",
1192
+ "A literal dotted leaf key and a nested key path resolve to the same path."
1193
+ );
1194
+ }
1195
+ ctx.claimed.set(effectivePath, mapKey);
1196
+ const { placeholders, isPlural } = ctx.derive(key, value);
1197
+ ctx.out.set(mapKey, { key: mapKey, namespace: ctx.namespace, value, placeholders, isPlural });
1198
+ }
1199
+ function addEntries(ctx, prefix, node) {
1200
+ for (const [key, value] of Object.entries(node)) {
1201
+ const segments = [...prefix, key];
1202
+ if (typeof value === "string") {
1203
+ addLeaf(ctx, segments, key, value);
1204
+ } else {
1205
+ addEntries(ctx, segments, value);
1206
+ }
1207
+ }
1208
+ }
1209
+ function addPathEntries(node, prefix, namespace, derive, out) {
1058
1210
  for (const [key, value] of Object.entries(node)) {
1059
1211
  const path = prefix === "" ? key : `${prefix}.${key}`;
1060
1212
  if (typeof value === "string") {
1061
1213
  const { placeholders, isPlural } = derive(key, value);
1062
1214
  out.set(path, { key: path, namespace, value, placeholders, isPlural });
1063
1215
  } else {
1064
- addEntries(value, path, namespace, derive, out);
1216
+ addPathEntries(value, path, namespace, derive, out);
1065
1217
  }
1066
1218
  }
1067
1219
  }
1068
- function flattenTree(tree, namespace, derive) {
1220
+ function flattenTree(tree, namespace, derive, keyMode = "literal-leaf") {
1069
1221
  const out = /* @__PURE__ */ new Map();
1070
- addEntries(tree, "", namespace, derive, out);
1222
+ if (keyMode === "path-notation") {
1223
+ addPathEntries(tree, "", namespace, derive, out);
1224
+ return out;
1225
+ }
1226
+ addEntries({ namespace, derive, out, claimed: /* @__PURE__ */ new Map() }, [], tree);
1071
1227
  return out;
1072
1228
  }
1073
1229
  var jsonTreeSchema = z.lazy(
@@ -1134,12 +1290,15 @@ function setPath(root, segments, value) {
1134
1290
  for (const segment of segments.slice(0, -1)) {
1135
1291
  node = descend(node, segment);
1136
1292
  }
1293
+ if (typeof node[leaf] === "object") {
1294
+ throw new AdapterError("INVALID_STRUCTURE", "A leaf key collides with a nested key path.");
1295
+ }
1137
1296
  node[leaf] = value;
1138
1297
  }
1139
1298
  function unflattenEntries(entries) {
1140
1299
  const root = emptyNode();
1141
1300
  for (const [key, entry] of entries) {
1142
- setPath(root, key.split("."), entry.value);
1301
+ setPath(root, decodeKeyToSegments(key), entry.value);
1143
1302
  }
1144
1303
  return root;
1145
1304
  }
@@ -1158,11 +1317,11 @@ function rethrowStructured(error, message) {
1158
1317
  }
1159
1318
  throw new AdapterError("INVALID_STRUCTURE", message);
1160
1319
  }
1161
- function toEntries(content, namespace, deriveEntry, validateTree) {
1320
+ function toEntries(content, namespace, deriveEntry, keyMode, validateTree) {
1162
1321
  try {
1163
1322
  const tree = parseJsonObject(content);
1164
1323
  validateTree?.(tree);
1165
- return flattenTree(tree, namespace, deriveEntry);
1324
+ return flattenTree(tree, namespace, deriveEntry, keyMode);
1166
1325
  } catch (error) {
1167
1326
  rethrowStructured(error, "The file could not be read as JSON.");
1168
1327
  }
@@ -1185,7 +1344,8 @@ function createJsonFileAdapter(options) {
1185
1344
  computeInvalidIcuKeys: computeInvalidIcuKeys2,
1186
1345
  validateMessage: validateMessage2,
1187
1346
  validateTree,
1188
- buildWriteTree
1347
+ buildWriteTree,
1348
+ keyMode = "literal-leaf"
1189
1349
  } = options;
1190
1350
  return {
1191
1351
  format,
@@ -1202,7 +1362,7 @@ function createJsonFileAdapter(options) {
1202
1362
  throw new AdapterError("INPUT_TOO_LARGE", "The file exceeds the maximum allowed size.");
1203
1363
  }
1204
1364
  const namespace = namespaceOf(filePath);
1205
- const entries = toEntries(outcome.content, namespace, deriveEntry, validateTree);
1365
+ const entries = toEntries(outcome.content, namespace, deriveEntry, keyMode, validateTree);
1206
1366
  const resource = { locale, namespace, format, entries };
1207
1367
  const invalidIcuKeys = computeIcu(entries, computeInvalidIcuKeys2);
1208
1368
  return { resource, invalidIcuKeys };
@@ -1214,23 +1374,41 @@ function createJsonFileAdapter(options) {
1214
1374
  }
1215
1375
  };
1216
1376
  }
1217
- var PLACEHOLDER_PATTERN = /\{\{[^{}]*\}\}/g;
1218
- function extractI18nextPlaceholders(value) {
1219
- const seen = /* @__PURE__ */ new Set();
1377
+ var DOUBLE_BRACE_PATTERN = /\{\{[^{}]*\}\}/g;
1378
+ var I18NEXT_PATTERN = /\{\{[^{}]*\}\}|\$t\([^()]*\)/g;
1379
+ function scanTokens(value, pattern) {
1220
1380
  const result = [];
1221
- for (const match of value.matchAll(PLACEHOLDER_PATTERN)) {
1381
+ for (const match of value.matchAll(pattern)) {
1222
1382
  const token = match[0];
1223
- if (token !== void 0 && !seen.has(token)) {
1224
- seen.add(token);
1383
+ if (token !== void 0) {
1225
1384
  result.push(token);
1226
1385
  }
1227
1386
  }
1228
1387
  return result;
1229
1388
  }
1389
+ function extractDoubleBracePlaceholders(value) {
1390
+ return scanTokens(value, DOUBLE_BRACE_PATTERN);
1391
+ }
1392
+ function extractI18nextPlaceholders(value) {
1393
+ return scanTokens(value, I18NEXT_PATTERN);
1394
+ }
1230
1395
  var PLURAL_SUFFIX = /_(zero|one|two|few|many|other)$/;
1231
1396
  function isPluralKey(key) {
1232
1397
  return PLURAL_SUFFIX.test(key);
1233
1398
  }
1399
+ function pluralCategoryOf(key) {
1400
+ const match = PLURAL_SUFFIX.exec(key);
1401
+ return match?.[1];
1402
+ }
1403
+ function pluralBaseKey(key) {
1404
+ if (!isPluralKey(key)) {
1405
+ return void 0;
1406
+ }
1407
+ return key.replace(PLURAL_SUFFIX, "");
1408
+ }
1409
+ function makePluralKey(baseKey, category) {
1410
+ return `${baseKey}_${category}`;
1411
+ }
1234
1412
  function createI18nextJsonAdapter() {
1235
1413
  return createJsonFileAdapter({
1236
1414
  format: "i18next-json",
@@ -1287,16 +1465,12 @@ function analyzeIcuValue(value) {
1287
1465
  }
1288
1466
  try {
1289
1467
  const ast = parse(value);
1290
- const seen = /* @__PURE__ */ new Set();
1291
1468
  const placeholders = [];
1292
1469
  const state = { isPlural: false };
1293
1470
  collect(
1294
1471
  ast,
1295
1472
  (token) => {
1296
- if (!seen.has(token)) {
1297
- seen.add(token);
1298
- placeholders.push(token);
1299
- }
1473
+ placeholders.push(token);
1300
1474
  },
1301
1475
  state
1302
1476
  );
@@ -1384,13 +1558,16 @@ async function buildNgxWriteTree(entries, filePath) {
1384
1558
  function createNgxTranslateJsonAdapter() {
1385
1559
  return createJsonFileAdapter({
1386
1560
  format: "ngx-translate-json",
1387
- extractPlaceholders: extractI18nextPlaceholders,
1561
+ extractPlaceholders: extractDoubleBracePlaceholders,
1388
1562
  deriveEntry: (_key, value) => ({
1389
- placeholders: extractI18nextPlaceholders(value),
1563
+ placeholders: extractDoubleBracePlaceholders(value),
1390
1564
  isPlural: false
1391
1565
  }),
1392
1566
  validateTree: assertNotMixed,
1393
- buildWriteTree: buildNgxWriteTree
1567
+ buildWriteTree: buildNgxWriteTree,
1568
+ // ngx-translate flat style uses dotted keys as path notation, not literal leaves;
1569
+ // keep the legacy non-encoding flatten so its flat/nested round-trip is unchanged.
1570
+ keyMode: "path-notation"
1394
1571
  });
1395
1572
  }
1396
1573
  var AdapterRegistry = class {
@@ -1441,15 +1618,13 @@ var AdapterRegistry = class {
1441
1618
  return this.resolveByDetection(filePath, options.sample);
1442
1619
  }
1443
1620
  };
1444
- var PLACEHOLDER_PATTERN2 = /\{[^{}]*\}/g;
1621
+ var PLACEHOLDER_PATTERN = /(?<!\{)\{\s*([A-Za-z_][\w$-]*|\d+)\s*\}(?!\})/g;
1445
1622
  function extractVueI18nPlaceholders(value) {
1446
- const seen = /* @__PURE__ */ new Set();
1447
1623
  const result = [];
1448
- for (const match of value.matchAll(PLACEHOLDER_PATTERN2)) {
1449
- const token = match[0];
1450
- if (token !== void 0 && !seen.has(token)) {
1451
- seen.add(token);
1452
- result.push(token);
1624
+ for (const match of value.matchAll(PLACEHOLDER_PATTERN)) {
1625
+ const key = match[1];
1626
+ if (key !== void 0) {
1627
+ result.push(`{${key}}`);
1453
1628
  }
1454
1629
  }
1455
1630
  return result;
@@ -1511,8 +1686,10 @@ function failureSummary(locale, error) {
1511
1686
  translated: [],
1512
1687
  unchanged: [],
1513
1688
  orphaned: [],
1689
+ pruned: [],
1514
1690
  invalidIcuSource: [],
1515
1691
  integrityMismatches: [],
1692
+ generated: [],
1516
1693
  notices: [],
1517
1694
  error: describeError(error)
1518
1695
  };
@@ -1535,6 +1712,198 @@ function readNotices(result) {
1535
1712
  return candidate.filter(isNotice);
1536
1713
  }
1537
1714
 
1715
+ // src/flow/plural-categories.ts
1716
+ var LANGUAGE_CATEGORIES = {
1717
+ ar: ["zero", "one", "two", "few", "many", "other"],
1718
+ cy: ["zero", "one", "two", "few", "many", "other"],
1719
+ ga: ["one", "two", "few", "many", "other"],
1720
+ pl: ["one", "few", "many", "other"],
1721
+ ru: ["one", "few", "many", "other"],
1722
+ uk: ["one", "few", "many", "other"],
1723
+ be: ["one", "few", "many", "other"],
1724
+ lt: ["one", "few", "many", "other"],
1725
+ sl: ["one", "two", "few", "other"]
1726
+ };
1727
+ function isKnownRicherLanguage(locale) {
1728
+ const subtag = locale.toLowerCase().split(/[-_]/)[0] ?? "";
1729
+ return LANGUAGE_CATEGORIES[subtag] !== void 0;
1730
+ }
1731
+ function requiredCategories(locale) {
1732
+ const subtag = locale.toLowerCase().split(/[-_]/)[0] ?? "";
1733
+ return LANGUAGE_CATEGORIES[subtag] ?? ["one", "other"];
1734
+ }
1735
+ function groupPluralSources(source) {
1736
+ const groups = /* @__PURE__ */ new Map();
1737
+ for (const [key, entry] of source.entries) {
1738
+ const baseKey = pluralBaseKey(key);
1739
+ const category = pluralCategoryOf(key);
1740
+ if (baseKey === void 0 || category === void 0) {
1741
+ continue;
1742
+ }
1743
+ const group = groups.get(baseKey) ?? /* @__PURE__ */ new Map();
1744
+ group.set(category, entry);
1745
+ groups.set(baseKey, group);
1746
+ }
1747
+ return groups;
1748
+ }
1749
+ function suppliedCategories(groups) {
1750
+ const supplied = /* @__PURE__ */ new Set();
1751
+ for (const group of groups.values()) {
1752
+ for (const category of group.keys()) {
1753
+ supplied.add(category);
1754
+ }
1755
+ }
1756
+ return supplied;
1757
+ }
1758
+ function detectMissingPluralCategories(source, targetLocale, format) {
1759
+ if (format !== "i18next-json") {
1760
+ return void 0;
1761
+ }
1762
+ const groups = groupPluralSources(source);
1763
+ const supplied = suppliedCategories(groups);
1764
+ if (supplied.size === 0) {
1765
+ return void 0;
1766
+ }
1767
+ const missing = requiredCategories(targetLocale).filter((category) => !supplied.has(category));
1768
+ if (missing.length === 0) {
1769
+ return void 0;
1770
+ }
1771
+ return {
1772
+ code: "PLURAL_CATEGORIES_INCOMPLETE",
1773
+ message: `The source does not supply all CLDR plural categories the target language "${targetLocale}" requires (missing: ${missing.join(", ")}); verbatra translates only the source's plural forms and does not synthesize the others. Add the missing forms manually.`
1774
+ };
1775
+ }
1776
+ function targetPluralSetIncomplete(targetKeys, targetLocale) {
1777
+ const required = requiredCategories(targetLocale);
1778
+ const present = /* @__PURE__ */ new Map();
1779
+ for (const key of targetKeys) {
1780
+ const baseKey = pluralBaseKey(key);
1781
+ const category = pluralCategoryOf(key);
1782
+ if (baseKey === void 0 || category === void 0) {
1783
+ continue;
1784
+ }
1785
+ const set = present.get(baseKey) ?? /* @__PURE__ */ new Set();
1786
+ set.add(category);
1787
+ present.set(baseKey, set);
1788
+ }
1789
+ for (const categories of present.values()) {
1790
+ if (required.some((category) => !categories.has(category))) {
1791
+ return true;
1792
+ }
1793
+ }
1794
+ return false;
1795
+ }
1796
+ function sourcePluralBaseKeys(source) {
1797
+ const bases = /* @__PURE__ */ new Set();
1798
+ for (const key of source.entries.keys()) {
1799
+ const baseKey = pluralBaseKey(key);
1800
+ if (baseKey !== void 0) {
1801
+ bases.add(baseKey);
1802
+ }
1803
+ }
1804
+ return bases;
1805
+ }
1806
+ function isGeneratedPluralKey(key, sourceBaseKeys) {
1807
+ const baseKey = pluralBaseKey(key);
1808
+ return baseKey !== void 0 && sourceBaseKeys.has(baseKey);
1809
+ }
1810
+ function pluralIncompleteNotice(targetLocale) {
1811
+ return {
1812
+ code: "PLURAL_CATEGORIES_INCOMPLETE",
1813
+ message: `The plural set for the target language "${targetLocale}" is still incomplete: verbatra could not generate every required CLDR plural form (an unsupported case, or a generated form was withheld for a placeholder mismatch). Add the remaining forms manually.`
1814
+ };
1815
+ }
1816
+ function representativeEntry(group) {
1817
+ return group.get("other") ?? group.get("one") ?? [...group.values()][0];
1818
+ }
1819
+ function planPluralGeneration(source, targetLocale, format) {
1820
+ if (format !== "i18next-json" || !isKnownRicherLanguage(targetLocale)) {
1821
+ return { items: [] };
1822
+ }
1823
+ const required = requiredCategories(targetLocale);
1824
+ const groups = groupPluralSources(source);
1825
+ const items = [];
1826
+ for (const [baseKey, group] of groups) {
1827
+ const representative = representativeEntry(group);
1828
+ if (representative === void 0) {
1829
+ continue;
1830
+ }
1831
+ const governingEntries = [...group.values()];
1832
+ for (const category of required) {
1833
+ if (group.has(category)) {
1834
+ continue;
1835
+ }
1836
+ items.push({
1837
+ targetKey: makePluralKey(baseKey, category),
1838
+ category,
1839
+ sourceEntry: representative,
1840
+ governingEntries
1841
+ });
1842
+ }
1843
+ }
1844
+ return { items };
1845
+ }
1846
+
1847
+ // src/flow/plural-generation.ts
1848
+ function generatedLockHash(governingEntries, category) {
1849
+ const governingHashes = governingEntries.map(contentHash).sort();
1850
+ return contentHash({
1851
+ value: `${category}:${governingHashes.join("|")}`,
1852
+ placeholders: [],
1853
+ isPlural: true
1854
+ });
1855
+ }
1856
+ function syntheticEntry(item) {
1857
+ return {
1858
+ ...item.sourceEntry,
1859
+ key: item.targetKey,
1860
+ isPlural: true,
1861
+ // The CLDR category travels as data context (the meaning field), never the instruction channel.
1862
+ meaning: `CLDR plural category "${item.category}"`
1863
+ };
1864
+ }
1865
+ function staleItems(items, baseline) {
1866
+ return items.filter((item) => {
1867
+ const hash = generatedLockHash(item.governingEntries, item.category);
1868
+ return baseline.get(item.targetKey) !== hash;
1869
+ });
1870
+ }
1871
+ function buildRequest2(context, entries) {
1872
+ return {
1873
+ sourceLocale: context.sourceLocale,
1874
+ targetLocale: context.targetLocale,
1875
+ entries,
1876
+ extractPlaceholders: context.adapter.extractPlaceholders,
1877
+ ...context.glossary !== void 0 ? { glossary: context.glossary } : {},
1878
+ ...context.tone !== void 0 ? { tone: context.tone } : {}
1879
+ };
1880
+ }
1881
+ async function generatePluralForms(context) {
1882
+ const plan = planPluralGeneration(context.source, context.targetLocale, context.format);
1883
+ const stale = staleItems(plan.items, context.baseline);
1884
+ if (stale.length === 0) {
1885
+ return { accepted: [], withheld: [] };
1886
+ }
1887
+ const entries = stale.map(syntheticEntry);
1888
+ const result = await context.provider.translateBatch(buildRequest2(context, entries));
1889
+ const accepted = [];
1890
+ const withheld = [];
1891
+ for (const item of stale) {
1892
+ const value = result.values.get(item.targetKey);
1893
+ const integrity = result.integrity.get(item.targetKey);
1894
+ if (value !== void 0 && integrity?.matches === true) {
1895
+ accepted.push({
1896
+ targetKey: item.targetKey,
1897
+ entry: { ...syntheticEntry(item), value },
1898
+ lockHash: generatedLockHash(item.governingEntries, item.category)
1899
+ });
1900
+ } else {
1901
+ withheld.push(item.targetKey);
1902
+ }
1903
+ }
1904
+ return { accepted, withheld };
1905
+ }
1906
+
1538
1907
  // src/flow/locale-run.ts
1539
1908
  function emptyResource(locale, format) {
1540
1909
  return { locale, namespace: "", format, entries: /* @__PURE__ */ new Map() };
@@ -1546,7 +1915,7 @@ async function readTarget(params) {
1546
1915
  }
1547
1916
  return (await params.adapter.read(path, params.targetLocale)).resource;
1548
1917
  }
1549
- function buildRequest2(params, entries) {
1918
+ function buildRequest3(params, entries) {
1550
1919
  return {
1551
1920
  sourceLocale: params.sourceLocale,
1552
1921
  targetLocale: params.targetLocale,
@@ -1559,25 +1928,56 @@ function buildRequest2(params, entries) {
1559
1928
  async function runLocale(params) {
1560
1929
  const target = await readTarget(params);
1561
1930
  const diff = diffResources(params.source, target, { baseline: params.baseline });
1931
+ const orphaned = params.generatePlurals ? diff.orphaned.filter((key) => !isGeneratedPluralKey(key, sourcePluralBaseKeys(params.source))) : diff.orphaned;
1932
+ const pruned = params.prune ? orphaned : [];
1562
1933
  const invalidIcu = new Set(params.sourceInvalidIcuKeys);
1563
1934
  const candidates = [...diff.missing, ...diff.changed];
1564
1935
  const toTranslate = candidates.filter((key) => !invalidIcu.has(key));
1565
1936
  const invalidIcuSource = candidates.filter((key) => invalidIcu.has(key));
1937
+ const pluralNotice = detectMissingPluralCategories(
1938
+ params.source,
1939
+ params.targetLocale,
1940
+ params.format
1941
+ );
1942
+ const sdkNotices = pluralNotice ? [pluralNotice] : [];
1566
1943
  const provider = params.provider;
1567
1944
  if (provider === void 0) {
1568
1945
  return {
1569
- summary: baseSummary(params.targetLocale, diff, invalidIcuSource, toTranslate, [], []),
1946
+ summary: baseSummary({
1947
+ locale: params.targetLocale,
1948
+ unchanged: diff.unchanged,
1949
+ orphaned,
1950
+ invalidIcuSource,
1951
+ translated: toTranslate,
1952
+ generated: [],
1953
+ integrityMismatches: [],
1954
+ pruned,
1955
+ notices: sdkNotices
1956
+ }),
1570
1957
  lockEntries: {}
1571
1958
  };
1572
1959
  }
1573
1960
  const entries = toTranslate.map((key) => params.source.entries.get(key)).filter((entry) => entry !== void 0);
1574
1961
  const accepted = /* @__PURE__ */ new Map();
1575
1962
  const integrityMismatches = [];
1576
- const notices = await translateAndCheck(provider, params, entries, accepted, integrityMismatches);
1963
+ const subBatchNotices = await translateAndCheck(
1964
+ provider,
1965
+ params,
1966
+ entries,
1967
+ accepted,
1968
+ integrityMismatches
1969
+ );
1577
1970
  const merged = new Map(target.entries);
1971
+ for (const key of pruned) {
1972
+ merged.delete(key);
1973
+ }
1578
1974
  for (const [key, { value, source }] of accepted) {
1579
1975
  merged.set(key, { ...source, value, namespace: target.namespace });
1580
1976
  }
1977
+ const generation = await runGeneration(params, provider);
1978
+ for (const form of generation.accepted) {
1979
+ merged.set(form.targetKey, { ...form.entry, namespace: target.namespace });
1980
+ }
1581
1981
  const path = localeFilePath(params.cwd, params.filesPattern, params.targetLocale);
1582
1982
  await params.adapter.write(
1583
1983
  {
@@ -1588,37 +1988,83 @@ async function runLocale(params) {
1588
1988
  },
1589
1989
  path
1590
1990
  );
1591
- const withheld = /* @__PURE__ */ new Set([...integrityMismatches, ...invalidIcuSource]);
1991
+ const pluralNotices = params.generatePlurals ? pluralNoticeFor(params, merged) : sdkNotices;
1992
+ const notices = [...pluralNotices, ...subBatchNotices];
1993
+ const withheld = /* @__PURE__ */ new Set([...integrityMismatches, ...invalidIcuSource, ...generation.withheld]);
1592
1994
  return {
1593
- summary: baseSummary(
1594
- params.targetLocale,
1595
- diff,
1995
+ summary: baseSummary({
1996
+ locale: params.targetLocale,
1997
+ unchanged: diff.unchanged,
1998
+ orphaned,
1596
1999
  invalidIcuSource,
1597
- [...accepted.keys()],
1598
- integrityMismatches,
2000
+ translated: [...accepted.keys()],
2001
+ generated: generation.accepted.map((form) => form.targetKey).sort(),
2002
+ // Withheld generated forms surface alongside withheld translations (spec D4): both failed integrity.
2003
+ integrityMismatches: [...integrityMismatches, ...generation.withheld].sort(),
2004
+ pruned,
1599
2005
  notices
1600
- ),
1601
- lockEntries: computeLockEntries(params, merged, withheld)
2006
+ }),
2007
+ lockEntries: computeLockEntries(params, merged, withheld, generation.accepted)
1602
2008
  };
1603
2009
  }
1604
- function baseSummary(locale, diff, invalidIcuSource, translated, integrityMismatches, notices) {
2010
+ async function runGeneration(params, provider) {
2011
+ if (!params.generatePlurals || provider.kind !== "llm") {
2012
+ return { accepted: [], withheld: [] };
2013
+ }
2014
+ return generatePluralForms({
2015
+ source: params.source,
2016
+ sourceLocale: params.sourceLocale,
2017
+ targetLocale: params.targetLocale,
2018
+ format: params.format,
2019
+ adapter: params.adapter,
2020
+ provider,
2021
+ glossary: params.glossary,
2022
+ tone: params.tone,
2023
+ baseline: params.baseline
2024
+ });
2025
+ }
2026
+ function pluralNoticeFor(params, merged) {
2027
+ if (params.format !== "i18next-json") {
2028
+ return [];
2029
+ }
2030
+ if (!targetPluralSetIncomplete(merged.keys(), params.targetLocale)) {
2031
+ return [];
2032
+ }
2033
+ return [pluralIncompleteNotice(params.targetLocale)];
2034
+ }
2035
+ function baseSummary(parts) {
1605
2036
  return {
1606
- locale,
2037
+ locale: parts.locale,
1607
2038
  status: "succeeded",
1608
- translated,
1609
- unchanged: diff.unchanged,
1610
- orphaned: diff.orphaned,
1611
- invalidIcuSource,
1612
- integrityMismatches,
1613
- notices
2039
+ translated: parts.translated,
2040
+ unchanged: parts.unchanged,
2041
+ orphaned: parts.orphaned,
2042
+ pruned: parts.pruned,
2043
+ invalidIcuSource: parts.invalidIcuSource,
2044
+ integrityMismatches: parts.integrityMismatches,
2045
+ generated: parts.generated,
2046
+ notices: parts.notices
1614
2047
  };
1615
2048
  }
1616
2049
  async function translateAndCheck(provider, params, entries, accepted, integrityMismatches) {
1617
- if (entries.length === 0) {
1618
- return [];
2050
+ const notices = [];
2051
+ for (const batch of chunk(entries, params.maxBatchSize)) {
2052
+ const subNotices = await runSubBatch(provider, params, batch, accepted, integrityMismatches);
2053
+ notices.push(...subNotices);
1619
2054
  }
1620
- const result = await provider.translateBatch(buildRequest2(params, entries));
1621
- for (const entry of entries) {
2055
+ return notices;
2056
+ }
2057
+ async function runSubBatch(provider, params, batch, accepted, integrityMismatches) {
2058
+ let result;
2059
+ try {
2060
+ result = await provider.translateBatch(buildRequest3(params, batch));
2061
+ } catch {
2062
+ for (const entry of batch) {
2063
+ integrityMismatches.push(entry.key);
2064
+ }
2065
+ return [subBatchFailedNotice(batch.length)];
2066
+ }
2067
+ for (const entry of batch) {
1622
2068
  const value = result.values.get(entry.key);
1623
2069
  const integrity = result.integrity.get(entry.key);
1624
2070
  if (value !== void 0 && integrity?.matches === true) {
@@ -1629,11 +2075,28 @@ async function translateAndCheck(provider, params, entries, accepted, integrityM
1629
2075
  }
1630
2076
  return readNotices(result);
1631
2077
  }
1632
- function computeLockEntries(params, merged, withheld) {
2078
+ function subBatchFailedNotice(count) {
2079
+ return {
2080
+ code: "SUB_BATCH_FAILED",
2081
+ message: `A sub-batch of ${count} entries failed and was withheld; it will be retried next run.`
2082
+ };
2083
+ }
2084
+ function chunk(items, size) {
2085
+ const chunks = [];
2086
+ for (let index = 0; index < items.length; index += size) {
2087
+ chunks.push(items.slice(index, index + size));
2088
+ }
2089
+ return chunks;
2090
+ }
2091
+ function computeLockEntries(params, merged, withheld, generated) {
1633
2092
  const lockEntries = {};
2093
+ const sourceBaseKeys = sourcePluralBaseKeys(params.source);
1634
2094
  for (const key of merged.keys()) {
1635
2095
  const sourceEntry = params.source.entries.get(key);
1636
2096
  if (sourceEntry === void 0) {
2097
+ if (params.generatePlurals) {
2098
+ carryGeneratedLock(lockEntries, params.baseline, key, sourceBaseKeys);
2099
+ }
1637
2100
  continue;
1638
2101
  }
1639
2102
  if (withheld.has(key)) {
@@ -1645,8 +2108,20 @@ function computeLockEntries(params, merged, withheld) {
1645
2108
  }
1646
2109
  lockEntries[key] = contentHash(sourceEntry);
1647
2110
  }
2111
+ for (const form of generated) {
2112
+ lockEntries[form.targetKey] = form.lockHash;
2113
+ }
1648
2114
  return lockEntries;
1649
2115
  }
2116
+ function carryGeneratedLock(lockEntries, baseline, key, sourceBaseKeys) {
2117
+ if (!isGeneratedPluralKey(key, sourceBaseKeys)) {
2118
+ return;
2119
+ }
2120
+ const prior = baseline.get(key);
2121
+ if (prior !== void 0) {
2122
+ lockEntries[key] = prior;
2123
+ }
2124
+ }
1650
2125
 
1651
2126
  // src/flow/source.ts
1652
2127
  async function readSource(config, cwd, fs, adapter) {
@@ -1673,6 +2148,9 @@ async function translate2(input, deps = {}) {
1673
2148
  const config = input.config;
1674
2149
  const cwd = input.cwd ?? process.cwd();
1675
2150
  const dryRun = input.dryRun ?? false;
2151
+ const prune = input.prune ?? config.prune ?? false;
2152
+ const generatePlurals = input.generatePlurals ?? config.generatePlurals ?? false;
2153
+ const maxBatchSize = config.maxBatchSize ?? DEFAULT_MAX_BATCH_SIZE;
1676
2154
  const fs = deps.fs ?? defaultFs;
1677
2155
  const adapter = selectAdapter(config.format, deps.adapterRegistry);
1678
2156
  const provider = dryRun ? void 0 : selectProvider(config.provider, deps.createProvider);
@@ -1695,6 +2173,9 @@ async function translate2(input, deps = {}) {
1695
2173
  format: config.format,
1696
2174
  glossary: config.glossary,
1697
2175
  tone: config.tone,
2176
+ prune,
2177
+ generatePlurals,
2178
+ maxBatchSize,
1698
2179
  fs
1699
2180
  };
1700
2181
  const { summary, lockEntries } = await runLocale(params);
@@ -2165,8 +2646,12 @@ function importLocale(params) {
2165
2646
  translated: [...buckets.accepted.keys()].sort(),
2166
2647
  unchanged: diff.unchanged,
2167
2648
  orphaned: diff.orphaned,
2649
+ // Import never prunes: orphans are reported but never removed here (pruning is a translate-flow concern).
2650
+ pruned: [],
2168
2651
  invalidIcuSource,
2169
2652
  integrityMismatches: [...buckets.mismatches].sort(),
2653
+ // Plural generation is a translate-flow concern; the manual workbook import never generates forms.
2654
+ generated: [],
2170
2655
  notices: []
2171
2656
  };
2172
2657
  return { summary, accepted: buckets.accepted, withheld: buckets.withheld };