@verbatra/sdk 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -10,6 +10,7 @@ var deepl = require('deepl-node');
10
10
  var log = require('loglevel');
11
11
  var genai = require('@google/genai');
12
12
  var OpenAI = require('openai');
13
+ var crypto = require('crypto');
13
14
  var promises = require('fs/promises');
14
15
  var icuMessageformatParser = require('@formatjs/icu-messageformat-parser');
15
16
  var ExcelJS = require('exceljs');
@@ -73,13 +74,16 @@ function fnv1a64(input) {
73
74
  }
74
75
  return hash.toString(16).padStart(16, "0");
75
76
  }
77
+ function normalizeText(text) {
78
+ return text.normalize("NFC").replace(/\r\n?/g, "\n");
79
+ }
76
80
  function canonicalize(entry) {
77
81
  return JSON.stringify([
78
- entry.value,
79
- entry.description ?? null,
80
- entry.meaning ?? null,
82
+ normalizeText(entry.value),
83
+ entry.description == null ? null : normalizeText(entry.description),
84
+ entry.meaning == null ? null : normalizeText(entry.meaning),
81
85
  entry.isPlural,
82
- [...entry.placeholders].sort()
86
+ [...entry.placeholders].map(normalizeText).sort()
83
87
  ]);
84
88
  }
85
89
  function contentHash(entry) {
@@ -143,17 +147,31 @@ zod.z.object({
143
147
  format: supportedFormatSchema,
144
148
  entries: zod.z.map(zod.z.string(), translationEntrySchema)
145
149
  });
146
- function difference(a, b) {
147
- return [...new Set(a.filter((item) => !b.has(item)))].sort();
150
+ function counts(items) {
151
+ const map = /* @__PURE__ */ new Map();
152
+ for (const item of items) {
153
+ map.set(item, (map.get(item) ?? 0) + 1);
154
+ }
155
+ return map;
156
+ }
157
+ function multisetExcess(a, b) {
158
+ const excess = [];
159
+ for (const [token, count] of a) {
160
+ const surplus = count - (b.get(token) ?? 0);
161
+ for (let i = 0; i < surplus; i += 1) {
162
+ excess.push(token);
163
+ }
164
+ }
165
+ return excess.sort();
148
166
  }
149
167
  function sameOrder(a, b) {
150
168
  return a.length === b.length && a.every((item, index) => item === b[index]);
151
169
  }
152
170
  function checkPlaceholders(source, translated) {
153
- const sourceSet = new Set(source);
154
- const translatedSet = new Set(translated);
155
- const missing = difference(source, translatedSet);
156
- const extra = difference(translated, sourceSet);
171
+ const sourceCounts = counts(source);
172
+ const translatedCounts = counts(translated);
173
+ const missing = multisetExcess(sourceCounts, translatedCounts);
174
+ const extra = multisetExcess(translatedCounts, sourceCounts);
157
175
  const reordered = missing.length === 0 && extra.length === 0 && !sameOrder(source, translated);
158
176
  return {
159
177
  matches: missing.length === 0 && extra.length === 0 && !reordered,
@@ -295,6 +313,12 @@ async function runLlmTranslation(request, mechanism) {
295
313
  );
296
314
  return completion.usage === void 0 ? { values, integrity } : { values, integrity, usage: completion.usage };
297
315
  }
316
+ var OUTPUT_TRUNCATED_MESSAGE = "The provider stopped because the output-token limit was reached. Reduce the batch size or raise the configured max output tokens.";
317
+ function assertNotTruncated(truncated) {
318
+ if (truncated) {
319
+ throw new ProviderError("OUTPUT_TRUNCATED", OUTPUT_TRUNCATED_MESSAGE);
320
+ }
321
+ }
298
322
  function readRequiredEnv(name) {
299
323
  const value = process.env[name];
300
324
  if (value === void 0 || value.length === 0) {
@@ -390,6 +414,7 @@ function createMechanism(client, config) {
390
414
  translate: async ({ payloadJson }) => {
391
415
  const body = buildRequest(config, payloadJson);
392
416
  const message = await callClient(client, body);
417
+ assertNotTruncated(message.stop_reason === "max_tokens");
393
418
  const raw = requireToolInput(message.content);
394
419
  const usage = toUsage(message.usage);
395
420
  return usage === void 0 ? { raw } : { raw, usage };
@@ -637,6 +662,7 @@ function extractGeminiResult(response) {
637
662
  if (candidate.finishReason !== void 0 && BLOCKED_FINISH_REASONS.has(candidate.finishReason)) {
638
663
  throw new ProviderError("PROVIDER_BLOCKED", "The provider filtered the translation response.");
639
664
  }
665
+ assertNotTruncated(candidate.finishReason === "MAX_TOKENS");
640
666
  const text = response.text;
641
667
  if (text === void 0 || text === "") {
642
668
  throw new ProviderError("INVALID_RESPONSE", "The provider returned no translation content.");
@@ -733,10 +759,12 @@ function toUsage3(usage) {
733
759
  return { inputTokens: prompt_tokens, outputTokens: completion_tokens };
734
760
  }
735
761
  function extractOpenAiResult(completion) {
736
- const message = completion.choices[0]?.message;
737
- if (message === void 0) {
762
+ const choice = completion.choices[0];
763
+ if (choice === void 0) {
738
764
  throw new ProviderError("INVALID_RESPONSE", "The provider returned no message.");
739
765
  }
766
+ assertNotTruncated(choice.finish_reason === "length");
767
+ const message = choice.message;
740
768
  if (message.refusal !== void 0 && message.refusal !== null && message.refusal !== "") {
741
769
  throw new ProviderError("PROVIDER_REFUSED", "The provider refused the translation request.");
742
770
  }
@@ -789,6 +817,7 @@ function buildProvider(config) {
789
817
  }
790
818
 
791
819
  // src/config/schema.ts
820
+ var DEFAULT_MAX_BATCH_SIZE = 50;
792
821
  var verbatraConfigSchema = zod.z.strictObject({
793
822
  sourceLocale: zod.z.string().min(1),
794
823
  targetLocales: zod.z.array(zod.z.string().min(1)).min(1),
@@ -798,7 +827,31 @@ var verbatraConfigSchema = zod.z.strictObject({
798
827
  }),
799
828
  provider: providerConfigSchema,
800
829
  glossary: zod.z.record(zod.z.string(), zod.z.string()).optional(),
801
- tone: zod.z.enum(["formal", "informal", "neutral"]).optional()
830
+ tone: zod.z.enum(["formal", "informal", "neutral"]).optional(),
831
+ /**
832
+ * Opt-in orphan pruning, off by default (absent is treated as false). When true, keys present in a
833
+ * target file but absent from the source (the diff's orphaned keys) are removed from the written file
834
+ * and the lock. A per-run `prune` option on `translate` (the CLI `--prune` flag) overrides this. This
835
+ * is non-secret, consistent with the config's no-secret invariant.
836
+ */
837
+ prune: zod.z.boolean().optional(),
838
+ /**
839
+ * Opt-in plural-category generation, off by default (absent is treated as false). When true, and only
840
+ * for an i18next-JSON project translated by an LLM provider, verbatra synthesizes the CLDR plural forms
841
+ * a target language requires but the source does not supply (for example Polish few/many). A per-run
842
+ * `generatePlurals` option on `translate` overrides this. Unsupported cases (DeepL, non-i18next, an
843
+ * unknown language) fall back to the per-locale plural warning.
844
+ */
845
+ generatePlurals: zod.z.boolean().optional(),
846
+ /**
847
+ * Optional maximum number of entries sent in a single provider request. A locale's missing-plus-changed
848
+ * entries are split into sequential sub-batches no larger than this so one oversized request cannot sink
849
+ * the whole locale; a failed sub-batch is withheld and retried while the others still make progress.
850
+ * Must be a positive integer (non-integer, zero, or negative is rejected at this boundary, never
851
+ * coerced). When absent, {@link DEFAULT_MAX_BATCH_SIZE} applies: 50, a conservative count that stays
852
+ * well inside provider context windows for typical short i18n strings while keeping request counts low.
853
+ */
854
+ maxBatchSize: zod.z.number().int().positive().optional()
802
855
  }).refine((config) => !config.targetLocales.includes(config.sourceLocale), {
803
856
  message: "targetLocales must not include the source locale",
804
857
  path: ["targetLocales"]
@@ -943,11 +996,14 @@ async function readBoundedBytes(path, maxBytes) {
943
996
  await handle.close();
944
997
  }
945
998
  }
946
- async function atomicWrite(path$1, data) {
947
- const tmp = path.join(path.dirname(path$1), `.${path.basename(path$1)}.tmp-${process.pid}-${Date.now()}`);
999
+ function tempFileName(path$1) {
1000
+ return path.join(path.dirname(path$1), `.${path.basename(path$1)}.tmp-${process.pid}-${Date.now()}-${crypto.randomUUID()}`);
1001
+ }
1002
+ async function atomicWrite(path, data) {
1003
+ const tmp = tempFileName(path);
948
1004
  await (typeof data === "string" ? promises.writeFile(tmp, data, "utf8") : promises.writeFile(tmp, data));
949
1005
  try {
950
- await promises.rename(tmp, path$1);
1006
+ await promises.rename(tmp, path);
951
1007
  } catch (error) {
952
1008
  await promises.rm(tmp, { force: true });
953
1009
  throw error;
@@ -1044,11 +1100,14 @@ async function cleanup(ops, tmp) {
1044
1100
  } catch {
1045
1101
  }
1046
1102
  }
1047
- async function atomicWriteFile(path$1, data, ops = nodeOps) {
1048
- const tmp = path.join(path.dirname(path$1), `.${path.basename(path$1)}.tmp-${process.pid}-${Date.now()}`);
1103
+ function tempFileName2(path$1) {
1104
+ return path.join(path.dirname(path$1), `.${path.basename(path$1)}.tmp-${process.pid}-${Date.now()}-${crypto.randomUUID()}`);
1105
+ }
1106
+ async function atomicWriteFile(path, data, ops = nodeOps) {
1107
+ const tmp = tempFileName2(path);
1049
1108
  try {
1050
1109
  await ops.writeFile(tmp, data);
1051
- await ops.rename(tmp, path$1);
1110
+ await ops.rename(tmp, path);
1052
1111
  } catch (error) {
1053
1112
  await cleanup(ops, tmp);
1054
1113
  throw error;
@@ -1083,20 +1142,117 @@ async function readBounded2(filePath) {
1083
1142
  await handle.close();
1084
1143
  }
1085
1144
  }
1086
- function addEntries(node, prefix, namespace, derive, out) {
1145
+ var BACKSLASH = "\\";
1146
+ var DOT = ".";
1147
+ var ESCAPED_BACKSLASH = "\\\\";
1148
+ var ESCAPED_DOT = "\\.";
1149
+ function needsEncoding(segment) {
1150
+ return segment.includes(BACKSLASH) || segment.includes(DOT);
1151
+ }
1152
+ function encodeSegment(segment) {
1153
+ if (!needsEncoding(segment)) {
1154
+ return segment;
1155
+ }
1156
+ let out = "";
1157
+ for (const char of segment) {
1158
+ if (char === BACKSLASH) {
1159
+ out += ESCAPED_BACKSLASH;
1160
+ } else if (char === DOT) {
1161
+ out += ESCAPED_DOT;
1162
+ } else {
1163
+ out += char;
1164
+ }
1165
+ }
1166
+ return out;
1167
+ }
1168
+ function decodeSegment(segment) {
1169
+ if (!segment.includes(BACKSLASH)) {
1170
+ return segment;
1171
+ }
1172
+ let out = "";
1173
+ let escaping = false;
1174
+ for (const char of segment) {
1175
+ if (escaping) {
1176
+ out += char;
1177
+ escaping = false;
1178
+ } else if (char === BACKSLASH) {
1179
+ escaping = true;
1180
+ } else {
1181
+ out += char;
1182
+ }
1183
+ }
1184
+ return out;
1185
+ }
1186
+ function joinEncodedSegments(segments) {
1187
+ return segments.join(DOT);
1188
+ }
1189
+ function decodeKeyToSegments(key) {
1190
+ if (!key.includes(BACKSLASH)) {
1191
+ return key.split(DOT);
1192
+ }
1193
+ const segments = [];
1194
+ let current = "";
1195
+ let escaping = false;
1196
+ for (const char of key) {
1197
+ if (escaping) {
1198
+ current += BACKSLASH + char;
1199
+ escaping = false;
1200
+ } else if (char === BACKSLASH) {
1201
+ escaping = true;
1202
+ } else if (char === DOT) {
1203
+ segments.push(current);
1204
+ current = "";
1205
+ } else {
1206
+ current += char;
1207
+ }
1208
+ }
1209
+ if (escaping) {
1210
+ current += BACKSLASH;
1211
+ }
1212
+ segments.push(current);
1213
+ return segments.map(decodeSegment);
1214
+ }
1215
+ function addLeaf(ctx, segments, key, value) {
1216
+ const effectivePath = segments.join(".");
1217
+ const mapKey = joinEncodedSegments(segments.map(encodeSegment));
1218
+ if (ctx.claimed.has(effectivePath) && ctx.claimed.get(effectivePath) !== mapKey) {
1219
+ throw new AdapterError(
1220
+ "INVALID_STRUCTURE",
1221
+ "A literal dotted leaf key and a nested key path resolve to the same path."
1222
+ );
1223
+ }
1224
+ ctx.claimed.set(effectivePath, mapKey);
1225
+ const { placeholders, isPlural } = ctx.derive(key, value);
1226
+ ctx.out.set(mapKey, { key: mapKey, namespace: ctx.namespace, value, placeholders, isPlural });
1227
+ }
1228
+ function addEntries(ctx, prefix, node) {
1229
+ for (const [key, value] of Object.entries(node)) {
1230
+ const segments = [...prefix, key];
1231
+ if (typeof value === "string") {
1232
+ addLeaf(ctx, segments, key, value);
1233
+ } else {
1234
+ addEntries(ctx, segments, value);
1235
+ }
1236
+ }
1237
+ }
1238
+ function addPathEntries(node, prefix, namespace, derive, out) {
1087
1239
  for (const [key, value] of Object.entries(node)) {
1088
1240
  const path = prefix === "" ? key : `${prefix}.${key}`;
1089
1241
  if (typeof value === "string") {
1090
1242
  const { placeholders, isPlural } = derive(key, value);
1091
1243
  out.set(path, { key: path, namespace, value, placeholders, isPlural });
1092
1244
  } else {
1093
- addEntries(value, path, namespace, derive, out);
1245
+ addPathEntries(value, path, namespace, derive, out);
1094
1246
  }
1095
1247
  }
1096
1248
  }
1097
- function flattenTree(tree, namespace, derive) {
1249
+ function flattenTree(tree, namespace, derive, keyMode = "literal-leaf") {
1098
1250
  const out = /* @__PURE__ */ new Map();
1099
- addEntries(tree, "", namespace, derive, out);
1251
+ if (keyMode === "path-notation") {
1252
+ addPathEntries(tree, "", namespace, derive, out);
1253
+ return out;
1254
+ }
1255
+ addEntries({ namespace, derive, out, claimed: /* @__PURE__ */ new Map() }, [], tree);
1100
1256
  return out;
1101
1257
  }
1102
1258
  var jsonTreeSchema = zod.z.lazy(
@@ -1163,12 +1319,15 @@ function setPath(root, segments, value) {
1163
1319
  for (const segment of segments.slice(0, -1)) {
1164
1320
  node = descend(node, segment);
1165
1321
  }
1322
+ if (typeof node[leaf] === "object") {
1323
+ throw new AdapterError("INVALID_STRUCTURE", "A leaf key collides with a nested key path.");
1324
+ }
1166
1325
  node[leaf] = value;
1167
1326
  }
1168
1327
  function unflattenEntries(entries) {
1169
1328
  const root = emptyNode();
1170
1329
  for (const [key, entry] of entries) {
1171
- setPath(root, key.split("."), entry.value);
1330
+ setPath(root, decodeKeyToSegments(key), entry.value);
1172
1331
  }
1173
1332
  return root;
1174
1333
  }
@@ -1187,11 +1346,11 @@ function rethrowStructured(error, message) {
1187
1346
  }
1188
1347
  throw new AdapterError("INVALID_STRUCTURE", message);
1189
1348
  }
1190
- function toEntries(content, namespace, deriveEntry, validateTree) {
1349
+ function toEntries(content, namespace, deriveEntry, keyMode, validateTree) {
1191
1350
  try {
1192
1351
  const tree = parseJsonObject(content);
1193
1352
  validateTree?.(tree);
1194
- return flattenTree(tree, namespace, deriveEntry);
1353
+ return flattenTree(tree, namespace, deriveEntry, keyMode);
1195
1354
  } catch (error) {
1196
1355
  rethrowStructured(error, "The file could not be read as JSON.");
1197
1356
  }
@@ -1214,7 +1373,8 @@ function createJsonFileAdapter(options) {
1214
1373
  computeInvalidIcuKeys: computeInvalidIcuKeys2,
1215
1374
  validateMessage: validateMessage2,
1216
1375
  validateTree,
1217
- buildWriteTree
1376
+ buildWriteTree,
1377
+ keyMode = "literal-leaf"
1218
1378
  } = options;
1219
1379
  return {
1220
1380
  format,
@@ -1231,7 +1391,7 @@ function createJsonFileAdapter(options) {
1231
1391
  throw new AdapterError("INPUT_TOO_LARGE", "The file exceeds the maximum allowed size.");
1232
1392
  }
1233
1393
  const namespace = namespaceOf(filePath);
1234
- const entries = toEntries(outcome.content, namespace, deriveEntry, validateTree);
1394
+ const entries = toEntries(outcome.content, namespace, deriveEntry, keyMode, validateTree);
1235
1395
  const resource = { locale, namespace, format, entries };
1236
1396
  const invalidIcuKeys = computeIcu(entries, computeInvalidIcuKeys2);
1237
1397
  return { resource, invalidIcuKeys };
@@ -1243,23 +1403,41 @@ function createJsonFileAdapter(options) {
1243
1403
  }
1244
1404
  };
1245
1405
  }
1246
- var PLACEHOLDER_PATTERN = /\{\{[^{}]*\}\}/g;
1247
- function extractI18nextPlaceholders(value) {
1248
- const seen = /* @__PURE__ */ new Set();
1406
+ var DOUBLE_BRACE_PATTERN = /\{\{[^{}]*\}\}/g;
1407
+ var I18NEXT_PATTERN = /\{\{[^{}]*\}\}|\$t\([^()]*\)/g;
1408
+ function scanTokens(value, pattern) {
1249
1409
  const result = [];
1250
- for (const match of value.matchAll(PLACEHOLDER_PATTERN)) {
1410
+ for (const match of value.matchAll(pattern)) {
1251
1411
  const token = match[0];
1252
- if (token !== void 0 && !seen.has(token)) {
1253
- seen.add(token);
1412
+ if (token !== void 0) {
1254
1413
  result.push(token);
1255
1414
  }
1256
1415
  }
1257
1416
  return result;
1258
1417
  }
1418
+ function extractDoubleBracePlaceholders(value) {
1419
+ return scanTokens(value, DOUBLE_BRACE_PATTERN);
1420
+ }
1421
+ function extractI18nextPlaceholders(value) {
1422
+ return scanTokens(value, I18NEXT_PATTERN);
1423
+ }
1259
1424
  var PLURAL_SUFFIX = /_(zero|one|two|few|many|other)$/;
1260
1425
  function isPluralKey(key) {
1261
1426
  return PLURAL_SUFFIX.test(key);
1262
1427
  }
1428
+ function pluralCategoryOf(key) {
1429
+ const match = PLURAL_SUFFIX.exec(key);
1430
+ return match?.[1];
1431
+ }
1432
+ function pluralBaseKey(key) {
1433
+ if (!isPluralKey(key)) {
1434
+ return void 0;
1435
+ }
1436
+ return key.replace(PLURAL_SUFFIX, "");
1437
+ }
1438
+ function makePluralKey(baseKey, category) {
1439
+ return `${baseKey}_${category}`;
1440
+ }
1263
1441
  function createI18nextJsonAdapter() {
1264
1442
  return createJsonFileAdapter({
1265
1443
  format: "i18next-json",
@@ -1316,16 +1494,12 @@ function analyzeIcuValue(value) {
1316
1494
  }
1317
1495
  try {
1318
1496
  const ast = icuMessageformatParser.parse(value);
1319
- const seen = /* @__PURE__ */ new Set();
1320
1497
  const placeholders = [];
1321
1498
  const state = { isPlural: false };
1322
1499
  collect(
1323
1500
  ast,
1324
1501
  (token) => {
1325
- if (!seen.has(token)) {
1326
- seen.add(token);
1327
- placeholders.push(token);
1328
- }
1502
+ placeholders.push(token);
1329
1503
  },
1330
1504
  state
1331
1505
  );
@@ -1413,13 +1587,16 @@ async function buildNgxWriteTree(entries, filePath) {
1413
1587
  function createNgxTranslateJsonAdapter() {
1414
1588
  return createJsonFileAdapter({
1415
1589
  format: "ngx-translate-json",
1416
- extractPlaceholders: extractI18nextPlaceholders,
1590
+ extractPlaceholders: extractDoubleBracePlaceholders,
1417
1591
  deriveEntry: (_key, value) => ({
1418
- placeholders: extractI18nextPlaceholders(value),
1592
+ placeholders: extractDoubleBracePlaceholders(value),
1419
1593
  isPlural: false
1420
1594
  }),
1421
1595
  validateTree: assertNotMixed,
1422
- buildWriteTree: buildNgxWriteTree
1596
+ buildWriteTree: buildNgxWriteTree,
1597
+ // ngx-translate flat style uses dotted keys as path notation, not literal leaves;
1598
+ // keep the legacy non-encoding flatten so its flat/nested round-trip is unchanged.
1599
+ keyMode: "path-notation"
1423
1600
  });
1424
1601
  }
1425
1602
  var AdapterRegistry = class {
@@ -1470,15 +1647,13 @@ var AdapterRegistry = class {
1470
1647
  return this.resolveByDetection(filePath, options.sample);
1471
1648
  }
1472
1649
  };
1473
- var PLACEHOLDER_PATTERN2 = /\{[^{}]*\}/g;
1650
+ var PLACEHOLDER_PATTERN = /(?<!\{)\{\s*([A-Za-z_][\w$-]*|\d+)\s*\}(?!\})/g;
1474
1651
  function extractVueI18nPlaceholders(value) {
1475
- const seen = /* @__PURE__ */ new Set();
1476
1652
  const result = [];
1477
- for (const match of value.matchAll(PLACEHOLDER_PATTERN2)) {
1478
- const token = match[0];
1479
- if (token !== void 0 && !seen.has(token)) {
1480
- seen.add(token);
1481
- result.push(token);
1653
+ for (const match of value.matchAll(PLACEHOLDER_PATTERN)) {
1654
+ const key = match[1];
1655
+ if (key !== void 0) {
1656
+ result.push(`{${key}}`);
1482
1657
  }
1483
1658
  }
1484
1659
  return result;
@@ -1540,8 +1715,10 @@ function failureSummary(locale, error) {
1540
1715
  translated: [],
1541
1716
  unchanged: [],
1542
1717
  orphaned: [],
1718
+ pruned: [],
1543
1719
  invalidIcuSource: [],
1544
1720
  integrityMismatches: [],
1721
+ generated: [],
1545
1722
  notices: [],
1546
1723
  error: describeError(error)
1547
1724
  };
@@ -1564,6 +1741,198 @@ function readNotices(result) {
1564
1741
  return candidate.filter(isNotice);
1565
1742
  }
1566
1743
 
1744
+ // src/flow/plural-categories.ts
1745
+ var LANGUAGE_CATEGORIES = {
1746
+ ar: ["zero", "one", "two", "few", "many", "other"],
1747
+ cy: ["zero", "one", "two", "few", "many", "other"],
1748
+ ga: ["one", "two", "few", "many", "other"],
1749
+ pl: ["one", "few", "many", "other"],
1750
+ ru: ["one", "few", "many", "other"],
1751
+ uk: ["one", "few", "many", "other"],
1752
+ be: ["one", "few", "many", "other"],
1753
+ lt: ["one", "few", "many", "other"],
1754
+ sl: ["one", "two", "few", "other"]
1755
+ };
1756
+ function isKnownRicherLanguage(locale) {
1757
+ const subtag = locale.toLowerCase().split(/[-_]/)[0] ?? "";
1758
+ return LANGUAGE_CATEGORIES[subtag] !== void 0;
1759
+ }
1760
+ function requiredCategories(locale) {
1761
+ const subtag = locale.toLowerCase().split(/[-_]/)[0] ?? "";
1762
+ return LANGUAGE_CATEGORIES[subtag] ?? ["one", "other"];
1763
+ }
1764
+ function groupPluralSources(source) {
1765
+ const groups = /* @__PURE__ */ new Map();
1766
+ for (const [key, entry] of source.entries) {
1767
+ const baseKey = pluralBaseKey(key);
1768
+ const category = pluralCategoryOf(key);
1769
+ if (baseKey === void 0 || category === void 0) {
1770
+ continue;
1771
+ }
1772
+ const group = groups.get(baseKey) ?? /* @__PURE__ */ new Map();
1773
+ group.set(category, entry);
1774
+ groups.set(baseKey, group);
1775
+ }
1776
+ return groups;
1777
+ }
1778
+ function suppliedCategories(groups) {
1779
+ const supplied = /* @__PURE__ */ new Set();
1780
+ for (const group of groups.values()) {
1781
+ for (const category of group.keys()) {
1782
+ supplied.add(category);
1783
+ }
1784
+ }
1785
+ return supplied;
1786
+ }
1787
+ function detectMissingPluralCategories(source, targetLocale, format) {
1788
+ if (format !== "i18next-json") {
1789
+ return void 0;
1790
+ }
1791
+ const groups = groupPluralSources(source);
1792
+ const supplied = suppliedCategories(groups);
1793
+ if (supplied.size === 0) {
1794
+ return void 0;
1795
+ }
1796
+ const missing = requiredCategories(targetLocale).filter((category) => !supplied.has(category));
1797
+ if (missing.length === 0) {
1798
+ return void 0;
1799
+ }
1800
+ return {
1801
+ code: "PLURAL_CATEGORIES_INCOMPLETE",
1802
+ message: `The source does not supply all CLDR plural categories the target language "${targetLocale}" requires (missing: ${missing.join(", ")}); verbatra translates only the source's plural forms and does not synthesize the others. Add the missing forms manually.`
1803
+ };
1804
+ }
1805
+ function targetPluralSetIncomplete(targetKeys, targetLocale) {
1806
+ const required = requiredCategories(targetLocale);
1807
+ const present = /* @__PURE__ */ new Map();
1808
+ for (const key of targetKeys) {
1809
+ const baseKey = pluralBaseKey(key);
1810
+ const category = pluralCategoryOf(key);
1811
+ if (baseKey === void 0 || category === void 0) {
1812
+ continue;
1813
+ }
1814
+ const set = present.get(baseKey) ?? /* @__PURE__ */ new Set();
1815
+ set.add(category);
1816
+ present.set(baseKey, set);
1817
+ }
1818
+ for (const categories of present.values()) {
1819
+ if (required.some((category) => !categories.has(category))) {
1820
+ return true;
1821
+ }
1822
+ }
1823
+ return false;
1824
+ }
1825
+ function sourcePluralBaseKeys(source) {
1826
+ const bases = /* @__PURE__ */ new Set();
1827
+ for (const key of source.entries.keys()) {
1828
+ const baseKey = pluralBaseKey(key);
1829
+ if (baseKey !== void 0) {
1830
+ bases.add(baseKey);
1831
+ }
1832
+ }
1833
+ return bases;
1834
+ }
1835
+ function isGeneratedPluralKey(key, sourceBaseKeys) {
1836
+ const baseKey = pluralBaseKey(key);
1837
+ return baseKey !== void 0 && sourceBaseKeys.has(baseKey);
1838
+ }
1839
+ function pluralIncompleteNotice(targetLocale) {
1840
+ return {
1841
+ code: "PLURAL_CATEGORIES_INCOMPLETE",
1842
+ message: `The plural set for the target language "${targetLocale}" is still incomplete: verbatra could not generate every required CLDR plural form (an unsupported case, or a generated form was withheld for a placeholder mismatch). Add the remaining forms manually.`
1843
+ };
1844
+ }
1845
+ function representativeEntry(group) {
1846
+ return group.get("other") ?? group.get("one") ?? [...group.values()][0];
1847
+ }
1848
+ function planPluralGeneration(source, targetLocale, format) {
1849
+ if (format !== "i18next-json" || !isKnownRicherLanguage(targetLocale)) {
1850
+ return { items: [] };
1851
+ }
1852
+ const required = requiredCategories(targetLocale);
1853
+ const groups = groupPluralSources(source);
1854
+ const items = [];
1855
+ for (const [baseKey, group] of groups) {
1856
+ const representative = representativeEntry(group);
1857
+ if (representative === void 0) {
1858
+ continue;
1859
+ }
1860
+ const governingEntries = [...group.values()];
1861
+ for (const category of required) {
1862
+ if (group.has(category)) {
1863
+ continue;
1864
+ }
1865
+ items.push({
1866
+ targetKey: makePluralKey(baseKey, category),
1867
+ category,
1868
+ sourceEntry: representative,
1869
+ governingEntries
1870
+ });
1871
+ }
1872
+ }
1873
+ return { items };
1874
+ }
1875
+
1876
+ // src/flow/plural-generation.ts
1877
+ function generatedLockHash(governingEntries, category) {
1878
+ const governingHashes = governingEntries.map(contentHash).sort();
1879
+ return contentHash({
1880
+ value: `${category}:${governingHashes.join("|")}`,
1881
+ placeholders: [],
1882
+ isPlural: true
1883
+ });
1884
+ }
1885
+ function syntheticEntry(item) {
1886
+ return {
1887
+ ...item.sourceEntry,
1888
+ key: item.targetKey,
1889
+ isPlural: true,
1890
+ // The CLDR category travels as data context (the meaning field), never the instruction channel.
1891
+ meaning: `CLDR plural category "${item.category}"`
1892
+ };
1893
+ }
1894
+ function staleItems(items, baseline) {
1895
+ return items.filter((item) => {
1896
+ const hash = generatedLockHash(item.governingEntries, item.category);
1897
+ return baseline.get(item.targetKey) !== hash;
1898
+ });
1899
+ }
1900
+ function buildRequest2(context, entries) {
1901
+ return {
1902
+ sourceLocale: context.sourceLocale,
1903
+ targetLocale: context.targetLocale,
1904
+ entries,
1905
+ extractPlaceholders: context.adapter.extractPlaceholders,
1906
+ ...context.glossary !== void 0 ? { glossary: context.glossary } : {},
1907
+ ...context.tone !== void 0 ? { tone: context.tone } : {}
1908
+ };
1909
+ }
1910
+ async function generatePluralForms(context) {
1911
+ const plan = planPluralGeneration(context.source, context.targetLocale, context.format);
1912
+ const stale = staleItems(plan.items, context.baseline);
1913
+ if (stale.length === 0) {
1914
+ return { accepted: [], withheld: [] };
1915
+ }
1916
+ const entries = stale.map(syntheticEntry);
1917
+ const result = await context.provider.translateBatch(buildRequest2(context, entries));
1918
+ const accepted = [];
1919
+ const withheld = [];
1920
+ for (const item of stale) {
1921
+ const value = result.values.get(item.targetKey);
1922
+ const integrity = result.integrity.get(item.targetKey);
1923
+ if (value !== void 0 && integrity?.matches === true) {
1924
+ accepted.push({
1925
+ targetKey: item.targetKey,
1926
+ entry: { ...syntheticEntry(item), value },
1927
+ lockHash: generatedLockHash(item.governingEntries, item.category)
1928
+ });
1929
+ } else {
1930
+ withheld.push(item.targetKey);
1931
+ }
1932
+ }
1933
+ return { accepted, withheld };
1934
+ }
1935
+
1567
1936
  // src/flow/locale-run.ts
1568
1937
  function emptyResource(locale, format) {
1569
1938
  return { locale, namespace: "", format, entries: /* @__PURE__ */ new Map() };
@@ -1575,7 +1944,7 @@ async function readTarget(params) {
1575
1944
  }
1576
1945
  return (await params.adapter.read(path, params.targetLocale)).resource;
1577
1946
  }
1578
- function buildRequest2(params, entries) {
1947
+ function buildRequest3(params, entries) {
1579
1948
  return {
1580
1949
  sourceLocale: params.sourceLocale,
1581
1950
  targetLocale: params.targetLocale,
@@ -1588,25 +1957,56 @@ function buildRequest2(params, entries) {
1588
1957
  async function runLocale(params) {
1589
1958
  const target = await readTarget(params);
1590
1959
  const diff = diffResources(params.source, target, { baseline: params.baseline });
1960
+ const orphaned = params.generatePlurals ? diff.orphaned.filter((key) => !isGeneratedPluralKey(key, sourcePluralBaseKeys(params.source))) : diff.orphaned;
1961
+ const pruned = params.prune ? orphaned : [];
1591
1962
  const invalidIcu = new Set(params.sourceInvalidIcuKeys);
1592
1963
  const candidates = [...diff.missing, ...diff.changed];
1593
1964
  const toTranslate = candidates.filter((key) => !invalidIcu.has(key));
1594
1965
  const invalidIcuSource = candidates.filter((key) => invalidIcu.has(key));
1966
+ const pluralNotice = detectMissingPluralCategories(
1967
+ params.source,
1968
+ params.targetLocale,
1969
+ params.format
1970
+ );
1971
+ const sdkNotices = pluralNotice ? [pluralNotice] : [];
1595
1972
  const provider = params.provider;
1596
1973
  if (provider === void 0) {
1597
1974
  return {
1598
- summary: baseSummary(params.targetLocale, diff, invalidIcuSource, toTranslate, [], []),
1975
+ summary: baseSummary({
1976
+ locale: params.targetLocale,
1977
+ unchanged: diff.unchanged,
1978
+ orphaned,
1979
+ invalidIcuSource,
1980
+ translated: toTranslate,
1981
+ generated: [],
1982
+ integrityMismatches: [],
1983
+ pruned,
1984
+ notices: sdkNotices
1985
+ }),
1599
1986
  lockEntries: {}
1600
1987
  };
1601
1988
  }
1602
1989
  const entries = toTranslate.map((key) => params.source.entries.get(key)).filter((entry) => entry !== void 0);
1603
1990
  const accepted = /* @__PURE__ */ new Map();
1604
1991
  const integrityMismatches = [];
1605
- const notices = await translateAndCheck(provider, params, entries, accepted, integrityMismatches);
1992
+ const subBatchNotices = await translateAndCheck(
1993
+ provider,
1994
+ params,
1995
+ entries,
1996
+ accepted,
1997
+ integrityMismatches
1998
+ );
1606
1999
  const merged = new Map(target.entries);
2000
+ for (const key of pruned) {
2001
+ merged.delete(key);
2002
+ }
1607
2003
  for (const [key, { value, source }] of accepted) {
1608
2004
  merged.set(key, { ...source, value, namespace: target.namespace });
1609
2005
  }
2006
+ const generation = await runGeneration(params, provider);
2007
+ for (const form of generation.accepted) {
2008
+ merged.set(form.targetKey, { ...form.entry, namespace: target.namespace });
2009
+ }
1610
2010
  const path = localeFilePath(params.cwd, params.filesPattern, params.targetLocale);
1611
2011
  await params.adapter.write(
1612
2012
  {
@@ -1617,37 +2017,83 @@ async function runLocale(params) {
1617
2017
  },
1618
2018
  path
1619
2019
  );
1620
- const withheld = /* @__PURE__ */ new Set([...integrityMismatches, ...invalidIcuSource]);
2020
+ const pluralNotices = params.generatePlurals ? pluralNoticeFor(params, merged) : sdkNotices;
2021
+ const notices = [...pluralNotices, ...subBatchNotices];
2022
+ const withheld = /* @__PURE__ */ new Set([...integrityMismatches, ...invalidIcuSource, ...generation.withheld]);
1621
2023
  return {
1622
- summary: baseSummary(
1623
- params.targetLocale,
1624
- diff,
2024
+ summary: baseSummary({
2025
+ locale: params.targetLocale,
2026
+ unchanged: diff.unchanged,
2027
+ orphaned,
1625
2028
  invalidIcuSource,
1626
- [...accepted.keys()],
1627
- integrityMismatches,
2029
+ translated: [...accepted.keys()],
2030
+ generated: generation.accepted.map((form) => form.targetKey).sort(),
2031
+ // Withheld generated forms surface alongside withheld translations (spec D4): both failed integrity.
2032
+ integrityMismatches: [...integrityMismatches, ...generation.withheld].sort(),
2033
+ pruned,
1628
2034
  notices
1629
- ),
1630
- lockEntries: computeLockEntries(params, merged, withheld)
2035
+ }),
2036
+ lockEntries: computeLockEntries(params, merged, withheld, generation.accepted)
1631
2037
  };
1632
2038
  }
1633
- function baseSummary(locale, diff, invalidIcuSource, translated, integrityMismatches, notices) {
2039
+ async function runGeneration(params, provider) {
2040
+ if (!params.generatePlurals || provider.kind !== "llm") {
2041
+ return { accepted: [], withheld: [] };
2042
+ }
2043
+ return generatePluralForms({
2044
+ source: params.source,
2045
+ sourceLocale: params.sourceLocale,
2046
+ targetLocale: params.targetLocale,
2047
+ format: params.format,
2048
+ adapter: params.adapter,
2049
+ provider,
2050
+ glossary: params.glossary,
2051
+ tone: params.tone,
2052
+ baseline: params.baseline
2053
+ });
2054
+ }
2055
+ function pluralNoticeFor(params, merged) {
2056
+ if (params.format !== "i18next-json") {
2057
+ return [];
2058
+ }
2059
+ if (!targetPluralSetIncomplete(merged.keys(), params.targetLocale)) {
2060
+ return [];
2061
+ }
2062
+ return [pluralIncompleteNotice(params.targetLocale)];
2063
+ }
2064
+ function baseSummary(parts) {
1634
2065
  return {
1635
- locale,
2066
+ locale: parts.locale,
1636
2067
  status: "succeeded",
1637
- translated,
1638
- unchanged: diff.unchanged,
1639
- orphaned: diff.orphaned,
1640
- invalidIcuSource,
1641
- integrityMismatches,
1642
- notices
2068
+ translated: parts.translated,
2069
+ unchanged: parts.unchanged,
2070
+ orphaned: parts.orphaned,
2071
+ pruned: parts.pruned,
2072
+ invalidIcuSource: parts.invalidIcuSource,
2073
+ integrityMismatches: parts.integrityMismatches,
2074
+ generated: parts.generated,
2075
+ notices: parts.notices
1643
2076
  };
1644
2077
  }
1645
2078
  async function translateAndCheck(provider, params, entries, accepted, integrityMismatches) {
1646
- if (entries.length === 0) {
1647
- return [];
2079
+ const notices = [];
2080
+ for (const batch of chunk(entries, params.maxBatchSize)) {
2081
+ const subNotices = await runSubBatch(provider, params, batch, accepted, integrityMismatches);
2082
+ notices.push(...subNotices);
1648
2083
  }
1649
- const result = await provider.translateBatch(buildRequest2(params, entries));
1650
- for (const entry of entries) {
2084
+ return notices;
2085
+ }
2086
+ async function runSubBatch(provider, params, batch, accepted, integrityMismatches) {
2087
+ let result;
2088
+ try {
2089
+ result = await provider.translateBatch(buildRequest3(params, batch));
2090
+ } catch {
2091
+ for (const entry of batch) {
2092
+ integrityMismatches.push(entry.key);
2093
+ }
2094
+ return [subBatchFailedNotice(batch.length)];
2095
+ }
2096
+ for (const entry of batch) {
1651
2097
  const value = result.values.get(entry.key);
1652
2098
  const integrity = result.integrity.get(entry.key);
1653
2099
  if (value !== void 0 && integrity?.matches === true) {
@@ -1658,11 +2104,28 @@ async function translateAndCheck(provider, params, entries, accepted, integrityM
1658
2104
  }
1659
2105
  return readNotices(result);
1660
2106
  }
1661
- function computeLockEntries(params, merged, withheld) {
2107
+ function subBatchFailedNotice(count) {
2108
+ return {
2109
+ code: "SUB_BATCH_FAILED",
2110
+ message: `A sub-batch of ${count} entries failed and was withheld; it will be retried next run.`
2111
+ };
2112
+ }
2113
+ function chunk(items, size) {
2114
+ const chunks = [];
2115
+ for (let index = 0; index < items.length; index += size) {
2116
+ chunks.push(items.slice(index, index + size));
2117
+ }
2118
+ return chunks;
2119
+ }
2120
+ function computeLockEntries(params, merged, withheld, generated) {
1662
2121
  const lockEntries = {};
2122
+ const sourceBaseKeys = sourcePluralBaseKeys(params.source);
1663
2123
  for (const key of merged.keys()) {
1664
2124
  const sourceEntry = params.source.entries.get(key);
1665
2125
  if (sourceEntry === void 0) {
2126
+ if (params.generatePlurals) {
2127
+ carryGeneratedLock(lockEntries, params.baseline, key, sourceBaseKeys);
2128
+ }
1666
2129
  continue;
1667
2130
  }
1668
2131
  if (withheld.has(key)) {
@@ -1674,8 +2137,20 @@ function computeLockEntries(params, merged, withheld) {
1674
2137
  }
1675
2138
  lockEntries[key] = contentHash(sourceEntry);
1676
2139
  }
2140
+ for (const form of generated) {
2141
+ lockEntries[form.targetKey] = form.lockHash;
2142
+ }
1677
2143
  return lockEntries;
1678
2144
  }
2145
+ function carryGeneratedLock(lockEntries, baseline, key, sourceBaseKeys) {
2146
+ if (!isGeneratedPluralKey(key, sourceBaseKeys)) {
2147
+ return;
2148
+ }
2149
+ const prior = baseline.get(key);
2150
+ if (prior !== void 0) {
2151
+ lockEntries[key] = prior;
2152
+ }
2153
+ }
1679
2154
 
1680
2155
  // src/flow/source.ts
1681
2156
  async function readSource(config, cwd, fs, adapter) {
@@ -1702,6 +2177,9 @@ async function translate2(input, deps = {}) {
1702
2177
  const config = input.config;
1703
2178
  const cwd = input.cwd ?? process.cwd();
1704
2179
  const dryRun = input.dryRun ?? false;
2180
+ const prune = input.prune ?? config.prune ?? false;
2181
+ const generatePlurals = input.generatePlurals ?? config.generatePlurals ?? false;
2182
+ const maxBatchSize = config.maxBatchSize ?? DEFAULT_MAX_BATCH_SIZE;
1705
2183
  const fs = deps.fs ?? defaultFs;
1706
2184
  const adapter = selectAdapter(config.format, deps.adapterRegistry);
1707
2185
  const provider = dryRun ? void 0 : selectProvider(config.provider, deps.createProvider);
@@ -1724,6 +2202,9 @@ async function translate2(input, deps = {}) {
1724
2202
  format: config.format,
1725
2203
  glossary: config.glossary,
1726
2204
  tone: config.tone,
2205
+ prune,
2206
+ generatePlurals,
2207
+ maxBatchSize,
1727
2208
  fs
1728
2209
  };
1729
2210
  const { summary, lockEntries } = await runLocale(params);
@@ -2194,8 +2675,12 @@ function importLocale(params) {
2194
2675
  translated: [...buckets.accepted.keys()].sort(),
2195
2676
  unchanged: diff.unchanged,
2196
2677
  orphaned: diff.orphaned,
2678
+ // Import never prunes: orphans are reported but never removed here (pruning is a translate-flow concern).
2679
+ pruned: [],
2197
2680
  invalidIcuSource,
2198
2681
  integrityMismatches: [...buckets.mismatches].sort(),
2682
+ // Plural generation is a translate-flow concern; the manual workbook import never generates forms.
2683
+ generated: [],
2199
2684
  notices: []
2200
2685
  };
2201
2686
  return { summary, accepted: buckets.accepted, withheld: buckets.withheld };