@purposeinplay/payload-ai-translate 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +714 -0
- package/dist/alerts-collection.d.ts +21 -0
- package/dist/alerts-collection.js +159 -0
- package/dist/api.d.ts +4 -0
- package/dist/api.js +918 -0
- package/dist/bulk-translate-batches-collection.d.ts +29 -0
- package/dist/bulk-translate-batches-collection.js +404 -0
- package/dist/bulk-translate-units-collection.d.ts +35 -0
- package/dist/bulk-translate-units-collection.js +310 -0
- package/dist/client/estimated-cost-cell.d.ts +6 -0
- package/dist/client/estimated-cost-cell.js +12 -0
- package/dist/client/excluded-fields-field.d.ts +45 -0
- package/dist/client/excluded-fields-field.js +553 -0
- package/dist/client/field-translate-button.d.ts +6 -0
- package/dist/client/field-translate-button.js +199 -0
- package/dist/client/index.d.ts +6 -0
- package/dist/client/index.js +6 -0
- package/dist/client/lib/use-global-kill-switches.d.ts +20 -0
- package/dist/client/lib/use-global-kill-switches.js +58 -0
- package/dist/client/translate-button.d.ts +2 -0
- package/dist/client/translate-button.js +228 -0
- package/dist/client/translate-modal.d.ts +16 -0
- package/dist/client/translate-modal.js +549 -0
- package/dist/client/translation-progress.d.ts +10 -0
- package/dist/client/translation-progress.js +297 -0
- package/dist/components/TranslationNavGroup.d.ts +45 -0
- package/dist/components/TranslationNavGroup.js +104 -0
- package/dist/defaults.d.ts +11 -0
- package/dist/defaults.js +16 -0
- package/dist/endpoints/client-config.d.ts +44 -0
- package/dist/endpoints/client-config.js +145 -0
- package/dist/endpoints/estimate.d.ts +5 -0
- package/dist/endpoints/estimate.js +237 -0
- package/dist/endpoints/progress.d.ts +2 -0
- package/dist/endpoints/progress.js +314 -0
- package/dist/endpoints/translate.d.ts +11 -0
- package/dist/endpoints/translate.js +376 -0
- package/dist/endpoints/translation-hub/_helpers.d.ts +140 -0
- package/dist/endpoints/translation-hub/_helpers.js +297 -0
- package/dist/endpoints/translation-hub/active.d.ts +21 -0
- package/dist/endpoints/translation-hub/active.js +220 -0
- package/dist/endpoints/translation-hub/cancel.d.ts +22 -0
- package/dist/endpoints/translation-hub/cancel.js +233 -0
- package/dist/endpoints/translation-hub/enqueue.d.ts +70 -0
- package/dist/endpoints/translation-hub/enqueue.js +529 -0
- package/dist/endpoints/translation-hub/failures.d.ts +12 -0
- package/dist/endpoints/translation-hub/failures.js +67 -0
- package/dist/endpoints/translation-hub/force-reset.d.ts +20 -0
- package/dist/endpoints/translation-hub/force-reset.js +144 -0
- package/dist/endpoints/translation-hub/index.d.ts +21 -0
- package/dist/endpoints/translation-hub/index.js +20 -0
- package/dist/endpoints/translation-hub/list.d.ts +40 -0
- package/dist/endpoints/translation-hub/list.js +182 -0
- package/dist/endpoints/translation-hub/preflight.d.ts +19 -0
- package/dist/endpoints/translation-hub/preflight.js +141 -0
- package/dist/endpoints/translation-hub/retry-failed.d.ts +38 -0
- package/dist/endpoints/translation-hub/retry-failed.js +235 -0
- package/dist/endpoints/translation-hub/revert.d.ts +88 -0
- package/dist/endpoints/translation-hub/revert.js +405 -0
- package/dist/endpoints/translation-hub/status.d.ts +45 -0
- package/dist/endpoints/translation-hub/status.js +391 -0
- package/dist/endpoints/translation-hub/usage-summary.d.ts +114 -0
- package/dist/endpoints/translation-hub/usage-summary.js +481 -0
- package/dist/exports/client.d.ts +6 -0
- package/dist/exports/client.js +6 -0
- package/dist/exports/components.d.ts +6 -0
- package/dist/exports/components.js +5 -0
- package/dist/exports/index.d.ts +8 -0
- package/dist/exports/index.js +7 -0
- package/dist/exports/providers.d.ts +9 -0
- package/dist/exports/providers.js +5 -0
- package/dist/exports/views-client.d.ts +23 -0
- package/dist/exports/views-client.js +22 -0
- package/dist/exports/views.d.ts +30 -0
- package/dist/exports/views.js +29 -0
- package/dist/hooks/after-change-global.d.ts +4 -0
- package/dist/hooks/after-change-global.js +109 -0
- package/dist/hooks/after-change.d.ts +16 -0
- package/dist/hooks/after-change.js +205 -0
- package/dist/hooks/after-delete.d.ts +30 -0
- package/dist/hooks/after-delete.js +95 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +5 -0
- package/dist/jobs-collection.d.ts +17 -0
- package/dist/jobs-collection.js +139 -0
- package/dist/lexical/classifier.d.ts +3 -0
- package/dist/lexical/classifier.js +108 -0
- package/dist/lexical/deserializer.d.ts +4 -0
- package/dist/lexical/deserializer.js +263 -0
- package/dist/lexical/placeholder-integrity.d.ts +6 -0
- package/dist/lexical/placeholder-integrity.js +21 -0
- package/dist/lexical/placeholders.d.ts +21 -0
- package/dist/lexical/placeholders.js +117 -0
- package/dist/lexical/serializer.d.ts +21 -0
- package/dist/lexical/serializer.js +233 -0
- package/dist/lexical/types.d.ts +32 -0
- package/dist/lexical/types.js +1 -0
- package/dist/lib/auth-diagnostics.d.ts +14 -0
- package/dist/lib/auth-diagnostics.js +19 -0
- package/dist/lib/batch-counts.d.ts +58 -0
- package/dist/lib/batch-counts.js +105 -0
- package/dist/lib/bulk-translate-migrations.d.ts +92 -0
- package/dist/lib/bulk-translate-migrations.js +153 -0
- package/dist/lib/coalescing-queue.d.ts +38 -0
- package/dist/lib/coalescing-queue.js +69 -0
- package/dist/lib/content-extractor.d.ts +16 -0
- package/dist/lib/content-extractor.js +410 -0
- package/dist/lib/content-hash.d.ts +1 -0
- package/dist/lib/content-hash.js +19 -0
- package/dist/lib/content-patcher.d.ts +15 -0
- package/dist/lib/content-patcher.js +293 -0
- package/dist/lib/cost-guards.d.ts +2 -0
- package/dist/lib/cost-guards.js +18 -0
- package/dist/lib/daily-spend-cap.d.ts +58 -0
- package/dist/lib/daily-spend-cap.js +233 -0
- package/dist/lib/effective-locales.d.ts +181 -0
- package/dist/lib/effective-locales.js +302 -0
- package/dist/lib/error-messages.d.ts +245 -0
- package/dist/lib/error-messages.js +626 -0
- package/dist/lib/events.d.ts +39 -0
- package/dist/lib/events.js +146 -0
- package/dist/lib/exclude-fields.d.ts +3 -0
- package/dist/lib/exclude-fields.js +64 -0
- package/dist/lib/field-breadcrumb.d.ts +31 -0
- package/dist/lib/field-breadcrumb.js +227 -0
- package/dist/lib/field-diff.d.ts +1 -0
- package/dist/lib/field-diff.js +25 -0
- package/dist/lib/field-empty.d.ts +2 -0
- package/dist/lib/field-empty.js +68 -0
- package/dist/lib/field-resolver.d.ts +3 -0
- package/dist/lib/field-resolver.js +164 -0
- package/dist/lib/group-soft-skips.d.ts +39 -0
- package/dist/lib/group-soft-skips.js +45 -0
- package/dist/lib/locale-merge.d.ts +44 -0
- package/dist/lib/locale-merge.js +357 -0
- package/dist/lib/locale-row-check.d.ts +30 -0
- package/dist/lib/locale-row-check.js +64 -0
- package/dist/lib/logger.d.ts +74 -0
- package/dist/lib/logger.js +97 -0
- package/dist/lib/manual-edit-guard.d.ts +128 -0
- package/dist/lib/manual-edit-guard.js +393 -0
- package/dist/lib/output-validation.d.ts +48 -0
- package/dist/lib/output-validation.js +148 -0
- package/dist/lib/payload-read.d.ts +16 -0
- package/dist/lib/payload-read.js +51 -0
- package/dist/lib/per-doc-claim.d.ts +90 -0
- package/dist/lib/per-doc-claim.js +140 -0
- package/dist/lib/per-doc-lock.d.ts +94 -0
- package/dist/lib/per-doc-lock.js +119 -0
- package/dist/lib/persist-usage.d.ts +91 -0
- package/dist/lib/persist-usage.js +116 -0
- package/dist/lib/progress-store.d.ts +103 -0
- package/dist/lib/progress-store.js +314 -0
- package/dist/lib/rate-limiter.d.ts +3 -0
- package/dist/lib/rate-limiter.js +53 -0
- package/dist/lib/snapshot-select.d.ts +43 -0
- package/dist/lib/snapshot-select.js +108 -0
- package/dist/lib/translate-prompt.d.ts +31 -0
- package/dist/lib/translate-prompt.js +66 -0
- package/dist/lib/translation-token-bucket.d.ts +57 -0
- package/dist/lib/translation-token-bucket.js +365 -0
- package/dist/lib/truncate-source-value.d.ts +1 -0
- package/dist/lib/truncate-source-value.js +27 -0
- package/dist/manual-edit-collection.d.ts +22 -0
- package/dist/manual-edit-collection.js +124 -0
- package/dist/plugin.d.ts +3 -0
- package/dist/plugin.js +934 -0
- package/dist/providers/ai-sdk-adapter.d.ts +35 -0
- package/dist/providers/ai-sdk-adapter.js +100 -0
- package/dist/providers/anthropic.d.ts +31 -0
- package/dist/providers/anthropic.js +66 -0
- package/dist/providers/custom.d.ts +36 -0
- package/dist/providers/custom.js +24 -0
- package/dist/providers/gemini.d.ts +20 -0
- package/dist/providers/gemini.js +48 -0
- package/dist/providers/mock.d.ts +2 -0
- package/dist/providers/mock.js +29 -0
- package/dist/providers/openai.d.ts +28 -0
- package/dist/providers/openai.js +69 -0
- package/dist/settings-global.d.ts +74 -0
- package/dist/settings-global.js +216 -0
- package/dist/tasks/bulk-translate-coordinator.d.ts +115 -0
- package/dist/tasks/bulk-translate-coordinator.js +708 -0
- package/dist/tasks/bulk-translate-doc-task.d.ts +142 -0
- package/dist/tasks/bulk-translate-doc-task.js +1000 -0
- package/dist/tasks/bulk-translate-janitor.d.ts +87 -0
- package/dist/tasks/bulk-translate-janitor.js +311 -0
- package/dist/tasks/translate-job-task.d.ts +51 -0
- package/dist/tasks/translate-job-task.js +154 -0
- package/dist/translate.d.ts +113 -0
- package/dist/translate.js +911 -0
- package/dist/translation-daily-spend-collection.d.ts +24 -0
- package/dist/translation-daily-spend-collection.js +133 -0
- package/dist/translation-rate-limits-collection.d.ts +30 -0
- package/dist/translation-rate-limits-collection.js +144 -0
- package/dist/types.d.ts +672 -0
- package/dist/types.js +1 -0
- package/dist/usage-collection.d.ts +14 -0
- package/dist/usage-collection.js +377 -0
- package/dist/views/BulkRunsHub/BatchRow.d.ts +32 -0
- package/dist/views/BulkRunsHub/BatchRow.js +1222 -0
- package/dist/views/BulkRunsHub/BucketRow.d.ts +62 -0
- package/dist/views/BulkRunsHub/BucketRow.js +982 -0
- package/dist/views/BulkRunsHub/BulkRunsHub.client.d.ts +18 -0
- package/dist/views/BulkRunsHub/BulkRunsHub.client.js +331 -0
- package/dist/views/BulkRunsHub/EmptyState.d.ts +6 -0
- package/dist/views/BulkRunsHub/EmptyState.js +64 -0
- package/dist/views/BulkRunsHub/FilterBar.d.ts +16 -0
- package/dist/views/BulkRunsHub/FilterBar.js +284 -0
- package/dist/views/BulkRunsHub/InFlightBanner.d.ts +14 -0
- package/dist/views/BulkRunsHub/InFlightBanner.js +59 -0
- package/dist/views/BulkRunsHub/StatusBadge.d.ts +64 -0
- package/dist/views/BulkRunsHub/StatusBadge.js +248 -0
- package/dist/views/BulkRunsHub/SummaryStrip.d.ts +22 -0
- package/dist/views/BulkRunsHub/SummaryStrip.js +249 -0
- package/dist/views/BulkRunsHub/bucket-grouping.d.ts +200 -0
- package/dist/views/BulkRunsHub/bucket-grouping.js +344 -0
- package/dist/views/BulkRunsHub/bucketFailureSummary.d.ts +9 -0
- package/dist/views/BulkRunsHub/bucketFailureSummary.js +36 -0
- package/dist/views/BulkRunsHub/dedupedStatusFetch.d.ts +5 -0
- package/dist/views/BulkRunsHub/dedupedStatusFetch.js +45 -0
- package/dist/views/BulkRunsHub/index.d.ts +17 -0
- package/dist/views/BulkRunsHub/index.js +80 -0
- package/dist/views/BulkRunsHub/urlFilters.d.ts +14 -0
- package/dist/views/BulkRunsHub/urlFilters.js +50 -0
- package/dist/views/BulkRunsHub/useBulkRunsList.d.ts +26 -0
- package/dist/views/BulkRunsHub/useBulkRunsList.js +204 -0
- package/dist/views/BulkRunsHub/useUrlFilters.d.ts +10 -0
- package/dist/views/BulkRunsHub/useUrlFilters.js +88 -0
- package/dist/views/TranslationHub/ActiveJobs.d.ts +6 -0
- package/dist/views/TranslationHub/ActiveJobs.js +320 -0
- package/dist/views/TranslationHub/AdvancedPanel.d.ts +17 -0
- package/dist/views/TranslationHub/AdvancedPanel.js +996 -0
- package/dist/views/TranslationHub/AlertBanner.d.ts +6 -0
- package/dist/views/TranslationHub/AlertBanner.js +568 -0
- package/dist/views/TranslationHub/AuditPanel.d.ts +6 -0
- package/dist/views/TranslationHub/AuditPanel.helpers.d.ts +44 -0
- package/dist/views/TranslationHub/AuditPanel.helpers.js +71 -0
- package/dist/views/TranslationHub/AuditPanel.js +1367 -0
- package/dist/views/TranslationHub/BulkTranslate.types.d.ts +242 -0
- package/dist/views/TranslationHub/BulkTranslate.types.js +36 -0
- package/dist/views/TranslationHub/BulkTranslateFailureDrawer.d.ts +19 -0
- package/dist/views/TranslationHub/BulkTranslateFailureDrawer.js +332 -0
- package/dist/views/TranslationHub/BulkTranslateMonitor.d.ts +28 -0
- package/dist/views/TranslationHub/BulkTranslateMonitor.js +305 -0
- package/dist/views/TranslationHub/BulkTranslateNarrowViewportBanner.d.ts +3 -0
- package/dist/views/TranslationHub/BulkTranslateNarrowViewportBanner.js +42 -0
- package/dist/views/TranslationHub/BulkTranslatePostEnqueueTransition.d.ts +26 -0
- package/dist/views/TranslationHub/BulkTranslatePostEnqueueTransition.js +95 -0
- package/dist/views/TranslationHub/BulkTranslatePreflightModal.d.ts +22 -0
- package/dist/views/TranslationHub/BulkTranslatePreflightModal.js +879 -0
- package/dist/views/TranslationHub/BulkTranslateTerminalCard.d.ts +29 -0
- package/dist/views/TranslationHub/BulkTranslateTerminalCard.js +445 -0
- package/dist/views/TranslationHub/BulkTranslateTrigger.d.ts +66 -0
- package/dist/views/TranslationHub/BulkTranslateTrigger.js +161 -0
- package/dist/views/TranslationHub/EditorRecentRunsPanel.d.ts +33 -0
- package/dist/views/TranslationHub/EditorRecentRunsPanel.js +290 -0
- package/dist/views/TranslationHub/Hub.client.d.ts +74 -0
- package/dist/views/TranslationHub/Hub.client.js +357 -0
- package/dist/views/TranslationHub/ModelCombobox.d.ts +14 -0
- package/dist/views/TranslationHub/ModelCombobox.js +415 -0
- package/dist/views/TranslationHub/PerCollectionConfig.d.ts +10 -0
- package/dist/views/TranslationHub/PerCollectionConfig.helpers.d.ts +16 -0
- package/dist/views/TranslationHub/PerCollectionConfig.helpers.js +19 -0
- package/dist/views/TranslationHub/PerCollectionConfig.js +759 -0
- package/dist/views/TranslationHub/SettingsRail.d.ts +11 -0
- package/dist/views/TranslationHub/SettingsRail.js +382 -0
- package/dist/views/TranslationHub/StatusStrip.d.ts +6 -0
- package/dist/views/TranslationHub/StatusStrip.js +451 -0
- package/dist/views/TranslationHub/UsageTable.d.ts +6 -0
- package/dist/views/TranslationHub/UsageTable.helpers.d.ts +69 -0
- package/dist/views/TranslationHub/UsageTable.helpers.js +49 -0
- package/dist/views/TranslationHub/UsageTable.js +1240 -0
- package/dist/views/TranslationHub/alertGrouping.d.ts +70 -0
- package/dist/views/TranslationHub/alertGrouping.js +99 -0
- package/dist/views/TranslationHub/index.d.ts +20 -0
- package/dist/views/TranslationHub/index.js +109 -0
- package/dist/views/TranslationHub/tabNavigation.d.ts +53 -0
- package/dist/views/TranslationHub/tabNavigation.js +74 -0
- package/dist/views/TranslationHub/terminalBannerVisibility.d.ts +33 -0
- package/dist/views/TranslationHub/terminalBannerVisibility.js +124 -0
- package/dist/views/TranslationHub/useBulkTranslateActive.d.ts +49 -0
- package/dist/views/TranslationHub/useBulkTranslateActive.js +251 -0
- package/dist/views/TranslationHub/useFocusTrap.d.ts +6 -0
- package/dist/views/TranslationHub/useFocusTrap.js +81 -0
- package/dist/views/TranslationHub/useTranslationHubUsageSummary.d.ts +77 -0
- package/dist/views/TranslationHub/useTranslationHubUsageSummary.js +267 -0
- package/dist/views/shared/EditorError.d.ts +97 -0
- package/dist/views/shared/EditorError.js +205 -0
- package/dist/views/shared/ModelCell.d.ts +18 -0
- package/dist/views/shared/ModelCell.js +31 -0
- package/dist/views/shared/docHref.d.ts +16 -0
- package/dist/views/shared/docHref.js +26 -0
- package/dist/views/shared/fetch-error-body.d.ts +25 -0
- package/dist/views/shared/fetch-error-body.js +42 -0
- package/dist/views/shared/filterPillStyle.d.ts +35 -0
- package/dist/views/shared/filterPillStyle.js +40 -0
- package/dist/views/shared/format.d.ts +75 -0
- package/dist/views/shared/format.js +131 -0
- package/package.json +141 -0
|
@@ -0,0 +1,708 @@
|
|
|
1
|
+
import { DEFAULT_BULK_TRANSLATE_BATCHES_COLLECTION_SLUG } from '../bulk-translate-batches-collection.js';
|
|
2
|
+
import { DEFAULT_BULK_TRANSLATE_UNITS_COLLECTION_SLUG } from '../bulk-translate-units-collection.js';
|
|
3
|
+
import { resolveTranslatableFields } from '../lib/field-resolver.js';
|
|
4
|
+
import { createScopedLogger } from '../lib/logger.js';
|
|
5
|
+
import { hashLocalizedSchema } from '../lib/snapshot-select.js';
|
|
6
|
+
export const BULK_TRANSLATE_COORDINATOR_SLUG = 'bulk-translate-coordinator';
|
|
7
|
+
export const BULK_TRANSLATE_DOC_TASK_SLUG = 'bulk-translate-doc';
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
// Task config
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
const taskInputSchema = [
|
|
12
|
+
{
|
|
13
|
+
name: 'batchId',
|
|
14
|
+
type: 'text',
|
|
15
|
+
required: true
|
|
16
|
+
}
|
|
17
|
+
];
|
|
18
|
+
/**
|
|
19
|
+
* Coordinator task that enumerates every (doc, locale) under
|
|
20
|
+
* `batch.scope`, inserts a `bulk-translate-units` row per unit and
|
|
21
|
+
* queues a `bulk-translate-doc` task per unit. Both ops are wrapped
|
|
22
|
+
* in a single transaction per (doc, locale) tuple to close F-DA-TOCTOU
|
|
23
|
+
* scenario A — the worker can never dequeue before the unit row has
|
|
24
|
+
* committed.
|
|
25
|
+
*
|
|
26
|
+
* Long enumerations trampoline: the task self-re-enqueues with the
|
|
27
|
+
* same `batchId` and a persisted `lastEnumerationCursor` once it hits
|
|
28
|
+
* `tickBudgetMs`. Resume from the cursor is exact (collection +
|
|
29
|
+
* pagination page), not approximate.
|
|
30
|
+
*
|
|
31
|
+
* For `mode: 'canary'`, the coordinator performs full enumeration
|
|
32
|
+
* (so the candidate pool is the same as `mode: 'changed'` would
|
|
33
|
+
* scope) and selects N units via random-stratified sampling keyed
|
|
34
|
+
* on `batchId` for reproducibility.
|
|
35
|
+
*/ export function buildBulkTranslateCoordinator(options = {}) {
|
|
36
|
+
const batchesSlug = options.batchesCollectionSlug ?? DEFAULT_BULK_TRANSLATE_BATCHES_COLLECTION_SLUG;
|
|
37
|
+
const unitsSlug = options.unitsCollectionSlug ?? DEFAULT_BULK_TRANSLATE_UNITS_COLLECTION_SLUG;
|
|
38
|
+
const tickBudgetMs = options.tickBudgetMs ?? 25_000;
|
|
39
|
+
const pageSize = options.pageSize ?? 100;
|
|
40
|
+
const workerSlug = options.workerTaskSlug ?? BULK_TRANSLATE_DOC_TASK_SLUG;
|
|
41
|
+
return {
|
|
42
|
+
slug: BULK_TRANSLATE_COORDINATOR_SLUG,
|
|
43
|
+
label: 'Bulk translate — coordinator',
|
|
44
|
+
inputSchema: taskInputSchema,
|
|
45
|
+
// Coordinator runs are idempotent at the unit level via the partial
|
|
46
|
+
// unique index. A retry that picks up where the previous tick left
|
|
47
|
+
// off is fine; a retry from scratch deduplicates via the index.
|
|
48
|
+
retries: 0,
|
|
49
|
+
handler: async ({ input, req })=>{
|
|
50
|
+
const typed = input;
|
|
51
|
+
const result = await runCoordinatorTick({
|
|
52
|
+
payload: req.payload,
|
|
53
|
+
batchId: typed.batchId,
|
|
54
|
+
batchesSlug,
|
|
55
|
+
unitsSlug,
|
|
56
|
+
tickBudgetMs,
|
|
57
|
+
pageSize,
|
|
58
|
+
workerSlug
|
|
59
|
+
});
|
|
60
|
+
return {
|
|
61
|
+
output: {
|
|
62
|
+
ok: true,
|
|
63
|
+
cursor: result.cursor
|
|
64
|
+
}
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
export async function runCoordinatorTick(params) {
|
|
70
|
+
const { payload, batchId, batchesSlug, unitsSlug, tickBudgetMs, pageSize, workerSlug } = params;
|
|
71
|
+
const now = params.now ?? (()=>Date.now());
|
|
72
|
+
const startedAt = now();
|
|
73
|
+
const batch = await payload.findByID({
|
|
74
|
+
collection: batchesSlug,
|
|
75
|
+
id: batchId,
|
|
76
|
+
overrideAccess: true,
|
|
77
|
+
depth: 0
|
|
78
|
+
});
|
|
79
|
+
if (!batch) {
|
|
80
|
+
return {
|
|
81
|
+
cursor: emptyCursor(),
|
|
82
|
+
unitsCreated: 0,
|
|
83
|
+
finished: true
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
// Terminal states short-circuit. Anything other than `queued` /
|
|
87
|
+
// `running` means a cancel / completion already happened and the
|
|
88
|
+
// coordinator must not resurrect it.
|
|
89
|
+
if (batch.status !== 'queued' && batch.status !== 'running') {
|
|
90
|
+
return {
|
|
91
|
+
cursor: batch.lastEnumerationCursor ?? emptyCursor(),
|
|
92
|
+
unitsCreated: 0,
|
|
93
|
+
finished: true
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
// Soft lease lock (round-5 pr-reviewer blocker #1). Without this
|
|
97
|
+
// guard, a duplicate coordinator delivery (admin retry, queue
|
|
98
|
+
// at-least-once semantics, two parallel cron ticks all enqueueing
|
|
99
|
+
// a trampoline) caused exponential coordinator fan-out: each tick
|
|
100
|
+
// re-enqueues another, and 2^K coordinators eventually pile up.
|
|
101
|
+
//
|
|
102
|
+
// Mechanism: each tick refreshes `coordinatorActiveAt`. A second
|
|
103
|
+
// coordinator finding the timestamp fresh within the lease window
|
|
104
|
+
// exits — it assumes the live one will handle the work. The lease
|
|
105
|
+
// is best-effort, not absolute: worst case two coordinators run
|
|
106
|
+
// for the lease window, but the partial unique index on units +
|
|
107
|
+
// the trampoline-skip below prevent duplicate row creation.
|
|
108
|
+
const COORDINATOR_LEASE_MS = 60_000;
|
|
109
|
+
const coordinatorActiveAt = batch.coordinatorActiveAt ? new Date(batch.coordinatorActiveAt).getTime() : Number.NEGATIVE_INFINITY;
|
|
110
|
+
// Lease is "fresh" only when a real timestamp exists AND it's
|
|
111
|
+
// within the lease window. Unset (`NEGATIVE_INFINITY`) always
|
|
112
|
+
// fails the check — first tick proceeds normally.
|
|
113
|
+
if (coordinatorActiveAt > now() - COORDINATOR_LEASE_MS) {
|
|
114
|
+
payload.logger?.debug?.(`[ai-translate] coordinator: lease still held for batch ${batchId} (last tick ${Math.round((now() - coordinatorActiveAt) / 1000)}s ago) — exiting to avoid fan-out`);
|
|
115
|
+
return {
|
|
116
|
+
cursor: batch.lastEnumerationCursor ?? emptyCursor(),
|
|
117
|
+
unitsCreated: 0,
|
|
118
|
+
finished: false
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
// First-tick transition: queued → running, stamp startedAt.
|
|
122
|
+
// Same call also refreshes coordinatorActiveAt under the new lease.
|
|
123
|
+
if (batch.status === 'queued') {
|
|
124
|
+
await payload.update({
|
|
125
|
+
collection: batchesSlug,
|
|
126
|
+
id: batchId,
|
|
127
|
+
data: {
|
|
128
|
+
status: 'running',
|
|
129
|
+
startedAt: new Date().toISOString(),
|
|
130
|
+
coordinatorActiveAt: new Date().toISOString()
|
|
131
|
+
},
|
|
132
|
+
overrideAccess: true
|
|
133
|
+
});
|
|
134
|
+
} else {
|
|
135
|
+
// Already running — just refresh the lease.
|
|
136
|
+
await payload.update({
|
|
137
|
+
collection: batchesSlug,
|
|
138
|
+
id: batchId,
|
|
139
|
+
data: {
|
|
140
|
+
coordinatorActiveAt: new Date().toISOString()
|
|
141
|
+
},
|
|
142
|
+
overrideAccess: true
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
const scope = batch.scope;
|
|
146
|
+
const cursor = batch.lastEnumerationCursor ?? emptyCursor();
|
|
147
|
+
let unitsCreated = 0;
|
|
148
|
+
const overBudget = ()=>now() - startedAt >= tickBudgetMs;
|
|
149
|
+
// -------------------------------------------------------------------
|
|
150
|
+
// Phase 1 — collections
|
|
151
|
+
// -------------------------------------------------------------------
|
|
152
|
+
if (cursor.phase === 'collections') {
|
|
153
|
+
const collectionSlugs = filterCollectionSlugs(scope);
|
|
154
|
+
while(cursor.collectionIndex < collectionSlugs.length){
|
|
155
|
+
if (overBudget()) {
|
|
156
|
+
return finishTick(payload, batchesSlug, unitsSlug, batchId, cursor, false, unitsCreated);
|
|
157
|
+
}
|
|
158
|
+
const slug = collectionSlugs[cursor.collectionIndex];
|
|
159
|
+
const fields = getCollectionFields(payload, slug);
|
|
160
|
+
if (!fields) {
|
|
161
|
+
cursor.collectionIndex += 1;
|
|
162
|
+
cursor.page = 1;
|
|
163
|
+
continue;
|
|
164
|
+
}
|
|
165
|
+
const localizedFields = resolveTranslatableFields(fields);
|
|
166
|
+
// Pagination loop. `documentIds` override short-circuits to a
|
|
167
|
+
// single targeted read per id rather than walking every page.
|
|
168
|
+
if (scope.documentIds && scope.documentIds.length > 0) {
|
|
169
|
+
// Documents-explicit mode: iterate the provided id list. Treat
|
|
170
|
+
// the same `page` cursor as an index into that list.
|
|
171
|
+
while(cursor.page <= scope.documentIds.length){
|
|
172
|
+
if (overBudget()) {
|
|
173
|
+
return finishTick(payload, batchesSlug, unitsSlug, batchId, cursor, false, unitsCreated);
|
|
174
|
+
}
|
|
175
|
+
const docId = scope.documentIds[cursor.page - 1];
|
|
176
|
+
try {
|
|
177
|
+
const doc = await payload.findByID({
|
|
178
|
+
collection: slug,
|
|
179
|
+
id: docId,
|
|
180
|
+
locale: scope.sourceLocale,
|
|
181
|
+
fallbackLocale: null,
|
|
182
|
+
depth: 0,
|
|
183
|
+
overrideAccess: true,
|
|
184
|
+
draft: true
|
|
185
|
+
});
|
|
186
|
+
if (doc) {
|
|
187
|
+
const created = await enqueueUnitsForDoc({
|
|
188
|
+
payload,
|
|
189
|
+
batchId,
|
|
190
|
+
unitsSlug,
|
|
191
|
+
workerSlug,
|
|
192
|
+
collection: slug,
|
|
193
|
+
doc,
|
|
194
|
+
docId: String(doc.id ?? docId),
|
|
195
|
+
localizedFields,
|
|
196
|
+
scope
|
|
197
|
+
});
|
|
198
|
+
unitsCreated += created;
|
|
199
|
+
}
|
|
200
|
+
} catch (err) {
|
|
201
|
+
const log = createScopedLogger(payload, {
|
|
202
|
+
component: 'bulk.coord',
|
|
203
|
+
batchId,
|
|
204
|
+
collection: slug,
|
|
205
|
+
documentId: docId
|
|
206
|
+
});
|
|
207
|
+
log.event('warn', 'bulk.coord.docId.fetch.failed', {
|
|
208
|
+
err,
|
|
209
|
+
collection: slug,
|
|
210
|
+
docId
|
|
211
|
+
});
|
|
212
|
+
// Missing doc id — skip silently. The bulk run records the
|
|
213
|
+
// failure as no units for that id rather than aborting the
|
|
214
|
+
// entire run on one bad id.
|
|
215
|
+
}
|
|
216
|
+
cursor.page += 1;
|
|
217
|
+
}
|
|
218
|
+
cursor.collectionIndex += 1;
|
|
219
|
+
cursor.page = 1;
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
// Standard pagination mode.
|
|
223
|
+
// eslint-disable-next-line no-constant-condition
|
|
224
|
+
while(true){
|
|
225
|
+
if (overBudget()) {
|
|
226
|
+
return finishTick(payload, batchesSlug, unitsSlug, batchId, cursor, false, unitsCreated);
|
|
227
|
+
}
|
|
228
|
+
const page = await payload.find({
|
|
229
|
+
collection: slug,
|
|
230
|
+
page: cursor.page,
|
|
231
|
+
limit: pageSize,
|
|
232
|
+
depth: 0,
|
|
233
|
+
locale: scope.sourceLocale,
|
|
234
|
+
overrideAccess: true,
|
|
235
|
+
draft: true
|
|
236
|
+
});
|
|
237
|
+
const docs = page.docs ?? [];
|
|
238
|
+
for (const doc of docs){
|
|
239
|
+
// Skip docs without a usable id. Payload normally guarantees
|
|
240
|
+
// `id` for every persisted row, but findMany has been observed
|
|
241
|
+
// returning rows with `null` / `undefined` id under specific
|
|
242
|
+
// adapter quirks (e.g. soft-deleted drafts). Without this
|
|
243
|
+
// guard, `String(null)` becomes the literal string `"null"`
|
|
244
|
+
// and the worker later fails the unit with "Not Found".
|
|
245
|
+
if (doc.id === null || doc.id === undefined || doc.id === '') {
|
|
246
|
+
continue;
|
|
247
|
+
}
|
|
248
|
+
const created = await enqueueUnitsForDoc({
|
|
249
|
+
payload,
|
|
250
|
+
batchId,
|
|
251
|
+
unitsSlug,
|
|
252
|
+
workerSlug,
|
|
253
|
+
collection: slug,
|
|
254
|
+
doc,
|
|
255
|
+
docId: String(doc.id),
|
|
256
|
+
localizedFields,
|
|
257
|
+
scope
|
|
258
|
+
});
|
|
259
|
+
unitsCreated += created;
|
|
260
|
+
}
|
|
261
|
+
if (!page.hasNextPage) break;
|
|
262
|
+
cursor.page += 1;
|
|
263
|
+
}
|
|
264
|
+
cursor.collectionIndex += 1;
|
|
265
|
+
cursor.page = 1;
|
|
266
|
+
}
|
|
267
|
+
cursor.phase = 'globals';
|
|
268
|
+
cursor.globalIndex = 0;
|
|
269
|
+
}
|
|
270
|
+
// -------------------------------------------------------------------
|
|
271
|
+
// Phase 2 — globals
|
|
272
|
+
// -------------------------------------------------------------------
|
|
273
|
+
if (cursor.phase === 'globals') {
|
|
274
|
+
const globalSlugs = scope.globals ?? [];
|
|
275
|
+
while(cursor.globalIndex < globalSlugs.length){
|
|
276
|
+
if (overBudget()) {
|
|
277
|
+
return finishTick(payload, batchesSlug, unitsSlug, batchId, cursor, false, unitsCreated);
|
|
278
|
+
}
|
|
279
|
+
const slug = globalSlugs[cursor.globalIndex];
|
|
280
|
+
const fields = getGlobalFields(payload, slug);
|
|
281
|
+
if (!fields) {
|
|
282
|
+
cursor.globalIndex += 1;
|
|
283
|
+
continue;
|
|
284
|
+
}
|
|
285
|
+
const localizedFields = resolveTranslatableFields(fields);
|
|
286
|
+
let doc = null;
|
|
287
|
+
try {
|
|
288
|
+
doc = await payload.findGlobal({
|
|
289
|
+
slug: slug,
|
|
290
|
+
locale: scope.sourceLocale,
|
|
291
|
+
fallbackLocale: null,
|
|
292
|
+
depth: 0,
|
|
293
|
+
overrideAccess: true
|
|
294
|
+
});
|
|
295
|
+
} catch (err) {
|
|
296
|
+
const log = createScopedLogger(payload, {
|
|
297
|
+
component: 'bulk.coord',
|
|
298
|
+
batchId,
|
|
299
|
+
collection: slug
|
|
300
|
+
});
|
|
301
|
+
log.event('warn', 'bulk.coord.global.fetch.failed', {
|
|
302
|
+
err,
|
|
303
|
+
globalSlug: slug
|
|
304
|
+
});
|
|
305
|
+
doc = null;
|
|
306
|
+
}
|
|
307
|
+
if (doc) {
|
|
308
|
+
const created = await enqueueUnitsForDoc({
|
|
309
|
+
payload,
|
|
310
|
+
batchId,
|
|
311
|
+
unitsSlug,
|
|
312
|
+
workerSlug,
|
|
313
|
+
collection: slug,
|
|
314
|
+
doc,
|
|
315
|
+
// Globals have a fixed identity — use the slug as the
|
|
316
|
+
// documentId for the partial unique index. Workers branch
|
|
317
|
+
// on collection-vs-global by checking whether the slug
|
|
318
|
+
// resolves to a registered global at execution time.
|
|
319
|
+
docId: slug,
|
|
320
|
+
localizedFields,
|
|
321
|
+
scope
|
|
322
|
+
});
|
|
323
|
+
unitsCreated += created;
|
|
324
|
+
}
|
|
325
|
+
cursor.globalIndex += 1;
|
|
326
|
+
}
|
|
327
|
+
cursor.phase = scope.mode === 'canary' ? 'canary' : 'done';
|
|
328
|
+
}
|
|
329
|
+
// -------------------------------------------------------------------
|
|
330
|
+
// Phase 3 — canary sampling
|
|
331
|
+
// -------------------------------------------------------------------
|
|
332
|
+
if (cursor.phase === 'canary') {
|
|
333
|
+
const limit = scope.canaryLimit ?? 10;
|
|
334
|
+
await applyCanarySampling(payload, batchId, unitsSlug, limit);
|
|
335
|
+
cursor.phase = 'done';
|
|
336
|
+
}
|
|
337
|
+
// -------------------------------------------------------------------
|
|
338
|
+
// Phase 4 — done: stamp totalUnits.
|
|
339
|
+
// -------------------------------------------------------------------
|
|
340
|
+
return finishTick(payload, batchesSlug, unitsSlug, batchId, cursor, true, unitsCreated);
|
|
341
|
+
}
|
|
342
|
+
function emptyCursor() {
|
|
343
|
+
return {
|
|
344
|
+
phase: 'collections',
|
|
345
|
+
collectionIndex: 0,
|
|
346
|
+
page: 1,
|
|
347
|
+
globalIndex: 0
|
|
348
|
+
};
|
|
349
|
+
}
|
|
350
|
+
function filterCollectionSlugs(scope) {
|
|
351
|
+
const slugs = scope.collections ?? [];
|
|
352
|
+
const excluded = new Set(scope.excludeCollections ?? []);
|
|
353
|
+
return slugs.filter((s)=>!excluded.has(s));
|
|
354
|
+
}
|
|
355
|
+
function getCollectionFields(payload, slug) {
|
|
356
|
+
const collections = payload.config?.collections ?? [];
|
|
357
|
+
const collection = collections.find((c)=>c.slug === slug);
|
|
358
|
+
return collection?.fields;
|
|
359
|
+
}
|
|
360
|
+
function getGlobalFields(payload, slug) {
|
|
361
|
+
const globals = payload.config?.globals ?? [];
|
|
362
|
+
const global = globals.find((g)=>g.slug === slug);
|
|
363
|
+
return global?.fields;
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
366
|
+
* Insert a unit row and queue its worker task for every target locale.
|
|
367
|
+
* Both ops share a single Payload transaction to close F-DA-TOCTOU
|
|
368
|
+
* scenario A — the partial unique index commits before the queue is
|
|
369
|
+
* told to dequeue.
|
|
370
|
+
*
|
|
371
|
+
* Returns the number of units actually inserted. Duplicates (already
|
|
372
|
+
* pending/running for the same (collection, doc, locale)) silently
|
|
373
|
+
* short-circuit thanks to the partial unique index — a unique-violation
|
|
374
|
+
* error from the DB is caught and the locale is skipped.
|
|
375
|
+
*/ async function enqueueUnitsForDoc(params) {
|
|
376
|
+
const { payload, batchId, unitsSlug, workerSlug, collection, doc, docId, localizedFields, scope } = params;
|
|
377
|
+
let created = 0;
|
|
378
|
+
// Schema hash ONCE per doc; locale loop reuses. NOTE (v1.2.12): the
|
|
379
|
+
// coordinator no longer captures `preRunSnapshot` — `doc` here is the
|
|
380
|
+
// SOURCE-locale enumeration read, and storing its values as "the
|
|
381
|
+
// pre-run state" for every target-locale unit meant Revert replayed
|
|
382
|
+
// source-language content over translations (2026-06-11 prod
|
|
383
|
+
// incident). The worker now captures a true per-locale snapshot just
|
|
384
|
+
// before its first write (see bulk-translate-doc-task step 3c).
|
|
385
|
+
let schemaHash;
|
|
386
|
+
try {
|
|
387
|
+
schemaHash = hashLocalizedSchema(localizedFields);
|
|
388
|
+
} catch (err) {
|
|
389
|
+
payload.logger?.warn?.(`[ai-translate] coordinator: schema hash failed for ${collection}/${docId}: ${err instanceof Error ? err.message : String(err)}`);
|
|
390
|
+
}
|
|
391
|
+
for (const locale of scope.locales){
|
|
392
|
+
// Same-locale-as-source is a no-op — coordinator skips it to keep
|
|
393
|
+
// worker logic free of the special case.
|
|
394
|
+
if (locale === scope.sourceLocale) continue;
|
|
395
|
+
// Transactional insert + enqueue. Payload's transaction primitive is
|
|
396
|
+
// `payload.db.beginTransaction()` returning a transactionID; pass
|
|
397
|
+
// it via `req: { transactionID, ... }` on every op inside the block.
|
|
398
|
+
// Fall back to non-transactional inserts when the DB adapter doesn't
|
|
399
|
+
// surface beginTransaction (some adapters return undefined here).
|
|
400
|
+
const transactionID = await tryBeginTransaction(payload);
|
|
401
|
+
// Payload's typed Payload.create/update/find APIs accept a `req`
|
|
402
|
+
// shaped as Partial<PayloadRequest>; we only need `transactionID`
|
|
403
|
+
// on it for the operations inside this transaction. Construct as
|
|
404
|
+
// Pick to avoid the double-cast prohibited by CLAUDE.md.
|
|
405
|
+
const txReq = transactionID ? {
|
|
406
|
+
transactionID
|
|
407
|
+
} : undefined;
|
|
408
|
+
try {
|
|
409
|
+
// Payload's relationship-field validator expects the FK value in
|
|
410
|
+
// the same primitive type as the target collection's `id` column.
|
|
411
|
+
// Our batches table uses `serial` (integer), so the unit's
|
|
412
|
+
// `batchId` must be a number. The coordinator receives `batchId`
|
|
413
|
+
// as a string via task input — coerce here to avoid "field is
|
|
414
|
+
// invalid: Batch Id".
|
|
415
|
+
const batchIdNumeric = Number.parseInt(batchId, 10);
|
|
416
|
+
await payload.create({
|
|
417
|
+
collection: unitsSlug,
|
|
418
|
+
data: {
|
|
419
|
+
batchId: Number.isFinite(batchIdNumeric) ? batchIdNumeric : batchId,
|
|
420
|
+
collection,
|
|
421
|
+
documentId: docId,
|
|
422
|
+
locale,
|
|
423
|
+
status: 'pending',
|
|
424
|
+
attempts: 0,
|
|
425
|
+
schemaHash: schemaHash ?? null
|
|
426
|
+
},
|
|
427
|
+
overrideAccess: true,
|
|
428
|
+
...txReq ? {
|
|
429
|
+
req: txReq
|
|
430
|
+
} : {}
|
|
431
|
+
});
|
|
432
|
+
// The unit row's id is the worker's input. Re-find the row we
|
|
433
|
+
// just created so we have a stable id to pass through the queue
|
|
434
|
+
// (Payload's `create` returns the row, but the typings here are
|
|
435
|
+
// loose — defensive read keeps the call site future-proof).
|
|
436
|
+
const refind = await payload.find({
|
|
437
|
+
collection: unitsSlug,
|
|
438
|
+
where: {
|
|
439
|
+
and: [
|
|
440
|
+
{
|
|
441
|
+
batchId: {
|
|
442
|
+
equals: batchId
|
|
443
|
+
}
|
|
444
|
+
},
|
|
445
|
+
{
|
|
446
|
+
collection: {
|
|
447
|
+
equals: collection
|
|
448
|
+
}
|
|
449
|
+
},
|
|
450
|
+
{
|
|
451
|
+
documentId: {
|
|
452
|
+
equals: docId
|
|
453
|
+
}
|
|
454
|
+
},
|
|
455
|
+
{
|
|
456
|
+
locale: {
|
|
457
|
+
equals: locale
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
]
|
|
461
|
+
},
|
|
462
|
+
limit: 1,
|
|
463
|
+
sort: '-createdAt',
|
|
464
|
+
overrideAccess: true,
|
|
465
|
+
...txReq ? {
|
|
466
|
+
req: txReq
|
|
467
|
+
} : {}
|
|
468
|
+
});
|
|
469
|
+
const newRow = refind.docs[0];
|
|
470
|
+
const unitId = newRow?.id ? String(newRow.id) : undefined;
|
|
471
|
+
if (unitId) {
|
|
472
|
+
await payload.jobs.queue({
|
|
473
|
+
task: workerSlug,
|
|
474
|
+
input: {
|
|
475
|
+
unitId
|
|
476
|
+
},
|
|
477
|
+
...txReq ? {
|
|
478
|
+
req: txReq
|
|
479
|
+
} : {}
|
|
480
|
+
});
|
|
481
|
+
}
|
|
482
|
+
if (transactionID) {
|
|
483
|
+
await commitTransaction(payload, transactionID);
|
|
484
|
+
}
|
|
485
|
+
// Only count the unit toward `created` if its worker task was
|
|
486
|
+
// actually queued. If `refind` returned empty (race: row deleted
|
|
487
|
+
// between create + refind, or adapter quirk returning stale
|
|
488
|
+
// data), the unit row exists in the DB without a worker task —
|
|
489
|
+
// it's an orphan that the janitor will eventually rescue, but
|
|
490
|
+
// the local counter must NOT claim it was enqueued. Without
|
|
491
|
+
// this guard, totalUnits stamped from the source-of-truth count
|
|
492
|
+
// would include the orphan and maybeTransitionBatch would
|
|
493
|
+
// permanently block on the pending row.
|
|
494
|
+
if (unitId) {
|
|
495
|
+
created += 1;
|
|
496
|
+
} else {
|
|
497
|
+
payload.logger?.error?.(`[ai-translate] coordinator: orphan unit row created for ${collection}/${docId}/${locale} (refind returned empty after create) — janitor will rescue at lease expiry`);
|
|
498
|
+
}
|
|
499
|
+
} catch (err) {
|
|
500
|
+
if (transactionID) {
|
|
501
|
+
await rollbackTransaction(payload, transactionID).catch(()=>{
|
|
502
|
+
// Rollback failure logs but doesn't escalate — the partial
|
|
503
|
+
// unique index keeps us correct even if the rollback was a
|
|
504
|
+
// no-op against a closed connection.
|
|
505
|
+
});
|
|
506
|
+
}
|
|
507
|
+
// Unique-violation on the partial index is the F-DA-TOCTOU
|
|
508
|
+
// expected outcome when a concurrent batch already owns this
|
|
509
|
+
// (collection, doc, locale). Log at debug level only.
|
|
510
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
511
|
+
if (/duplicate key|unique constraint|UNIQUE/i.test(msg)) {
|
|
512
|
+
payload.logger?.debug?.(`[ai-translate] coordinator: skipped duplicate unit ${collection}/${docId}/${locale} (already pending or running)`);
|
|
513
|
+
} else {
|
|
514
|
+
payload.logger?.error?.(`[ai-translate] coordinator: failed to enqueue unit ${collection}/${docId}/${locale}: ${msg}`);
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
return created;
|
|
519
|
+
}
|
|
520
|
+
async function tryBeginTransaction(payload) {
|
|
521
|
+
const db = payload.db;
|
|
522
|
+
if (typeof db?.beginTransaction === 'function') {
|
|
523
|
+
try {
|
|
524
|
+
return await db.beginTransaction();
|
|
525
|
+
} catch (err) {
|
|
526
|
+
const log = createScopedLogger(payload, {
|
|
527
|
+
component: 'bulk.coord'
|
|
528
|
+
});
|
|
529
|
+
log.event('error', 'bulk.coord.txn-begin.failed', {
|
|
530
|
+
err
|
|
531
|
+
});
|
|
532
|
+
return undefined;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
return undefined;
|
|
536
|
+
}
|
|
537
|
+
async function commitTransaction(payload, transactionID) {
|
|
538
|
+
const db = payload.db;
|
|
539
|
+
if (typeof db?.commitTransaction === 'function') {
|
|
540
|
+
await db.commitTransaction(transactionID);
|
|
541
|
+
}
|
|
542
|
+
}
|
|
543
|
+
async function rollbackTransaction(payload, transactionID) {
|
|
544
|
+
const db = payload.db;
|
|
545
|
+
if (typeof db?.rollbackTransaction === 'function') {
|
|
546
|
+
await db.rollbackTransaction(transactionID);
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
/**
|
|
550
|
+
* Random-stratified sampling for canary mode (Decision #26 +
|
|
551
|
+
* F-DA-CANARY). After full enumeration we have N units across M
|
|
552
|
+
* collections; we want a `limit`-sized sample with proportional
|
|
553
|
+
* representation per collection. Selection is deterministic per
|
|
554
|
+
* `batchId` — re-running the coordinator yields the same canary
|
|
555
|
+
* sample, which makes the run reproducible for debugging.
|
|
556
|
+
*
|
|
557
|
+
* Implementation: bucket the units by collection, sort within each
|
|
558
|
+
* bucket by a seeded hash, take ceil(limit / M) per bucket up to
|
|
559
|
+
* the global limit, mark unselected units as `skipped` with reason
|
|
560
|
+
* `canary_not_sampled` so they don't burn LLM tokens.
|
|
561
|
+
*/ async function applyCanarySampling(payload, batchId, unitsSlug, limit) {
|
|
562
|
+
// Read all pending units for this batch (post-enumeration). Limit
|
|
563
|
+
// 10_000 — beyond that the canary semantics don't make sense.
|
|
564
|
+
const result = await payload.find({
|
|
565
|
+
collection: unitsSlug,
|
|
566
|
+
where: {
|
|
567
|
+
and: [
|
|
568
|
+
{
|
|
569
|
+
batchId: {
|
|
570
|
+
equals: batchId
|
|
571
|
+
}
|
|
572
|
+
},
|
|
573
|
+
{
|
|
574
|
+
status: {
|
|
575
|
+
equals: 'pending'
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
]
|
|
579
|
+
},
|
|
580
|
+
limit: 10_000,
|
|
581
|
+
depth: 0,
|
|
582
|
+
overrideAccess: true
|
|
583
|
+
});
|
|
584
|
+
const units = result.docs;
|
|
585
|
+
// Bucket by collection.
|
|
586
|
+
const buckets = new Map();
|
|
587
|
+
for (const u of units){
|
|
588
|
+
const list = buckets.get(u.collection) ?? [];
|
|
589
|
+
list.push(u);
|
|
590
|
+
buckets.set(u.collection, list);
|
|
591
|
+
}
|
|
592
|
+
// Seeded shuffle per bucket using a deterministic hash of
|
|
593
|
+
// `${batchId}:${unitId}`. We don't need cryptographic randomness,
|
|
594
|
+
// just stable ordering across coordinator retries.
|
|
595
|
+
const sortedBuckets = [];
|
|
596
|
+
for (const [collection, list] of buckets){
|
|
597
|
+
const sorted = [
|
|
598
|
+
...list
|
|
599
|
+
].sort((a, b)=>{
|
|
600
|
+
const ha = seededHash(`${batchId}:${a.id}`);
|
|
601
|
+
const hb = seededHash(`${batchId}:${b.id}`);
|
|
602
|
+
return ha - hb;
|
|
603
|
+
});
|
|
604
|
+
sortedBuckets.push({
|
|
605
|
+
collection,
|
|
606
|
+
units: sorted
|
|
607
|
+
});
|
|
608
|
+
}
|
|
609
|
+
sortedBuckets.sort((a, b)=>a.collection.localeCompare(b.collection));
|
|
610
|
+
if (sortedBuckets.length === 0) return;
|
|
611
|
+
// Round-robin pull from each bucket until limit reached.
|
|
612
|
+
const selected = new Set();
|
|
613
|
+
let cursor = 0;
|
|
614
|
+
while(selected.size < limit){
|
|
615
|
+
let pickedThisRound = false;
|
|
616
|
+
for (const b of sortedBuckets){
|
|
617
|
+
if (selected.size >= limit) break;
|
|
618
|
+
if (cursor < b.units.length) {
|
|
619
|
+
selected.add(b.units[cursor].id);
|
|
620
|
+
pickedThisRound = true;
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
if (!pickedThisRound) break;
|
|
624
|
+
cursor += 1;
|
|
625
|
+
}
|
|
626
|
+
// Mark unselected as skipped.
|
|
627
|
+
for (const u of units){
|
|
628
|
+
if (selected.has(u.id)) continue;
|
|
629
|
+
try {
|
|
630
|
+
await payload.update({
|
|
631
|
+
collection: unitsSlug,
|
|
632
|
+
id: u.id,
|
|
633
|
+
data: {
|
|
634
|
+
status: 'skipped',
|
|
635
|
+
failureCode: null,
|
|
636
|
+
failureMessage: 'canary_not_sampled',
|
|
637
|
+
completedAt: new Date().toISOString()
|
|
638
|
+
},
|
|
639
|
+
overrideAccess: true
|
|
640
|
+
});
|
|
641
|
+
} catch (err) {
|
|
642
|
+
payload.logger?.warn?.(`[ai-translate] coordinator: canary skip failed for unit ${u.id}: ${err instanceof Error ? err.message : String(err)}`);
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
function seededHash(s) {
|
|
647
|
+
// Tiny xorshift-style hash. Deterministic per input string.
|
|
648
|
+
let h = 2166136261;
|
|
649
|
+
for(let i = 0; i < s.length; i++){
|
|
650
|
+
h ^= s.charCodeAt(i);
|
|
651
|
+
h = h * 16777619 >>> 0;
|
|
652
|
+
}
|
|
653
|
+
return h;
|
|
654
|
+
}
|
|
655
|
+
/**
|
|
656
|
+
* Final step in every tick: persist the cursor + totalUnits, optionally
|
|
657
|
+
* re-enqueue self if not finished. Returns the cursor state for tests.
|
|
658
|
+
*/ async function finishTick(payload, batchesSlug, unitsSlug, batchId, cursor, finished, unitsCreated) {
|
|
659
|
+
const update = {
|
|
660
|
+
lastEnumerationCursor: cursor
|
|
661
|
+
};
|
|
662
|
+
if (finished) {
|
|
663
|
+
// Stamp totalUnits from the source of truth (units collection)
|
|
664
|
+
// rather than the local counter — the local counter wouldn't
|
|
665
|
+
// account for duplicates the partial unique index rejected.
|
|
666
|
+
const countResult = await payload.count({
|
|
667
|
+
collection: unitsSlug,
|
|
668
|
+
where: {
|
|
669
|
+
batchId: {
|
|
670
|
+
equals: batchId
|
|
671
|
+
}
|
|
672
|
+
},
|
|
673
|
+
overrideAccess: true
|
|
674
|
+
});
|
|
675
|
+
update.totalUnits = countResult.totalDocs;
|
|
676
|
+
// Release the coordinator lease when we're done — no further
|
|
677
|
+
// ticks should run for this batch.
|
|
678
|
+
update.coordinatorActiveAt = null;
|
|
679
|
+
}
|
|
680
|
+
await payload.update({
|
|
681
|
+
collection: batchesSlug,
|
|
682
|
+
id: batchId,
|
|
683
|
+
data: update,
|
|
684
|
+
overrideAccess: true
|
|
685
|
+
});
|
|
686
|
+
if (!finished) {
|
|
687
|
+
// Trampoline: re-enqueue the coordinator for the same batch.
|
|
688
|
+
// The lease lock at the start of the next tick is what actually
|
|
689
|
+
// prevents duplicate fan-out — if two trampolines land in the
|
|
690
|
+
// queue (admin retry, at-least-once delivery), the second one
|
|
691
|
+
// to run will see the lease still fresh and exit immediately.
|
|
692
|
+
try {
|
|
693
|
+
await payload.jobs.queue({
|
|
694
|
+
task: BULK_TRANSLATE_COORDINATOR_SLUG,
|
|
695
|
+
input: {
|
|
696
|
+
batchId
|
|
697
|
+
}
|
|
698
|
+
});
|
|
699
|
+
} catch (err) {
|
|
700
|
+
payload.logger?.error?.(`[ai-translate] coordinator: failed to trampoline batch ${batchId}: ${err instanceof Error ? err.message : String(err)}`);
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
return {
|
|
704
|
+
cursor,
|
|
705
|
+
unitsCreated,
|
|
706
|
+
finished
|
|
707
|
+
};
|
|
708
|
+
}
|