transduck 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts ADDED
@@ -0,0 +1,251 @@
1
+ import { createHash } from 'crypto';
2
+ import { loadConfig, type TransduckConfig } from './config.js';
3
+ import { TranslationStore } from './storage.js';
4
+ import { translate as backendTranslate, translatePlural as backendTranslatePlural } from './backend.js';
5
+ import { validateTranslation, extractPlaceholders } from './validation.js';
6
+ import { getPluralCategory, getPluralCategories, interpolateVars } from './plural.js';
7
+
8
+ interface State {
9
+ config: TransduckConfig | null;
10
+ store: TranslationStore | null;
11
+ targetLang: string | null;
12
+ pendingTranslations: Map<string, Promise<string>>;
13
+ }
14
+
15
+ let state: State = {
16
+ config: null,
17
+ store: null,
18
+ targetLang: null,
19
+ pendingTranslations: new Map(),
20
+ };
21
+
22
+ function hash(text: string): string {
23
+ return createHash('sha256').update(text).digest('hex');
24
+ }
25
+
26
+ export async function initialize(config?: TransduckConfig): Promise<void> {
27
+ const cfg = config ?? loadConfig();
28
+ const store = new TranslationStore(cfg.storagePath);
29
+ await store.initialize();
30
+ state.config = cfg;
31
+ state.store = store;
32
+ }
33
+
34
+ export function setLanguage(lang: string): void {
35
+ state.targetLang = lang.toUpperCase();
36
+ }
37
+
38
+ export function _resetState(): void {
39
+ state = { config: null, store: null, targetLang: null, pendingTranslations: new Map() };
40
+ }
41
+
42
+ export function _getStore(): TranslationStore | null {
43
+ return state.store;
44
+ }
45
+
46
+ export async function ait(
47
+ sourceText: string,
48
+ context?: string,
49
+ vars?: Record<string, string | number>,
50
+ ): Promise<string> {
51
+ if (!state.config || !state.store) {
52
+ throw new Error('transduck not initialized. Call initialize() first.');
53
+ }
54
+ if (!state.targetLang) {
55
+ throw new Error('Target language not set. Call setLanguage() first.');
56
+ }
57
+
58
+ const cfg = state.config;
59
+
60
+ if (state.targetLang === cfg.sourceLang) {
61
+ return interpolateVars(sourceText, vars);
62
+ }
63
+
64
+ const projectContextHash = hash(cfg.projectContext);
65
+ const stringContextHash = hash(context ?? '');
66
+
67
+ // Cache lookup
68
+ const cached = await state.store.lookup({
69
+ sourceText, sourceLang: cfg.sourceLang, targetLang: state.targetLang,
70
+ projectContextHash, stringContextHash,
71
+ });
72
+ if (cached !== null) return interpolateVars(cached, vars);
73
+
74
+ // In-process dedup
75
+ const lockKey = `${sourceText}|${cfg.sourceLang}|${state.targetLang}|${projectContextHash}|${stringContextHash}`;
76
+
77
+ if (state.pendingTranslations.has(lockKey)) {
78
+ const pending = await state.pendingTranslations.get(lockKey)!;
79
+ return interpolateVars(pending, vars);
80
+ }
81
+
82
+ const translationPromise = (async () => {
83
+ // Double-check after getting in
84
+ const rechecked = await state.store!.lookup({
85
+ sourceText, sourceLang: cfg.sourceLang, targetLang: state.targetLang!,
86
+ projectContextHash, stringContextHash,
87
+ });
88
+ if (rechecked !== null) return rechecked;
89
+
90
+ const apiKey = process.env[cfg.apiKeyEnv];
91
+ try {
92
+ const translated = await backendTranslate({
93
+ sourceText, sourceLang: cfg.sourceLang, targetLang: state.targetLang!,
94
+ projectContext: cfg.projectContext, stringContext: context ?? null,
95
+ apiKey: apiKey!, model: cfg.backendModel,
96
+ timeout: cfg.backendTimeout, maxRetries: cfg.backendMaxRetries,
97
+ });
98
+
99
+ if (!validateTranslation(sourceText, translated)) {
100
+ console.warn(`[transduck] Validation failed for: ${sourceText} -> ${translated}`);
101
+ await state.store!.insert({
102
+ sourceText, sourceLang: cfg.sourceLang, targetLang: state.targetLang!,
103
+ projectContextHash, stringContextHash,
104
+ translatedText: translated, model: cfg.backendModel, status: 'failed',
105
+ });
106
+ return sourceText;
107
+ }
108
+
109
+ await state.store!.insert({
110
+ sourceText, sourceLang: cfg.sourceLang, targetLang: state.targetLang!,
111
+ projectContextHash, stringContextHash,
112
+ translatedText: translated, model: cfg.backendModel, status: 'translated',
113
+ });
114
+ return translated;
115
+ } catch (err) {
116
+ console.warn(`[transduck] Backend failed for: ${sourceText}`, err);
117
+ return sourceText;
118
+ } finally {
119
+ state.pendingTranslations.delete(lockKey);
120
+ }
121
+ })();
122
+
123
+ state.pendingTranslations.set(lockKey, translationPromise);
124
+ const result = await translationPromise;
125
+ return interpolateVars(result, vars);
126
+ }
127
+
128
+ export async function aitPlural(
129
+ one: string,
130
+ other: string,
131
+ count: number,
132
+ opts?: { context?: string; vars?: Record<string, string | number> },
133
+ ): Promise<string> {
134
+ if (!state.config || !state.store) {
135
+ throw new Error('transduck not initialized. Call initialize() first.');
136
+ }
137
+ if (!state.targetLang) {
138
+ throw new Error('Target language not set. Call setLanguage() first.');
139
+ }
140
+
141
+ const cfg = state.config;
142
+ const context = opts?.context;
143
+
144
+ // Build vars with count
145
+ let vars: Record<string, string | number>;
146
+ if (!opts?.vars) {
147
+ vars = { count };
148
+ } else if (!('count' in opts.vars)) {
149
+ vars = { ...opts.vars, count };
150
+ } else {
151
+ vars = { ...opts.vars };
152
+ }
153
+
154
+ // Same language, 2-form language: select directly from provided forms
155
+ if (state.targetLang === cfg.sourceLang) {
156
+ const categories = getPluralCategories(cfg.sourceLang);
157
+ if (categories.size <= 2) {
158
+ const category = getPluralCategory(cfg.sourceLang, count);
159
+ const form = category === 'one' ? one : other;
160
+ return interpolateVars(form, vars);
161
+ }
162
+ }
163
+
164
+ // Build cache key
165
+ const sourceKey = one + '\x00' + other;
166
+ const projectContextHash = hash(cfg.projectContext);
167
+ const stringContextHash = hash(context ?? '');
168
+
169
+ // Cache lookup
170
+ const cached = await state.store.lookupPlural({
171
+ sourceText: sourceKey, sourceLang: cfg.sourceLang, targetLang: state.targetLang,
172
+ projectContextHash, stringContextHash,
173
+ });
174
+
175
+ const category = getPluralCategory(state.targetLang, count);
176
+ if (category in cached) {
177
+ return interpolateVars(cached[category], vars);
178
+ }
179
+
180
+ // Cache miss — call backend
181
+ const apiKey = process.env[cfg.apiKeyEnv];
182
+ try {
183
+ const forms = await backendTranslatePlural({
184
+ one, other,
185
+ sourceLang: cfg.sourceLang, targetLang: state.targetLang,
186
+ projectContext: cfg.projectContext, stringContext: context ?? null,
187
+ apiKey: apiKey!, model: cfg.backendModel,
188
+ timeout: cfg.backendTimeout, maxRetries: cfg.backendMaxRetries,
189
+ });
190
+
191
+ // Validate and store each form
192
+ const validCategories = new Set(['zero', 'one', 'two', 'few', 'many', 'other']);
193
+ const sourcePlaceholders = new Set([
194
+ ...extractPlaceholders(one),
195
+ ...extractPlaceholders(other),
196
+ ]);
197
+
198
+ if (typeof forms !== 'object' || forms === null || !('other' in forms)) {
199
+ console.warn(`[transduck] Invalid plural response for: ${one} / ${other}`);
200
+ const fallbackCategory = getPluralCategory(cfg.sourceLang, count);
201
+ const fallback = fallbackCategory === 'one' ? one : other;
202
+ return interpolateVars(fallback, vars);
203
+ }
204
+
205
+ for (const [cat, text] of Object.entries(forms)) {
206
+ if (!validCategories.has(cat) || !text) continue;
207
+ // Validate placeholders
208
+ const translatedPlaceholders = extractPlaceholders(text);
209
+ let allPresent = true;
210
+ for (const p of sourcePlaceholders) {
211
+ if (!translatedPlaceholders.has(p)) {
212
+ allPresent = false;
213
+ break;
214
+ }
215
+ }
216
+ const status = allPresent ? 'translated' : 'failed';
217
+ await state.store!.insertPlural({
218
+ sourceText: sourceKey, sourceLang: cfg.sourceLang, targetLang: state.targetLang!,
219
+ projectContextHash, stringContextHash,
220
+ pluralCategory: cat, translatedText: text,
221
+ model: cfg.backendModel, status,
222
+ });
223
+ }
224
+
225
+ // Select the right form
226
+ if (category in forms) {
227
+ const text = forms[category];
228
+ const tp = extractPlaceholders(text);
229
+ let allPresent = true;
230
+ for (const p of sourcePlaceholders) {
231
+ if (!tp.has(p)) {
232
+ allPresent = false;
233
+ break;
234
+ }
235
+ }
236
+ if (allPresent) {
237
+ return interpolateVars(text, vars);
238
+ }
239
+ }
240
+
241
+ // Fallback
242
+ const fallbackCategory = getPluralCategory(cfg.sourceLang, count);
243
+ const fallback = fallbackCategory === 'one' ? one : other;
244
+ return interpolateVars(fallback, vars);
245
+ } catch (err) {
246
+ console.warn(`[transduck] Backend failed for plural: ${one} / ${other}`, err);
247
+ const fallbackCategory = getPluralCategory(cfg.sourceLang, count);
248
+ const fallback = fallbackCategory === 'one' ? one : other;
249
+ return interpolateVars(fallback, vars);
250
+ }
251
+ }
package/src/plural.ts ADDED
@@ -0,0 +1,47 @@
1
+ /**
2
+ * CLDR plural rule helpers and variable interpolation.
3
+ */
4
+
5
+ /**
6
+ * Get the CLDR plural category for a count in the given language.
7
+ * Uses the built-in Intl.PluralRules API.
8
+ */
9
+ export function getPluralCategory(lang: string, count: number): string {
10
+ const rules = new Intl.PluralRules(lang.toLowerCase());
11
+ return rules.select(count);
12
+ }
13
+
14
+ /**
15
+ * Get all CLDR plural categories used by a language.
16
+ * Since Intl.PluralRules doesn't directly expose this, we probe with test values
17
+ * that cover all CLDR categories.
18
+ */
19
+ export function getPluralCategories(lang: string): Set<string> {
20
+ const rules = new Intl.PluralRules(lang.toLowerCase());
21
+ // Test values that trigger different categories across languages
22
+ // Include floats to cover languages where "other" is only triggered by non-integers (e.g., Russian)
23
+ const testValues = [0, 0.5, 1, 1.5, 2, 2.5, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 19, 20, 21, 22, 100, 101, 102];
24
+ const categories = new Set<string>();
25
+ for (const n of testValues) {
26
+ categories.add(rules.select(n));
27
+ }
28
+ // 'other' is always a valid CLDR category
29
+ categories.add('other');
30
+ return categories;
31
+ }
32
+
33
+ /**
34
+ * Safely interpolate variables into a translated string.
35
+ * Replaces {key} with the corresponding value from vars.
36
+ */
37
+ export function interpolateVars(
38
+ text: string,
39
+ vars?: Record<string, string | number> | null,
40
+ ): string {
41
+ if (!vars) return text;
42
+ let result = text;
43
+ for (const [key, value] of Object.entries(vars)) {
44
+ result = result.replaceAll(`{${key}}`, String(value));
45
+ }
46
+ return result;
47
+ }
package/src/storage.ts ADDED
@@ -0,0 +1,229 @@
1
+ import { DuckDBInstance, DuckDBResultReader } from '@duckdb/node-api';
2
+ import type { DuckDBConnection } from '@duckdb/node-api';
3
+
4
+ const SCHEMA_V2 = `
5
+ CREATE TABLE IF NOT EXISTS translations (
6
+ source_text TEXT NOT NULL,
7
+ source_lang TEXT NOT NULL,
8
+ target_lang TEXT NOT NULL,
9
+ project_context_hash TEXT NOT NULL,
10
+ string_context_hash TEXT NOT NULL,
11
+ plural_category TEXT NOT NULL DEFAULT '',
12
+ translated_text TEXT NOT NULL,
13
+ model TEXT NOT NULL,
14
+ status TEXT NOT NULL DEFAULT 'translated',
15
+ created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
16
+ PRIMARY KEY(source_text, source_lang, target_lang, project_context_hash, string_context_hash, plural_category)
17
+ );
18
+ `;
19
+
20
+ const MIGRATION_V1_TO_V2 = `
21
+ CREATE TABLE translations_v2 (
22
+ source_text TEXT NOT NULL,
23
+ source_lang TEXT NOT NULL,
24
+ target_lang TEXT NOT NULL,
25
+ project_context_hash TEXT NOT NULL,
26
+ string_context_hash TEXT NOT NULL,
27
+ plural_category TEXT NOT NULL DEFAULT '',
28
+ translated_text TEXT NOT NULL,
29
+ model TEXT NOT NULL,
30
+ status TEXT NOT NULL DEFAULT 'translated',
31
+ created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
32
+ PRIMARY KEY(source_text, source_lang, target_lang, project_context_hash, string_context_hash, plural_category)
33
+ );
34
+ INSERT INTO translations_v2
35
+ SELECT source_text, source_lang, target_lang, project_context_hash,
36
+ string_context_hash, '' as plural_category, translated_text,
37
+ model, status, created_at
38
+ FROM translations;
39
+ DROP TABLE translations;
40
+ ALTER TABLE translations_v2 RENAME TO translations;
41
+ `;
42
+
43
+ const CHECK_PLURAL_COLUMN = `
44
+ SELECT column_name FROM information_schema.columns
45
+ WHERE table_name = 'translations' AND column_name = 'plural_category'
46
+ `;
47
+
48
+ const LOOKUP_SQL = `
49
+ SELECT translated_text FROM translations
50
+ WHERE source_text = $1 AND source_lang = $2 AND target_lang = $3
51
+ AND project_context_hash = $4 AND string_context_hash = $5
52
+ AND plural_category = '' AND status = 'translated'
53
+ `;
54
+
55
+ const INSERT_SQL = `
56
+ INSERT INTO translations
57
+ (source_text, source_lang, target_lang, project_context_hash,
58
+ string_context_hash, plural_category, translated_text, model, status)
59
+ VALUES ($1, $2, $3, $4, $5, '', $6, $7, $8)
60
+ ON CONFLICT DO NOTHING
61
+ `;
62
+
63
+ const LOOKUP_PLURAL_SQL = `
64
+ SELECT plural_category, translated_text FROM translations
65
+ WHERE source_text = $1 AND source_lang = $2 AND target_lang = $3
66
+ AND project_context_hash = $4 AND string_context_hash = $5
67
+ AND plural_category != '' AND status = 'translated'
68
+ `;
69
+
70
+ const INSERT_PLURAL_SQL = `
71
+ INSERT INTO translations
72
+ (source_text, source_lang, target_lang, project_context_hash,
73
+ string_context_hash, plural_category, translated_text, model, status)
74
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
75
+ ON CONFLICT DO NOTHING
76
+ `;
77
+
78
+ export interface LookupParams {
79
+ sourceText: string;
80
+ sourceLang: string;
81
+ targetLang: string;
82
+ projectContextHash: string;
83
+ stringContextHash: string;
84
+ }
85
+
86
+ export interface InsertParams extends LookupParams {
87
+ translatedText: string;
88
+ model: string;
89
+ status: string;
90
+ }
91
+
92
+ export interface InsertPluralParams extends LookupParams {
93
+ pluralCategory: string;
94
+ translatedText: string;
95
+ model: string;
96
+ status: string;
97
+ }
98
+
99
+ interface Stats {
100
+ totalTranslations: number;
101
+ totalFailed: number;
102
+ byLanguage: Record<string, number>;
103
+ }
104
+
105
+ export class TranslationStore {
106
+ private dbPath: string;
107
+ private instance: DuckDBInstance | null = null;
108
+ private conn: DuckDBConnection | null = null;
109
+
110
+ constructor(dbPath: string) {
111
+ this.dbPath = dbPath;
112
+ }
113
+
114
+ private async getConn(): Promise<DuckDBConnection> {
115
+ if (!this.conn) throw new Error('Store not initialized');
116
+ return this.conn;
117
+ }
118
+
119
+ private convertRow(row: Record<string, unknown>): Record<string, unknown> {
120
+ const converted: Record<string, unknown> = {};
121
+ for (const [key, value] of Object.entries(row)) {
122
+ if (typeof value === 'bigint') {
123
+ converted[key] = Number(value);
124
+ } else {
125
+ converted[key] = value;
126
+ }
127
+ }
128
+ return converted;
129
+ }
130
+
131
+ async query(sql: string, params: string[] = []): Promise<Record<string, unknown>[]> {
132
+ const conn = await this.getConn();
133
+ let result;
134
+ if (params.length === 0) {
135
+ result = await conn.run(sql);
136
+ } else {
137
+ const stmt = await conn.prepare(sql);
138
+ for (let i = 0; i < params.length; i++) {
139
+ stmt.bindVarchar(i + 1, params[i]);
140
+ }
141
+ result = await stmt.run();
142
+ }
143
+ const reader = new DuckDBResultReader(result);
144
+ await reader.readAll();
145
+ return [...reader.getRowObjects()].map(row => this.convertRow(row));
146
+ }
147
+
148
+ async initialize(): Promise<void> {
149
+ this.instance = await DuckDBInstance.create(this.dbPath);
150
+ this.conn = await this.instance.connect();
151
+
152
+ // Check if table exists
153
+ const tables = await this.query(
154
+ "SELECT table_name FROM information_schema.tables WHERE table_name = 'translations'"
155
+ );
156
+
157
+ if (tables.length === 0) {
158
+ // Fresh database — create v2 schema
159
+ await this.conn.run(SCHEMA_V2);
160
+ return;
161
+ }
162
+
163
+ // Table exists — check if it has plural_category (v2)
164
+ const hasPlural = await this.query(CHECK_PLURAL_COLUMN);
165
+ if (hasPlural.length === 0) {
166
+ // Migrate from v1 to v2
167
+ await this.conn.run(MIGRATION_V1_TO_V2);
168
+ }
169
+ }
170
+
171
+ async lookup(params: LookupParams): Promise<string | null> {
172
+ const rows = await this.query(LOOKUP_SQL, [
173
+ params.sourceText, params.sourceLang, params.targetLang,
174
+ params.projectContextHash, params.stringContextHash,
175
+ ]);
176
+ return rows.length > 0 ? (rows[0].translated_text as string) : null;
177
+ }
178
+
179
+ async insert(params: InsertParams): Promise<void> {
180
+ await this.query(INSERT_SQL, [
181
+ params.sourceText, params.sourceLang, params.targetLang,
182
+ params.projectContextHash, params.stringContextHash,
183
+ params.translatedText, params.model, params.status,
184
+ ]);
185
+ }
186
+
187
+ async lookupPlural(params: LookupParams): Promise<Record<string, string>> {
188
+ const rows = await this.query(LOOKUP_PLURAL_SQL, [
189
+ params.sourceText, params.sourceLang, params.targetLang,
190
+ params.projectContextHash, params.stringContextHash,
191
+ ]);
192
+ const result: Record<string, string> = {};
193
+ for (const row of rows) {
194
+ result[row.plural_category as string] = row.translated_text as string;
195
+ }
196
+ return result;
197
+ }
198
+
199
+ async insertPlural(params: InsertPluralParams): Promise<void> {
200
+ await this.query(INSERT_PLURAL_SQL, [
201
+ params.sourceText, params.sourceLang, params.targetLang,
202
+ params.projectContextHash, params.stringContextHash,
203
+ params.pluralCategory, params.translatedText, params.model, params.status,
204
+ ]);
205
+ }
206
+
207
+ async stats(): Promise<Stats> {
208
+ const totalRows = await this.query("SELECT count(*) as c FROM translations WHERE status = 'translated'");
209
+ const failedRows = await this.query("SELECT count(*) as c FROM translations WHERE status = 'failed'");
210
+ const langRows = await this.query(
211
+ "SELECT target_lang, count(*) as c FROM translations WHERE status = 'translated' GROUP BY target_lang"
212
+ );
213
+ const byLanguage: Record<string, number> = {};
214
+ for (const row of langRows) {
215
+ byLanguage[row.target_lang as string] = Number(row.c);
216
+ }
217
+ return {
218
+ totalTranslations: Number(totalRows[0].c),
219
+ totalFailed: Number(failedRows[0].c),
220
+ byLanguage,
221
+ };
222
+ }
223
+
224
+ close(): void {
225
+ // @duckdb/node-api handles cleanup via GC
226
+ this.conn = null;
227
+ this.instance = null;
228
+ }
229
+ }
@@ -0,0 +1,30 @@
1
+ const PLACEHOLDER_PATTERNS = [
2
+ /\{\{[^}]*\}\}/g, // {{ count }}
3
+ /\$\{[^}]+\}/g, // ${value}
4
+ /(?<!\{)(?<!\$)\{[^{}]+\}(?!\})/g, // {name} but not {{x}} or ${x}
5
+ /%[sd]/g, // %s, %d
6
+ ];
7
+
8
+ export function extractPlaceholders(text: string): Set<string> {
9
+ const result = new Set<string>();
10
+ for (const pattern of PLACEHOLDER_PATTERNS) {
11
+ const matches = text.match(pattern);
12
+ if (matches) {
13
+ for (const m of matches) result.add(m);
14
+ }
15
+ }
16
+ return result;
17
+ }
18
+
19
+ export function validateTranslation(sourceText: string, translatedText: string): boolean {
20
+ if (!translatedText || !translatedText.trim()) return false;
21
+
22
+ const sourcePlaceholders = extractPlaceholders(sourceText);
23
+ if (sourcePlaceholders.size === 0) return true;
24
+
25
+ const translatedPlaceholders = extractPlaceholders(translatedText);
26
+ for (const p of sourcePlaceholders) {
27
+ if (!translatedPlaceholders.has(p)) return false;
28
+ }
29
+ return true;
30
+ }