anylang-dev 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/extract.js ADDED
@@ -0,0 +1,348 @@
1
+ import { readdir, readFile } from "node:fs/promises";
2
+ import path from "node:path";
3
+
4
+ const DEFAULT_EXTENSIONS = new Set([".js", ".jsx", ".ts", ".tsx", ".vue", ".html", ".svelte", ".astro"]);
5
+
6
+ export async function extractProjectStrings(config) {
7
+ const files = await listCandidateFiles(process.cwd(), config);
8
+ const items = [];
9
+ const seen = new Set();
10
+
11
+ for (const file of files) {
12
+ const source = await readFile(file, "utf8");
13
+ for (const match of extractFromSource(source, config.functionName)) {
14
+ const key = `${match.value}\0${file}\0${match.index}`;
15
+ if (seen.has(key)) continue;
16
+ seen.add(key);
17
+ items.push({
18
+ ...match,
19
+ file,
20
+ ...lineColumnForIndex(source, match.index)
21
+ });
22
+ }
23
+ }
24
+
25
+ return { files, items };
26
+ }
27
+
28
+ export function extractFromSource(source, functionName = "$tr") {
29
+ const matches = [];
30
+ let index = 0;
31
+ let state = "code";
32
+ let quote = "";
33
+
34
+ while (index < source.length) {
35
+ const char = source[index];
36
+ const next = source[index + 1];
37
+
38
+ if (state === "lineComment") {
39
+ if (char === "\n") state = "code";
40
+ index += 1;
41
+ continue;
42
+ }
43
+ if (state === "blockComment") {
44
+ if (char === "*" && next === "/") {
45
+ index += 2;
46
+ state = "code";
47
+ } else {
48
+ index += 1;
49
+ }
50
+ continue;
51
+ }
52
+ if (state === "string") {
53
+ if (char === "\\") {
54
+ index += 2;
55
+ } else if (char === quote) {
56
+ index += 1;
57
+ state = "code";
58
+ } else {
59
+ index += 1;
60
+ }
61
+ continue;
62
+ }
63
+ if (state === "template") {
64
+ if (char === "\\") {
65
+ index += 2;
66
+ } else if (char === "`") {
67
+ index += 1;
68
+ state = "code";
69
+ } else {
70
+ index += 1;
71
+ }
72
+ continue;
73
+ }
74
+
75
+ if (char === "/" && next === "/") {
76
+ state = "lineComment";
77
+ index += 2;
78
+ continue;
79
+ }
80
+ if (char === "/" && next === "*") {
81
+ state = "blockComment";
82
+ index += 2;
83
+ continue;
84
+ }
85
+ if (char === "'" || char === "\"") {
86
+ quote = char;
87
+ state = "string";
88
+ index += 1;
89
+ continue;
90
+ }
91
+ if (char === "`") {
92
+ state = "template";
93
+ index += 1;
94
+ continue;
95
+ }
96
+
97
+ if (source.startsWith(functionName, index) && hasIdentifierBoundary(source, index, functionName)) {
98
+ const call = parseTranslationCall(source, index + functionName.length);
99
+ if (call) {
100
+ matches.push({ key: call.key, value: call.value, variables: call.variables, index, raw: source.slice(index, call.endIndex) });
101
+ index = call.endIndex;
102
+ continue;
103
+ }
104
+ }
105
+
106
+ index += 1;
107
+ }
108
+
109
+ return matches;
110
+ }
111
+
112
+ async function listCandidateFiles(root, config) {
113
+ const allFiles = [];
114
+ const excluded = new Set(config.exclude || []);
115
+ const allowedExtensions = extensionsFromInclude(config.include);
116
+ const roots = includeRoots(root, config.include);
117
+
118
+ async function walk(dir) {
119
+ let entries;
120
+ try {
121
+ entries = await readdir(dir, { withFileTypes: true });
122
+ } catch (error) {
123
+ if (error && error.code === "ENOENT") return;
124
+ throw error;
125
+ }
126
+ for (const entry of entries) {
127
+ if (excluded.has(entry.name) || hasExcludedSegment(path.join(dir, entry.name), excluded)) continue;
128
+ const fullPath = path.join(dir, entry.name);
129
+ if (entry.isDirectory()) {
130
+ await walk(fullPath);
131
+ } else if (entry.isFile() && allowedExtensions.has(path.extname(entry.name))) {
132
+ allFiles.push(fullPath);
133
+ }
134
+ }
135
+ }
136
+
137
+ for (const includeRoot of roots) {
138
+ await walk(includeRoot);
139
+ }
140
+ return allFiles.sort();
141
+ }
142
+
143
+ function includeRoots(root, include) {
144
+ const roots = new Set();
145
+ for (const pattern of include) {
146
+ const firstGlobIndex = pattern.search(/[*{?]/);
147
+ const staticPart = firstGlobIndex === -1 ? pattern : pattern.slice(0, firstGlobIndex);
148
+ const normalized = staticPart.replace(/[/\\][^/\\]*$/, "");
149
+ roots.add(path.resolve(root, normalized || "."));
150
+ }
151
+ return roots.size > 0 ? Array.from(roots) : [root];
152
+ }
153
+
154
+ function hasExcludedSegment(filePath, excluded) {
155
+ return filePath.split(path.sep).some((segment) => excluded.has(segment));
156
+ }
157
+
158
+ function extensionsFromInclude(include) {
159
+ const extensions = new Set();
160
+ for (const pattern of include) {
161
+ const braceMatch = pattern.match(/\.\{([^}]+)\}/);
162
+ if (braceMatch) {
163
+ for (const ext of braceMatch[1].split(",")) extensions.add(`.${ext.trim()}`);
164
+ continue;
165
+ }
166
+ const ext = path.extname(pattern.replace(/\*/g, "x"));
167
+ if (ext) extensions.add(ext);
168
+ }
169
+ return extensions.size > 0 ? extensions : DEFAULT_EXTENSIONS;
170
+ }
171
+
172
+ function hasIdentifierBoundary(source, start, functionName) {
173
+ const before = source[start - 1];
174
+ const after = source[start + functionName.length];
175
+ return !isIdentifierChar(before) && !isIdentifierChar(after);
176
+ }
177
+
178
+ function isIdentifierChar(char) {
179
+ return Boolean(char && /[A-Za-z0-9_$]/.test(char));
180
+ }
181
+
182
+ function parseTranslationCall(source, offset) {
183
+ let index = skipWhitespace(source, offset);
184
+ if (source[index] !== "(") return null;
185
+ index = skipWhitespace(source, index + 1);
186
+ const keyLiteral = parseLiteral(source, index);
187
+ if (!keyLiteral) return null;
188
+ index = skipWhitespace(source, keyLiteral.endIndex);
189
+
190
+ let text = keyLiteral.value;
191
+ if (source[index] === ",") {
192
+ const secondArgIndex = skipWhitespace(source, index + 1);
193
+ const textLiteral = parseLiteral(source, secondArgIndex);
194
+ if (textLiteral) {
195
+ text = textLiteral.value;
196
+ index = skipWhitespace(source, textLiteral.endIndex);
197
+ }
198
+ }
199
+
200
+ const endIndex = findCallEnd(source, index);
201
+ if (endIndex === -1) return null;
202
+ return {
203
+ key: keyLiteral.value,
204
+ value: text,
205
+ variables: extractVariables(text),
206
+ endIndex: endIndex + 1
207
+ };
208
+ }
209
+
210
+ function findCallEnd(source, index) {
211
+ let cursor = index;
212
+ let depth = 0;
213
+ let state = "code";
214
+ let quote = "";
215
+
216
+ while (cursor < source.length) {
217
+ const char = source[cursor];
218
+ const next = source[cursor + 1];
219
+
220
+ if (state === "lineComment") {
221
+ if (char === "\n") state = "code";
222
+ cursor += 1;
223
+ continue;
224
+ }
225
+ if (state === "blockComment") {
226
+ if (char === "*" && next === "/") {
227
+ cursor += 2;
228
+ state = "code";
229
+ } else {
230
+ cursor += 1;
231
+ }
232
+ continue;
233
+ }
234
+ if (state === "string" || state === "template") {
235
+ if (char === "\\") {
236
+ cursor += 2;
237
+ } else if (char === quote) {
238
+ cursor += 1;
239
+ state = "code";
240
+ } else {
241
+ cursor += 1;
242
+ }
243
+ continue;
244
+ }
245
+
246
+ if (char === "/" && next === "/") {
247
+ state = "lineComment";
248
+ cursor += 2;
249
+ continue;
250
+ }
251
+ if (char === "/" && next === "*") {
252
+ state = "blockComment";
253
+ cursor += 2;
254
+ continue;
255
+ }
256
+ if (char === "'" || char === "\"" || char === "`") {
257
+ quote = char;
258
+ state = char === "`" ? "template" : "string";
259
+ cursor += 1;
260
+ continue;
261
+ }
262
+ if (char === "(" || char === "[" || char === "{") {
263
+ depth += 1;
264
+ cursor += 1;
265
+ continue;
266
+ }
267
+ if (char === ")" && depth === 0) return cursor;
268
+ if (char === ")" || char === "]" || char === "}") {
269
+ depth -= 1;
270
+ cursor += 1;
271
+ continue;
272
+ }
273
+
274
+ cursor += 1;
275
+ }
276
+
277
+ return -1;
278
+ }
279
+
280
+ function parseLiteral(source, index) {
281
+ const quote = source[index];
282
+ if (quote !== "'" && quote !== "\"" && quote !== "`") return null;
283
+
284
+ let cursor = index + 1;
285
+ let value = "";
286
+ while (cursor < source.length) {
287
+ const char = source[cursor];
288
+ if (char === "\\") {
289
+ const escape = source[cursor + 1];
290
+ value += decodeEscape(escape);
291
+ cursor += 2;
292
+ continue;
293
+ }
294
+ if (quote === "`" && char === "$" && source[cursor + 1] === "{") {
295
+ return null;
296
+ }
297
+ if (char === quote) {
298
+ return { value, endIndex: cursor + 1 };
299
+ }
300
+ value += char;
301
+ cursor += 1;
302
+ }
303
+ return null;
304
+ }
305
+
306
+ function decodeEscape(char) {
307
+ switch (char) {
308
+ case "n":
309
+ return "\n";
310
+ case "r":
311
+ return "\r";
312
+ case "t":
313
+ return "\t";
314
+ case "\\":
315
+ return "\\";
316
+ case "\"":
317
+ return "\"";
318
+ case "'":
319
+ return "'";
320
+ case "`":
321
+ return "`";
322
+ default:
323
+ return char || "";
324
+ }
325
+ }
326
+
327
+ function skipWhitespace(source, index) {
328
+ let cursor = index;
329
+ while (/\s/.test(source[cursor] || "")) cursor += 1;
330
+ return cursor;
331
+ }
332
+
333
+ function lineColumnForIndex(source, index) {
334
+ const before = source.slice(0, index);
335
+ const lines = before.split("\n");
336
+ return {
337
+ line: lines.length,
338
+ column: lines[lines.length - 1].length + 1
339
+ };
340
+ }
341
+
342
+ function extractVariables(text) {
343
+ const variables = new Set();
344
+ for (const match of text.matchAll(/\{([A-Za-z_$][A-Za-z0-9_$]*)\}/g)) {
345
+ variables.add(match[1]);
346
+ }
347
+ return Array.from(variables).sort();
348
+ }
@@ -0,0 +1,214 @@
1
+ import { createHash } from "node:crypto";
2
+ import { mkdir, readFile, writeFile } from "node:fs/promises";
3
+ import path from "node:path";
4
+ import { extractProjectStrings } from "./extract.js";
5
+ import { createTranslator } from "./providers.js";
6
+
7
+ export async function runPipeline(config, options = {}) {
8
+ const outDir = path.resolve(config.outDir);
9
+ await mkdir(outDir, { recursive: true });
10
+
11
+ const extraction = await extractProjectStrings(config);
12
+ const sourceEntries = sourceEntriesFromItems(extraction.items);
13
+ const sourceCatalog = sortObject(Object.fromEntries(sourceEntries.map((entry) => [
14
+ entry.key,
15
+ {
16
+ text: entry.text,
17
+ variables: entry.variables
18
+ }
19
+ ])));
20
+ await writeJson(path.join(outDir, `${config.sourceLocale}.json`), sourceCatalog);
21
+
22
+ const lockPath = path.join(outDir, "anylang.lock.json");
23
+ const lock = await readJson(lockPath, { version: 1, entries: {} });
24
+ const translator = options.translator || (options.dryRun ? null : createTranslator(config.provider));
25
+ let translatedCount = 0;
26
+ let skippedTranslationCount = 0;
27
+
28
+ for (const locale of config.targetLocales) {
29
+ if (locale === config.sourceLocale) continue;
30
+ const localePath = path.join(outDir, `${locale}.json`);
31
+ const catalog = await readJson(localePath, {});
32
+
33
+ for (const entry of sourceEntries) {
34
+ const existing = normalizeTargetEntry(catalog[entry.key]);
35
+ const fingerprint = hashString(entry.text);
36
+ const lockKey = `${locale}:${entry.key}`;
37
+ const isFresh = existing && existing.source === entry.text && existing.text;
38
+ if (isFresh) continue;
39
+
40
+ if (options.dryRun || !translator) {
41
+ if (!existing) {
42
+ catalog[entry.key] = {
43
+ source: entry.text,
44
+ text: "",
45
+ variables: entry.variables
46
+ };
47
+ }
48
+ skippedTranslationCount += 1;
49
+ } else {
50
+ catalog[entry.key] = {
51
+ source: entry.text,
52
+ text: await translator.translate({ text: entry.text, sourceLocale: config.sourceLocale, targetLocale: locale }),
53
+ variables: entry.variables
54
+ };
55
+ translatedCount += 1;
56
+ }
57
+
58
+ lock.entries[lockKey] = {
59
+ fingerprint,
60
+ updatedAt: new Date().toISOString()
61
+ };
62
+ }
63
+
64
+ for (const key of Object.keys(catalog)) {
65
+ if (!sourceCatalog[key]) delete catalog[key];
66
+ }
67
+ await writeJson(localePath, sortObject(catalog));
68
+ }
69
+
70
+ await writeJson(lockPath, lock);
71
+ await writeGeneratedRuntime(config);
72
+
73
+ return {
74
+ sourceCount: sourceEntries.length,
75
+ localeCount: 1 + config.targetLocales.filter((locale) => locale !== config.sourceLocale).length,
76
+ translatedCount,
77
+ skippedTranslationCount,
78
+ outDir: path.relative(process.cwd(), outDir) || "."
79
+ };
80
+ }
81
+
82
+ async function readJson(file, fallback) {
83
+ try {
84
+ return JSON.parse(await readFile(file, "utf8"));
85
+ } catch (error) {
86
+ if (error && error.code === "ENOENT") return fallback;
87
+ throw error;
88
+ }
89
+ }
90
+
91
+ async function writeJson(file, value) {
92
+ await writeFile(file, `${JSON.stringify(value, null, 2)}\n`);
93
+ }
94
+
95
+ function hashString(value) {
96
+ return createHash("sha256").update(value).digest("hex");
97
+ }
98
+
99
+ function sortObject(object) {
100
+ return Object.fromEntries(Object.entries(object).sort(([left], [right]) => left.localeCompare(right)));
101
+ }
102
+
103
+ function sourceEntriesFromItems(items) {
104
+ const entries = new Map();
105
+ for (const item of items) {
106
+ const existing = entries.get(item.key);
107
+ if (existing && existing.text !== item.value) {
108
+ throw new Error(`Translation key "${item.key}" has multiple source texts: "${existing.text}" and "${item.value}".`);
109
+ }
110
+ entries.set(item.key, {
111
+ key: item.key,
112
+ text: item.value,
113
+ variables: item.variables || []
114
+ });
115
+ }
116
+ return Array.from(entries.values()).sort((left, right) => left.key.localeCompare(right.key));
117
+ }
118
+
119
+ function normalizeTargetEntry(entry) {
120
+ if (!entry) return null;
121
+ if (typeof entry === "string") {
122
+ return { source: undefined, text: entry, variables: [] };
123
+ }
124
+ return {
125
+ source: entry.source,
126
+ text: typeof entry.text === "string" ? entry.text : "",
127
+ variables: Array.isArray(entry.variables) ? entry.variables : []
128
+ };
129
+ }
130
+
131
+ async function writeGeneratedRuntime(config) {
132
+ if (config.runtime === false) return;
133
+
134
+ const output = path.resolve(config.runtime?.output || "src/anylang.generated.ts");
135
+ const outDir = path.resolve(config.outDir);
136
+ const locales = [config.sourceLocale, ...config.targetLocales.filter((locale) => locale !== config.sourceLocale)];
137
+ const importFrom = config.runtime?.importFrom || "anylang-dev/runtime";
138
+ const runtimeDir = path.dirname(output);
139
+ const localeImports = locales.map((locale) => ({
140
+ locale,
141
+ identifier: localeIdentifier(locale),
142
+ importPath: toImportPath(path.relative(runtimeDir, path.join(outDir, `${locale}.json`)))
143
+ }));
144
+
145
+ await mkdir(runtimeDir, { recursive: true });
146
+ await writeFile(output, `${generatedRuntimeSource({ importFrom, localeImports, sourceLocale: config.sourceLocale })}\n`);
147
+ }
148
+
149
+ function generatedRuntimeSource({ importFrom, localeImports, sourceLocale }) {
150
+ const imports = localeImports
151
+ .map((item) => `import ${item.identifier} from '${item.importPath}'`)
152
+ .join("\n");
153
+ const languageUnion = localeImports.map((item) => `'${item.locale}'`).join(" | ");
154
+ const languageItems = localeImports
155
+ .map((item) => ` { code: '${item.locale}' as const, label: '${languageLabel(item.locale)}', nativeLabel: '${nativeLanguageLabel(item.locale)}' }`)
156
+ .join(",\n");
157
+ const catalogItems = localeImports
158
+ .map((item) => ` '${item.locale}': ${item.identifier}`)
159
+ .join(",\n");
160
+
161
+ return `/* This file is generated by anylang. Do not edit by hand. */
162
+ import { useCallback } from 'react'
163
+ import { $tr as translate, configureAnyLang, setAnyLangLocale } from '${importFrom}'
164
+ ${imports}
165
+
166
+ export type LanguageCode = ${languageUnion}
167
+
168
+ export const languages = [
169
+ ${languageItems}
170
+ ]
171
+
172
+ configureAnyLang({
173
+ locale: '${sourceLocale}',
174
+ catalogs: {
175
+ ${catalogItems}
176
+ },
177
+ })
178
+
179
+ export function useAnyLang(locale: LanguageCode) {
180
+ return useCallback((key: string, source?: string) => {
181
+ return translate(key, source, locale)
182
+ }, [locale])
183
+ }
184
+
185
+ export function setLanguage(locale: LanguageCode) {
186
+ setAnyLangLocale(locale)
187
+ }
188
+ `;
189
+ }
190
+
191
+ function localeIdentifier(locale) {
192
+ return `catalog_${locale.replace(/[^A-Za-z0-9_$]/g, "_")}`;
193
+ }
194
+
195
+ function toImportPath(relativePath) {
196
+ const normalized = relativePath.split(path.sep).join("/");
197
+ return normalized.startsWith(".") ? normalized : `./${normalized}`;
198
+ }
199
+
200
+ function languageLabel(locale) {
201
+ try {
202
+ return new Intl.DisplayNames(["en"], { type: "language" }).of(locale) || locale;
203
+ } catch {
204
+ return locale;
205
+ }
206
+ }
207
+
208
+ function nativeLanguageLabel(locale) {
209
+ try {
210
+ return new Intl.DisplayNames([locale], { type: "language" }).of(locale) || languageLabel(locale);
211
+ } catch {
212
+ return languageLabel(locale);
213
+ }
214
+ }