@generaltranslation/python-extractor 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE.md ADDED
@@ -0,0 +1,105 @@
1
+ # Functional Source License, Version 1.1, ALv2 Future License
2
+
3
+ ## Abbreviation
4
+
5
+ FSL-1.1-ALv2
6
+
7
+ ## Notice
8
+
9
+ Copyright 2025 General Translation, Inc.
10
+
11
+ ## Terms and Conditions
12
+
13
+ ### Licensor ("We")
14
+
15
+ The party offering the Software under these Terms and Conditions.
16
+
17
+ ### The Software
18
+
19
+ The "Software" is each version of the software that we make available under
20
+ these Terms and Conditions, as indicated by our inclusion of these Terms and
21
+ Conditions with the Software.
22
+
23
+ ### License Grant
24
+
25
+ Subject to your compliance with this License Grant and the Patents,
26
+ Redistribution and Trademark clauses below, we hereby grant you the right to
27
+ use, copy, modify, create derivative works, publicly perform, publicly display
28
+ and redistribute the Software for any Permitted Purpose identified below.
29
+
30
+ ### Permitted Purpose
31
+
32
+ A Permitted Purpose is any purpose other than a Competing Use. A Competing Use
33
+ means making the Software available to others in a commercial product or
34
+ service that:
35
+
36
+ 1. substitutes for the Software;
37
+
38
+ 2. substitutes for any other product or service we offer using the Software
39
+ that exists as of the date we make the Software available; or
40
+
41
+ 3. offers the same or substantially similar functionality as the Software.
42
+
43
+ Permitted Purposes specifically include using the Software:
44
+
45
+ 1. for your internal use and access;
46
+
47
+ 2. for non-commercial education;
48
+
49
+ 3. for non-commercial research; and
50
+
51
+ 4. in connection with professional services that you provide to a licensee
52
+ using the Software in accordance with these Terms and Conditions.
53
+
54
+ ### Patents
55
+
56
+ To the extent your use for a Permitted Purpose would necessarily infringe our
57
+ patents, the license grant above includes a license under our patents. If you
58
+ make a claim against any party that the Software infringes or contributes to
59
+ the infringement of any patent, then your patent license to the Software ends
60
+ immediately.
61
+
62
+ ### Redistribution
63
+
64
+ The Terms and Conditions apply to all copies, modifications and derivatives of
65
+ the Software.
66
+
67
+ If you redistribute any copies, modifications or derivatives of the Software,
68
+ you must include a copy of or a link to these Terms and Conditions and not
69
+ remove any copyright notices provided in or with the Software.
70
+
71
+ ### Disclaimer
72
+
73
+ THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR
74
+ IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR
75
+ PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT.
76
+
77
+ IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE
78
+ SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES,
79
+ EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE.
80
+
81
+ ### Trademarks
82
+
83
+ Except for displaying the License Details and identifying us as the origin of
84
+ the Software, you have no right under these Terms and Conditions to use our
85
+ trademarks, trade names, service marks or product names.
86
+
87
+ ## Grant of Future License
88
+
89
+ We hereby irrevocably grant you an additional license to use the Software under
90
+ the Apache License, Version 2.0 that is effective on the second anniversary of
91
+ the date we make the Software available. On or after that date, you may use the
92
+ Software under the Apache License, Version 2.0, in which case the following
93
+ will apply:
94
+
95
+ Licensed under the Apache License, Version 2.0 (the "License"); you may not use
96
+ this file except in compliance with the License.
97
+
98
+ You may obtain a copy of the License at
99
+
100
+ http://www.apache.org/licenses/LICENSE-2.0
101
+
102
+ Unless required by applicable law or agreed to in writing, software distributed
103
+ under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
104
+ CONDITIONS OF ANY KIND, either express or implied. See the License for the
105
+ specific language governing permissions and limitations under the License.
@@ -0,0 +1,13 @@
1
+ export declare const PYTHON_GT_PACKAGES: readonly ["gt_flask", "gt_fastapi"];
2
+ export declare const PYTHON_GT_DEPENDENCIES: readonly ["gt-flask", "gt-fastapi"];
3
+ export declare const PYTHON_T_FUNCTION = "t";
4
+ export declare const PYTHON_MSG_FUNCTION = "msg";
5
+ export declare const PYTHON_DECLARE_STATIC = "declare_static";
6
+ export declare const PYTHON_DECLARE_VAR = "declare_var";
7
+ /** These imported names are tracked (translation functions + static helpers) */
8
+ export declare const PYTHON_TRANSLATION_FUNCTIONS: readonly ["t", "msg", "declare_static", "declare_var"];
9
+ export declare const PYTHON_METADATA_KWARGS: {
10
+ readonly _id: "id";
11
+ readonly _context: "context";
12
+ readonly _max_chars: "maxChars";
13
+ };
@@ -0,0 +1,18 @@
1
+ export const PYTHON_GT_PACKAGES = ['gt_flask', 'gt_fastapi'];
2
+ export const PYTHON_GT_DEPENDENCIES = ['gt-flask', 'gt-fastapi'];
3
+ export const PYTHON_T_FUNCTION = 't';
4
+ export const PYTHON_MSG_FUNCTION = 'msg';
5
+ export const PYTHON_DECLARE_STATIC = 'declare_static';
6
+ export const PYTHON_DECLARE_VAR = 'declare_var';
7
+ /** These imported names are tracked (translation functions + static helpers) */
8
+ export const PYTHON_TRANSLATION_FUNCTIONS = [
9
+ 't',
10
+ 'msg',
11
+ 'declare_static',
12
+ 'declare_var',
13
+ ];
14
+ export const PYTHON_METADATA_KWARGS = {
15
+ _id: 'id',
16
+ _context: 'context',
17
+ _max_chars: 'maxChars',
18
+ };
@@ -0,0 +1,20 @@
1
+ import type { SyntaxNode } from './parser.js';
2
+ import type { ImportAlias } from './extractImports.js';
3
+ export type RawTranslationCall = {
4
+ source: string;
5
+ id?: string;
6
+ context?: string;
7
+ maxChars?: number;
8
+ staticId?: string;
9
+ line: number;
10
+ column: number;
11
+ };
12
+ /**
13
+ * Extracts translation function calls from a Python AST.
14
+ * Walks all `call` nodes and checks if they reference a tracked import.
15
+ */
16
+ export declare function extractCalls(rootNode: SyntaxNode, imports: ImportAlias[], filePath: string): Promise<{
17
+ calls: RawTranslationCall[];
18
+ errors: string[];
19
+ warnings: string[];
20
+ }>;
@@ -0,0 +1,211 @@
1
+ import { PYTHON_METADATA_KWARGS } from './constants.js';
2
+ import { containsStaticCalls, parseStringExpression, } from './parseStringExpression.js';
3
+ import { nodeToStrings } from './stringNode.js';
4
+ import { indexVars } from 'generaltranslation/internal';
5
+ import { randomUUID } from 'node:crypto';
6
+ /**
7
+ * Extracts translation function calls from a Python AST.
8
+ * Walks all `call` nodes and checks if they reference a tracked import.
9
+ */
10
+ export async function extractCalls(rootNode, imports, filePath) {
11
+ const calls = [];
12
+ const errors = [];
13
+ const warnings = [];
14
+ // Only track t/msg as translation functions (not declare_static/declare_var)
15
+ const trackedNames = new Set(imports
16
+ .filter((imp) => imp.originalName !== 'declare_static' &&
17
+ imp.originalName !== 'declare_var')
18
+ .map((imp) => imp.localName));
19
+ if (trackedNames.size === 0)
20
+ return { calls, errors, warnings };
21
+ await walkCalls(rootNode, trackedNames, imports, filePath, calls, errors, warnings);
22
+ return { calls, errors, warnings };
23
+ }
24
+ async function walkCalls(node, trackedNames, imports, filePath, calls, errors, warnings) {
25
+ if (node.type === 'call') {
26
+ const funcNode = node.childForFieldName('function');
27
+ if (funcNode &&
28
+ funcNode.type === 'identifier' &&
29
+ trackedNames.has(funcNode.text)) {
30
+ await processCall(node, imports, filePath, calls, errors, warnings);
31
+ }
32
+ }
33
+ for (let i = 0; i < node.childCount; i++) {
34
+ const child = node.child(i);
35
+ if (child)
36
+ await walkCalls(child, trackedNames, imports, filePath, calls, errors, warnings);
37
+ }
38
+ }
39
+ async function processCall(callNode, imports, filePath, calls, errors, _warnings) {
40
+ const argsNode = callNode.childForFieldName('arguments');
41
+ if (!argsNode) {
42
+ errors.push(`${locationStr(callNode)}: translation call has no arguments`);
43
+ return;
44
+ }
45
+ // Find first positional argument (skip punctuation)
46
+ let firstArg = null;
47
+ for (let i = 0; i < argsNode.childCount; i++) {
48
+ const child = argsNode.child(i);
49
+ if (child &&
50
+ child.type !== '(' &&
51
+ child.type !== ')' &&
52
+ child.type !== ',' &&
53
+ child.type !== 'keyword_argument') {
54
+ firstArg = child;
55
+ break;
56
+ }
57
+ }
58
+ if (!firstArg) {
59
+ errors.push(`${locationStr(callNode)}: translation call has no positional argument`);
60
+ return;
61
+ }
62
+ // Check if this expression contains declare_static/declare_var
63
+ const hasStaticHelpers = (firstArg.type === 'string' &&
64
+ isFString(firstArg) &&
65
+ containsStaticCalls(firstArg, imports)) ||
66
+ (firstArg.type === 'binary_operator' &&
67
+ containsStaticCalls(firstArg, imports)) ||
68
+ (firstArg.type === 'call' && containsStaticCalls(firstArg, imports));
69
+ if (hasStaticHelpers) {
70
+ // Compound expression path: parse into StringNode tree
71
+ const rootNode = callNode.tree?.rootNode;
72
+ if (!rootNode) {
73
+ errors.push(`${locationStr(callNode)}: could not access AST root`);
74
+ return;
75
+ }
76
+ const stringNode = await parseStringExpression(firstArg, {
77
+ rootNode,
78
+ imports,
79
+ filePath,
80
+ errors,
81
+ });
82
+ if (!stringNode)
83
+ return;
84
+ const strings = nodeToStrings(stringNode).map(indexVars);
85
+ if (strings.length === 0) {
86
+ errors.push(`${locationStr(callNode)}: no string variants produced`);
87
+ return;
88
+ }
89
+ const metadata = extractKwargs(argsNode, errors, callNode);
90
+ const staticId = `static-temp-id-${randomUUID()}`;
91
+ for (const source of strings) {
92
+ calls.push({
93
+ source,
94
+ ...metadata,
95
+ staticId,
96
+ line: callNode.startPosition.row + 1,
97
+ column: callNode.startPosition.column,
98
+ });
99
+ }
100
+ return;
101
+ }
102
+ // Simple path: validate first argument is a plain string literal
103
+ if (firstArg.type !== 'string') {
104
+ if (firstArg.type === 'identifier') {
105
+ errors.push(`${locationStr(callNode)}: translation call uses a variable "${firstArg.text}" instead of a string literal`);
106
+ }
107
+ else if (firstArg.type === 'concatenated_string') {
108
+ errors.push(`${locationStr(callNode)}: translation call uses concatenated strings — use a single string literal`);
109
+ }
110
+ else {
111
+ errors.push(`${locationStr(callNode)}: translation call first argument must be a string literal, got "${firstArg.type}"`);
112
+ }
113
+ return;
114
+ }
115
+ // Check for f-strings (without declare_static/declare_var)
116
+ if (isFString(firstArg)) {
117
+ errors.push(`${locationStr(callNode)}: translation call uses an f-string — use a plain string literal or declare_static()/declare_var()`);
118
+ return;
119
+ }
120
+ const source = extractStringContent(firstArg);
121
+ if (source === undefined) {
122
+ errors.push(`${locationStr(callNode)}: could not extract string content`);
123
+ return;
124
+ }
125
+ // Extract keyword arguments
126
+ const metadata = extractKwargs(argsNode, errors, callNode);
127
+ calls.push({
128
+ source,
129
+ ...metadata,
130
+ line: callNode.startPosition.row + 1,
131
+ column: callNode.startPosition.column,
132
+ });
133
+ }
134
+ function extractKwargs(argsNode, errors, callNode) {
135
+ const result = {};
136
+ for (let i = 0; i < argsNode.childCount; i++) {
137
+ const child = argsNode.child(i);
138
+ if (!child || child.type !== 'keyword_argument')
139
+ continue;
140
+ const nameNode = child.childForFieldName('name');
141
+ const valueNode = child.childForFieldName('value');
142
+ if (!nameNode || !valueNode)
143
+ continue;
144
+ const kwargName = nameNode.text;
145
+ const metadataKey = PYTHON_METADATA_KWARGS[kwargName];
146
+ if (!metadataKey)
147
+ continue;
148
+ if (metadataKey === 'maxChars') {
149
+ if (valueNode.type === 'integer') {
150
+ result.maxChars = parseInt(valueNode.text, 10);
151
+ }
152
+ else {
153
+ errors.push(`${locationStr(callNode)}: _max_chars must be an integer literal`);
154
+ }
155
+ }
156
+ else {
157
+ if (valueNode.type === 'string' && !isFString(valueNode)) {
158
+ const value = extractStringContent(valueNode);
159
+ if (value !== undefined) {
160
+ if (metadataKey === 'id')
161
+ result.id = value;
162
+ else if (metadataKey === 'context')
163
+ result.context = value;
164
+ }
165
+ }
166
+ else {
167
+ errors.push(`${locationStr(callNode)}: _${metadataKey} must be a string literal`);
168
+ }
169
+ }
170
+ }
171
+ return result;
172
+ }
173
+ function isFString(stringNode) {
174
+ // Check if string_start begins with 'f' or 'F'
175
+ for (let i = 0; i < stringNode.childCount; i++) {
176
+ const child = stringNode.child(i);
177
+ if (child && child.type === 'string_start') {
178
+ return /^[fF]/.test(child.text);
179
+ }
180
+ // Also check for interpolation children (hallmark of f-strings)
181
+ if (child && child.type === 'interpolation') {
182
+ return true;
183
+ }
184
+ }
185
+ return false;
186
+ }
187
+ function extractStringContent(stringNode) {
188
+ // Look for string_content child
189
+ for (let i = 0; i < stringNode.childCount; i++) {
190
+ const child = stringNode.child(i);
191
+ if (child && child.type === 'string_content') {
192
+ return child.text;
193
+ }
194
+ }
195
+ // Empty string — no string_content child, but has string_start and string_end
196
+ let hasStart = false;
197
+ let hasEnd = false;
198
+ for (let i = 0; i < stringNode.childCount; i++) {
199
+ const child = stringNode.child(i);
200
+ if (child?.type === 'string_start')
201
+ hasStart = true;
202
+ if (child?.type === 'string_end')
203
+ hasEnd = true;
204
+ }
205
+ if (hasStart && hasEnd)
206
+ return '';
207
+ return undefined;
208
+ }
209
+ function locationStr(node) {
210
+ return `line ${node.startPosition.row + 1}, col ${node.startPosition.column}`;
211
+ }
@@ -0,0 +1,18 @@
1
+ import type { SyntaxNode } from './parser.js';
2
+ export type ImportAlias = {
3
+ /** The local name used in the source file (e.g. "translate" for `import t as translate`) */
4
+ localName: string;
5
+ /** The original imported name (e.g. "t") */
6
+ originalName: string;
7
+ /** The package it was imported from (e.g. "gt_flask") */
8
+ packageName: string;
9
+ };
10
+ /**
11
+ * Extracts GT-related imports from a Python AST.
12
+ *
13
+ * Handles:
14
+ * - `from gt_flask import t`
15
+ * - `from gt_flask import t as translate`
16
+ * - `from gt_flask import t, msg`
17
+ */
18
+ export declare function extractImports(rootNode: SyntaxNode): ImportAlias[];
@@ -0,0 +1,86 @@
1
+ import { PYTHON_GT_PACKAGES, PYTHON_TRANSLATION_FUNCTIONS, } from './constants.js';
2
+ /**
3
+ * Extracts GT-related imports from a Python AST.
4
+ *
5
+ * Handles:
6
+ * - `from gt_flask import t`
7
+ * - `from gt_flask import t as translate`
8
+ * - `from gt_flask import t, msg`
9
+ */
10
+ export function extractImports(rootNode) {
11
+ const aliases = [];
12
+ for (let i = 0; i < rootNode.childCount; i++) {
13
+ const node = rootNode.child(i);
14
+ if (!node || node.type !== 'import_from_statement')
15
+ continue;
16
+ const moduleName = getModuleName(node);
17
+ if (!moduleName || !isGtPackage(moduleName))
18
+ continue;
19
+ // Collect all imported names from this statement
20
+ for (let j = 0; j < node.childCount; j++) {
21
+ const child = node.child(j);
22
+ if (!child)
23
+ continue;
24
+ if (child.type === 'aliased_import') {
25
+ // `from gt_flask import t as translate`
26
+ const nameNode = child.childForFieldName('name');
27
+ const aliasNode = child.childForFieldName('alias');
28
+ const originalName = nameNode ? getIdentifierText(nameNode) : undefined;
29
+ const localName = aliasNode ? aliasNode.text : originalName;
30
+ if (originalName && localName && isTranslationFunction(originalName)) {
31
+ aliases.push({ localName, originalName, packageName: moduleName });
32
+ }
33
+ }
34
+ else if (child.type === 'dotted_name') {
35
+ // Skip the module name itself (first dotted_name is the module)
36
+ const text = child.text;
37
+ if (text === moduleName)
38
+ continue;
39
+ // `from gt_flask import t` — only track translation functions
40
+ if (isTranslationFunction(text)) {
41
+ aliases.push({
42
+ localName: text,
43
+ originalName: text,
44
+ packageName: moduleName,
45
+ });
46
+ }
47
+ }
48
+ }
49
+ }
50
+ return aliases;
51
+ }
52
+ function getModuleName(importNode) {
53
+ const moduleNode = importNode.childForFieldName('module_name');
54
+ if (moduleNode)
55
+ return moduleNode.text;
56
+ // Fallback: find the first dotted_name child (before 'import' keyword)
57
+ for (let i = 0; i < importNode.childCount; i++) {
58
+ const child = importNode.child(i);
59
+ if (!child)
60
+ continue;
61
+ if (child.type === 'import')
62
+ break; // reached the 'import' keyword
63
+ if (child.type === 'dotted_name')
64
+ return child.text;
65
+ }
66
+ return undefined;
67
+ }
68
+ function getIdentifierText(node) {
69
+ if (node.type === 'identifier')
70
+ return node.text;
71
+ if (node.type === 'dotted_name') {
72
+ // Get the last identifier in a dotted name
73
+ for (let i = node.childCount - 1; i >= 0; i--) {
74
+ const child = node.child(i);
75
+ if (child && child.type === 'identifier')
76
+ return child.text;
77
+ }
78
+ }
79
+ return node.text;
80
+ }
81
+ function isGtPackage(name) {
82
+ return PYTHON_GT_PACKAGES.includes(name);
83
+ }
84
+ function isTranslationFunction(name) {
85
+ return PYTHON_TRANSLATION_FUNCTIONS.includes(name);
86
+ }
@@ -0,0 +1,9 @@
1
+ import type { ExtractionResult } from './types.js';
2
+ export type { ExtractionResult, ExtractionMetadata } from './types.js';
3
+ export type { ImportAlias } from './extractImports.js';
4
+ export { PYTHON_GT_PACKAGES, PYTHON_GT_DEPENDENCIES, PYTHON_T_FUNCTION, PYTHON_MSG_FUNCTION, PYTHON_DECLARE_STATIC, PYTHON_DECLARE_VAR, PYTHON_TRANSLATION_FUNCTIONS, PYTHON_METADATA_KWARGS, } from './constants.js';
5
+ export declare function extractFromPythonSource(sourceCode: string, filePath: string): Promise<{
6
+ results: ExtractionResult[];
7
+ errors: string[];
8
+ warnings: string[];
9
+ }>;
package/dist/index.js ADDED
@@ -0,0 +1,42 @@
1
+ import { getParser } from './parser.js';
2
+ import { extractImports } from './extractImports.js';
3
+ import { extractCalls } from './extractCalls.js';
4
+ export { PYTHON_GT_PACKAGES, PYTHON_GT_DEPENDENCIES, PYTHON_T_FUNCTION, PYTHON_MSG_FUNCTION, PYTHON_DECLARE_STATIC, PYTHON_DECLARE_VAR, PYTHON_TRANSLATION_FUNCTIONS, PYTHON_METADATA_KWARGS, } from './constants.js';
5
+ export async function extractFromPythonSource(sourceCode, filePath) {
6
+ const parser = await getParser();
7
+ const tree = parser.parse(sourceCode);
8
+ if (!tree) {
9
+ return {
10
+ results: [],
11
+ errors: [`Failed to parse ${filePath}`],
12
+ warnings: [],
13
+ };
14
+ }
15
+ // Step 1: Extract GT imports
16
+ const imports = extractImports(tree.rootNode);
17
+ if (imports.length === 0) {
18
+ return { results: [], errors: [], warnings: [] };
19
+ }
20
+ // Step 2: Extract translation calls
21
+ const { calls, errors, warnings } = await extractCalls(tree.rootNode, imports, filePath);
22
+ // Step 3: Map to ExtractionResult
23
+ const results = calls.map((call) => ({
24
+ dataFormat: 'ICU',
25
+ source: call.source,
26
+ metadata: {
27
+ ...(call.id && { id: call.id }),
28
+ ...(call.context && { context: call.context }),
29
+ ...(call.maxChars != null && { maxChars: call.maxChars }),
30
+ ...(call.staticId && { staticId: call.staticId }),
31
+ filePaths: [filePath],
32
+ },
33
+ }));
34
+ return {
35
+ results,
36
+ errors: prefixErrors(errors, filePath),
37
+ warnings: prefixErrors(warnings, filePath),
38
+ };
39
+ }
40
+ function prefixErrors(messages, filePath) {
41
+ return messages.map((msg) => `${filePath}: ${msg}`);
42
+ }
@@ -0,0 +1,20 @@
1
+ import type { SyntaxNode } from './parser.js';
2
+ import type { StringNode } from './stringNode.js';
3
+ import type { ImportAlias } from './extractImports.js';
4
+ type ParseContext = {
5
+ rootNode: SyntaxNode;
6
+ imports: ImportAlias[];
7
+ filePath: string;
8
+ errors: string[];
9
+ };
10
+ /**
11
+ * Checks if an expression contains declare_static or declare_var calls.
12
+ */
13
+ export declare function containsStaticCalls(node: SyntaxNode, imports: ImportAlias[]): boolean;
14
+ /**
15
+ * Parses the first argument of t() into a StringNode tree.
16
+ * Handles: plain strings, f-strings with declare_static/declare_var,
17
+ * binary + concatenation, and standalone declare_static calls.
18
+ */
19
+ export declare function parseStringExpression(node: SyntaxNode, ctx: ParseContext): Promise<StringNode | null>;
20
+ export {};