@generaltranslation/python-extractor 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +105 -0
- package/dist/constants.d.ts +13 -0
- package/dist/constants.js +18 -0
- package/dist/extractCalls.d.ts +20 -0
- package/dist/extractCalls.js +211 -0
- package/dist/extractImports.d.ts +18 -0
- package/dist/extractImports.js +86 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.js +42 -0
- package/dist/parseStringExpression.d.ts +20 -0
- package/dist/parseStringExpression.js +607 -0
- package/dist/parser.d.ts +8 -0
- package/dist/parser.js +22 -0
- package/dist/resolveFunctionVariants.d.ts +20 -0
- package/dist/resolveFunctionVariants.js +122 -0
- package/dist/resolveImport.d.ts +11 -0
- package/dist/resolveImport.js +63 -0
- package/dist/stringNode.d.ts +21 -0
- package/dist/stringNode.js +51 -0
- package/dist/types.d.ts +16 -0
- package/dist/types.js +1 -0
- package/package.json +68 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import { getParser } from './parser.js';
|
|
3
|
+
const crossFileCache = new Map();
|
|
4
|
+
/**
|
|
5
|
+
* Resolves all return values of a function defined in the current file's AST.
|
|
6
|
+
* Uses the provided expression parser to handle complex return expressions
|
|
7
|
+
* (concat, declare_var, etc.).
|
|
8
|
+
*/
|
|
9
|
+
export async function resolveFunctionInCurrentFile(functionName, rootNode, parseExpr) {
|
|
10
|
+
const funcDef = findFunctionDefinition(rootNode, functionName);
|
|
11
|
+
if (!funcDef)
|
|
12
|
+
return null;
|
|
13
|
+
return extractReturnVariants(funcDef, rootNode, parseExpr);
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Resolves all return values of a function defined in an external file.
|
|
17
|
+
* Results are cached by filePath::functionName.
|
|
18
|
+
*/
|
|
19
|
+
export async function resolveFunctionInFile(functionName, filePath, parseExpr) {
|
|
20
|
+
const cacheKey = `${filePath}::${functionName}`;
|
|
21
|
+
if (crossFileCache.has(cacheKey)) {
|
|
22
|
+
return crossFileCache.get(cacheKey);
|
|
23
|
+
}
|
|
24
|
+
let source;
|
|
25
|
+
try {
|
|
26
|
+
source = fs.readFileSync(filePath, 'utf8');
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
crossFileCache.set(cacheKey, null);
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
const parser = await getParser();
|
|
33
|
+
const tree = parser.parse(source);
|
|
34
|
+
if (!tree) {
|
|
35
|
+
crossFileCache.set(cacheKey, null);
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
const result = await resolveFunctionInCurrentFile(functionName, tree.rootNode, parseExpr);
|
|
39
|
+
crossFileCache.set(cacheKey, result);
|
|
40
|
+
return result;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Finds a top-level function_definition by name in the AST.
|
|
44
|
+
*/
|
|
45
|
+
function findFunctionDefinition(rootNode, name) {
|
|
46
|
+
for (let i = 0; i < rootNode.childCount; i++) {
|
|
47
|
+
const child = rootNode.child(i);
|
|
48
|
+
if (!child)
|
|
49
|
+
continue;
|
|
50
|
+
if (child.type === 'function_definition') {
|
|
51
|
+
const nameNode = child.childForFieldName('name');
|
|
52
|
+
if (nameNode && nameNode.text === name) {
|
|
53
|
+
return child;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
// Also check decorated definitions
|
|
57
|
+
if (child.type === 'decorated_definition') {
|
|
58
|
+
const defNode = child.childForFieldName('definition');
|
|
59
|
+
if (defNode && defNode.type === 'function_definition') {
|
|
60
|
+
const nameNode = defNode.childForFieldName('name');
|
|
61
|
+
if (nameNode && nameNode.text === name) {
|
|
62
|
+
return defNode;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Extracts all return values from a function body and parses them into StringNodes.
|
|
71
|
+
* Skips nested function definitions.
|
|
72
|
+
*/
|
|
73
|
+
async function extractReturnVariants(funcDef, rootNode, parseExpr) {
|
|
74
|
+
const body = funcDef.childForFieldName('body');
|
|
75
|
+
if (!body)
|
|
76
|
+
return null;
|
|
77
|
+
const returnExprs = [];
|
|
78
|
+
collectReturnExpressions(body, returnExprs);
|
|
79
|
+
if (returnExprs.length === 0)
|
|
80
|
+
return null;
|
|
81
|
+
// Parse each return expression into a StringNode
|
|
82
|
+
const nodes = [];
|
|
83
|
+
for (const expr of returnExprs) {
|
|
84
|
+
const node = await parseExpr(expr, rootNode);
|
|
85
|
+
if (node)
|
|
86
|
+
nodes.push(node);
|
|
87
|
+
}
|
|
88
|
+
if (nodes.length === 0)
|
|
89
|
+
return null;
|
|
90
|
+
if (nodes.length === 1)
|
|
91
|
+
return nodes[0];
|
|
92
|
+
return { type: 'choice', nodes };
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Recursively collects return expression nodes from a function body,
|
|
96
|
+
* skipping nested function definitions.
|
|
97
|
+
*/
|
|
98
|
+
function collectReturnExpressions(node, results) {
|
|
99
|
+
if (node.type === 'function_definition') {
|
|
100
|
+
// Skip nested function bodies
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
if (node.type === 'return_statement') {
|
|
104
|
+
// Get the expression being returned (skip the 'return' keyword)
|
|
105
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
106
|
+
const child = node.child(i);
|
|
107
|
+
if (!child || child.type === 'return')
|
|
108
|
+
continue;
|
|
109
|
+
results.push(child);
|
|
110
|
+
break; // Only one expression per return
|
|
111
|
+
}
|
|
112
|
+
return;
|
|
113
|
+
}
|
|
114
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
115
|
+
const child = node.child(i);
|
|
116
|
+
if (child)
|
|
117
|
+
collectReturnExpressions(child, results);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
export function clearFunctionCache() {
|
|
121
|
+
crossFileCache.clear();
|
|
122
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resolves a Python import module path to a file path on disk.
|
|
3
|
+
*
|
|
4
|
+
* Handles:
|
|
5
|
+
* - Relative imports: `.helpers` → sibling `helpers.py`
|
|
6
|
+
* - Parent relative imports: `..utils` → parent dir `utils.py`
|
|
7
|
+
* - Absolute imports: `utils` → `utils.py` in same/parent dirs
|
|
8
|
+
* - Dotted paths: `myapp.utils` → `myapp/utils.py`
|
|
9
|
+
*/
|
|
10
|
+
export declare function resolveImportPath(moduleName: string, currentFilePath: string): string | null;
|
|
11
|
+
export declare function clearResolveCache(): void;
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
const resolveCache = new Map();
|
|
4
|
+
/**
|
|
5
|
+
* Resolves a Python import module path to a file path on disk.
|
|
6
|
+
*
|
|
7
|
+
* Handles:
|
|
8
|
+
* - Relative imports: `.helpers` → sibling `helpers.py`
|
|
9
|
+
* - Parent relative imports: `..utils` → parent dir `utils.py`
|
|
10
|
+
* - Absolute imports: `utils` → `utils.py` in same/parent dirs
|
|
11
|
+
* - Dotted paths: `myapp.utils` → `myapp/utils.py`
|
|
12
|
+
*/
|
|
13
|
+
export function resolveImportPath(moduleName, currentFilePath) {
|
|
14
|
+
const cacheKey = `${currentFilePath}::${moduleName}`;
|
|
15
|
+
if (resolveCache.has(cacheKey)) {
|
|
16
|
+
return resolveCache.get(cacheKey);
|
|
17
|
+
}
|
|
18
|
+
const result = doResolve(moduleName, currentFilePath);
|
|
19
|
+
resolveCache.set(cacheKey, result);
|
|
20
|
+
return result;
|
|
21
|
+
}
|
|
22
|
+
function doResolve(moduleName, currentFilePath) {
|
|
23
|
+
const currentDir = path.dirname(currentFilePath);
|
|
24
|
+
// Relative import: starts with dots
|
|
25
|
+
if (moduleName.startsWith('.')) {
|
|
26
|
+
// Count leading dots
|
|
27
|
+
let dotCount = 0;
|
|
28
|
+
while (dotCount < moduleName.length && moduleName[dotCount] === '.') {
|
|
29
|
+
dotCount++;
|
|
30
|
+
}
|
|
31
|
+
// Go up (dotCount - 1) directories from current dir
|
|
32
|
+
let baseDir = currentDir;
|
|
33
|
+
for (let i = 1; i < dotCount; i++) {
|
|
34
|
+
baseDir = path.dirname(baseDir);
|
|
35
|
+
}
|
|
36
|
+
const remainder = moduleName.slice(dotCount);
|
|
37
|
+
if (!remainder)
|
|
38
|
+
return null;
|
|
39
|
+
return resolveModulePath(baseDir, remainder);
|
|
40
|
+
}
|
|
41
|
+
// Absolute import: dotted or simple name
|
|
42
|
+
return resolveModulePath(currentDir, moduleName);
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Resolves a dotted module name (e.g. "myapp.utils") relative to a base dir.
|
|
46
|
+
* Tries: baseDir/myapp/utils.py, then baseDir/myapp/utils/__init__.py
|
|
47
|
+
*/
|
|
48
|
+
function resolveModulePath(baseDir, moduleName) {
|
|
49
|
+
const parts = moduleName.split('.');
|
|
50
|
+
const filePath = path.join(baseDir, ...parts) + '.py';
|
|
51
|
+
if (fs.existsSync(filePath)) {
|
|
52
|
+
return filePath;
|
|
53
|
+
}
|
|
54
|
+
// Try as package: moduleName/__init__.py
|
|
55
|
+
const initPath = path.join(baseDir, ...parts, '__init__.py');
|
|
56
|
+
if (fs.existsSync(initPath)) {
|
|
57
|
+
return initPath;
|
|
58
|
+
}
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
export function clearResolveCache() {
|
|
62
|
+
resolveCache.clear();
|
|
63
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
type StringNode = StringTextNode | StringSequenceNode | StringChoiceNode;
|
|
2
|
+
type StringTextNode = {
|
|
3
|
+
type: 'text';
|
|
4
|
+
text: string;
|
|
5
|
+
};
|
|
6
|
+
type StringSequenceNode = {
|
|
7
|
+
type: 'sequence';
|
|
8
|
+
nodes: StringNode[];
|
|
9
|
+
};
|
|
10
|
+
type StringChoiceNode = {
|
|
11
|
+
type: 'choice';
|
|
12
|
+
nodes: StringNode[];
|
|
13
|
+
};
|
|
14
|
+
export type { StringNode, StringTextNode, StringSequenceNode, StringChoiceNode, };
|
|
15
|
+
/**
|
|
16
|
+
* Converts a StringNode tree into all possible string variants.
|
|
17
|
+
* - TextNode → single string
|
|
18
|
+
* - SequenceNode → cartesian product of all parts
|
|
19
|
+
* - ChoiceNode → flattened branches (deduplicated)
|
|
20
|
+
*/
|
|
21
|
+
export declare function nodeToStrings(node: StringNode | null): string[];
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
// ===== Tree Construction ===== //
|
|
2
|
+
// Used for declare_static / declare_var parsing
|
|
3
|
+
/**
|
|
4
|
+
* Converts a StringNode tree into all possible string variants.
|
|
5
|
+
* - TextNode → single string
|
|
6
|
+
* - SequenceNode → cartesian product of all parts
|
|
7
|
+
* - ChoiceNode → flattened branches (deduplicated)
|
|
8
|
+
*/
|
|
9
|
+
export function nodeToStrings(node) {
|
|
10
|
+
if (node === null) {
|
|
11
|
+
return [];
|
|
12
|
+
}
|
|
13
|
+
if (node.type === 'text') {
|
|
14
|
+
return [node.text];
|
|
15
|
+
}
|
|
16
|
+
if (node.type === 'sequence') {
|
|
17
|
+
const partResults = node.nodes.map((n) => nodeToStrings(n));
|
|
18
|
+
return cartesianProduct(partResults);
|
|
19
|
+
}
|
|
20
|
+
if (node.type === 'choice') {
|
|
21
|
+
const allStrings = [];
|
|
22
|
+
for (const branch of node.nodes) {
|
|
23
|
+
allStrings.push(...nodeToStrings(branch));
|
|
24
|
+
}
|
|
25
|
+
return [...new Set(allStrings)]; // Deduplicate
|
|
26
|
+
}
|
|
27
|
+
return [];
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Creates cartesian product of string arrays and concatenates them.
|
|
31
|
+
* @example cartesianProduct([["Hello "], ["day", "night"]]) → ["Hello day", "Hello night"]
|
|
32
|
+
*/
|
|
33
|
+
function cartesianProduct(arrays) {
|
|
34
|
+
if (arrays.length === 0) {
|
|
35
|
+
return [];
|
|
36
|
+
}
|
|
37
|
+
if (arrays.length === 1) {
|
|
38
|
+
return arrays[0];
|
|
39
|
+
}
|
|
40
|
+
let result = arrays[0];
|
|
41
|
+
for (let i = 1; i < arrays.length; i++) {
|
|
42
|
+
const newResult = [];
|
|
43
|
+
for (const prev of result) {
|
|
44
|
+
for (const curr of arrays[i]) {
|
|
45
|
+
newResult.push(prev + curr);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
result = newResult;
|
|
49
|
+
}
|
|
50
|
+
return result;
|
|
51
|
+
}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export type ExtractionResult = {
|
|
2
|
+
/** The data format of the extracted content */
|
|
3
|
+
dataFormat: 'ICU' | 'JSX';
|
|
4
|
+
/** The extracted translatable content */
|
|
5
|
+
source: string;
|
|
6
|
+
/** Metadata about the extraction */
|
|
7
|
+
metadata: ExtractionMetadata;
|
|
8
|
+
};
|
|
9
|
+
export type ExtractionMetadata = {
|
|
10
|
+
id?: string;
|
|
11
|
+
context?: string;
|
|
12
|
+
maxChars?: number;
|
|
13
|
+
filePaths?: string[];
|
|
14
|
+
/** Groups related static content variants together (for declareStatic/declare_static) */
|
|
15
|
+
staticId?: string;
|
|
16
|
+
};
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@generaltranslation/python-extractor",
|
|
3
|
+
"version": "0.0.0",
|
|
4
|
+
"description": "Python source code extraction for General Translation",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"type": "module",
|
|
8
|
+
"author": "General Translation, Inc.",
|
|
9
|
+
"license": "FSL-1.1-ALv2",
|
|
10
|
+
"exports": {
|
|
11
|
+
".": {
|
|
12
|
+
"import": "./dist/index.js",
|
|
13
|
+
"types": "./dist/index.d.ts"
|
|
14
|
+
},
|
|
15
|
+
"./types": {
|
|
16
|
+
"import": "./dist/types.js",
|
|
17
|
+
"types": "./dist/types.d.ts"
|
|
18
|
+
},
|
|
19
|
+
"./constants": {
|
|
20
|
+
"import": "./dist/constants.js",
|
|
21
|
+
"types": "./dist/constants.d.ts"
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
"files": [
|
|
25
|
+
"dist"
|
|
26
|
+
],
|
|
27
|
+
"repository": {
|
|
28
|
+
"type": "git",
|
|
29
|
+
"url": "git+https://github.com/generaltranslation/gt.git"
|
|
30
|
+
},
|
|
31
|
+
"keywords": [
|
|
32
|
+
"python",
|
|
33
|
+
"extraction",
|
|
34
|
+
"translation",
|
|
35
|
+
"i18n",
|
|
36
|
+
"flask",
|
|
37
|
+
"fastapi"
|
|
38
|
+
],
|
|
39
|
+
"devDependencies": {
|
|
40
|
+
"@types/node": "^22.5.1",
|
|
41
|
+
"typescript": "^5.5.4",
|
|
42
|
+
"vitest": "^2.0.0"
|
|
43
|
+
},
|
|
44
|
+
"engines": {
|
|
45
|
+
"node": ">=16.0.0"
|
|
46
|
+
},
|
|
47
|
+
"dependencies": {
|
|
48
|
+
"tree-sitter-python": "^0.25.0",
|
|
49
|
+
"web-tree-sitter": "^0.26.6",
|
|
50
|
+
"generaltranslation": "8.1.14"
|
|
51
|
+
},
|
|
52
|
+
"scripts": {
|
|
53
|
+
"build": "tsc",
|
|
54
|
+
"build:watch": "tsc --watch",
|
|
55
|
+
"build:clean": "sh ../../scripts/clean.sh && pnpm run build",
|
|
56
|
+
"build:release": "pnpm run build:clean",
|
|
57
|
+
"format": "prettier --write src",
|
|
58
|
+
"patch": "pnpm version patch",
|
|
59
|
+
"release": "pnpm run build:clean && pnpm publish",
|
|
60
|
+
"release:alpha": "pnpm run build:clean && pnpm publish --tag alpha",
|
|
61
|
+
"release:beta": "pnpm run build:clean && pnpm publish --tag beta",
|
|
62
|
+
"release:latest": "pnpm run build:clean && pnpm publish --tag latest",
|
|
63
|
+
"lint": "eslint \"src/**/*.{js,ts}\"",
|
|
64
|
+
"lint:fix": "eslint \"src/**/*.{js,ts}\" --fix",
|
|
65
|
+
"test": "vitest run",
|
|
66
|
+
"test:watch": "vitest"
|
|
67
|
+
}
|
|
68
|
+
}
|