motia 0.5.11-beta.120-742949 → 0.5.11-beta.120-433270

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,6 @@ Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.addPackageToArchive = void 0;
7
7
  const fs_1 = __importDefault(require("fs"));
8
8
  const path_1 = __importDefault(require("path"));
9
- const colors_1 = __importDefault(require("colors"));
10
9
  const shouldIgnore = (filePath) => {
11
10
  const ignorePatterns = [/\.pyc$/, /\.egg$/, /__pycache__/, /\.dist-info$/];
12
11
  return ignorePatterns.some((pattern) => pattern.test(filePath));
@@ -31,26 +30,24 @@ const addDirectoryToArchive = async (archive, baseDir, dirPath) => {
31
30
  .filter(Boolean));
32
31
  };
33
32
  const addPackageToArchive = async (archive, sitePackagesDir, packageName) => {
34
- // First try the package name as is
35
- let fullPath = path_1.default.join(sitePackagesDir, packageName);
36
- // If not found, try with .py extension
37
- if (!fs_1.default.existsSync(fullPath)) {
38
- const pyPath = path_1.default.join(sitePackagesDir, `${packageName}.py`);
39
- if (fs_1.default.existsSync(pyPath)) {
40
- fullPath = pyPath;
33
+ const packageNameVariations = [packageName, `${packageName}.py`];
34
+ // Iterate over all possible package name variations
35
+ for (const pkg of packageNameVariations) {
36
+ let fullPath = path_1.default.join(sitePackagesDir, pkg);
37
+ if (!fs_1.default.existsSync(fullPath)) {
38
+ // If not found, try next package name variation
39
+ continue;
41
40
  }
42
- }
43
- if (!fs_1.default.existsSync(fullPath)) {
44
- console.log(colors_1.default.yellow(`Warning: Package not found in site-packages: ${packageName}`));
41
+ const stat = fs_1.default.statSync(fullPath);
42
+ if (stat.isDirectory()) {
43
+ await addDirectoryToArchive(archive, sitePackagesDir, fullPath);
44
+ }
45
+ else {
46
+ const relativePath = path_1.default.relative(sitePackagesDir, fullPath);
47
+ archive.append(fs_1.default.createReadStream(fullPath), relativePath);
48
+ }
49
+ // package added successfully
45
50
  return;
46
51
  }
47
- const stat = fs_1.default.statSync(fullPath);
48
- if (stat.isDirectory()) {
49
- await addDirectoryToArchive(archive, sitePackagesDir, fullPath);
50
- }
51
- else {
52
- const relativePath = path_1.default.relative(sitePackagesDir, fullPath);
53
- archive.append(fs_1.default.createReadStream(fullPath), relativePath);
54
- }
55
52
  };
56
53
  exports.addPackageToArchive = addPackageToArchive;
@@ -23,60 +23,33 @@ class PythonBuilder {
23
23
  const normalizedEntrypointPath = entrypointPath.replace(/[.]step.py$/, '_step.py');
24
24
  const sitePackagesDir = `${process.env.PYTHON_SITE_PACKAGES}-lambda`;
25
25
  // Get Python builder response
26
- const { packages, local_files } = await this.getPythonBuilderData(step);
26
+ const { packages, files } = await this.getPythonBuilderData(step);
27
27
  // Add main file to archive
28
28
  if (!fs_1.default.existsSync(step.filePath)) {
29
29
  throw new Error(`Source file not found: ${step.filePath}`);
30
30
  }
31
31
  archive.append(fs_1.default.createReadStream(step.filePath), path_1.default.relative(this.builder.projectDir, normalizedEntrypointPath));
32
- // Add local Python files to archive
33
- if (local_files && local_files.length > 0) {
34
- local_files.forEach((localFile) => {
35
- const fullPath = path_1.default.join(this.builder.projectDir, localFile);
36
- if (fs_1.default.existsSync(fullPath)) {
37
- archive.append(fs_1.default.createReadStream(fullPath), localFile);
38
- }
39
- });
32
+ files.forEach((file) => archive.append(fs_1.default.createReadStream(file), path_1.default.relative(this.builder.projectDir, file)));
33
+ if (packages.length > 0) {
34
+ await Promise.all(packages.map((pkg) => (0, add_package_to_archive_1.addPackageToArchive)(archive, sitePackagesDir, pkg.name)));
35
+ this.listener.onBuildProgress(step, `Added ${packages.length} packages to archive`);
40
36
  }
41
- await Promise.all(packages.map(async (packageName) => (0, add_package_to_archive_1.addPackageToArchive)(archive, sitePackagesDir, packageName)));
42
37
  return normalizedEntrypointPath;
43
38
  }
44
39
  async build(step) {
45
40
  const entrypointPath = step.filePath.replace(this.builder.projectDir, '');
46
41
  const bundlePath = path_1.default.join('python', entrypointPath.replace(/(.*)\.py$/, '$1.zip'));
47
- const normalizedEntrypointPath = entrypointPath.replace(/[.]step.py$/, '_step.py');
48
42
  const outfile = path_1.default.join(constants_1.distDir, bundlePath);
49
43
  try {
50
44
  // Create output directory
51
45
  fs_1.default.mkdirSync(path_1.default.dirname(outfile), { recursive: true });
52
46
  this.listener.onBuildStart(step);
53
- // Get Python builder response
54
- const { packages, local_files } = await this.getPythonBuilderData(step);
47
+ // Create the step zip archive
55
48
  const stepArchiver = new archiver_1.Archiver(outfile);
49
+ // Build the step
56
50
  const stepPath = await this.buildStep(step, stepArchiver);
57
- // Add main file to archive
58
- if (!fs_1.default.existsSync(step.filePath)) {
59
- throw new Error(`Source file not found: ${step.filePath}`);
60
- }
61
- stepArchiver.append(fs_1.default.createReadStream(step.filePath), path_1.default.relative(this.builder.projectDir, normalizedEntrypointPath));
62
- // Add all imported files to archive
63
- this.listener.onBuildProgress(step, 'Adding imported files to archive...');
64
- const sitePackagesDir = `${process.env.PYTHON_SITE_PACKAGES}-lambda`;
65
- // Add local Python files to archive
66
- if (local_files && local_files.length > 0) {
67
- local_files.forEach((localFile) => {
68
- const fullPath = path_1.default.join(this.builder.projectDir, localFile);
69
- if (fs_1.default.existsSync(fullPath)) {
70
- stepArchiver.append(fs_1.default.createReadStream(fullPath), localFile);
71
- }
72
- });
73
- this.listener.onBuildProgress(step, `Added ${local_files.length} local Python files to archive`);
74
- }
51
+ // Add static files to the archive
75
52
  (0, include_static_files_1.includeStaticFiles)([step], this.builder, stepArchiver);
76
- if (packages.length > 0) {
77
- await Promise.all(packages.map(async (packageName) => (0, add_package_to_archive_1.addPackageToArchive)(stepArchiver, sitePackagesDir, packageName)));
78
- this.listener.onBuildProgress(step, `Added ${packages.length} packages to archive`);
79
- }
80
53
  // Finalize the archive and wait for completion
81
54
  const size = await stepArchiver.finalize();
82
55
  this.builder.registerStep({ entrypointPath: stepPath, bundlePath, step, type: 'python' });
@@ -119,7 +92,7 @@ class PythonBuilder {
119
92
  }
120
93
  async getPythonBuilderData(step) {
121
94
  return new Promise((resolve, reject) => {
122
- const child = (0, child_process_1.spawn)('python', [path_1.default.join(__dirname, 'python-builder.py'), step.filePath], {
95
+ const child = (0, child_process_1.spawn)('python', [path_1.default.join(__dirname, 'python-builder.py'), this.builder.projectDir, step.filePath], {
123
96
  cwd: this.builder.projectDir,
124
97
  stdio: [undefined, undefined, 'pipe', 'ipc'],
125
98
  });
@@ -1,313 +1,39 @@
1
1
  import os
2
2
  import sys
3
3
  import json
4
- import importlib.util
5
4
  import traceback
6
- import site
7
- import builtins
8
- import ast
9
- import importlib.metadata
10
- import subprocess
11
- import re
12
- from typing import Set, List, Tuple, Optional, Dict, Any
13
- from pathlib import Path
14
- from functools import lru_cache
15
5
 
16
- NODEIPCFD = int(os.environ["NODE_CHANNEL_FD"])
6
+ from trace_packages import trace_packages
7
+ from trace_project_files import trace_project_files
17
8
 
18
- # Cache for built-in modules to avoid repeated checks
19
- _builtin_modules_cache: Set[str] = set()
20
-
21
- @lru_cache(maxsize=1024)
22
- def is_valid_package_name(name: str) -> bool:
23
- """Check if a name is a valid package name."""
24
- if not name or name.startswith('_'):
25
- return False
26
-
27
- # Skip common special cases
28
- invalid_names = {'__main__', 'module', 'cython_runtime', 'builtins'}
29
- return name not in invalid_names
30
-
31
- @lru_cache(maxsize=1024)
32
- def get_package_name(module_name: str) -> str:
33
- """Get the top-level package name from a module name."""
34
- return module_name.split('.')[0]
35
-
36
- @lru_cache(maxsize=1024)
37
- def clean_package_name(package_name: str) -> str:
38
- """Clean package name by removing version specifiers and other metadata."""
39
- # Remove version specifiers and conditions using regex
40
- package_name = re.sub(r'[<>=~!;].*$', '', package_name)
41
- # Remove any remaining whitespace and convert underscores to hyphens
42
- return package_name.strip().replace('_', '-')
43
-
44
- @lru_cache(maxsize=1024)
45
- def extract_base_package_name(dependency_spec: str) -> str:
46
- """
47
- Extract the base package name from a complex dependency specification.
48
- Handles cases like:
49
- - 'package (>=1.2.1,<2.0.0)'
50
- - 'package[extra] (>=1.2.1)'
51
- - 'package ; extra == "vlm"'
52
- - 'package (>=1.2.1) ; sys_platform == "darwin"'
53
- """
54
- # First, remove any conditions after semicolon
55
- base_spec = dependency_spec.split(';')[0].strip()
56
-
57
- # Extract the package name before any version specifiers or extras
58
- match = re.match(r'^([a-zA-Z0-9_.-]+)(?:\[[^\]]+\])?(?:\s*\([^)]*\))?$', base_spec)
59
-
60
- return clean_package_name(match.group(1) if match else base_spec)
61
-
62
- @lru_cache(maxsize=1024)
63
- def is_package_installed(package_name: str) -> bool:
64
- """Check if a package is installed in the current environment."""
65
- try:
66
- # Try both hyphenated and non-hyphenated versions
67
- try:
68
- importlib.metadata.distribution(package_name)
69
- return True
70
- except importlib.metadata.PackageNotFoundError:
71
- # Try with hyphens replaced by underscores
72
- alt_name = package_name.replace('-', '_')
73
- if alt_name != package_name:
74
- importlib.metadata.distribution(alt_name)
75
- return True
76
- return False
77
- except importlib.metadata.PackageNotFoundError:
78
- return False
79
-
80
- @lru_cache(maxsize=1024)
81
- def is_builtin_module(module_name: str) -> bool:
82
- """Check if a module is a Python built-in module."""
83
- if module_name in _builtin_modules_cache:
84
- return True
85
-
86
- # First check if it's a known built-in module name
87
- builtin_modules = {
88
- 'os', 'sys', 'json', 'math', 'random', 'datetime', 'time', 'urllib', 'http',
89
- 'pathlib', 're', 'collections', 'itertools', 'functools', 'operator', 'typing',
90
- 'io', 'csv', 'xml', 'html', 'email', 'base64', 'hashlib', 'hmac', 'uuid',
91
- 'pickle', 'sqlite3', 'logging', 'unittest', 'argparse', 'configparser',
92
- 'tempfile', 'shutil', 'glob', 'fnmatch', 'subprocess', 'threading', 'queue',
93
- 'multiprocessing', 'concurrent', 'asyncio', 'socket', 'ssl', 'gzip', 'zipfile',
94
- 'tarfile', 'zlib', 'bz2', 'lzma', 'struct', 'array', 'ctypes', 'mmap',
95
- 'weakref', 'gc', 'inspect', 'dis', 'ast', 'token', 'tokenize', 'keyword',
96
- 'builtins', '__main__', 'site', 'sysconfig', 'platform', 'warnings'
97
- }
98
-
99
- if module_name in builtin_modules:
100
- _builtin_modules_cache.add(module_name)
101
- return True
102
-
103
- try:
104
- module = importlib.import_module(module_name)
105
-
106
- # Built-in modules either have no __file__ attribute or their file is in the standard library
107
- if not hasattr(module, '__file__'):
108
- _builtin_modules_cache.add(module_name)
109
- return True
110
-
111
- # Get the standard library path
112
- stdlib_path = os.path.dirname(os.__file__)
113
-
114
- # Check if the module's file is in the standard library
115
- is_builtin = module.__file__ and module.__file__.startswith(stdlib_path)
116
- if is_builtin:
117
- _builtin_modules_cache.add(module_name)
118
- return is_builtin
119
- except ImportError:
120
- # If we can't import it, assume it's not a built-in module
121
- # This handles local modules that aren't in the current Python path
122
- return False
123
-
124
- def get_direct_imports(file_path: str) -> Set[str]:
125
- """Extract direct imports from a Python file using AST parsing."""
126
- direct_imports = set()
127
-
128
- try:
129
- with open(file_path, 'r') as f:
130
- content = f.read()
131
-
132
- tree = ast.parse(content)
133
- for node in ast.walk(tree):
134
- if isinstance(node, ast.Import):
135
- for name in node.names:
136
- base_pkg = name.name.split('.')[0]
137
- if is_valid_package_name(base_pkg) and not is_builtin_module(base_pkg):
138
- direct_imports.add(base_pkg)
139
- elif isinstance(node, ast.ImportFrom):
140
- if node.module:
141
- base_pkg = node.module.split('.')[0]
142
- if is_valid_package_name(base_pkg) and not is_builtin_module(base_pkg):
143
- direct_imports.add(base_pkg)
144
- except Exception as e:
145
- print(f"Warning: Could not parse imports from {file_path}: {str(e)}")
146
-
147
- return direct_imports
148
-
149
- def get_all_python_files(project_root: str) -> List[str]:
150
- """Get all Python files in the project."""
151
- python_files = []
152
- for root, dirs, files in os.walk(project_root):
153
- # Skip common directories
154
- dirs[:] = [d for d in dirs if not d.startswith('.') and d not in
155
- {'__pycache__', 'node_modules', 'dist', 'build', 'venv'}]
156
-
157
- for file in files:
158
- if file.endswith('.py') and not file.startswith('.'):
159
- full_path = os.path.join(root, file)
160
- relative_path = os.path.relpath(full_path, project_root)
161
- python_files.append(relative_path)
162
-
163
- return python_files
164
-
165
- def get_imports_from_file(file_path: str) -> Set[str]:
166
- """Get all import module names from a Python file."""
167
- imports = set()
168
-
169
- try:
170
- with open(file_path, 'r') as f:
171
- content = f.read()
172
-
173
- tree = ast.parse(content)
174
- for node in ast.walk(tree):
175
- if isinstance(node, ast.Import):
176
- for name in node.names:
177
- imports.add(name.name)
178
- elif isinstance(node, ast.ImportFrom):
179
- if node.module:
180
- imports.add(node.module)
181
- except Exception as e:
182
- print(f"Warning: Could not parse imports from {file_path}: {str(e)}")
183
-
184
- return imports
185
-
186
- def get_local_files_for_entry(entry_file: str) -> List[str]:
187
- """Get local Python files that are imported by the entry file."""
188
- # Find project root
189
- project_root = os.path.dirname(entry_file)
190
- while project_root != os.path.dirname(project_root):
191
- if any(os.path.exists(os.path.join(project_root, f))
192
- for f in ['package.json', 'requirements.txt']):
193
- break
194
- project_root = os.path.dirname(project_root)
195
-
196
- # Get all Python files in the project
197
- all_python_files = get_all_python_files(project_root)
198
-
199
- # Get imports from the entry file
200
- imports = get_imports_from_file(entry_file)
201
-
202
- # Check which imports match local Python files
203
- local_files = []
204
- for import_name in imports:
205
- for py_file in all_python_files:
206
- # Convert file path to module name (e.g., 'utils/example.py' -> 'utils.example')
207
- module_name = py_file.replace(os.sep, '.').replace('.py', '')
208
- if import_name == module_name:
209
- local_files.append(py_file)
210
-
211
- return sorted(local_files)
212
-
213
- def trace_imports(entry_file: str) -> List[str]:
214
- """Find all imported Python packages from entry file and its local imports."""
215
- entry_file = os.path.abspath(entry_file)
216
-
217
- # Get local files that are imported
218
- local_files = get_local_files_for_entry(entry_file)
219
-
220
- # Get project root
221
- project_root = os.path.dirname(entry_file)
222
- while project_root != os.path.dirname(project_root):
223
- if any(os.path.exists(os.path.join(project_root, f))
224
- for f in ['package.json', 'requirements.txt']):
225
- break
226
- project_root = os.path.dirname(project_root)
227
-
228
- # Get imports from entry file and local files
229
- all_packages = set()
230
- processed_packages = set()
231
- files_to_process = [entry_file] + [os.path.join(project_root, f) for f in local_files]
232
-
233
- for python_file in files_to_process:
234
- if os.path.exists(python_file):
235
- direct_imports = get_direct_imports(python_file)
236
- for package_name in direct_imports:
237
- if is_valid_package_name(package_name) and not is_builtin_module(package_name):
238
- all_packages.add(package_name)
239
- # Get all dependencies including sub-dependencies
240
- all_packages.update(get_package_dependencies(package_name, processed_packages))
241
-
242
- return sorted(list(all_packages))
243
-
244
- @lru_cache(maxsize=1024)
245
- def is_optional_dependency(req: str) -> bool:
246
- """Check if a dependency is an optional dependency."""
247
- return '[' in req or 'extra ==' in req
248
-
249
- def get_package_dependencies(package_name: str, processed: Set[str] = None) -> Set[str]:
250
- """Get all dependencies (including sub-dependencies) for a given package."""
251
- if processed is None:
252
- processed = set()
253
-
254
- if package_name in processed or is_builtin_module(package_name):
255
- return set()
256
-
257
- processed.add(package_name)
258
- all_dependencies = set()
259
-
260
- try:
261
- # Try to get the distribution
262
- try:
263
- dist = importlib.metadata.distribution(package_name)
264
- except importlib.metadata.PackageNotFoundError:
265
- print(f'Warning: Package {package_name} not found')
266
- return all_dependencies
267
-
268
- # Filter out optional dependencies
269
- sub_dependencies = list(filter(lambda dep: not is_optional_dependency(dep), dist.requires or []))
270
-
271
- # Get direct dependencies
272
- for req in sub_dependencies:
273
- base_pkg = extract_base_package_name(req)
274
-
275
- if base_pkg and base_pkg not in processed:
276
- # Try both hyphenated and non-hyphenated versions
277
- for dep_name in [base_pkg, base_pkg.replace('-', '_'), base_pkg.replace('_', '-')]:
278
- try:
279
- importlib.import_module(dep_name)
280
- all_dependencies.add(dep_name)
281
- # Recursively get sub-dependencies
282
- all_dependencies.update(get_package_dependencies(dep_name, processed))
283
- break
284
- except ImportError:
285
- continue
286
-
287
- except Exception as e:
288
- print(f"Warning: Error processing {package_name}: {str(e)}")
289
-
290
- return all_dependencies
9
+ NODEIPCFD = int(os.environ.get("NODE_CHANNEL_FD", 0))
291
10
 
292
11
  def main() -> None:
293
12
  """Main entry point for the script."""
294
- if len(sys.argv) != 2:
295
- print("Usage: python python-builder.py <entry_file>", file=sys.stderr)
296
- sys.exit(1)
13
+ if len(sys.argv) != 3:
14
+ print("Usage: python python-builder.py <project_dir> <entry_file>")
15
+ sys.exit(2)
297
16
 
298
- entry_file = sys.argv[1]
299
- try:
300
- packages = trace_imports(entry_file)
301
- local_files = get_local_files_for_entry(entry_file)
302
-
17
+ project_dir = os.path.abspath(sys.argv[1])
18
+ entry_file = os.path.abspath(sys.argv[2])
303
19
 
20
+ try:
21
+ # Find project dependencies
22
+ packages = trace_packages(project_dir, entry_file)
23
+ files = trace_project_files(project_dir, entry_file)
304
24
 
25
+ # Prepare output
305
26
  output = {
306
27
  'packages': packages,
307
- 'local_files': local_files
28
+ 'files': files,
308
29
  }
30
+
31
+ # Output as JSON
309
32
  bytes_message = (json.dumps(output) + '\n').encode('utf-8')
310
- os.write(NODEIPCFD, bytes_message)
33
+ if NODEIPCFD > 0:
34
+ os.write(NODEIPCFD, bytes_message)
35
+ else:
36
+ print(bytes_message)
311
37
  sys.exit(0)
312
38
  except Exception as e:
313
39
  print(f"Error: {str(e)}", file=sys.stderr)
@@ -0,0 +1,212 @@
1
+ import os
2
+ import sys
3
+ import importlib.util
4
+ import ast
5
+ import importlib.metadata
6
+ import re
7
+ from typing import Set, List
8
+ from functools import lru_cache
9
+
10
+ # Cache for built-in modules to avoid repeated checks
11
+ _builtin_modules_cache: Set[str] = set()
12
+
13
+ @lru_cache(maxsize=1024)
14
+ def is_valid_package_name(name: str) -> bool:
15
+ """Check if a name is a valid package name."""
16
+ if not name or name.startswith('_'):
17
+ return False
18
+
19
+ # Skip common special cases
20
+ invalid_names = {'__main__', 'module', 'cython_runtime', 'builtins'}
21
+ return name not in invalid_names
22
+
23
+ @lru_cache(maxsize=1024)
24
+ def get_package_name(module_name: str) -> str:
25
+ """Get the top-level package name from a module name."""
26
+ return module_name.split('.')[0]
27
+
28
+ @lru_cache(maxsize=1024)
29
+ def clean_package_name(package_name: str) -> str:
30
+ """Clean package name by removing version specifiers and other metadata."""
31
+ # Remove version specifiers and conditions using regex
32
+ package_name = re.sub(r'[<>=~!;].*$', '', package_name)
33
+ # Remove any remaining whitespace and convert underscores to hyphens
34
+ return package_name.strip().replace('_', '-')
35
+
36
+ @lru_cache(maxsize=1024)
37
+ def extract_base_package_name(dependency_spec: str) -> str:
38
+ """
39
+ Extract the base package name from a complex dependency specification.
40
+ Handles cases like:
41
+ - 'package (>=1.2.1,<2.0.0)'
42
+ - 'package[extra] (>=1.2.1)'
43
+ - 'package ; extra == "vlm"'
44
+ - 'package (>=1.2.1) ; sys_platform == "darwin"'
45
+ """
46
+ # First, remove any conditions after semicolon
47
+ base_spec = dependency_spec.split(';')[0].strip()
48
+
49
+ # Extract the package name before any version specifiers or extras
50
+ match = re.match(r'^([a-zA-Z0-9_.-]+)(?:\[[^\]]+\])?(?:\s*\([^)]*\))?$', base_spec)
51
+
52
+ return clean_package_name(match.group(1) if match else base_spec)
53
+
54
+ @lru_cache(maxsize=1024)
55
+ def is_package_installed(package_name: str) -> bool:
56
+ """Check if a package is installed in the current environment."""
57
+ try:
58
+ # Try both hyphenated and non-hyphenated versions
59
+ try:
60
+ importlib.metadata.distribution(package_name)
61
+ return True
62
+ except importlib.metadata.PackageNotFoundError:
63
+ # Try with hyphens replaced by underscores
64
+ alt_name = package_name.replace('-', '_')
65
+ if alt_name != package_name:
66
+ importlib.metadata.distribution(alt_name)
67
+ return True
68
+ return False
69
+ except importlib.metadata.PackageNotFoundError:
70
+ return False
71
+
72
+ @lru_cache(maxsize=1024)
73
+ def is_builtin_module(module_name: str) -> bool:
74
+ """Check if a module is a Python built-in module."""
75
+ if module_name in _builtin_modules_cache:
76
+ return True
77
+
78
+ try:
79
+ module = importlib.import_module(module_name)
80
+
81
+ # Built-in modules either have no __file__ attribute or their file is in the standard library
82
+ if not hasattr(module, '__file__'):
83
+ _builtin_modules_cache.add(module_name)
84
+ return True
85
+
86
+ # Get the standard library path
87
+ stdlib_path = os.path.dirname(os.__file__)
88
+
89
+ # Check if the module's file is in the standard library
90
+ is_builtin = module.__file__ and module.__file__.startswith(stdlib_path)
91
+ if is_builtin:
92
+ _builtin_modules_cache.add(module_name)
93
+ return is_builtin
94
+ except ImportError:
95
+ return False
96
+
97
+ def is_local_import(package_name: str, project_dir: str) -> bool:
98
+ """Check if a package is a local import within the project directory."""
99
+ try:
100
+ # Try to import the module
101
+ module = importlib.import_module(package_name)
102
+
103
+ # If the module has no __file__ attribute, it's not a local file
104
+ if not hasattr(module, '__file__'):
105
+ return False
106
+
107
+ # Check if the module's file is within the project directory
108
+ module_path = os.path.abspath(module.__file__)
109
+
110
+ # Check if the module path is not in a site-packages or dist-packages directory
111
+ return 'site-packages' not in module_path and 'dist-packages' not in module_path
112
+
113
+ except ImportError:
114
+ # If we can't import it, it means it's a local import
115
+ return True
116
+
117
+ def get_direct_imports(project_dir: str, file_path: str) -> Set[str]:
118
+ """Extract direct imports from a Python file using AST parsing."""
119
+ direct_imports = set()
120
+
121
+ try:
122
+ with open(file_path, 'r') as f:
123
+ content = f.read()
124
+
125
+ tree = ast.parse(content)
126
+ for node in ast.walk(tree):
127
+ if isinstance(node, ast.Import):
128
+ for name in node.names:
129
+ base_pkg = name.name.split('.')[0]
130
+ if is_valid_package_name(base_pkg) and not is_builtin_module(base_pkg):
131
+ # Check if this is a local import
132
+ if not is_local_import(base_pkg, project_dir):
133
+ direct_imports.add(base_pkg)
134
+ elif isinstance(node, ast.ImportFrom):
135
+ if node.module:
136
+ base_pkg = node.module.split('.')[0]
137
+ if is_valid_package_name(base_pkg) and not is_builtin_module(base_pkg):
138
+ # Check if this is a local import
139
+ if not is_local_import(base_pkg, project_dir):
140
+ direct_imports.add(base_pkg)
141
+ except Exception as e:
142
+ print(f"Warning: Could not parse imports from {file_path}: {str(e)}")
143
+
144
+ return direct_imports
145
+
146
+ @lru_cache(maxsize=1024)
147
+ def is_optional_dependency(req: str) -> bool:
148
+ """Check if a dependency is an optional dependency."""
149
+ return '[' in req or 'extra ==' in req
150
+
151
+ def get_package_dependencies(package_name: str, processed: Set[str] = None) -> Set[str]:
152
+ """Get all dependencies (including sub-dependencies) for a given package."""
153
+ if processed is None:
154
+ processed = set()
155
+
156
+ if package_name in processed:
157
+ return set()
158
+
159
+ processed.add(package_name)
160
+ all_dependencies = set()
161
+
162
+ try:
163
+ # Try to get the distribution
164
+ try:
165
+ dist = importlib.metadata.distribution(package_name)
166
+ except importlib.metadata.PackageNotFoundError:
167
+ print(f'Warning: Package {package_name} not found')
168
+ return all_dependencies
169
+
170
+ # Filter out optional dependencies
171
+ sub_dependencies = list(filter(lambda dep: not is_optional_dependency(dep), dist.requires or []))
172
+
173
+ # Get direct dependencies
174
+ for req in sub_dependencies:
175
+ base_pkg = extract_base_package_name(req)
176
+
177
+ if base_pkg and base_pkg not in processed:
178
+ # Try both hyphenated and non-hyphenated versions
179
+ for dep_name in [base_pkg, base_pkg.replace('-', '_'), base_pkg.replace('_', '-')]:
180
+ try:
181
+ importlib.import_module(dep_name)
182
+ all_dependencies.add(dep_name)
183
+ # Recursively get sub-dependencies
184
+ all_dependencies.update(get_package_dependencies(dep_name, processed))
185
+ break
186
+ except ImportError:
187
+ continue
188
+
189
+ except Exception as e:
190
+ print(f"Warning: Error processing {package_name}: {str(e)}")
191
+
192
+ return all_dependencies
193
+
194
+ def trace_packages(project_dir: str, entry_file: str) -> List[str]:
195
+ """Find all imported Python packages and files starting from an entry file."""
196
+
197
+ # Get direct imports from the entry file
198
+ direct_imports = get_direct_imports(project_dir, entry_file)
199
+
200
+ # Initialize lists to track packages
201
+ all_packages = []
202
+ processed_packages = set()
203
+
204
+ # Process each direct import and its dependencies
205
+ for package_name in direct_imports:
206
+ all_packages.append({ 'name': package_name, 'is_direct_import': True })
207
+ # Get all dependencies including sub-dependencies
208
+ dependencies = get_package_dependencies(package_name, processed_packages)
209
+ all_packages.extend([{ 'name': dep, 'is_direct_import': False } for dep in dependencies])
210
+
211
+ # Filter out built-in packages
212
+ return all_packages