modaic 0.10.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
modaic/module_utils.py ADDED
@@ -0,0 +1,560 @@
1
+ import importlib.util
2
+ import re
3
+ import shutil
4
+ import sys
5
+ import sysconfig
6
+ import warnings
7
+ from pathlib import Path
8
+ from types import ModuleType
9
+ from typing import Dict
10
+
11
+ import tomlkit as tomlk
12
+
13
+ from .constants import EDITABLE_MODE, SYNC_DIR
14
+ from .utils import smart_rmtree
15
+
16
+
17
+ def is_builtin(module_name: str) -> bool:
18
+ """Check whether a module name refers to a built-in module.
19
+
20
+ Args:
21
+ module_name: The fully qualified module name.
22
+
23
+ Returns:
24
+ bool: True if the module is a Python built-in.
25
+ """
26
+
27
+ return module_name in sys.builtin_module_names
28
+
29
+
30
+ def is_stdlib(module_name: str) -> bool:
31
+ """Check whether a module belongs to the Python standard library.
32
+
33
+ Args:
34
+ module_name: The fully qualified module name.
35
+
36
+ Returns:
37
+ bool: True if the module is part of the stdlib (including built-ins).
38
+ """
39
+
40
+ try:
41
+ spec = importlib.util.find_spec(module_name)
42
+ except ValueError:
43
+ return False
44
+ except Exception:
45
+ return False
46
+ if not spec:
47
+ return False
48
+ if spec.origin == "built-in":
49
+ return True
50
+ origin = spec.origin or ""
51
+ stdlib_dir = Path(sysconfig.get_paths()["stdlib"]).resolve()
52
+ try:
53
+ origin_path = Path(origin).resolve()
54
+ except OSError:
55
+ return False
56
+ return stdlib_dir in origin_path.parents or origin_path == stdlib_dir
57
+
58
+
59
+ def is_builtin_or_frozen(mod: ModuleType) -> bool:
60
+ """Check whether a module object is built-in or frozen.
61
+
62
+ Args:
63
+ mod: The module object.
64
+
65
+ Returns:
66
+ bool: True if the module is built-in or frozen.
67
+ """
68
+
69
+ spec = getattr(mod, "__spec__", None)
70
+ origin = getattr(spec, "origin", None)
71
+ name = getattr(mod, "__name__", None)
72
+ return (name in sys.builtin_module_names) or (origin in ("built-in", "frozen"))
73
+
74
+
75
+ # FIXME: make faster. Currently takes ~.70 seconds
76
+ def get_internal_imports() -> Dict[str, ModuleType]:
77
+ """Return only internal modules currently loaded in sys.modules.
78
+
79
+ Internal modules are defined as those not installed in site/dist packages
80
+ (covers virtualenv `.venv` cases as well).
81
+
82
+ If the environment variable `EDITABLE_MODE` is set to "true" (case-insensitive),
83
+ modules located under `src/modaic/` are also excluded.
84
+
85
+ Args:
86
+ None
87
+
88
+ Returns:
89
+ Dict[str, ModuleType]: Mapping of module names to module objects that are
90
+ not located under any "site-packages" or "dist-packages" directory.
91
+ """
92
+
93
+ internal: Dict[str, ModuleType] = {}
94
+
95
+ seen: set[int] = set()
96
+ for name, module in list(sys.modules.items()):
97
+ if module is None:
98
+ continue
99
+ module_id = id(module)
100
+ if module_id in seen:
101
+ continue
102
+ seen.add(module_id)
103
+
104
+ if is_builtin_or_frozen(module):
105
+ continue
106
+
107
+ # edge case: local modaic package
108
+ if name == "modaic" or "modaic." in name:
109
+ continue
110
+
111
+ module_file = getattr(module, "__file__", None)
112
+ if not module_file:
113
+ continue
114
+ try:
115
+ module_path = Path(module_file).resolve()
116
+ except OSError:
117
+ continue
118
+
119
+ if is_builtin(name) or is_stdlib(name):
120
+ continue
121
+ if is_external_package(module_path):
122
+ continue
123
+ if EDITABLE_MODE:
124
+ posix_path = module_path.as_posix().lower()
125
+ if "src/modaic" in posix_path:
126
+ continue
127
+ normalized_name = name
128
+
129
+ internal[normalized_name] = module
130
+
131
+ return internal
132
+
133
+
134
+ def resolve_project_root() -> Path:
135
+ """
136
+ Return the project root directory, must be a directory containing a pyproject.toml file.
137
+
138
+ Raises:
139
+ FileNotFoundError: If pyproject.toml is not found in the current directory.
140
+ """
141
+ pyproject_path = Path("pyproject.toml")
142
+ if not pyproject_path.exists():
143
+ raise FileNotFoundError("pyproject.toml not found in current directory")
144
+ return pyproject_path.resolve().parent
145
+
146
+
147
+ def is_path_ignored(target_path: Path, ignored_paths: list[Path]) -> bool:
148
+ """Return True if target_path matches or is contained within any ignored path."""
149
+ try:
150
+ absolute_target = target_path.resolve()
151
+ except OSError:
152
+ return False
153
+ for ignored in ignored_paths:
154
+ if absolute_target == ignored:
155
+ return True
156
+ try:
157
+ absolute_target.relative_to(ignored)
158
+ return True
159
+ except Exception:
160
+ pass
161
+ return False
162
+
163
+
164
+ def copy_module_layout(base_dir: Path, name_parts: list[str]) -> None:
165
+ """
166
+ Create ancestor package directories and ensure each contains an __init__.py file.
167
+ Example:
168
+ Given a base_dir of "/tmp/modaic" and name_parts of ["program","indexer"],
169
+ creates the following layout:
170
+ | /tmp/modaic/
171
+ | | program/
172
+ | | | __init__.py
173
+ | | indexer/
174
+ | | | __init__.py
175
+ """
176
+ current = base_dir
177
+ for part in name_parts:
178
+ current = current / part
179
+ current.mkdir(parents=True, exist_ok=True)
180
+ init_file = current / "__init__.py"
181
+ if not init_file.exists():
182
+ init_file.touch()
183
+
184
+
185
+ def is_external_package(path: Path) -> bool:
186
+ """Return True if the path is under site-packages or dist-packages."""
187
+ parts = {p.lower() for p in path.parts}
188
+ return "site-packages" in parts or "dist-packages" in parts
189
+
190
+
191
+ def get_ignored_files() -> list[Path]:
192
+ """Return a list of absolute Paths that should be excluded from staging."""
193
+ project_root = resolve_project_root()
194
+ pyproject_path = Path("pyproject.toml")
195
+ doc = tomlk.parse(pyproject_path.read_text(encoding="utf-8"))
196
+
197
+ # Safely get [tool.modaic.exclude]
198
+ files = (
199
+ doc.get("tool", {}) # [tool]
200
+ .get("modaic", {}) # [tool.modaic]
201
+ .get("exclude", {}) # [tool.modaic.exclude]
202
+ .get("files", []) # [tool.modaic.exclude] files = ["file1", "file2"]
203
+ )
204
+
205
+ excluded: list[Path] = []
206
+ for entry in files:
207
+ entry = Path(entry)
208
+ if not entry.is_absolute():
209
+ entry = project_root / entry
210
+ if entry.exists():
211
+ excluded.append(entry)
212
+ return excluded
213
+
214
+
215
+ def get_extra_paths() -> list[Path]:
216
+ """Return a list of extra files and folders that should be included in staging."""
217
+ project_root = resolve_project_root()
218
+ pyproject_path = Path("pyproject.toml")
219
+ doc = tomlk.parse(pyproject_path.read_text(encoding="utf-8"))
220
+ files = (
221
+ doc.get("tool", {}) # [tool]
222
+ .get("modaic", {}) # [tool.modaic]
223
+ .get("include", {}) # [tool.modaic.include]
224
+ .get("files", []) # [tool.modaic.include] files = ["file1", "file2"]
225
+ )
226
+ included: list[Path] = []
227
+ for entry in files:
228
+ entry = Path(entry)
229
+ if entry.is_absolute():
230
+ try:
231
+ entry = entry.resolve()
232
+ entry.relative_to(project_root.resolve())
233
+ except ValueError:
234
+ warnings.warn(
235
+ f"{entry} will not be bundled because it is not inside the current working directory",
236
+ stacklevel=4,
237
+ )
238
+ else:
239
+ entry = project_root / entry
240
+ if entry.resolve().exists():
241
+ included.append(entry)
242
+
243
+ return included
244
+
245
+
246
+ def create_pyproject_toml(repo_dir: Path, package_name: str):
247
+ """
248
+ Create a new pyproject.toml for the bundled program in the staging directory.
249
+ """
250
+ old = Path("pyproject.toml").read_text(encoding="utf-8")
251
+ new = repo_dir / "pyproject.toml"
252
+
253
+ doc_old = tomlk.parse(old)
254
+ doc_new = tomlk.document()
255
+
256
+ if "project" not in doc_old:
257
+ raise KeyError("No [project] table in old TOML")
258
+ doc_new["project"] = doc_old["project"]
259
+ doc_new["project"]["dependencies"] = get_final_dependencies(doc_old["project"]["dependencies"])
260
+ if "tool" in doc_old and "uv" in doc_old["tool"] and "sources" in doc_old["tool"]["uv"]:
261
+ doc_new["tool"] = {"uv": {"sources": doc_old["tool"]["uv"]["sources"]}}
262
+ warn_if_local(doc_new["tool"]["uv"]["sources"])
263
+
264
+ doc_new["project"]["name"] = package_name
265
+
266
+ with open(new, "w") as fp:
267
+ tomlk.dump(doc_new, fp)
268
+
269
+
270
+ def get_final_dependencies(dependencies: list[str]) -> list[str]:
271
+ """
272
+ Get the dependencies that should be included in the bundled program.
273
+ Filters out "[tool.modaic.ignore] dependencies. Adds [tool.modaic.include] dependencies.
274
+ """
275
+ pyproject_path = Path("pyproject.toml")
276
+ doc = tomlk.parse(pyproject_path.read_text(encoding="utf-8"))
277
+
278
+ # Safely get [tool.modaic.exclude]
279
+ exclude_deps = (
280
+ doc.get("tool", {}) # [tool]
281
+ .get("modaic", {}) # [tool.modaic]
282
+ .get("exclude", {}) # [tool.modaic.exclude]
283
+ .get("dependencies", []) # [tool.modaic.exclude] dependencies = ["praw", "sagemaker"]
284
+ )
285
+ include_deps = (
286
+ doc.get("tool", {}) # [tool]
287
+ .get("modaic", {}) # [tool.modaic]
288
+ .get("include", {}) # [tool.modaic.include]
289
+ .get("dependencies", []) # [tool.modaic.include] dependencies = ["praw", "sagemaker"]
290
+ )
291
+
292
+ if exclude_deps:
293
+ pattern = re.compile(r"\b(" + "|".join(map(re.escape, exclude_deps)) + r")\b")
294
+ dependencies = [pkg for pkg in dependencies if not pattern.search(pkg)]
295
+ return dependencies + include_deps
296
+
297
+
298
+ def warn_if_local(sources: dict[str, dict]):
299
+ """
300
+ Warn if the program is bundled with a local package.
301
+ """
302
+ for source, config in sources.items():
303
+ if "path" in config:
304
+ warnings.warn(
305
+ f"Bundling program with local package {source} installed from {config['path']}. This is not recommended.",
306
+ stacklevel=5,
307
+ )
308
+
309
+
310
+ def _module_path(instance: object) -> str:
311
+ """
312
+ Return a deterministic module path for the given instance.
313
+
314
+ Args:
315
+ instance: The object instance whose class path should be resolved.
316
+
317
+ Returns:
318
+ str: A fully qualified path in the form "<module>.<ClassName>". If the
319
+ class' module is "__main__", use the file system to derive a stable
320
+ module name: the parent directory name when the file is "__main__.py",
321
+ otherwise the file stem.
322
+ """
323
+ from .precompiled import PrecompiledConfig
324
+
325
+ cls = type(instance)
326
+ if cls is PrecompiledConfig:
327
+ return "modaic.PrecompiledConfig"
328
+
329
+ module_name = cls.__module__
330
+ module = sys.modules[module_name]
331
+ file = Path(module.__file__)
332
+ module_path = str(file.relative_to(resolve_project_root()).with_suffix(""))
333
+ if sys.platform.startswith("win"):
334
+ module_path = module_path.replace("\\", ".")
335
+ else:
336
+ module_path = module_path.replace("/", ".")
337
+
338
+ return f"{module_path}.{cls.__name__}"
339
+
340
+
341
+ def create_sync_dir(repo_path: str, with_code: bool = True) -> Path:
342
+ """Creates the 'sync' directory for the given repository path.
343
+ - Contains a symlink directory layout of all files that will be pushed to modaic hub
344
+ - The resulting directory is used to sync with a git repo in STAGING_DIR which orchestrates git operations
345
+ """
346
+ sync_dir = SYNC_DIR / repo_path
347
+ smart_rmtree(sync_dir, ignore_errors=True)
348
+ sync_dir.mkdir(parents=True, exist_ok=False)
349
+
350
+ project_root = resolve_project_root()
351
+
352
+ internal_imports = get_internal_imports()
353
+ ignored_paths = get_ignored_files()
354
+
355
+ seen_files: set[Path] = set()
356
+
357
+ # Common repository files to include
358
+ common_files = ["README.md", "LICENSE", "CONTRIBUTING.md"]
359
+
360
+ for file_name in common_files:
361
+ file_src = project_root / file_name
362
+ if file_src.exists() and not is_path_ignored(file_src, ignored_paths):
363
+ sync_file = sync_dir / file_name
364
+ smart_link(sync_file, file_src)
365
+ elif file_name == "README.md":
366
+ # Only warn for README.md since it's essential
367
+ warnings.warn(
368
+ "README.md not found in current directory. Please add one when pushing to the hub.",
369
+ stacklevel=4,
370
+ )
371
+
372
+ if not with_code:
373
+ return sync_dir
374
+
375
+ for _, module in internal_imports.items():
376
+ module_file = Path(getattr(module, "__file__", None))
377
+ if not module_file:
378
+ continue
379
+ try:
380
+ src_path = module_file.resolve()
381
+ except OSError:
382
+ continue
383
+ if src_path.suffix != ".py":
384
+ continue
385
+ if is_path_ignored(src_path, ignored_paths):
386
+ continue
387
+ if src_path in seen_files:
388
+ continue
389
+ seen_files.add(src_path)
390
+
391
+ rel_path = module_file.relative_to(project_root)
392
+ sync_path = sync_dir / rel_path
393
+ sync_path.parent.mkdir(parents=True, exist_ok=True)
394
+ smart_link(sync_path, src_path)
395
+
396
+ # Ensure __init__.py is copied over at every directory level
397
+ src_init = project_root / rel_path.parent / "__init__.py"
398
+ sync_init = sync_path.parent / "__init__.py"
399
+ if src_init.exists() and not sync_init.exists():
400
+ smart_link(sync_init, src_init)
401
+ seen_files.add(src_init)
402
+
403
+ for extra_file in get_extra_paths():
404
+ sync_path = sync_dir / extra_file.relative_to(project_root)
405
+ smart_link(sync_path, extra_file)
406
+
407
+ package_name = repo_path.split("/")[-1]
408
+ create_pyproject_toml(sync_dir, package_name)
409
+
410
+ return sync_dir
411
+
412
+
413
+ def sync_dir_from(source_dir: Path) -> Path:
414
+ """Mirror the source directory as symlinks to a new directory."""
415
+ # Expects directory from modaic_hub dir. modaic_hub/user/repo/rev
416
+ # Make target directory sync/user/repo
417
+ sync_dir = SYNC_DIR / source_dir.parent.parent.name / source_dir.parent.name
418
+ smart_rmtree(sync_dir, ignore_errors=True)
419
+ sync_dir.mkdir(parents=True, exist_ok=False)
420
+ excluded_names = {".git", "program.json", "config.json"}
421
+
422
+ for src_path in source_dir.iterdir():
423
+ if src_path.name in excluded_names:
424
+ continue
425
+ sync_path = sync_dir / src_path.relative_to(source_dir)
426
+ smart_link(sync_path, src_path)
427
+
428
+ return sync_dir
429
+
430
+
431
+ def smart_link(link: Path, source: Path) -> None:
432
+ """
433
+ If on mac/linux use symlink
434
+ If on windows use hardlink for files and recursive hardlink for directories
435
+ """
436
+ if sys.platform.startswith("win"):
437
+ if source.is_dir():
438
+ link.parent.mkdir(parents=True, exist_ok=True)
439
+ recursive_hard_link(link, source)
440
+ else:
441
+ link.hardlink_to(source)
442
+ else:
443
+ link.symlink_to(source, target_is_directory=source.is_dir())
444
+
445
+
446
+ def recursive_hard_link(link: Path, source: Path) -> None:
447
+ """
448
+ Create a hard link to the source directory.
449
+ """
450
+ if source.is_dir():
451
+ link.mkdir(parents=True, exist_ok=True)
452
+ for src_path in source.iterdir():
453
+ recursive_hard_link(link / src_path.name, src_path)
454
+
455
+ else:
456
+ link.hardlink_to(source)
457
+
458
+
459
+ def _clear_git_repo(repo_dir: Path) -> None:
460
+ """
461
+ Clear the git repository of all files and directories except .git.
462
+ """
463
+ for path in repo_dir.iterdir():
464
+ if path != repo_dir / ".git":
465
+ if path.is_dir():
466
+ shutil.rmtree(path)
467
+ else:
468
+ path.unlink()
469
+
470
+
471
+ # not in use currently
472
+ def copy_update_program_dir(target_dir: Path, repo_path: str, with_code: bool = True) -> None:
473
+ """
474
+ Copys files from workspace to the staging directory. (Used for Windows)
475
+ Args:
476
+ target_dir: The directory to copy the files to.
477
+ repo_path: The path to the repository on modaic hub
478
+ with_code: Whether to copy the code files.
479
+ """
480
+ _clear_git_repo(target_dir)
481
+ project_root = resolve_project_root()
482
+
483
+ internal_imports = get_internal_imports()
484
+ ignored_paths = get_ignored_files()
485
+
486
+ seen_files: set[Path] = set()
487
+
488
+ # Common repository files to include
489
+ common_files = ["README.md", "LICENSE", "CONTRIBUTING.md"]
490
+ keep = set()
491
+ for file_name in common_files:
492
+ file_src = project_root / file_name
493
+ if file_src.exists() and not is_path_ignored(file_src, ignored_paths):
494
+ target_file = target_dir / file_name
495
+ shutil.copy2(file_src, target_file)
496
+ keep.add(target_file)
497
+ elif file_name == "README.md":
498
+ # Only warn for README.md since it's essential
499
+ warnings.warn(
500
+ "README.md not found in current directory. Please add one when pushing to the hub.",
501
+ stacklevel=4,
502
+ )
503
+
504
+ if not with_code:
505
+ return
506
+
507
+ for _, module in internal_imports.items():
508
+ module_file = Path(getattr(module, "__file__", None))
509
+ if not module_file:
510
+ continue
511
+ try:
512
+ src_path = module_file.resolve()
513
+ except OSError:
514
+ continue
515
+ if src_path.suffix != ".py":
516
+ continue
517
+ if is_path_ignored(src_path, ignored_paths):
518
+ continue
519
+ if src_path in seen_files:
520
+ continue
521
+ seen_files.add(src_path)
522
+
523
+ rel_path = module_file.relative_to(project_root)
524
+ target_path = target_dir / rel_path
525
+ target_path.parent.mkdir(parents=True, exist_ok=True)
526
+ shutil.copy2(src_path, target_path)
527
+ keep.add(target_path)
528
+
529
+ # Ensure __init__.py is copied over at every directory level
530
+ src_init = project_root / rel_path.parent / "__init__.py"
531
+ target_init = target_path.parent / "__init__.py"
532
+ if src_init.exists() and not target_init.exists():
533
+ shutil.copy2(src_init, target_init)
534
+ keep.add(target_init)
535
+ seen_files.add(src_init)
536
+
537
+ for extra_path in get_extra_paths():
538
+ target_path = target_dir / extra_path.relative_to(project_root)
539
+ if extra_path.is_dir():
540
+ shutil.copytree(extra_path, target_path)
541
+ else:
542
+ shutil.copy2(extra_path, target_path)
543
+ keep.add(target_path)
544
+
545
+ package_name = repo_path.split("/")[-1]
546
+ create_pyproject_toml(target_dir, package_name)
547
+
548
+
549
+ # Not in use currently
550
+ def copy_update_from(target_dir: Path, source_dir: Path) -> None:
551
+ """
552
+ Update target dir by copying in files from source directory.
553
+ """
554
+ _clear_git_repo(target_dir)
555
+ for src_path in source_dir.iterdir():
556
+ if src_path != source_dir / ".git":
557
+ if src_path.is_dir():
558
+ shutil.copytree(src_path, target_dir / src_path.name)
559
+ else:
560
+ shutil.copy2(src_path, target_dir / src_path.name)