atdata 0.2.0a1__py3-none-any.whl → 0.2.3b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,534 @@
1
+ """Module manager for automatic Python module generation.
2
+
3
+ This module provides automatic generation and management of Python modules
4
+ for dynamically decoded schema types. When enabled, modules are generated
5
+ on schema access to provide IDE autocomplete and type checking support.
6
+
7
+ Unlike simple .pyi stubs, the generated modules are actual Python code that
8
+ can be imported at runtime. This allows ``decode_schema`` to return properly
9
+ typed classes that work with both static type checkers and runtime.
10
+
11
+ Examples:
12
+ >>> from atdata.local import Index
13
+ >>>
14
+ >>> # Enable auto-stub generation
15
+ >>> index = Index(auto_stubs=True)
16
+ >>>
17
+ >>> # Modules are generated automatically on decode_schema
18
+ >>> MyType = index.decode_schema("atdata://local/sampleSchema/MySample@1.0.0")
19
+ >>> # MyType is now properly typed for IDE autocomplete!
20
+ >>>
21
+ >>> # Get the stub directory path for IDE configuration
22
+ >>> print(f"Add to IDE: {index.stub_dir}")
23
+ """
24
+
25
+ from pathlib import Path
26
+ from typing import Optional, Union, Type
27
+ import os
28
+ import re
29
+ import sys
30
+ import tempfile
31
+ import fcntl
32
+ import importlib.util
33
+
34
+ from ._schema_codec import generate_module
35
+
36
+
37
+ # Default stub directory location
38
+ DEFAULT_STUB_DIR = Path.home() / ".atdata" / "stubs"
39
+
40
+ # Pattern to extract version from module docstring
41
+ _VERSION_PATTERN = re.compile(r"^Schema: .+@(\d+\.\d+\.\d+)", re.MULTILINE)
42
+
43
+ # Pattern to extract authority from atdata:// URI
44
+ _AUTHORITY_PATTERN = re.compile(r"^atdata://([^/]+)/")
45
+
46
+ # Default authority for schemas without a ref
47
+ DEFAULT_AUTHORITY = "local"
48
+
49
+
50
+ def _extract_authority(schema_ref: Optional[str]) -> str:
51
+ """Extract authority from a schema reference URI.
52
+
53
+ Args:
54
+ schema_ref: Schema ref like "atdata://local/sampleSchema/Name@1.0.0"
55
+ or "atdata://alice.bsky.social/sampleSchema/Name@1.0.0"
56
+
57
+ Returns:
58
+ Authority string (e.g., "local", "alice.bsky.social", "did_plc_xxx").
59
+ Special characters like ':' are replaced with '_' for filesystem safety.
60
+ """
61
+ if not schema_ref:
62
+ return DEFAULT_AUTHORITY
63
+
64
+ match = _AUTHORITY_PATTERN.match(schema_ref)
65
+ if match:
66
+ authority = match.group(1)
67
+ # Make filesystem-safe: replace : with _
68
+ return authority.replace(":", "_")
69
+
70
+ return DEFAULT_AUTHORITY
71
+
72
+
73
+ class StubManager:
74
+ """Manages automatic generation of Python modules for decoded schemas.
75
+
76
+ The StubManager handles:
77
+ - Determining module file paths from schema metadata
78
+ - Checking if modules exist and are current
79
+ - Generating modules atomically (write to temp, rename)
80
+ - Creating __init__.py files for proper package structure
81
+ - Importing classes from generated modules
82
+ - Cleaning up old modules
83
+
84
+ Modules are organized by authority (from the schema ref URI) to avoid
85
+ collisions between schemas with the same name from different sources::
86
+
87
+ ~/.atdata/stubs/
88
+ __init__.py
89
+ local/
90
+ __init__.py
91
+ MySample_1_0_0.py
92
+ alice.bsky.social/
93
+ __init__.py
94
+ MySample_1_0_0.py
95
+ did_plc_abc123/
96
+ __init__.py
97
+ OtherSample_2_0_0.py
98
+
99
+ Args:
100
+ stub_dir: Directory to write module files. Defaults to ``~/.atdata/stubs/``.
101
+
102
+ Examples:
103
+ >>> manager = StubManager()
104
+ >>> schema_dict = {"name": "MySample", "version": "1.0.0", "fields": [...]}
105
+ >>> SampleClass = manager.ensure_module(schema_dict)
106
+ >>> print(manager.stub_dir)
107
+ /Users/you/.atdata/stubs
108
+ """
109
+
110
+ def __init__(self, stub_dir: Optional[Union[str, Path]] = None):
111
+ if stub_dir is None:
112
+ self._stub_dir = DEFAULT_STUB_DIR
113
+ else:
114
+ self._stub_dir = Path(stub_dir)
115
+
116
+ self._initialized = False
117
+ self._first_generation = True
118
+ # Cache of imported classes: (authority, name, version) -> class
119
+ self._class_cache: dict[tuple[str, str, str], Type] = {}
120
+
121
+ @property
122
+ def stub_dir(self) -> Path:
123
+ """The directory where module files are written."""
124
+ return self._stub_dir
125
+
126
+ def _ensure_dir_exists(self) -> None:
127
+ """Create stub directory with __init__.py if it doesn't exist."""
128
+ if not self._initialized:
129
+ self._stub_dir.mkdir(parents=True, exist_ok=True)
130
+ # Create root __init__.py
131
+ init_path = self._stub_dir / "__init__.py"
132
+ if not init_path.exists():
133
+ init_path.write_text('"""Auto-generated atdata schema modules."""\n')
134
+ self._initialized = True
135
+
136
+ def _module_filename(self, name: str, version: str) -> str:
137
+ """Generate module filename from schema name and version.
138
+
139
+ Replaces dots in version with underscores to avoid confusion
140
+ with file extensions.
141
+
142
+ Args:
143
+ name: Schema name (e.g., "MySample")
144
+ version: Schema version (e.g., "1.0.0")
145
+
146
+ Returns:
147
+ Filename like "MySample_1_0_0.py"
148
+ """
149
+ safe_version = version.replace(".", "_")
150
+ return f"{name}_{safe_version}.py"
151
+
152
+ def _stub_filename(self, name: str, version: str) -> str:
153
+ """Alias for _module_filename for backwards compatibility."""
154
+ return self._module_filename(name, version)
155
+
156
+ def _module_path(
157
+ self, name: str, version: str, authority: str = DEFAULT_AUTHORITY
158
+ ) -> Path:
159
+ """Get full path to module file for a schema.
160
+
161
+ Args:
162
+ name: Schema name
163
+ version: Schema version
164
+ authority: Authority from schema ref (e.g., "local", "alice.bsky.social")
165
+
166
+ Returns:
167
+ Path like ~/.atdata/stubs/local/MySample_1_0_0.py
168
+ """
169
+ return self._stub_dir / authority / self._module_filename(name, version)
170
+
171
+ def _stub_path(
172
+ self, name: str, version: str, authority: str = DEFAULT_AUTHORITY
173
+ ) -> Path:
174
+ """Alias for _module_path for backwards compatibility."""
175
+ return self._module_path(name, version, authority)
176
+
177
+ def _module_is_current(self, path: Path, version: str) -> bool:
178
+ """Check if an existing module file matches the expected version.
179
+
180
+ Reads the module docstring to extract the version and compares
181
+ it to the expected version.
182
+
183
+ Args:
184
+ path: Path to the module file
185
+ version: Expected schema version
186
+
187
+ Returns:
188
+ True if module exists and version matches
189
+ """
190
+ if not path.exists():
191
+ return False
192
+
193
+ try:
194
+ with open(path, "r", encoding="utf-8") as f:
195
+ content = f.read(500) # Read first 500 chars for docstring
196
+ match = _VERSION_PATTERN.search(content)
197
+ if match:
198
+ return match.group(1) == version
199
+ return False
200
+ except (OSError, IOError):
201
+ return False
202
+
203
+ def _stub_is_current(self, path: Path, version: str) -> bool:
204
+ """Alias for _module_is_current for backwards compatibility."""
205
+ return self._module_is_current(path, version)
206
+
207
+ def _ensure_authority_package(self, authority: str) -> None:
208
+ """Ensure authority subdirectory exists with __init__.py."""
209
+ self._ensure_dir_exists()
210
+ authority_dir = self._stub_dir / authority
211
+ authority_dir.mkdir(parents=True, exist_ok=True)
212
+ init_path = authority_dir / "__init__.py"
213
+ if not init_path.exists():
214
+ init_path.write_text(
215
+ f'"""Auto-generated schema modules for {authority}."""\n'
216
+ )
217
+
218
+ def _write_module_atomic(self, path: Path, content: str, authority: str) -> None:
219
+ """Write module file atomically using temp file and rename.
220
+
221
+ This ensures that concurrent processes won't see partial files.
222
+ Uses file locking for additional safety on systems that support it.
223
+
224
+ Args:
225
+ path: Destination path for the module file
226
+ content: Module file content to write
227
+ authority: Authority namespace (for creating __init__.py)
228
+ """
229
+ self._ensure_authority_package(authority)
230
+
231
+ # Create temp file in same directory for atomic rename
232
+ fd, temp_path = tempfile.mkstemp(
233
+ suffix=".py.tmp",
234
+ dir=path.parent, # Use parent dir (authority subdir) for atomic rename
235
+ )
236
+ temp_path = Path(temp_path)
237
+
238
+ try:
239
+ with os.fdopen(fd, "w", encoding="utf-8") as f:
240
+ # Try to get exclusive lock (non-blocking, ignore if unavailable)
241
+ # File locking is best-effort - not all filesystems support it
242
+ try:
243
+ fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
244
+ except (OSError, IOError):
245
+ # Lock unavailable (NFS, Windows, etc.) - proceed without lock
246
+ # Atomic rename provides the real protection
247
+ pass
248
+
249
+ f.write(content)
250
+ f.flush()
251
+ os.fsync(f.fileno())
252
+
253
+ # Atomic rename (on POSIX systems)
254
+ temp_path.rename(path)
255
+
256
+ except Exception:
257
+ # Clean up temp file on error - best effort, ignore failures
258
+ try:
259
+ temp_path.unlink()
260
+ except OSError:
261
+ pass # Temp file cleanup failed, re-raising original error
262
+ raise
263
+
264
+ def _write_stub_atomic(self, path: Path, content: str) -> None:
265
+ """Legacy method - extracts authority from path and calls _write_module_atomic."""
266
+ # Extract authority from path (parent directory name)
267
+ authority = path.parent.name
268
+ self._write_module_atomic(path, content, authority)
269
+
270
+ def ensure_stub(self, schema: dict) -> Optional[Path]:
271
+ """Ensure a module file exists for the given schema.
272
+
273
+ If a current module already exists, returns its path without
274
+ regenerating. Otherwise, generates the module and writes it.
275
+
276
+ Modules are namespaced by the authority from the schema's $ref URI
277
+ to avoid collisions between schemas with the same name from
278
+ different sources.
279
+
280
+ Args:
281
+ schema: Schema dict with 'name', 'version', and 'fields' keys.
282
+ Can also be a LocalSchemaRecord (supports dict-style access).
283
+ Should include '$ref' for proper namespacing.
284
+
285
+ Returns:
286
+ Path to the module file, or None if schema is missing required fields.
287
+ """
288
+ # Extract schema metadata (works with dict or LocalSchemaRecord)
289
+ name = schema.get("name") if hasattr(schema, "get") else None
290
+ version = schema.get("version", "1.0.0") if hasattr(schema, "get") else "1.0.0"
291
+ schema_ref = schema.get("$ref") if hasattr(schema, "get") else None
292
+
293
+ if not name:
294
+ return None
295
+
296
+ # Extract authority from schema ref for namespacing
297
+ authority = _extract_authority(schema_ref)
298
+ path = self._module_path(name, version, authority)
299
+
300
+ # Skip if current module exists
301
+ if self._module_is_current(path, version):
302
+ return path
303
+
304
+ # Generate and write module
305
+ # Convert to dict if needed for generate_module
306
+ if hasattr(schema, "to_dict"):
307
+ schema_dict = schema.to_dict()
308
+ else:
309
+ schema_dict = schema
310
+
311
+ content = generate_module(schema_dict)
312
+ self._write_module_atomic(path, content, authority)
313
+
314
+ # Print helpful message on first generation
315
+ if self._first_generation:
316
+ self._first_generation = False
317
+ self._print_ide_hint()
318
+
319
+ return path
320
+
321
+ def ensure_module(self, schema: dict) -> Optional[Type]:
322
+ """Ensure a module exists and return the class from it.
323
+
324
+ This is the primary method for getting a properly-typed class from
325
+ a schema. It generates the module if needed, imports the class,
326
+ and returns it with proper type information.
327
+
328
+ Args:
329
+ schema: Schema dict with 'name', 'version', and 'fields' keys.
330
+ Can also be a LocalSchemaRecord (supports dict-style access).
331
+ Should include '$ref' for proper namespacing.
332
+
333
+ Returns:
334
+ The PackableSample subclass from the generated module, or None
335
+ if schema is missing required fields.
336
+ """
337
+ # Extract schema metadata
338
+ name = schema.get("name") if hasattr(schema, "get") else None
339
+ version = schema.get("version", "1.0.0") if hasattr(schema, "get") else "1.0.0"
340
+ schema_ref = schema.get("$ref") if hasattr(schema, "get") else None
341
+
342
+ if not name:
343
+ return None
344
+
345
+ authority = _extract_authority(schema_ref)
346
+
347
+ # Check cache first
348
+ cache_key = (authority, name, version)
349
+ if cache_key in self._class_cache:
350
+ return self._class_cache[cache_key]
351
+
352
+ # Ensure module exists
353
+ path = self.ensure_stub(schema)
354
+ if path is None:
355
+ return None
356
+
357
+ # Import and cache the class
358
+ cls = self._import_class_from_module(path, name)
359
+ if cls is not None:
360
+ self._class_cache[cache_key] = cls
361
+
362
+ return cls
363
+
364
+ def _import_class_from_module(
365
+ self, module_path: Path, class_name: str
366
+ ) -> Optional[Type]:
367
+ """Import a class from a generated module file.
368
+
369
+ Uses importlib to dynamically load the module and extract the class.
370
+
371
+ Args:
372
+ module_path: Path to the .py module file
373
+ class_name: Name of the class to import
374
+
375
+ Returns:
376
+ The imported class, or None if import fails
377
+ """
378
+ if not module_path.exists():
379
+ return None
380
+
381
+ try:
382
+ # Create a unique module name based on the path
383
+ module_name = f"_atdata_generated_{module_path.stem}"
384
+
385
+ # Load the module spec
386
+ spec = importlib.util.spec_from_file_location(module_name, module_path)
387
+ if spec is None or spec.loader is None:
388
+ return None
389
+
390
+ # Create and execute the module
391
+ module = importlib.util.module_from_spec(spec)
392
+ sys.modules[module_name] = module
393
+ spec.loader.exec_module(module)
394
+
395
+ # Get the class from the module
396
+ cls = getattr(module, class_name, None)
397
+ return cls
398
+
399
+ except (ModuleNotFoundError, AttributeError, ImportError, OSError):
400
+ # Import failed - return None and let caller fall back to dynamic generation
401
+ return None
402
+
403
+ def _print_ide_hint(self) -> None:
404
+ """Print a one-time hint about IDE configuration."""
405
+ import sys as _sys
406
+
407
+ print(
408
+ f"\n[atdata] Generated schema module in: {self._stub_dir}\n"
409
+ f"[atdata] For IDE support, add this path to your type checker:\n"
410
+ f"[atdata] VS Code/Pylance: Add to python.analysis.extraPaths\n"
411
+ f"[atdata] PyCharm: Mark as Sources Root\n"
412
+ f"[atdata] mypy: Add to mypy_path in mypy.ini\n",
413
+ file=_sys.stderr,
414
+ )
415
+
416
+ def get_stub_path(
417
+ self, name: str, version: str, authority: str = DEFAULT_AUTHORITY
418
+ ) -> Optional[Path]:
419
+ """Get the path to an existing stub file.
420
+
421
+ Args:
422
+ name: Schema name
423
+ version: Schema version
424
+ authority: Authority namespace (default: "local")
425
+
426
+ Returns:
427
+ Path if stub exists, None otherwise
428
+ """
429
+ path = self._stub_path(name, version, authority)
430
+ return path if path.exists() else None
431
+
432
+ def list_stubs(self, authority: Optional[str] = None) -> list[Path]:
433
+ """List all module files in the stub directory.
434
+
435
+ Args:
436
+ authority: If provided, only list modules for this authority.
437
+ If None, lists all modules across all authorities.
438
+
439
+ Returns:
440
+ List of paths to existing module files (excludes __init__.py)
441
+ """
442
+ if not self._stub_dir.exists():
443
+ return []
444
+
445
+ if authority:
446
+ # List modules for specific authority
447
+ authority_dir = self._stub_dir / authority
448
+ if not authority_dir.exists():
449
+ return []
450
+ return [p for p in authority_dir.glob("*.py") if p.name != "__init__.py"]
451
+
452
+ # List all modules across all authorities (recursive, excluding __init__.py)
453
+ return [p for p in self._stub_dir.glob("**/*.py") if p.name != "__init__.py"]
454
+
455
+ def clear_stubs(self, authority: Optional[str] = None) -> int:
456
+ """Remove module files from the stub directory.
457
+
458
+ Args:
459
+ authority: If provided, only clear modules for this authority.
460
+ If None, clears all modules across all authorities.
461
+
462
+ Returns:
463
+ Number of files removed
464
+ """
465
+ stubs = self.list_stubs(authority)
466
+ removed = 0
467
+ for path in stubs:
468
+ try:
469
+ path.unlink()
470
+ removed += 1
471
+ except OSError:
472
+ # File already removed or permission denied - skip and continue
473
+ continue
474
+
475
+ # Clear the class cache for removed modules
476
+ if authority:
477
+ keys_to_remove = [k for k in self._class_cache if k[0] == authority]
478
+ else:
479
+ keys_to_remove = list(self._class_cache.keys())
480
+ for key in keys_to_remove:
481
+ del self._class_cache[key]
482
+
483
+ # Clean up empty authority directories (including __init__.py)
484
+ if self._stub_dir.exists():
485
+ for subdir in self._stub_dir.iterdir():
486
+ if subdir.is_dir():
487
+ # Check if only __init__.py remains
488
+ contents = list(subdir.iterdir())
489
+ if len(contents) == 0:
490
+ try:
491
+ subdir.rmdir()
492
+ except OSError:
493
+ continue
494
+ elif len(contents) == 1 and contents[0].name == "__init__.py":
495
+ try:
496
+ contents[0].unlink()
497
+ subdir.rmdir()
498
+ except OSError:
499
+ continue
500
+
501
+ return removed
502
+
503
+ def clear_stub(
504
+ self, name: str, version: str, authority: str = DEFAULT_AUTHORITY
505
+ ) -> bool:
506
+ """Remove a specific module file.
507
+
508
+ Args:
509
+ name: Schema name
510
+ version: Schema version
511
+ authority: Authority namespace (default: "local")
512
+
513
+ Returns:
514
+ True if file was removed, False if it didn't exist
515
+ """
516
+ path = self._stub_path(name, version, authority)
517
+ if path.exists():
518
+ try:
519
+ path.unlink()
520
+ # Clear from class cache
521
+ cache_key = (authority, name, version)
522
+ if cache_key in self._class_cache:
523
+ del self._class_cache[cache_key]
524
+ return True
525
+ except OSError:
526
+ return False
527
+ return False
528
+
529
+
530
+ __all__ = [
531
+ "StubManager",
532
+ "DEFAULT_STUB_DIR",
533
+ "DEFAULT_AUTHORITY",
534
+ ]
atdata/_type_utils.py ADDED
@@ -0,0 +1,104 @@
1
+ """Shared type conversion utilities for schema handling.
2
+
3
+ This module provides common type mapping functions used by both local.py
4
+ and atmosphere/schema.py to avoid code duplication.
5
+ """
6
+
7
+ import types
8
+ from typing import Any, get_origin, get_args, Union
9
+
10
+ # Mapping from numpy dtype strings to schema dtype names
11
+ NUMPY_DTYPE_MAP = {
12
+ "float16": "float16",
13
+ "float32": "float32",
14
+ "float64": "float64",
15
+ "int8": "int8",
16
+ "int16": "int16",
17
+ "int32": "int32",
18
+ "int64": "int64",
19
+ "uint8": "uint8",
20
+ "uint16": "uint16",
21
+ "uint32": "uint32",
22
+ "uint64": "uint64",
23
+ "bool": "bool",
24
+ "complex64": "complex64",
25
+ "complex128": "complex128",
26
+ }
27
+
28
+ # Mapping from Python primitive types to schema type names
29
+ PRIMITIVE_TYPE_MAP = {
30
+ str: "str",
31
+ int: "int",
32
+ float: "float",
33
+ bool: "bool",
34
+ bytes: "bytes",
35
+ }
36
+
37
+
38
+ def numpy_dtype_to_string(dtype: Any) -> str:
39
+ """Convert a numpy dtype annotation to a schema dtype string.
40
+
41
+ Args:
42
+ dtype: A numpy dtype or type annotation containing dtype info.
43
+
44
+ Returns:
45
+ Schema dtype string (e.g., "float32", "int64"). Defaults to "float32".
46
+ """
47
+ dtype_str = str(dtype)
48
+ for key, value in NUMPY_DTYPE_MAP.items():
49
+ if key in dtype_str:
50
+ return value
51
+ return "float32"
52
+
53
+
54
+ def unwrap_optional(python_type: Any) -> tuple[Any, bool]:
55
+ """Extract the inner type from Optional/Union types.
56
+
57
+ Handles both `Optional[T]` (Union[T, None]) and `T | None` syntax.
58
+
59
+ Args:
60
+ python_type: A Python type annotation.
61
+
62
+ Returns:
63
+ Tuple of (inner_type, is_optional). If type is not Optional,
64
+ returns (python_type, False).
65
+
66
+ Raises:
67
+ TypeError: If complex union types (Union[A, B] where both are non-None).
68
+ """
69
+ origin = get_origin(python_type)
70
+
71
+ if origin is Union or isinstance(python_type, types.UnionType):
72
+ args = get_args(python_type)
73
+ non_none_args = [a for a in args if a is not type(None)]
74
+ is_optional = type(None) in args or len(non_none_args) < len(args)
75
+
76
+ if len(non_none_args) == 1:
77
+ return non_none_args[0], is_optional
78
+ elif len(non_none_args) > 1:
79
+ raise TypeError(f"Complex union types not supported: {python_type}")
80
+
81
+ return python_type, False
82
+
83
+
84
+ def is_ndarray_type(python_type: Any) -> bool:
85
+ """Check if a type annotation represents an NDArray."""
86
+ type_str = str(python_type)
87
+ return "NDArray" in type_str or "ndarray" in type_str.lower()
88
+
89
+
90
+ def extract_ndarray_dtype(python_type: Any) -> str:
91
+ """Extract dtype from NDArray type annotation.
92
+
93
+ Args:
94
+ python_type: NDArray type annotation (e.g., NDArray[np.float32]).
95
+
96
+ Returns:
97
+ Dtype string (e.g., "float32"). Defaults to "float32".
98
+ """
99
+ args = get_args(python_type)
100
+ if args:
101
+ dtype_arg = args[-1]
102
+ if dtype_arg is not None:
103
+ return numpy_dtype_to_string(dtype_arg)
104
+ return "float32"