exonware-xwlazy 0.1.0.22__py3-none-any.whl → 1.0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. exonware/__init__.py +86 -16
  2. exonware/xwlazy/version.py +5 -5
  3. exonware/xwlazy.py +2546 -0
  4. exonware/xwlazy_external_libs.toml +716 -0
  5. {exonware_xwlazy-0.1.0.22.dist-info → exonware_xwlazy-1.0.1.2.dist-info}/METADATA +6 -6
  6. exonware_xwlazy-1.0.1.2.dist-info/RECORD +8 -0
  7. exonware/xwlazy/__init__.py +0 -367
  8. exonware/xwlazy/common/__init__.py +0 -47
  9. exonware/xwlazy/common/base.py +0 -56
  10. exonware/xwlazy/common/cache.py +0 -504
  11. exonware/xwlazy/common/logger.py +0 -257
  12. exonware/xwlazy/common/services/__init__.py +0 -72
  13. exonware/xwlazy/common/services/dependency_mapper.py +0 -232
  14. exonware/xwlazy/common/services/install_async_utils.py +0 -165
  15. exonware/xwlazy/common/services/install_cache_utils.py +0 -245
  16. exonware/xwlazy/common/services/keyword_detection.py +0 -283
  17. exonware/xwlazy/common/services/spec_cache.py +0 -165
  18. exonware/xwlazy/common/services/state_manager.py +0 -84
  19. exonware/xwlazy/common/strategies/__init__.py +0 -28
  20. exonware/xwlazy/common/strategies/caching_dict.py +0 -44
  21. exonware/xwlazy/common/strategies/caching_installation.py +0 -88
  22. exonware/xwlazy/common/strategies/caching_lfu.py +0 -66
  23. exonware/xwlazy/common/strategies/caching_lru.py +0 -63
  24. exonware/xwlazy/common/strategies/caching_multitier.py +0 -59
  25. exonware/xwlazy/common/strategies/caching_ttl.py +0 -59
  26. exonware/xwlazy/config.py +0 -193
  27. exonware/xwlazy/contracts.py +0 -1396
  28. exonware/xwlazy/defs.py +0 -378
  29. exonware/xwlazy/errors.py +0 -276
  30. exonware/xwlazy/facade.py +0 -991
  31. exonware/xwlazy/module/__init__.py +0 -18
  32. exonware/xwlazy/module/base.py +0 -565
  33. exonware/xwlazy/module/data.py +0 -17
  34. exonware/xwlazy/module/facade.py +0 -246
  35. exonware/xwlazy/module/importer_engine.py +0 -2117
  36. exonware/xwlazy/module/strategies/__init__.py +0 -22
  37. exonware/xwlazy/module/strategies/module_helper_lazy.py +0 -93
  38. exonware/xwlazy/module/strategies/module_helper_simple.py +0 -65
  39. exonware/xwlazy/module/strategies/module_manager_advanced.py +0 -111
  40. exonware/xwlazy/module/strategies/module_manager_simple.py +0 -95
  41. exonware/xwlazy/package/__init__.py +0 -18
  42. exonware/xwlazy/package/base.py +0 -798
  43. exonware/xwlazy/package/conf.py +0 -324
  44. exonware/xwlazy/package/data.py +0 -17
  45. exonware/xwlazy/package/facade.py +0 -480
  46. exonware/xwlazy/package/services/__init__.py +0 -84
  47. exonware/xwlazy/package/services/async_install_handle.py +0 -87
  48. exonware/xwlazy/package/services/config_manager.py +0 -245
  49. exonware/xwlazy/package/services/discovery.py +0 -370
  50. exonware/xwlazy/package/services/host_packages.py +0 -145
  51. exonware/xwlazy/package/services/install_async.py +0 -277
  52. exonware/xwlazy/package/services/install_cache.py +0 -145
  53. exonware/xwlazy/package/services/install_interactive.py +0 -59
  54. exonware/xwlazy/package/services/install_policy.py +0 -156
  55. exonware/xwlazy/package/services/install_registry.py +0 -54
  56. exonware/xwlazy/package/services/install_result.py +0 -17
  57. exonware/xwlazy/package/services/install_sbom.py +0 -153
  58. exonware/xwlazy/package/services/install_utils.py +0 -79
  59. exonware/xwlazy/package/services/installer_engine.py +0 -406
  60. exonware/xwlazy/package/services/lazy_installer.py +0 -718
  61. exonware/xwlazy/package/services/manifest.py +0 -496
  62. exonware/xwlazy/package/services/strategy_registry.py +0 -186
  63. exonware/xwlazy/package/strategies/__init__.py +0 -57
  64. exonware/xwlazy/package/strategies/package_discovery_file.py +0 -129
  65. exonware/xwlazy/package/strategies/package_discovery_hybrid.py +0 -84
  66. exonware/xwlazy/package/strategies/package_discovery_manifest.py +0 -101
  67. exonware/xwlazy/package/strategies/package_execution_async.py +0 -113
  68. exonware/xwlazy/package/strategies/package_execution_cached.py +0 -90
  69. exonware/xwlazy/package/strategies/package_execution_pip.py +0 -99
  70. exonware/xwlazy/package/strategies/package_execution_wheel.py +0 -106
  71. exonware/xwlazy/package/strategies/package_mapping_discovery_first.py +0 -100
  72. exonware/xwlazy/package/strategies/package_mapping_hybrid.py +0 -105
  73. exonware/xwlazy/package/strategies/package_mapping_manifest_first.py +0 -100
  74. exonware/xwlazy/package/strategies/package_policy_allow_list.py +0 -57
  75. exonware/xwlazy/package/strategies/package_policy_deny_list.py +0 -57
  76. exonware/xwlazy/package/strategies/package_policy_permissive.py +0 -46
  77. exonware/xwlazy/package/strategies/package_timing_clean.py +0 -67
  78. exonware/xwlazy/package/strategies/package_timing_full.py +0 -66
  79. exonware/xwlazy/package/strategies/package_timing_smart.py +0 -68
  80. exonware/xwlazy/package/strategies/package_timing_temporary.py +0 -66
  81. exonware/xwlazy/runtime/__init__.py +0 -18
  82. exonware/xwlazy/runtime/adaptive_learner.py +0 -129
  83. exonware/xwlazy/runtime/base.py +0 -274
  84. exonware/xwlazy/runtime/facade.py +0 -94
  85. exonware/xwlazy/runtime/intelligent_selector.py +0 -170
  86. exonware/xwlazy/runtime/metrics.py +0 -60
  87. exonware/xwlazy/runtime/performance.py +0 -37
  88. exonware_xwlazy-0.1.0.22.dist-info/RECORD +0 -87
  89. {exonware_xwlazy-0.1.0.22.dist-info → exonware_xwlazy-1.0.1.2.dist-info}/WHEEL +0 -0
  90. {exonware_xwlazy-0.1.0.22.dist-info → exonware_xwlazy-1.0.1.2.dist-info}/licenses/LICENSE +0 -0
exonware/xwlazy.py ADDED
@@ -0,0 +1,2546 @@
1
+ """
2
+ xwlazy v4.0 - Enterprise Features in Single File
3
+
4
+ A comprehensive, single-file auto-installation system with enterprise-grade features.
5
+ Covers all major xwlazy capabilities while maintaining single-file simplicity.
6
+
7
+ FIX v3.0.3: Code Consolidation & Critical Fixes:
8
+ - Unified TOML Loader: Single _load_toml_file() function reused across all TOML parsing (reduces ~50 lines)
9
+ - Fixed Cache Collisions: Uses hashlib.sha256 instead of hash() for collision-resistant cache filenames
10
+ - Fixed Race Conditions: Thread-safe access to _global_hook_manager in _intercepting_import()
11
+ - Fixed Silent Exceptions: Proper error handling and logging throughout (no more silent failures)
12
+ - Removed Duplicate Code: Eliminated duplicate SERIALIZATION_PREFIXES and consolidated TOML reading
13
+ - Better Error Messages: All exceptions logged with ASCII-safe encoding for Windows compatibility
14
+
15
+ FIX v4.0.2: Full Dependency Installation with Version Support:
16
+ - Ensures all dependencies are installed along with the package (explicit --no-deps prevention)
17
+ - Version constraints from requirements.txt/pyproject.toml are included in install commands
18
+ - _run_pip_install now explicitly installs full dependency tree with version constraints
19
+
20
+ Key Features:
21
+ - ✅ PER-PACKAGE ISOLATION: Each package configured independently
22
+ - ✅ KEYWORD-BASED AUTO-DETECTION: Zero-code integration via pyproject.toml keywords
23
+ - ✅ GLOBAL __import__ HOOK: Module-level import interception
24
+ - ✅ ONE-LINE ACTIVATION: auto_enable_lazy(__package__)
25
+ - ✅ EXTERNAL LIBRARY MAPPINGS: Loads from xwlazy_external_libs.toml with version support
26
+ - ✅ MULTI-TIER CACHING: L1 (memory LRU) + L2 (disk cache) for better performance (NEW v3.0.2!)
27
+ - ✅ WATCHED PREFIXES: Special handling for serialization modules (pickle, json, yaml, etc.) (NEW v3.0.2!)
28
+ - ✅ ENHANCED PERFORMANCE MONITORING: Detailed metrics tracking (load times, access counts, cache stats) (NEW v3.0.2!)
29
+ - ✅ SERIALIZATION MODULE DETECTION: Automatic detection and special handling (NEW v3.0.2!)
30
+ - ✅ LOCKFILE SUPPORT: Track installed packages for reproducibility
31
+ - ✅ ADAPTIVE LEARNING: Lightweight pattern-based optimization
32
+ - ✅ functools.lru_cache: High-performance resolution caching
33
+ - ✅ Multiple Installation Strategies: PIP, Wheel, Smart, Cached
34
+ - ✅ Thread-safe: RLock-based concurrency handling
35
+ - ✅ Zero dependencies: Uses only standard library (+ tomllib/tomli)
36
+
37
+ Fully TOML-only Implementation (v4.0):
38
+ - External Library Mappings: xwlazy_external_libs.toml (no JSON fallback)
39
+ - SBOM Output: xwlazy_sbom.toml (TOML format)
40
+ - Lockfile: xwlazy.lock.toml (TOML format)
41
+ - Manifest Parsing: requirements.txt + pyproject.toml only (no JSON manifests)
42
+ - Version Support: Uses versions from external_libs.toml if missing from requirements.txt/pyproject.toml
43
+ - Backwards Compatibility: Can read legacy JSON files during migration
44
+ - TOML Writer: Uses tomli-w if available, otherwise manual writer (zero dependencies goal)
45
+
46
+ NEW v4.0 - Enterprise Features:
47
+ - Multi-Tier Caching: L1 (memory LRU) + L2 (disk cache) for better performance
48
+ - Watched Prefixes: Special handling for serialization modules (pickle, json, yaml, etc.)
49
+ - Enhanced Performance Monitoring: Detailed metrics (load times, access counts, cache performance)
50
+ - Serialization Module Detection: Automatic detection of serialization modules for special handling
51
+ Version: 1.0.1.2
52
+ Company: eXonware.com
53
+ Author: Eng. Muhammad AlShehri
54
+ Email: connect@exonware.com
55
+ Date: 2025-01-27
56
+ """
57
+
58
+ # =============================================================================
59
+ # STANDARD LIBRARY IMPORTS (Built-in, No pip installation needed)
60
+ # =============================================================================
61
+ import sys
62
+ import os
63
+ import re
64
+ import time
65
+ import subprocess
66
+ import importlib
67
+ import importlib.util
68
+ import importlib.metadata # Built-in since Python 3.8+ (for Python 3.7: needs pip install importlib-metadata)
69
+ import threading
70
+ import types
71
+ import collections
72
+ import builtins
73
+ import inspect
74
+ import pickle
75
+ import hashlib
76
+ from functools import lru_cache
77
+ from pathlib import Path
78
+ from datetime import datetime
79
+ from collections import defaultdict, deque, OrderedDict
80
+ from importlib.abc import MetaPathFinder, Loader
81
+ from importlib.util import spec_from_loader
82
+
83
+ # =============================================================================
84
+ # EXTERNAL DEPENDENCIES (Conditionally imported, Need pip installation)
85
+ # =============================================================================
86
+ # These are imported conditionally inside functions:
87
+ # - tomllib: Built-in since Python 3.11+ (no pip needed)
88
+ # - tomli: Required for Python < 3.11 (pip install tomli) - imported conditionally in _read_toml_simple, _load_hard_mappings, _index_manifests, _extract_auto_config
89
+ # - tomli_w: Optional for TOML writing (pip install tomli-w) - imported conditionally in _write_toml_simple
90
+ # Note: Code gracefully handles missing tomli/tomli_w by falling back to manual writer/reader
91
+
92
+ # =============================================================================
93
+ # CONFIGURATION & CONSTANTS
94
+ # =============================================================================
95
+
96
+ # Centralized storage directory for xwlazy files (prevents pollution in project directories)
97
+ XWLAZY_DATA_DIR = Path.home() / ".xwlazy"
98
+ XWLAZY_CACHE_DIR = XWLAZY_DATA_DIR / "cache"
99
+ AUDIT_LOG_FILE = "xwlazy_sbom.toml" # Filename only (will be stored in XWLAZY_DATA_DIR)
100
+ LOCKFILE_PATH = "xwlazy.lock.toml" # Filename only (will be stored in XWLAZY_DATA_DIR)
101
+ EXTERNAL_LIBS_TOML = "xwlazy_external_libs.toml"
102
+
103
+ # Serialization module prefixes (watched for special handling)
104
+ SERIALIZATION_PREFIXES = {
105
+ "pickle", "json", "yaml", "toml", "xml", "msgpack", "cbor",
106
+ "bson", "protobuf", "avro", "csv", "parquet", "feather"
107
+ }
108
+
109
+ # Fallback Hard Mappings (used if TOML file not found)
110
+ _FALLBACK_HARD_MAPPINGS = {
111
+ "google.protobuf": "protobuf", "cv2": "opencv-python", "PIL": "Pillow",
112
+ "sklearn": "scikit-learn", "yaml": "PyYAML", "bs4": "beautifulsoup4",
113
+ "mysqldb": "mysqlclient", "pandas": "pandas", "numpy": "numpy",
114
+ "requests": "requests", "lz4.frame": "lz4", "fastavro": "fastavro",
115
+ "pyarrow": "pyarrow", "h5py": "h5py", "scipy": "scipy",
116
+ "psycopg2": "psycopg2-binary", "boto3": "boto3",
117
+ }
118
+
119
+ def _write_toml_simple(data, file_path):
120
+ """
121
+ Simple TOML writer for basic data structures (dict, list, str, int, float, bool, None).
122
+ Handles nested structures but keeps it minimal for our use case (SBOM, lockfile).
123
+ Uses tomli-w if available, otherwise falls back to minimal manual writer.
124
+ """
125
+ # Try using tomli-w (optional dependency) for better TOML compliance
126
+ # Note: tomli-w only handles dicts, so we use manual writer for lists
127
+ if isinstance(data, dict):
128
+ try:
129
+ import tomli_w
130
+ with open(file_path, 'wb') as f:
131
+ tomli_w.dump(data, f)
132
+ return
133
+ except ImportError:
134
+ pass # Fall back to manual writer
135
+
136
+ # Manual TOML writer for simple cases (handles nested dicts as TOML tables)
137
+ def _escape_string(s):
138
+ """Escape strings for TOML."""
139
+ if not isinstance(s, str):
140
+ return s
141
+ # Escape special chars for TOML strings
142
+ s = s.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n').replace('\t', '\\t')
143
+ return f'"{s}"'
144
+
145
+ def _escape_key(key):
146
+ """Escape TOML key if needed."""
147
+ key_str = str(key)
148
+ if not key_str.replace('_', '').replace('-', '').replace('.', '').isalnum():
149
+ return _escape_string(key_str)
150
+ return key_str
151
+
152
+ def _format_value(value, indent=0):
153
+ """Format a value for TOML."""
154
+ if value is None:
155
+ return 'null'
156
+ elif isinstance(value, bool):
157
+ return 'true' if value else 'false'
158
+ elif isinstance(value, (int, float)):
159
+ return str(value)
160
+ elif isinstance(value, str):
161
+ return _escape_string(value)
162
+ elif isinstance(value, list):
163
+ if not value:
164
+ return '[]'
165
+ # Check if all items are primitives
166
+ if all(not isinstance(item, (dict, list)) for item in value):
167
+ items = ', '.join(_format_value(item, indent) for item in value)
168
+ return f'[{items}]'
169
+ else:
170
+ # Multi-line array
171
+ lines = ['[']
172
+ for item in value:
173
+ formatted = _format_value(item, indent + 1)
174
+ if isinstance(item, dict):
175
+ # Inline table for dicts in arrays
176
+ lines.append(' ' * (indent * 2 + 2) + '{' + formatted + '},')
177
+ else:
178
+ lines.append(' ' * (indent * 2 + 2) + formatted + ',')
179
+ lines.append(' ' * (indent * 2) + ']')
180
+ return '\n'.join(lines)
181
+ else:
182
+ return str(value)
183
+
184
+ def _format_dict(data, prefix="", indent=0):
185
+ """Format a dictionary as TOML (uses tables for nested dicts)."""
186
+ lines = []
187
+ primitives = {}
188
+ nested = {}
189
+
190
+ # Separate primitives from nested dicts
191
+ for key, val in data.items():
192
+ if isinstance(val, dict):
193
+ nested[key] = val
194
+ else:
195
+ primitives[key] = val
196
+
197
+ # Write primitive values
198
+ for key, val in primitives.items():
199
+ key_escaped = _escape_key(key)
200
+ formatted_val = _format_value(val, indent)
201
+ lines.append(' ' * (indent * 2) + f'{key_escaped} = {formatted_val}')
202
+
203
+ # Write nested dicts as tables
204
+ for key, val in nested.items():
205
+ key_escaped = _escape_key(key)
206
+ table_path = f'{prefix}.{key_escaped}' if prefix else key_escaped
207
+ lines.append('')
208
+ lines.append(' ' * (indent * 2) + f'[{table_path}]')
209
+ nested_lines = _format_dict(val, table_path, indent + 1)
210
+ lines.extend(nested_lines)
211
+
212
+ return lines
213
+
214
+ try:
215
+ with open(file_path, 'w', encoding='utf-8') as f:
216
+ if isinstance(data, dict):
217
+ lines = _format_dict(data)
218
+ content = '\n'.join(lines)
219
+ f.write(content)
220
+ if content and not content.endswith('\n'):
221
+ f.write('\n')
222
+ elif isinstance(data, list):
223
+ # List format (for audit log entries - list of dicts)
224
+ if data and isinstance(data[0], dict):
225
+ # Array of tables format
226
+ for item in data:
227
+ f.write('[[entry]]\n')
228
+ item_lines = _format_dict(item, "entry", 0)
229
+ f.write('\n'.join(item_lines))
230
+ f.write('\n\n')
231
+ else:
232
+ # Simple array of primitives
233
+ f.write('entries = ' + _format_value(data, 0) + '\n')
234
+ else:
235
+ content = _format_value(data, 0)
236
+ f.write(content)
237
+ if not content.endswith('\n'):
238
+ f.write('\n')
239
+ except Exception as e:
240
+ raise IOError(f"Failed to write TOML to {file_path}: {e}")
241
+
242
+ # =============================================================================
243
+ # UNIFIED TOML LOADER (Reused across all TOML operations)
244
+ # =============================================================================
245
+
246
+ def _load_toml_file(file_path, verbose_error=True):
247
+ """
248
+ Unified TOML file loader - reused across all TOML parsing operations.
249
+
250
+ Uses tomllib (Python 3.11+) or tomli (Python < 3.11) with proper error handling.
251
+ Returns None if file doesn't exist or parsing fails.
252
+
253
+ Args:
254
+ file_path: Path to TOML file
255
+ verbose_error: If True, log errors to stderr (for verbose mode)
256
+
257
+ Returns:
258
+ dict: Parsed TOML data, or None if file doesn't exist or parsing fails
259
+ """
260
+ if not Path(file_path).exists():
261
+ return None
262
+
263
+ # Try TOML parsing (Python 3.11+ uses built-in tomllib, else use tomli)
264
+ try:
265
+ if sys.version_info >= (3, 11):
266
+ import tomllib
267
+ with open(file_path, "rb") as f:
268
+ return tomllib.load(f)
269
+ else:
270
+ try:
271
+ import tomli
272
+ with open(file_path, "rb") as f:
273
+ return tomli.load(f)
274
+ except ImportError:
275
+ if verbose_error and os.environ.get('XWLAZY_VERBOSE'):
276
+ sys.stderr.write(f"[xwlazy] tomli not available for Python < 3.11\n")
277
+ return None
278
+ except Exception as e:
279
+ if verbose_error and os.environ.get('XWLAZY_VERBOSE'):
280
+ err_msg = str(e).encode('ascii', 'replace').decode('ascii')
281
+ sys.stderr.write(f"[xwlazy] Error parsing TOML file {file_path}: {err_msg}\n")
282
+ return None
283
+
284
+ def _read_toml_simple(file_path):
285
+ """
286
+ Simple TOML reader with JSON fallback (for backwards compatibility).
287
+ Reuses unified _load_toml_file() function.
288
+
289
+ Handles array of tables format ([[entry]]) and legacy JSON format.
290
+ """
291
+ if not Path(file_path).exists():
292
+ return None
293
+
294
+ # Use unified TOML loader
295
+ data = _load_toml_file(file_path, verbose_error=False)
296
+ if data is not None:
297
+ # Handle array of tables format ([[entry]])
298
+ if isinstance(data, dict) and 'entry' in data:
299
+ entries = data['entry']
300
+ if isinstance(entries, list):
301
+ return entries
302
+ return data
303
+
304
+ # Fallback to JSON (for backwards compatibility with existing files)
305
+ try:
306
+ import json
307
+ with open(file_path, 'r', encoding='utf-8') as f:
308
+ return json.load(f)
309
+ except Exception as e:
310
+ if os.environ.get('XWLAZY_VERBOSE'):
311
+ sys.stderr.write(f"[xwlazy] Failed to read {file_path} as TOML or JSON: {e}\n")
312
+ return None
313
+
314
+ def _extract_package_name(value):
315
+ """
316
+ Extract package name from value that may contain version constraints.
317
+
318
+ Examples:
319
+ "protobuf" -> "protobuf"
320
+ "protobuf>=4.0" -> "protobuf"
321
+ "pandas>=2.0,<3.0" -> "pandas"
322
+ """
323
+ if not isinstance(value, str):
324
+ return str(value)
325
+
326
+ # Remove version constraints (>=, <=, ==, !=, ~=, <, >, and comma-separated)
327
+ # Keep only the package name
328
+ clean = re.split(r'[<>=!~,;]', value)[0].strip()
329
+ return clean
330
+
331
+ def _generate_fallback_candidates(fullname):
332
+ """
333
+ Generate fallback package name candidates from import name.
334
+
335
+ If a package is not found in xwlazy_external_libs.toml, try importing
336
+ using the same name with transformations:
337
+ - Replace dots with dashes
338
+ - Replace dots with underscores
339
+ - Progressively shorten by removing segments from the end
340
+
341
+ Examples:
342
+ "exonware.xwlazy.core_file" -> [
343
+ "exonware-xwlazy-core_file",
344
+ "exonware_xwlazy_core_file",
345
+ "exonware-xwlazy",
346
+ "exonware_xwlazy",
347
+ "exonware"
348
+ ]
349
+
350
+ "something.something.something.something.something.something" -> [
351
+ "something-something-something-something-something-something",
352
+ "something_something_something_something_something_something",
353
+ "something-something-something-something-something",
354
+ "something_something_something_something_something",
355
+ ... (continues down to just "something")
356
+ ]
357
+ """
358
+ if not fullname or not isinstance(fullname, str):
359
+ return []
360
+
361
+ parts = fullname.split('.')
362
+ if not parts:
363
+ return []
364
+
365
+ candidates = []
366
+
367
+ # Generate progressively shorter versions, starting from full name
368
+ for length in range(len(parts), 0, -1):
369
+ segment = '.'.join(parts[:length])
370
+
371
+ # Try dash variant first
372
+ dash_version = segment.replace('.', '-')
373
+ candidates.append(dash_version)
374
+
375
+ # Try underscore variant (only if different from dash)
376
+ underscore_version = segment.replace('.', '_')
377
+ if underscore_version != dash_version:
378
+ candidates.append(underscore_version)
379
+
380
+ return candidates
381
+
382
+ def _load_hard_mappings():
383
+ """
384
+ Load hard mappings from external TOML file.
385
+ Reuses unified _load_toml_file() function to reduce code duplication.
386
+
387
+ Loads from xwlazy_external_libs.toml (supports version constraints).
388
+ Falls back to hardcoded mappings if TOML file not found or invalid.
389
+
390
+ TOML file format (supports versions):
391
+ [mappings]
392
+ "google.protobuf" = "protobuf" # Package name only
393
+ "pandas" = "pandas>=2.0" # Package name with version (used if missing from requirements.txt)
394
+ "cv2" = "opencv-python"
395
+ ...
396
+
397
+ Returns:
398
+ dict: Mapping of import names to full package spec (name + version if present)
399
+ Format: {"import_name": "package_name" or "package_name>=version"}
400
+ """
401
+ module_dir = Path(__file__).parent
402
+ toml_path = module_dir / EXTERNAL_LIBS_TOML
403
+
404
+ # Use unified TOML loader (reused code)
405
+ data = _load_toml_file(toml_path, verbose_error=True)
406
+
407
+ if data:
408
+ # Extract [mappings] section
409
+ mappings_section = data.get("mappings", {})
410
+ if isinstance(mappings_section, dict):
411
+ # Keep full value (including versions if present)
412
+ mappings = {}
413
+ for import_name, package_value in mappings_section.items():
414
+ if isinstance(package_value, str):
415
+ mappings[import_name] = package_value # Keep version if present
416
+ else:
417
+ mappings[import_name] = str(package_value)
418
+
419
+ if os.environ.get('XWLAZY_VERBOSE'):
420
+ sys.stdout.write(f"[OK] [xwlazy] Loaded {len(mappings)} mappings from {EXTERNAL_LIBS_TOML}\n")
421
+ return mappings
422
+ else:
423
+ if os.environ.get('XWLAZY_VERBOSE'):
424
+ sys.stderr.write(f"[xwlazy] {EXTERNAL_LIBS_TOML} [mappings] section is not a dictionary, using hardcoded fallback\n")
425
+ else:
426
+ if os.environ.get('XWLAZY_VERBOSE'):
427
+ if not toml_path.exists():
428
+ sys.stderr.write(f"[xwlazy] {EXTERNAL_LIBS_TOML} not found, using hardcoded fallback mappings\n")
429
+ else:
430
+ sys.stderr.write(f"[xwlazy] Failed to parse {EXTERNAL_LIBS_TOML}, using hardcoded fallback\n")
431
+
432
+ # Fallback to hardcoded mappings (package names only, no versions)
433
+ if os.environ.get('XWLAZY_VERBOSE'):
434
+ sys.stdout.write(f"[xwlazy] Using fallback hardcoded mappings ({len(_FALLBACK_HARD_MAPPINGS)} entries)\n")
435
+ return _FALLBACK_HARD_MAPPINGS.copy()
436
+
437
+ def _load_deny_list():
438
+ """
439
+ Load deny list from external TOML file.
440
+
441
+ Loads from xwlazy_external_libs.toml [deny_list] section.
442
+ Falls back to hardcoded deny list if TOML file not found or invalid.
443
+
444
+ TOML file format:
445
+ [deny_list]
446
+ "lxml" = "Blocked: Python 2 syntax incompatibility"
447
+ "package_name" = "Reason for blocking"
448
+
449
+ Returns:
450
+ set: Set of package names to deny
451
+ """
452
+ module_dir = Path(__file__).parent
453
+ toml_path = module_dir / EXTERNAL_LIBS_TOML
454
+
455
+ # Use unified TOML loader
456
+ data = _load_toml_file(toml_path, verbose_error=False)
457
+
458
+ if data:
459
+ # Extract [deny_list] section
460
+ deny_list_section = data.get("deny_list", {})
461
+ if isinstance(deny_list_section, dict):
462
+ # Extract package names (keys) from deny_list
463
+ deny_list = set(deny_list_section.keys())
464
+
465
+ if os.environ.get('XWLAZY_VERBOSE'):
466
+ sys.stdout.write(f"[OK] [xwlazy] Loaded {len(deny_list)} packages from deny_list in {EXTERNAL_LIBS_TOML}\n")
467
+ return deny_list
468
+
469
+ # Fallback to hardcoded deny list (lxml blocked due to Python 2 syntax issues)
470
+ return {"lxml"}
471
+
472
+ # Load hard mappings from external TOML file
473
+ HARD_MAPPINGS = _load_hard_mappings()
474
+
475
+ # =============================================================================
476
+ # MULTI-TIER CACHE (L1 + L2) - Simplified Version
477
+ # =============================================================================
478
+
479
+ class SimpleMultiTierCache:
480
+ """
481
+ Simplified multi-tier cache: L1 (memory LRU) + L2 (disk).
482
+
483
+ Multi-tier caching: L1 (memory LRU) + L2 (disk cache) for better performance.
484
+ - L1: In-memory LRU cache (fastest)
485
+ - L2: Disk cache (persistent across runs)
486
+ """
487
+
488
+ def __init__(self, l1_size=1000, l2_dir=None):
489
+ self._l1_size = l1_size
490
+ self._l1_cache = OrderedDict() # LRU cache (OrderedDict for O(1) operations)
491
+ self._l2_dir = Path(l2_dir) if l2_dir else XWLAZY_CACHE_DIR / "l2_cache"
492
+ self._l2_dir.mkdir(parents=True, exist_ok=True)
493
+ self._lock = threading.RLock()
494
+ self._stats = {"l1_hits": 0, "l2_hits": 0, "misses": 0}
495
+
496
+ def _get_cache_filename(self, key):
497
+ """
498
+ Generate collision-resistant cache filename using hashlib.
499
+ FIX v3.0.3: Replaces hash() with hashlib to prevent collisions.
500
+ """
501
+ # Use hashlib for collision-resistant hashing (SHA-256 truncated to 16 hex chars = 64 bits)
502
+ key_bytes = str(key).encode('utf-8')
503
+ cache_hash = hashlib.sha256(key_bytes).hexdigest()[:16] # 16 hex chars = 64 bits
504
+ return f"{cache_hash}.cache"
505
+
506
+ def get(self, key):
507
+ """Get value from cache (L1 -> L2)."""
508
+ with self._lock:
509
+ # Check L1 first (fastest)
510
+ if key in self._l1_cache:
511
+ value = self._l1_cache.pop(key)
512
+ self._l1_cache[key] = value # Move to end (LRU)
513
+ self._stats["l1_hits"] += 1
514
+ return value
515
+
516
+ # Check L2 (disk) with collision-resistant filename (uses hashlib.sha256)
517
+ cache_filename = self._get_cache_filename(key)
518
+ l2_path = self._l2_dir / cache_filename
519
+ if l2_path.exists():
520
+ try:
521
+ with open(l2_path, 'rb') as f:
522
+ value = pickle.load(f)
523
+ # Handle both legacy format (direct value) and new format (with key verification)
524
+ if isinstance(value, dict) and "_cache_key" in value and "_cache_value" in value:
525
+ # New format: verify key matches to prevent collision issues
526
+ if value.get("_cache_key") == key:
527
+ actual_value = value["_cache_value"]
528
+ else:
529
+ # Hash collision detected - skip this cache entry
530
+ if os.environ.get('XWLAZY_VERBOSE'):
531
+ sys.stderr.write(f"[xwlazy] Cache collision detected for {key}, skipping L2 cache\n")
532
+ self._stats["misses"] += 1
533
+ return None
534
+ else:
535
+ # Legacy format (direct value) - trust it (very rare collision)
536
+ actual_value = value
537
+
538
+ # Promote to L1
539
+ self._set_l1(key, actual_value)
540
+ self._stats["l2_hits"] += 1
541
+ return actual_value
542
+ except Exception as e:
543
+ # FIX v3.0.3: Proper error handling instead of silent pass
544
+ if os.environ.get('XWLAZY_VERBOSE'):
545
+ err_msg = str(e).encode('ascii', 'replace').decode('ascii')
546
+ sys.stderr.write(f"[xwlazy] Failed to load L2 cache for {key}: {err_msg}\n")
547
+
548
+ self._stats["misses"] += 1
549
+ return None
550
+
551
+ def set(self, key, value):
552
+ """Set value in cache (L1 + L2)."""
553
+ with self._lock:
554
+ self._set_l1(key, value)
555
+
556
+ # Write to L2 (disk) with collision-resistant filename and key verification
557
+ try:
558
+ cache_filename = self._get_cache_filename(key)
559
+ l2_path = self._l2_dir / cache_filename
560
+
561
+ # Store with key for collision detection
562
+ cache_data = {"_cache_key": key, "_cache_value": value}
563
+
564
+ with open(l2_path, 'wb') as f:
565
+ pickle.dump(cache_data, f)
566
+ except Exception as e:
567
+ # FIX v3.0.3: Proper error handling instead of silent pass
568
+ if os.environ.get('XWLAZY_VERBOSE'):
569
+ err_msg = str(e).encode('ascii', 'replace').decode('ascii')
570
+ sys.stderr.write(f"[xwlazy] Failed to write L2 cache for {key}: {err_msg}\n")
571
+ # L2 failures are non-fatal, continue without disk cache
572
+
573
+ def _set_l1(self, key, value):
574
+ """Set value in L1 cache (internal, called with lock held)."""
575
+ if key in self._l1_cache:
576
+ self._l1_cache.pop(key)
577
+ elif len(self._l1_cache) >= self._l1_size:
578
+ self._l1_cache.popitem(last=False) # Remove oldest (LRU)
579
+ self._l1_cache[key] = value
580
+
581
+ def invalidate(self, key):
582
+ """Invalidate cached value (L1 + L2)."""
583
+ with self._lock:
584
+ self._l1_cache.pop(key, None)
585
+
586
+ # Invalidate L2 with collision-resistant filename
587
+ cache_filename = self._get_cache_filename(key)
588
+ l2_path = self._l2_dir / cache_filename
589
+ try:
590
+ # Verify key matches before deleting (prevent accidental deletion from collision)
591
+ if l2_path.exists():
592
+ try:
593
+ with open(l2_path, 'rb') as f:
594
+ cached = pickle.load(f)
595
+ # Only delete if key matches (prevent collision issues)
596
+ if isinstance(cached, dict) and cached.get("_cache_key") == key:
597
+ l2_path.unlink(missing_ok=True)
598
+ elif not isinstance(cached, dict) or "_cache_key" not in cached:
599
+ # Legacy format - trust hash (very rare collision) and delete
600
+ l2_path.unlink(missing_ok=True)
601
+ except Exception:
602
+ # If verification fails, still try to delete (might be corrupted)
603
+ l2_path.unlink(missing_ok=True)
604
+ except Exception as e:
605
+ # FIX v3.0.3: Proper error handling
606
+ if os.environ.get('XWLAZY_VERBOSE'):
607
+ err_msg = str(e).encode('ascii', 'replace').decode('ascii')
608
+ sys.stderr.write(f"[xwlazy] Failed to invalidate L2 cache for {key}: {err_msg}\n")
609
+
610
+ def clear(self):
611
+ """Clear all caches (L1 + L2)."""
612
+ with self._lock:
613
+ self._l1_cache.clear()
614
+ self._stats = {"l1_hits": 0, "l2_hits": 0, "misses": 0}
615
+
616
+ # Clear L2 cache files
617
+ try:
618
+ for cache_file in self._l2_dir.glob("*.cache"):
619
+ cache_file.unlink(missing_ok=True)
620
+ except Exception as e:
621
+ # FIX v3.0.3: Proper error handling
622
+ if os.environ.get('XWLAZY_VERBOSE'):
623
+ err_msg = str(e).encode('ascii', 'replace').decode('ascii')
624
+ sys.stderr.write(f"[xwlazy] Failed to clear L2 cache directory: {err_msg}\n")
625
+
626
+ def get_stats(self):
627
+ """Get cache statistics."""
628
+ with self._lock:
629
+ total = sum(self._stats.values())
630
+ return {
631
+ "l1_size": len(self._l1_cache),
632
+ "l1_max_size": self._l1_size,
633
+ "l1_hits": self._stats["l1_hits"],
634
+ "l2_hits": self._stats["l2_hits"],
635
+ "misses": self._stats["misses"],
636
+ "hit_rate": (self._stats["l1_hits"] + self._stats["l2_hits"]) / total if total > 0 else 0.0
637
+ }
638
+
639
+ # =============================================================================
640
+ # WATCHED PREFIXES REGISTRY (For Serialization Modules)
641
+ # =============================================================================
642
+
643
+ class WatchedPrefixRegistry:
644
+ """
645
+ Registry for watched module prefixes (for serialization modules).
646
+
647
+ Watched prefixes: Special handling for serialization modules.
648
+ Used to detect serialization modules (pickle, json, yaml, etc.).
649
+ """
650
+
651
+ def __init__(self, initial_prefixes=None):
652
+ self._prefixes = set(initial_prefixes or SERIALIZATION_PREFIXES)
653
+ self._lock = threading.RLock()
654
+ self._custom_prefixes = set() # User-defined prefixes
655
+
656
+ def add_prefix(self, prefix):
657
+ """Add a watched prefix."""
658
+ with self._lock:
659
+ self._custom_prefixes.add(prefix)
660
+
661
+ def remove_prefix(self, prefix):
662
+ """Remove a watched prefix."""
663
+ with self._lock:
664
+ self._custom_prefixes.discard(prefix)
665
+
666
+ def is_watched(self, module_name):
667
+ """Check if module matches any watched prefix."""
668
+ with self._lock:
669
+ all_prefixes = self._prefixes | self._custom_prefixes
670
+ top_module = module_name.split('.')[0]
671
+ return top_module in all_prefixes or any(
672
+ module_name.startswith(prefix + '.') for prefix in all_prefixes
673
+ )
674
+
675
+ def get_watched_prefixes(self):
676
+ """Get all watched prefixes."""
677
+ with self._lock:
678
+ return sorted(self._prefixes | self._custom_prefixes)
679
+
680
+ # =============================================================================
681
+ # ENHANCED PERFORMANCE MONITORING
682
+ # =============================================================================
683
+
684
+ class EnhancedPerformanceMonitor:
685
+ """
686
+ Enhanced performance monitoring with detailed metrics.
687
+
688
+ NEW v3.0.2: More comprehensive metrics tracking like xwlazy.
689
+ Tracks: load times, access counts, memory usage, cache performance.
690
+ """
691
+
692
+ def __init__(self):
693
+ self._load_times = defaultdict(list)
694
+ self._access_counts = defaultdict(int)
695
+ self._module_sizes = {}
696
+ self._cache_performance = {"hits": 0, "misses": 0}
697
+ self._operation_history = deque(maxlen=1000) # Last 1000 operations
698
+ self._lock = threading.RLock()
699
+
700
+ def record_load_time(self, module, load_time):
701
+ """Record module load time."""
702
+ with self._lock:
703
+ self._load_times[module].append(load_time)
704
+ self._operation_history.append({
705
+ "operation": "load",
706
+ "module": module,
707
+ "duration": load_time,
708
+ "timestamp": time.time()
709
+ })
710
+
711
+ def record_access(self, module):
712
+ """Record module access."""
713
+ with self._lock:
714
+ self._access_counts[module] += 1
715
+
716
+ def record_cache_hit(self):
717
+ """Record cache hit."""
718
+ with self._lock:
719
+ self._cache_performance["hits"] += 1
720
+
721
+ def record_cache_miss(self):
722
+ """Record cache miss."""
723
+ with self._lock:
724
+ self._cache_performance["misses"] += 1
725
+
726
+ def record_module_size(self, module, size_bytes):
727
+ """Record module size (in bytes)."""
728
+ with self._lock:
729
+ self._module_sizes[module] = size_bytes
730
+
731
+ def get_stats(self):
732
+ """Get comprehensive performance statistics."""
733
+ with self._lock:
734
+ total_loads = sum(len(times) for times in self._load_times.values())
735
+ avg_load_time = sum(
736
+ sum(times) for times in self._load_times.values()
737
+ ) / total_loads if total_loads > 0 else 0.0
738
+
739
+ cache_total = self._cache_performance["hits"] + self._cache_performance["misses"]
740
+ cache_hit_rate = (
741
+ self._cache_performance["hits"] / cache_total
742
+ if cache_total > 0 else 0.0
743
+ )
744
+
745
+ return {
746
+ "modules_loaded": len(self._load_times),
747
+ "total_loads": total_loads,
748
+ "average_load_time_ms": avg_load_time * 1000,
749
+ "total_accesses": sum(self._access_counts.values()),
750
+ "top_accessed_modules": sorted(
751
+ self._access_counts.items(),
752
+ key=lambda x: x[1],
753
+ reverse=True
754
+ )[:10],
755
+ "cache_hit_rate": cache_hit_rate,
756
+ "cache_hits": self._cache_performance["hits"],
757
+ "cache_misses": self._cache_performance["misses"],
758
+ "total_module_size_bytes": sum(self._module_sizes.values()),
759
+ "recent_operations": list(self._operation_history)[-10:]
760
+ }
761
+
762
+ def clear(self):
763
+ """Clear all metrics."""
764
+ with self._lock:
765
+ self._load_times.clear()
766
+ self._access_counts.clear()
767
+ self._module_sizes.clear()
768
+ self._cache_performance = {"hits": 0, "misses": 0}
769
+ self._operation_history.clear()
770
+
771
+ # =============================================================================
772
+ # ADAPTIVE LEARNING (Simplified Version)
773
+ # =============================================================================
774
+
775
+ class AdaptiveLearner:
776
+ """Lightweight adaptive learning for pattern-based optimization."""
777
+
778
+ def __init__(self, learning_window=100):
779
+ self._learning_window = learning_window
780
+ self._import_sequences = deque(maxlen=learning_window)
781
+ self._access_times = defaultdict(list)
782
+ self._import_chains = defaultdict(lambda: defaultdict(int))
783
+ self._module_scores = {}
784
+ self._lock = threading.RLock()
785
+
786
+ def record_import(self, module_name, import_time):
787
+ """Record an import event for learning."""
788
+ current_time = time.time()
789
+ with self._lock:
790
+ self._import_sequences.append((module_name, current_time, import_time))
791
+ self._access_times[module_name].append(current_time)
792
+
793
+ # Update import chains
794
+ if len(self._import_sequences) > 1:
795
+ prev_name, _, _ = self._import_sequences[-2]
796
+ self._import_chains[prev_name][module_name] += 1
797
+
798
+ # Update scores periodically
799
+ if len(self._access_times[module_name]) % 5 == 0:
800
+ self._update_module_score(module_name)
801
+
802
+ def _update_module_score(self, module_name):
803
+ """Update module priority score."""
804
+ with self._lock:
805
+ accesses = self._access_times[module_name]
806
+ if not accesses:
807
+ return
808
+
809
+ recent = [t for t in accesses if time.time() - t < 3600]
810
+ frequency = len(recent)
811
+
812
+ if accesses:
813
+ recency = 1.0 / (time.time() - accesses[-1] + 1.0)
814
+ else:
815
+ recency = 0.0
816
+
817
+ chain_weight = sum(
818
+ self._import_chains.get(prev, {}).get(module_name, 0)
819
+ for prev in self._access_times.keys()
820
+ ) / max(len(self._import_sequences), 1)
821
+
822
+ self._module_scores[module_name] = frequency * 0.4 + recency * 1000 * 0.4 + chain_weight * 0.2
823
+
824
+ def predict_next_imports(self, current_module=None, limit=5):
825
+ """Predict likely next imports."""
826
+ with self._lock:
827
+ if not self._import_sequences:
828
+ return []
829
+
830
+ candidates = {}
831
+ if current_module:
832
+ chain_candidates = self._import_chains.get(current_module, {})
833
+ for module, count in chain_candidates.items():
834
+ candidates[module] = candidates.get(module, 0.0) + count * 2.0
835
+
836
+ for module, score in self._module_scores.items():
837
+ candidates[module] = candidates.get(module, 0.0) + score * 0.5
838
+
839
+ sorted_candidates = sorted(candidates.items(), key=lambda x: x[1], reverse=True)
840
+ return [module for module, _ in sorted_candidates[:limit]]
841
+
842
+ def get_stats(self):
843
+ """Get learning statistics."""
844
+ with self._lock:
845
+ return {
846
+ 'sequences_tracked': len(self._import_sequences),
847
+ 'unique_modules': len(self._access_times),
848
+ 'chains_tracked': sum(len(chains) for chains in self._import_chains.values()),
849
+ 'top_modules': self._get_priority_modules(5),
850
+ }
851
+
852
+ def _get_priority_modules(self, limit=10):
853
+ """Get priority modules based on scores."""
854
+ sorted_modules = sorted(self._module_scores.items(), key=lambda x: x[1], reverse=True)
855
+ return [module for module, _ in sorted_modules[:limit]]
856
+
857
+ # =============================================================================
858
+ # GLOBAL __import__ HOOK (Module-Level Interception)
859
+ # =============================================================================
860
+
861
+ # Capture original builtins.__import__ only once at module load (prevents state issues)
862
+ _original_builtins_import = builtins.__import__
863
+ _global_import_hook_installed = False
864
+ _global_import_hook_lock = threading.RLock()
865
+ _global_hook_manager = None
866
+
867
+ def _intercepting_import(name, globals=None, locals=None, fromlist=(), level=0):
868
+ """
869
+ Global builtins.__import__ replacement for module-level interception.
870
+ Intercepts ALL imports including those at module level during package init.
871
+ FIX v3.0.3: Thread-safe access to _global_hook_manager with proper error handling.
872
+ """
873
+ # Skip relative imports (level > 0) - use normal import
874
+ if level > 0:
875
+ return _original_builtins_import(name, globals, locals, fromlist, level)
876
+
877
+ # Use original import first
878
+ try:
879
+ module = _original_builtins_import(name, globals, locals, fromlist, level)
880
+ return module
881
+ except ImportError:
882
+ # Re-raise: never return None. Callers expect a module or an exception.
883
+ # xwlazy handling for missing packages runs via meta_path finder, not here.
884
+ raise
885
+ except (OSError, TypeError, AttributeError) as e:
886
+ # Handle exceptions during module initialization:
887
+ # - OSError: gssapi/Kerberos issues
888
+ # - TypeError: protobuf version incompatibilities (e.g., riak)
889
+ # - AttributeError: protobuf/other version issues
890
+ # Convert to ImportError so connectors can handle gracefully
891
+ error_msg = f"Module {name} cannot be initialized"
892
+ if isinstance(e, OSError) and ('gssapi' in name.lower() or 'kerberos' in str(e).lower() or 'KfW' in str(e)):
893
+ error_msg += " (system dependency missing: Kerberos)"
894
+ elif isinstance(e, TypeError) and ('protobuf' in str(e).lower() or 'descriptor' in str(e).lower()):
895
+ error_msg += " (protobuf version incompatibility)"
896
+ elif isinstance(e, AttributeError):
897
+ error_msg += " (version incompatibility)"
898
+ raise ImportError(f"{error_msg}: {e}") from e
899
+ # FIX v3.0.3: Thread-safe access to _global_hook_manager (prevents race conditions)
900
+ with _global_import_hook_lock:
901
+ hook_manager = _global_hook_manager
902
+
903
+ # Let xwlazy handle it via meta_path finder (outside lock to avoid deadlock)
904
+ if hook_manager and hasattr(hook_manager, 'find_spec'):
905
+ try:
906
+ # FIX: find_spec requires (fullname, path, target=None) signature
907
+ # path should be None for top-level modules, or the package's __path__ for submodules
908
+ spec = hook_manager.find_spec(name, None)
909
+ if spec and spec.loader:
910
+ module = importlib.util.module_from_spec(spec)
911
+ sys.modules[name] = module
912
+ spec.loader.exec_module(module)
913
+ return module
914
+ except Exception as e:
915
+ # FIX v3.0.3: Proper error handling instead of silent pass
916
+ if os.environ.get('XWLAZY_VERBOSE'):
917
+ err_msg = str(e).encode('ascii', 'replace').decode('ascii')
918
+ sys.stderr.write(f"[xwlazy] Error in global import hook for {name}: {err_msg}\n")
919
+
920
+ # Re-raise original ImportError
921
+ raise
922
+
923
+ def _install_global_import_hook(manager):
924
+ """Install global builtins.__import__ hook."""
925
+ global _original_builtins_import, _global_import_hook_installed, _global_hook_manager
926
+
927
+ with _global_import_hook_lock:
928
+ if _global_import_hook_installed:
929
+ return
930
+
931
+ # FIX v3.0.3: Use module-level _original_builtins_import (captured at module load)
932
+ # Don't re-capture here to avoid issues if hook is installed after another module modifies builtins.__import__
933
+ builtins.__import__ = _intercepting_import
934
+ _global_hook_manager = manager
935
+ _global_import_hook_installed = True
936
+
937
+ def _uninstall_global_import_hook():
938
+ """Uninstall global builtins.__import__ hook."""
939
+ global _original_builtins_import, _global_import_hook_installed, _global_hook_manager
940
+
941
+ with _global_import_hook_lock:
942
+ if not _global_import_hook_installed:
943
+ return
944
+
945
+ builtins.__import__ = _original_builtins_import
946
+ _global_hook_manager = None
947
+ _global_import_hook_installed = False
948
+
949
+ # =============================================================================
950
+ # LAZY LOADING PROXY (Thread-Safe & Recursion-Free)
951
+ # =============================================================================
952
+
953
+ class LazyModuleProxy(types.ModuleType):
954
+ """Proxy that poses as module while installing in background."""
955
+
956
+ def __init__(self, fullname, install_thread, manager):
957
+ super().__init__(fullname)
958
+ self.__file__ = f"<lazy_installing_{fullname}>"
959
+ self._install_thread = install_thread
960
+ self._manager = manager
961
+ self._real_module = None
962
+
963
+ def _ensure_installed(self):
964
+ """Blocks until install completes, then swaps itself out."""
965
+ if self._real_module:
966
+ return self._real_module
967
+
968
+ if self._install_thread.is_alive():
969
+ self._install_thread.join()
970
+
971
+ with self._manager._lock:
972
+ if self.__name__ in self._manager.failed_installs:
973
+ raise ImportError(f"xwlazy failed to install {self.__name__}")
974
+
975
+ manager = self._manager
976
+ was_in_path = False
977
+ try:
978
+ if manager in sys.meta_path:
979
+ sys.meta_path.remove(manager)
980
+ was_in_path = True
981
+
982
+ self._real_module = importlib.import_module(self.__name__)
983
+ sys.modules[self.__name__] = self._real_module
984
+ finally:
985
+ if was_in_path and manager not in sys.meta_path:
986
+ sys.meta_path.insert(0, manager)
987
+
988
+ return self._real_module
989
+
990
+ def __getattr__(self, name):
991
+ if name in ["_ensure_installed", "_install_thread", "_manager", "_real_module", "__name__", "__path__", "__loader__", "__spec__"]:
992
+ return object.__getattribute__(self, name)
993
+ return getattr(self._ensure_installed(), name)
994
+
995
+ def __dir__(self):
996
+ return dir(self._ensure_installed())
997
+
998
+ def __repr__(self):
999
+ if self._real_module:
1000
+ return repr(self._real_module)
1001
+ return f"<LazyProxy for '{self.__name__}' (Installing...)>"
1002
+
1003
+ class LazyLoader(Loader):
1004
+ def __init__(self, manager, fullname, install_target):
1005
+ self.manager = manager
1006
+ self.fullname = fullname
1007
+ self.install_target = install_target
1008
+
1009
+ def create_module(self, spec):
1010
+ thread = threading.Thread(
1011
+ target=self.manager._perform_install,
1012
+ args=(self.install_target, self.fullname),
1013
+ daemon=True
1014
+ )
1015
+ thread.start()
1016
+ return LazyModuleProxy(self.fullname, thread, self.manager)
1017
+
1018
+ def exec_module(self, module):
1019
+ pass
1020
+
1021
+ # =============================================================================
1022
+ # XWLAZY MANAGER (v4.0 - Enterprise Features)
1023
+ # =============================================================================
1024
+
1025
+ class XWLazy(MetaPathFinder):
1026
+ def __init__(self, root_dir=".", default_enabled=True, enable_global_hook=True, enable_learning=False):
1027
+ self.root_dir = Path(root_dir)
1028
+
1029
+ # Thread Safety & State
1030
+ self._lock = threading.RLock()
1031
+ self.manifest_index = {}
1032
+
1033
+ # Caching: Thread-safe set for installed packages (backward compatibility)
1034
+ self.installed_cache = set()
1035
+ self.failed_installs = set()
1036
+ self.installing_now = set()
1037
+
1038
+ # NEW v3.0.2: Multi-tier cache (L1 + L2)
1039
+ self._multi_tier_cache = SimpleMultiTierCache(l1_size=1000, l2_dir=XWLAZY_CACHE_DIR / "l2_cache")
1040
+
1041
+ # NEW v3.0.2: Watched prefixes registry (for serialization modules)
1042
+ self._watched_prefixes = WatchedPrefixRegistry(initial_prefixes=SERIALIZATION_PREFIXES)
1043
+
1044
+ # NEW v3.0.2: Enhanced performance monitor
1045
+ self._perf_monitor = EnhancedPerformanceMonitor()
1046
+
1047
+ # Configuration: Per-Package Isolation
1048
+ # Load deny list from TOML file (blocks packages with compatibility issues)
1049
+ self.global_deny_list = _load_deny_list()
1050
+ self.package_policies = {}
1051
+
1052
+ # NEW v3.0: Adaptive Learning (optional)
1053
+ self._enable_learning = enable_learning
1054
+ self._learner = AdaptiveLearner() if enable_learning else None
1055
+
1056
+ # NEW v3.0: Lockfile support - Store in centralized ~/.xwlazy/ directory
1057
+ # This prevents pollution of project directories with xwlazy files
1058
+ XWLAZY_DATA_DIR.mkdir(parents=True, exist_ok=True)
1059
+ self._lockfile_path = XWLAZY_DATA_DIR / LOCKFILE_PATH
1060
+ self._audit_log_path = XWLAZY_DATA_DIR / AUDIT_LOG_FILE
1061
+
1062
+ # Stats & Observability (Enhanced)
1063
+ self.stats = {
1064
+ "installs": 0,
1065
+ "failures": 0,
1066
+ "total_time_ms": 0,
1067
+ "strategies_used": collections.defaultdict(int),
1068
+ "cache_hits": 0,
1069
+ "cache_misses": 0,
1070
+ "adaptive_predictions": 0,
1071
+ "history": []
1072
+ }
1073
+
1074
+ # NEW v3.0: Keyword detection configuration
1075
+ self._keyword_detection_enabled = True
1076
+ self._keyword_to_check = "xwlazy-enabled"
1077
+
1078
+ # Init: Check for auto-config BEFORE setting default_enabled
1079
+ auto_config_enabled = self._extract_auto_config(root_dir)
1080
+ if auto_config_enabled is not None:
1081
+ self.default_enabled = auto_config_enabled
1082
+ else:
1083
+ self.default_enabled = default_enabled
1084
+
1085
+ # Load manifests (requirements.txt, pyproject.toml - TOML-only, no JSON manifests)
1086
+ self._index_manifests()
1087
+
1088
+ # Load lockfile if exists
1089
+ self._load_lockfile()
1090
+
1091
+ # PEP 668 detection
1092
+ self._is_externally_managed = (Path(sys.prefix) / "EXTERNALLY-MANAGED").exists()
1093
+
1094
+ # NEW v3.0: Install global __import__ hook if requested
1095
+ if enable_global_hook:
1096
+ _install_global_import_hook(self)
1097
+
1098
+ # --- PUBLIC API (Enhanced v3.0) ---
1099
+
1100
+ def configure(self, package_name, enabled=True, mode="blocking", install_strategy="pip", allow=True):
1101
+ """
1102
+ Configure per-package behavior (PER-PACKAGE ISOLATION).
1103
+
1104
+ Each package can have its own independent settings.
1105
+ """
1106
+ if not isinstance(package_name, str) or not package_name:
1107
+ raise ValueError("package_name must be a non-empty string")
1108
+ if mode not in ("blocking", "lazy"):
1109
+ raise ValueError(f"mode must be 'blocking' or 'lazy', got: {mode!r}")
1110
+ if install_strategy not in ("pip", "wheel", "cached", "smart"):
1111
+ raise ValueError(f"Invalid strategy: {install_strategy}")
1112
+
1113
+ with self._lock:
1114
+ self.package_policies[package_name] = {
1115
+ "enabled": enabled,
1116
+ "mode": mode,
1117
+ "strategy": install_strategy,
1118
+ "allow": allow
1119
+ }
1120
+
1121
+ def deny_package(self, package_name):
1122
+ """Shortcut to Security Deny."""
1123
+ self.configure(package_name, enabled=True, allow=False)
1124
+
1125
+ def enable_package(self, package_name):
1126
+ """Shortcut to enable a package."""
1127
+ self.configure(package_name, enabled=True)
1128
+
1129
+ def disable_package(self, package_name):
1130
+ """Shortcut to disable a package."""
1131
+ self.configure(package_name, enabled=False)
1132
+
1133
+ def get_stats(self):
1134
+ """Get comprehensive statistics including NEW v3.0.2 enhanced metrics."""
1135
+ with self._lock:
1136
+ stats = self.stats.copy()
1137
+ if self._learner:
1138
+ stats['adaptive_learning'] = self._learner.get_stats()
1139
+ else:
1140
+ stats['adaptive_learning'] = None
1141
+
1142
+ # Resolution cache stats (functools.lru_cache)
1143
+ cache_stats = self._resolve_target_cached.cache_info()
1144
+ stats['resolution_cache'] = {
1145
+ 'hits': cache_stats.hits,
1146
+ 'misses': cache_stats.misses,
1147
+ 'size': cache_stats.currsize,
1148
+ 'maxsize': cache_stats.maxsize
1149
+ }
1150
+
1151
+ # NEW v3.0.2: Multi-tier cache stats (L1 + L2)
1152
+ stats['multi_tier_cache'] = self._multi_tier_cache.get_stats()
1153
+
1154
+ # NEW v3.0.2: Enhanced performance monitoring stats
1155
+ stats['performance'] = self._perf_monitor.get_stats()
1156
+
1157
+ # NEW v3.0.2: Watched prefixes info
1158
+ stats['watched_prefixes'] = {
1159
+ 'count': len(self._watched_prefixes.get_watched_prefixes()),
1160
+ 'prefixes': self._watched_prefixes.get_watched_prefixes()
1161
+ }
1162
+
1163
+ # NEW v3.0: Additional stats
1164
+ stats['lockfile_path'] = str(self._lockfile_path)
1165
+ stats['lockfile_exists'] = self._lockfile_path.exists()
1166
+ stats['global_hook_installed'] = _global_import_hook_installed
1167
+ stats['keyword_detection_enabled'] = self._keyword_detection_enabled
1168
+ stats['learning_enabled'] = self._enable_learning
1169
+ stats['installed_packages_count'] = len(self.installed_cache)
1170
+ stats['failed_packages_count'] = len(self.failed_installs)
1171
+ stats['configured_packages_count'] = len(self.package_policies)
1172
+
1173
+ return stats
1174
+
1175
+ def generate_sbom(self, output_path=None):
1176
+ """Generate SBOM (Software Bill of Materials)."""
1177
+ # Use centralized path unless explicitly overridden
1178
+ output = output_path or self._audit_log_path
1179
+ try:
1180
+ with self._lock:
1181
+ sbom_data = {
1182
+ "metadata": {
1183
+ "format": "xwlazy-sbom",
1184
+ "version": "1.0",
1185
+ "generated": datetime.now().isoformat(),
1186
+ },
1187
+ "packages": list(self.installed_cache),
1188
+ "statistics": self.stats.copy(),
1189
+ "lockfile": self._read_lockfile() if self._lockfile_path.exists() else None
1190
+ }
1191
+ _write_toml_simple(sbom_data, output)
1192
+ return sbom_data
1193
+ except Exception as e:
1194
+ if os.environ.get('XWLAZY_VERBOSE'):
1195
+ sys.stderr.write(f"[xwlazy] Failed to generate SBOM: {e}\n")
1196
+ return None
1197
+
1198
+ def get_lockfile(self):
1199
+ """Get current lockfile contents."""
1200
+ return self._read_lockfile()
1201
+
1202
+ def save_lockfile(self):
1203
+ """Save current state to lockfile."""
1204
+ self._save_lockfile()
1205
+
1206
+ # --- NEW v3.0.2: Watched Prefixes API ---
1207
+
1208
+ def add_watched_prefix(self, prefix):
1209
+ """Add a watched prefix for special handling (e.g., serialization modules)."""
1210
+ self._watched_prefixes.add_prefix(prefix)
1211
+
1212
+ def remove_watched_prefix(self, prefix):
1213
+ """Remove a watched prefix."""
1214
+ self._watched_prefixes.remove_prefix(prefix)
1215
+
1216
+ def get_watched_prefixes(self):
1217
+ """Get all watched prefixes."""
1218
+ return self._watched_prefixes.get_watched_prefixes()
1219
+
1220
+ def is_watched(self, module_name):
1221
+ """Check if a module matches any watched prefix."""
1222
+ return self._watched_prefixes.is_watched(module_name)
1223
+
1224
+ # --- NEW v3.0.2: Cache Management API ---
1225
+
1226
+ def get_cache_stats(self):
1227
+ """Get multi-tier cache statistics."""
1228
+ return self._multi_tier_cache.get_stats()
1229
+
1230
+ def clear_cache(self):
1231
+ """Clear all caches (L1 + L2)."""
1232
+ self._multi_tier_cache.clear()
1233
+ with self._lock:
1234
+ self.installed_cache.clear()
1235
+ # Clear resolution cache
1236
+ if hasattr(self, '_resolve_target_cached') and hasattr(self._resolve_target_cached, 'cache_clear'):
1237
+ self._resolve_target_cached.cache_clear()
1238
+
1239
+ def invalidate_cache(self, module_name):
1240
+ """Invalidate cache for a specific module."""
1241
+ cache_key = f"installed:{module_name}"
1242
+ self._multi_tier_cache.invalidate(cache_key)
1243
+ with self._lock:
1244
+ self.installed_cache.discard(module_name)
1245
+
1246
+ # --- NEW v3.0.2: Performance Monitoring API ---
1247
+
1248
+ def get_performance_stats(self):
1249
+ """Get enhanced performance monitoring statistics."""
1250
+ return self._perf_monitor.get_stats()
1251
+
1252
+ def clear_performance_stats(self):
1253
+ """Clear all performance statistics."""
1254
+ self._perf_monitor.clear()
1255
+
1256
+ # --- INTERNAL LOGIC (Enhanced v3.0) ---
1257
+
1258
+ def _extract_auto_config(self, root_dir):
1259
+ """
1260
+ Extract default_enabled from pyproject.toml BEFORE initialization.
1261
+ NEW v3.0: Supports keyword-based auto-detection.
1262
+ FIX v3.0.3: Reuses unified _load_toml_file() to reduce code duplication.
1263
+ """
1264
+ toml_file = Path(root_dir) / "pyproject.toml"
1265
+
1266
+ # Use unified TOML loader (reused code)
1267
+ data = _load_toml_file(toml_file, verbose_error=False)
1268
+
1269
+ if data:
1270
+ # Check [tool.xwlazy] or [tool.titanguardian] (backwards compatibility)
1271
+ tool_cfg = data.get("tool", {})
1272
+ if "xwlazy" in tool_cfg:
1273
+ if "default_enabled" in tool_cfg["xwlazy"]:
1274
+ return tool_cfg["xwlazy"]["default_enabled"]
1275
+ # Legacy support for xwlazylite
1276
+ if "xwlazylite" in tool_cfg:
1277
+ if "default_enabled" in tool_cfg["xwlazylite"]:
1278
+ return tool_cfg["xwlazylite"]["default_enabled"]
1279
+ if "titanguardian" in tool_cfg:
1280
+ if "default_enabled" in tool_cfg["titanguardian"]:
1281
+ return tool_cfg["titanguardian"]["default_enabled"]
1282
+
1283
+ # NEW v3.0: Check [project] keywords for 'xwlazy-enabled'
1284
+ if self._keyword_detection_enabled:
1285
+ keywords = data.get("project", {}).get("keywords", [])
1286
+ if isinstance(keywords, list):
1287
+ keywords_lower = [k.lower() if isinstance(k, str) else str(k).lower() for k in keywords]
1288
+ if self._keyword_to_check.lower() in keywords_lower:
1289
+ return True
1290
+ elif isinstance(keywords, str):
1291
+ if self._keyword_to_check.lower() in keywords.lower():
1292
+ return True
1293
+
1294
+ return None
1295
+
1296
+ def _check_package_keywords(self, package_name=None):
1297
+ """
1298
+ Check if package has xwlazy-enabled keyword in metadata.
1299
+ NEW v3.0: Keyword-based auto-detection.
1300
+ """
1301
+ if not self._keyword_detection_enabled:
1302
+ return False
1303
+
1304
+ if sys.version_info < (3, 8):
1305
+ return False
1306
+
1307
+ try:
1308
+ if package_name:
1309
+ try:
1310
+ dist = importlib.metadata.distribution(package_name)
1311
+ keywords = dist.metadata.get_all('Keywords', [])
1312
+ if keywords:
1313
+ all_keywords = []
1314
+ for kw in keywords:
1315
+ if isinstance(kw, str):
1316
+ all_keywords.extend(k.strip().lower() for k in kw.split(','))
1317
+ else:
1318
+ all_keywords.append(str(kw).lower())
1319
+ return self._keyword_to_check.lower() in all_keywords
1320
+ except importlib.metadata.PackageNotFoundError:
1321
+ return False
1322
+ else:
1323
+ # Check all installed packages
1324
+ for dist in importlib.metadata.distributions():
1325
+ try:
1326
+ keywords = dist.metadata.get_all('Keywords', [])
1327
+ if keywords:
1328
+ all_keywords = []
1329
+ for kw in keywords:
1330
+ if isinstance(kw, str):
1331
+ all_keywords.extend(k.strip().lower() for k in kw.split(','))
1332
+ else:
1333
+ all_keywords.append(str(kw).lower())
1334
+ if self._keyword_to_check.lower() in all_keywords:
1335
+ return True
1336
+ except Exception as e:
1337
+ # Continue on individual package errors (non-critical)
1338
+ if os.environ.get('XWLAZY_VERBOSE'):
1339
+ err_msg = str(e).encode('ascii', 'replace').decode('ascii')
1340
+ sys.stderr.write(f"[xwlazy] Error checking keywords for package {dist.metadata.get('Name', 'unknown')}: {err_msg}\n")
1341
+ continue
1342
+ except Exception as e:
1343
+ # Non-critical: keyword detection failures don't block execution
1344
+ if os.environ.get('XWLAZY_VERBOSE'):
1345
+ err_msg = str(e).encode('ascii', 'replace').decode('ascii')
1346
+ sys.stderr.write(f"[xwlazy] Error in keyword detection: {err_msg}\n")
1347
+ pass
1348
+
1349
+ return False
1350
+
1351
+ def _index_manifests(self):
1352
+ """Robust Parsing: Requirements.txt + pyproject.toml (TOML-only, no JSON)."""
1353
+ # 1. Requirements.txt
1354
+ req_file = self.root_dir / "requirements.txt"
1355
+ if req_file.exists():
1356
+ try:
1357
+ with open(req_file, 'r', encoding='utf-8') as f:
1358
+ for line in f:
1359
+ line = line.split('#')[0].strip()
1360
+ if line:
1361
+ clean = re.split(r'\[', line)[0].strip()
1362
+ clean = re.split(r'[<>=!~]', clean)[0].strip()
1363
+ if clean:
1364
+ self._add_index(clean, line)
1365
+ except (IOError, OSError, UnicodeDecodeError) as e:
1366
+ if os.environ.get('XWLAZY_VERBOSE'):
1367
+ sys.stderr.write(f"[xwlazy] Error reading requirements.txt: {e}\n")
1368
+
1369
+ # 2. pyproject.toml (TOML-only - no JSON manifests)
1370
+ # Uses unified _load_toml_file() to reduce code duplication
1371
+ toml_file = self.root_dir / "pyproject.toml"
1372
+ data = _load_toml_file(toml_file, verbose_error=True)
1373
+
1374
+ if data:
1375
+ # Standard dependencies
1376
+ deps = data.get("project", {}).get("dependencies", [])
1377
+ for dep in deps:
1378
+ if isinstance(dep, str):
1379
+ clean = re.split(r'\[', dep)[0].strip()
1380
+ clean = re.split(r'[<>=!~]', clean)[0].strip()
1381
+ if clean:
1382
+ # Get version from dep if available, otherwise use package name only
1383
+ install_str = dep # Keep full version constraint if present
1384
+ self._add_index(clean, install_str)
1385
+
1386
+ # Optional dependencies
1387
+ opt_deps = data.get("project", {}).get("optional-dependencies", {})
1388
+ for group, group_deps in opt_deps.items():
1389
+ for dep in group_deps:
1390
+ if isinstance(dep, str):
1391
+ clean = re.split(r'\[', dep)[0].strip()
1392
+ clean = re.split(r'[<>=!~]', clean)[0].strip()
1393
+ if clean:
1394
+ install_str = dep # Keep full version constraint if present
1395
+ self._add_index(clean, install_str)
1396
+
1397
+ def _add_index(self, pkg, install_str):
1398
+ """
1399
+ Add package to manifest index and clear resolution cache.
1400
+
1401
+ NEW v3.0.1: If install_str has no version, check external_libs.toml for version.
1402
+ This uses version from external_libs.toml if missing from requirements.txt/pyproject.toml.
1403
+ """
1404
+ pkg_key = pkg.lower().replace('-', '_')
1405
+
1406
+ # Check if install_str has version constraint
1407
+ has_version = bool(re.search(r'[<>=!~]', install_str))
1408
+
1409
+ # If no version in requirements.txt/pyproject.toml, check external_libs.toml
1410
+ if not has_version:
1411
+ # Try to find version in HARD_MAPPINGS (external_libs.toml)
1412
+ for import_name, package_spec in HARD_MAPPINGS.items():
1413
+ # Check if this import name maps to our package
1414
+ if _extract_package_name(package_spec).lower().replace('-', '_') == pkg_key:
1415
+ # Check if external_libs.toml has a version
1416
+ if re.search(r'[<>=!~]', package_spec):
1417
+ install_str = package_spec # Use version from external_libs.toml
1418
+ break
1419
+ # Also check direct import name match
1420
+ if import_name.lower().replace('-', '_') == pkg_key:
1421
+ if re.search(r'[<>=!~]', package_spec):
1422
+ install_str = package_spec # Use version from external_libs.toml
1423
+ break
1424
+
1425
+ self.manifest_index[pkg_key] = install_str
1426
+ if hasattr(self, '_resolve_target_cached') and hasattr(self._resolve_target_cached, 'cache_clear'):
1427
+ self._resolve_target_cached.cache_clear()
1428
+
1429
+ def _resolve_target(self, fullname):
1430
+ """Resolves target with Dot-Notation Walk-up (cached)."""
1431
+ manifest_key = frozenset(self.manifest_index.items())
1432
+ return self._resolve_target_cached(manifest_key, fullname)
1433
+
1434
+ @lru_cache(maxsize=512)
1435
+ def _resolve_target_cached(self, manifest_items, fullname):
1436
+ """
1437
+ Cached implementation of target resolution.
1438
+
1439
+ NEW v3.0.1: Prioritizes manifest_index (requirements.txt/pyproject.toml) over HARD_MAPPINGS.
1440
+ Falls back to HARD_MAPPINGS (external_libs.toml) only if not found in manifest.
1441
+ This ensures versions from requirements.txt/pyproject.toml are used first.
1442
+
1443
+ NEW v4.0.1: If not found in TOML mappings, generates fallback candidates from import name:
1444
+ - Replaces dots with dashes, then underscores
1445
+ - Progressively shortens by removing segments from the end
1446
+ - Returns first candidate as install target (pip will validate)
1447
+ """
1448
+ manifest_dict = dict(manifest_items)
1449
+ parts = fullname.split('.')
1450
+ for i in range(len(parts), 0, -1):
1451
+ prefix = '.'.join(parts[:i])
1452
+
1453
+ # 1. First check manifest_index (requirements.txt/pyproject.toml) - has priority
1454
+ key = prefix.lower().replace('-', '_')
1455
+ if key in manifest_dict:
1456
+ return manifest_dict[key]
1457
+
1458
+ # 2. Check HARD_MAPPINGS (external_libs.toml) - fallback with version support
1459
+ if prefix in HARD_MAPPINGS:
1460
+ mapped = HARD_MAPPINGS[prefix]
1461
+ # Check if mapped package is in manifest_index (with version from requirements)
1462
+ mapped_key = _extract_package_name(mapped).lower().replace('-', '_')
1463
+ if mapped_key in manifest_dict:
1464
+ return manifest_dict[mapped_key] # Use version from requirements.txt/pyproject.toml
1465
+ # Otherwise use version from external_libs.toml (or package name only)
1466
+ return mapped
1467
+
1468
+ # 3. Fallback: Generate candidates from import name itself
1469
+ # Try progressively shorter versions with dots -> dashes/underscores
1470
+ candidates = _generate_fallback_candidates(fullname)
1471
+ if candidates:
1472
+ # Return list of candidates to try sequentially until one succeeds
1473
+ return candidates
1474
+
1475
+ return None
1476
+
1477
+ def _get_policy(self, top_module):
1478
+ """Retrieves policy with fallbacks to defaults."""
1479
+ if top_module in self.package_policies:
1480
+ return self.package_policies[top_module]
1481
+ return {
1482
+ "enabled": self.default_enabled,
1483
+ "mode": "blocking",
1484
+ "strategy": "pip",
1485
+ "allow": True
1486
+ }
1487
+
1488
+ # --- NEW v3.0: Lockfile Support ---
1489
+
1490
+ def _load_lockfile(self):
1491
+ """Load lockfile if exists."""
1492
+ if not self._lockfile_path.exists():
1493
+ return
1494
+
1495
+ try:
1496
+ lockfile_data = _read_toml_simple(self._lockfile_path)
1497
+ if lockfile_data:
1498
+ # Restore installed packages from lockfile
1499
+ installed_packages = lockfile_data.get("packages", [])
1500
+ with self._lock:
1501
+ self.installed_cache.update(installed_packages)
1502
+ except (IOError, OSError, Exception) as e:
1503
+ if os.environ.get('XWLAZY_VERBOSE'):
1504
+ sys.stderr.write(f"[xwlazy] Error reading lockfile: {e}\n")
1505
+
1506
+ def _save_lockfile(self):
1507
+ """Save current state to lockfile."""
1508
+ try:
1509
+ with self._lock:
1510
+ lockfile_data = {
1511
+ "version": "1.0",
1512
+ "generated": datetime.now().isoformat(),
1513
+ "packages": sorted(list(self.installed_cache)),
1514
+ "statistics": {
1515
+ "total_installs": self.stats['installs'],
1516
+ "total_failures": self.stats['failures'],
1517
+ }
1518
+ }
1519
+ _write_toml_simple(lockfile_data, self._lockfile_path)
1520
+ except (IOError, OSError, Exception) as e:
1521
+ if os.environ.get('XWLAZY_VERBOSE'):
1522
+ sys.stderr.write(f"[xwlazy] Failed to write lockfile: {e}\n")
1523
+
1524
+ def _read_lockfile(self):
1525
+ """Read lockfile contents (TOML format)."""
1526
+ return _read_toml_simple(self._lockfile_path)
1527
+
1528
+ # --- STRATEGY IMPLEMENTATIONS ---
1529
+
1530
+ def _detect_venv(self):
1531
+ """
1532
+ Detect if Python is running in a virtual environment.
1533
+
1534
+ Checks multiple methods to detect venv:
1535
+ - hasattr(sys, 'real_prefix'): Old-style virtualenv
1536
+ - sys.prefix != sys.base_prefix: venv/virtualenv
1537
+ - VIRTUAL_ENV environment variable: Set by most venv activators
1538
+
1539
+ Returns:
1540
+ tuple: (in_venv: bool, venv_python: Path | None)
1541
+ - in_venv: True if running in a venv
1542
+ - venv_python: Path to venv Python executable if available, None otherwise
1543
+ """
1544
+ # Check for venv: hasattr(sys, 'real_prefix') is for old-style venv,
1545
+ # sys.prefix != sys.base_prefix is for venv/virtualenv,
1546
+ # VIRTUAL_ENV environment variable is set by most venv activators
1547
+ in_venv = (hasattr(sys, 'real_prefix') or
1548
+ (hasattr(sys, 'base_prefix') and sys.prefix != sys.base_prefix) or
1549
+ os.environ.get('VIRTUAL_ENV') is not None)
1550
+
1551
+ venv_python = None
1552
+ if os.environ.get('VIRTUAL_ENV'):
1553
+ venv_path = Path(os.environ['VIRTUAL_ENV'])
1554
+ if sys.platform == 'win32':
1555
+ venv_python = venv_path / 'Scripts' / 'python.exe'
1556
+ else:
1557
+ venv_python = venv_path / 'bin' / 'python'
1558
+ if not venv_python.exists():
1559
+ venv_python = None
1560
+
1561
+ return in_venv, venv_python
1562
+
1563
+ def _is_package_installed_in_venv(self, package_name: str) -> bool:
1564
+ """
1565
+ Check if a package is installed in the current venv (if in venv).
1566
+
1567
+ NEW v4.0.3: When in a venv, checks if package is installed specifically
1568
+ in the venv's site-packages, not in user site-packages or system-wide.
1569
+
1570
+ Args:
1571
+ package_name: Package name to check (e.g., "pandas", "exonware-xwdata")
1572
+
1573
+ Returns:
1574
+ bool: True if package is installed in venv (when in venv) or globally (when not in venv)
1575
+ """
1576
+ in_venv, venv_python = self._detect_venv()
1577
+
1578
+ # If not in venv, use standard check (will check system-wide and user site-packages)
1579
+ if not in_venv:
1580
+ try:
1581
+ dist = importlib.metadata.distribution(package_name)
1582
+ return dist is not None
1583
+ except importlib.metadata.PackageNotFoundError:
1584
+ return False
1585
+
1586
+ # In venv: check if package is installed specifically in venv's site-packages
1587
+ # We check by trying to find the distribution and verifying its location
1588
+ try:
1589
+ dist = importlib.metadata.distribution(package_name)
1590
+ if dist is None:
1591
+ return False
1592
+
1593
+ # Get distribution location (install location)
1594
+ try:
1595
+ # Use locate_file() to get a file path from the distribution
1596
+ # This gives us a file that's part of the package
1597
+ dist_file = dist.locate_file("")
1598
+ if dist_file is None:
1599
+ # Fallback: try to get metadata file location
1600
+ try:
1601
+ metadata_file = dist.locate_file("METADATA")
1602
+ if metadata_file:
1603
+ dist_file = Path(metadata_file)
1604
+ else:
1605
+ # Last resort: use sys.prefix to check venv site-packages
1606
+ dist_file = Path(sys.prefix)
1607
+ except Exception:
1608
+ # If we can't get file location, fall back to prefix check
1609
+ dist_file = Path(sys.prefix)
1610
+
1611
+ dist_path = Path(dist_file) if dist_file else Path(sys.prefix)
1612
+
1613
+ # Normalize paths for comparison (handle Windows)
1614
+ dist_path_str = str(dist_path.resolve()).replace('\\', '/')
1615
+ venv_prefix_str = str(Path(sys.prefix).resolve()).replace('\\', '/')
1616
+
1617
+ # Check if distribution is in venv's prefix (site-packages are under venv prefix)
1618
+ return venv_prefix_str in dist_path_str
1619
+
1620
+ except Exception as e:
1621
+ # If we can't determine location, assume it's in venv if distribution exists
1622
+ # This is a safe fallback - better to reinstall than to miss an installed package
1623
+ if os.environ.get('XWLAZY_VERBOSE'):
1624
+ err_msg = str(e).encode('ascii', 'replace').decode('ascii')
1625
+ sys.stderr.write(f"[xwlazy] Could not verify venv location for {package_name}, assuming installed: {err_msg}\n")
1626
+ return True # Safe fallback: assume installed if we can't verify
1627
+
1628
+ except importlib.metadata.PackageNotFoundError:
1629
+ return False
1630
+
1631
+ def _run_pip_install(self, install_str, extra_args=None):
1632
+ """
1633
+ Run pip install with full dependency installation and venv detection.
1634
+
1635
+ NEW v4.0.2: Ensures all dependencies are installed along with the package.
1636
+ Version constraints from requirements.txt/pyproject.toml are included in install_str.
1637
+
1638
+ NEW v4.0.3: Auto-detects virtual environment and ensures installations go into venv
1639
+ when detected (not user site-packages). Uses venv Python explicitly if available.
1640
+
1641
+ Args:
1642
+ install_str: Package specification with version constraints if available from
1643
+ requirements.txt/pyproject.toml (e.g., "pandas>=2.0.0" or "pandas==2.0.1")
1644
+ extra_args: Additional pip arguments (e.g., ["--no-index", "--find-links", ...])
1645
+ Note: --no-deps is NEVER used to ensure full dependency installation
1646
+ Note: --user flag is automatically avoided when in venv
1647
+ """
1648
+ # Detect venv and get venv Python if available
1649
+ in_venv, venv_python = self._detect_venv()
1650
+
1651
+ # Use venv Python explicitly if available, otherwise use sys.executable
1652
+ if venv_python:
1653
+ python_exe = str(venv_python)
1654
+ else:
1655
+ python_exe = sys.executable
1656
+
1657
+ cmd = [python_exe, "-m", "pip", "install", install_str]
1658
+
1659
+ if extra_args:
1660
+ # Filter out --no-deps if accidentally passed (should never happen, but safety check)
1661
+ # Also filter out --user when in venv to ensure installation into venv site-packages
1662
+ filtered_args = []
1663
+ for arg in extra_args:
1664
+ if arg == "--no-deps" or arg == "--no-dependencies":
1665
+ continue # Never allow --no-deps
1666
+ if in_venv and arg == "--user":
1667
+ continue # Don't use --user in venv (would install to user site-packages)
1668
+ filtered_args.append(arg)
1669
+ cmd.extend(filtered_args)
1670
+ elif in_venv:
1671
+ # In venv: explicitly avoid --user flag (pip will use venv site-packages by default)
1672
+ # The --user flag is a boolean flag - we simply don't include it when in venv
1673
+ pass
1674
+
1675
+ # Note: pip install by default installs all dependencies unless --no-deps is used
1676
+ # We explicitly ensure --no-deps is never used to install full dependency tree
1677
+ # When in venv, pip will automatically use venv site-packages (no --user flag needed)
1678
+ subprocess.run(
1679
+ cmd,
1680
+ stdout=subprocess.DEVNULL,
1681
+ stderr=subprocess.PIPE,
1682
+ check=True, timeout=120
1683
+ )
1684
+
1685
+ def _strategy_pip(self, install_str):
1686
+ self._run_pip_install(install_str)
1687
+
1688
+ def _strategy_wheel(self, install_str):
1689
+ wheel_dir = XWLAZY_CACHE_DIR / "wheels"
1690
+ if wheel_dir.exists():
1691
+ self._run_pip_install(install_str, ["--no-index", "--find-links", str(wheel_dir)])
1692
+ else:
1693
+ raise FileNotFoundError(f"Wheel directory not found: {wheel_dir}")
1694
+
1695
+ def _strategy_cached(self, install_str):
1696
+ self._run_pip_install(install_str, ["--no-index"])
1697
+
1698
+ def _strategy_smart(self, install_str):
1699
+ """Smart strategy: Try wheel first, fallback to pip."""
1700
+ try:
1701
+ self._strategy_wheel(install_str)
1702
+ except Exception as e:
1703
+ # Fallback to pip (expected behavior, not an error)
1704
+ if os.environ.get('XWLAZY_VERBOSE'):
1705
+ err_msg = str(e).encode('ascii', 'replace').decode('ascii')
1706
+ sys.stderr.write(f"[xwlazy] Wheel strategy failed for {install_str}, falling back to pip: {err_msg}\n")
1707
+ self._strategy_pip(install_str)
1708
+
1709
+ def _is_stdlib_module(self, module_name: str) -> bool:
1710
+ """Check if module is part of Python standard library."""
1711
+ import importlib.util
1712
+
1713
+ # Get root module name (first part before dot)
1714
+ root_module = module_name.split('.')[0]
1715
+
1716
+ # Check built-in modules
1717
+ if root_module in sys.builtin_module_names:
1718
+ return True
1719
+
1720
+ # Check if module spec indicates it's standard library
1721
+ try:
1722
+ spec = importlib.util.find_spec(root_module)
1723
+ if spec is None:
1724
+ return False
1725
+
1726
+ # Built-in modules have None origin
1727
+ if spec.origin is None:
1728
+ return True
1729
+
1730
+ # Check if origin is in standard library (not site-packages)
1731
+ if spec.origin:
1732
+ # Standard library is typically in Python's lib directory, not site-packages
1733
+ if 'site-packages' not in spec.origin and 'dist-packages' not in spec.origin:
1734
+ # Further check: is it in Python's installation directory?
1735
+ stdlib_indicators = [
1736
+ f'{sys.prefix}/lib',
1737
+ f'{sys.base_prefix}/lib',
1738
+ f'{sys.exec_prefix}/lib',
1739
+ f'{sys.base_exec_prefix}/lib',
1740
+ ]
1741
+ for indicator in stdlib_indicators:
1742
+ if indicator.replace('\\', '/') in spec.origin.replace('\\', '/'):
1743
+ return True
1744
+ except Exception:
1745
+ pass
1746
+
1747
+ return False
1748
+
1749
+ def _perform_install(self, install_str, mod_name):
1750
+ """
1751
+ Worker function for installation with Strategy support and multi-tier caching.
1752
+
1753
+ NEW v4.0.1: Supports both single string and list of candidates.
1754
+ If install_str is a list, tries each candidate sequentially until one succeeds.
1755
+ """
1756
+ start = time.time()
1757
+ success = False
1758
+
1759
+ top_module = mod_name.split('.')[0]
1760
+ policy = self._get_policy(top_module)
1761
+ strategy_name = policy.get("strategy", "pip")
1762
+
1763
+ # NEW v3.0.2: Check multi-tier cache first
1764
+ cache_key = f"installed:{top_module}"
1765
+ cached_result = self._multi_tier_cache.get(cache_key)
1766
+ if cached_result is not None:
1767
+ if cached_result:
1768
+ # Already installed (from L1/L2 cache)
1769
+ with self._lock:
1770
+ self.installing_now.discard(top_module)
1771
+ self.installed_cache.add(top_module)
1772
+ self.stats['cache_hits'] += 1
1773
+ self._perf_monitor.record_cache_hit()
1774
+ self._perf_monitor.record_access(top_module)
1775
+ return
1776
+ else:
1777
+ # Known failure (from cache)
1778
+ with self._lock:
1779
+ self.installing_now.discard(top_module)
1780
+ self.failed_installs.add(top_module)
1781
+ return
1782
+
1783
+ # Additional check: verify package is actually installed before proceeding
1784
+ # This prevents unnecessary installation attempts for already-installed packages
1785
+ # Check both package name and module importability
1786
+ package_name = _extract_package_name(top_module)
1787
+ is_installed = False
1788
+
1789
+ if package_name:
1790
+ if self._is_package_installed_in_venv(package_name):
1791
+ is_installed = True
1792
+
1793
+ # Also check if module is importable (might be installed but package name not resolved)
1794
+ if not is_installed:
1795
+ try:
1796
+ spec = importlib.util.find_spec(top_module)
1797
+ if spec is not None and spec.loader is not None:
1798
+ # Module is importable - check if it's a standard library
1799
+ if not self._is_stdlib_module(top_module):
1800
+ # Not stdlib and importable - likely already installed
1801
+ is_installed = True
1802
+ except Exception:
1803
+ # If check fails, proceed with installation
1804
+ pass
1805
+
1806
+ if is_installed:
1807
+ # Package is already installed - update cache and return
1808
+ with self._lock:
1809
+ self.installing_now.discard(top_module)
1810
+ self.installed_cache.add(top_module)
1811
+ self._multi_tier_cache.set(cache_key, True)
1812
+ self.stats['cache_hits'] += 1
1813
+ self._perf_monitor.record_cache_hit()
1814
+ self._perf_monitor.record_access(top_module)
1815
+ return
1816
+
1817
+ # NEW v3.0.2: Cache miss - record it
1818
+ self._perf_monitor.record_cache_miss()
1819
+ with self._lock:
1820
+ self.stats['cache_misses'] += 1
1821
+
1822
+ # NEW v4.0.1: Handle list of candidates (fallback mechanism)
1823
+ candidates = install_str if isinstance(install_str, list) else [install_str]
1824
+ last_error = None
1825
+
1826
+ for candidate in candidates:
1827
+ try:
1828
+ if strategy_name == "wheel":
1829
+ self._strategy_wheel(candidate)
1830
+ elif strategy_name == "cached":
1831
+ self._strategy_cached(candidate)
1832
+ elif strategy_name == "smart":
1833
+ self._strategy_smart(candidate)
1834
+ else:
1835
+ self._strategy_pip(candidate)
1836
+
1837
+ success = True
1838
+ with self._lock:
1839
+ self.installed_cache.add(top_module)
1840
+ self.stats["strategies_used"][strategy_name] += 1
1841
+
1842
+ # NEW v3.0.2: Store in multi-tier cache (L1 + L2)
1843
+ self._multi_tier_cache.set(cache_key, True)
1844
+
1845
+ # NEW v3.0.2: Record performance metrics
1846
+ duration_so_far = time.time() - start
1847
+ self._perf_monitor.record_load_time(top_module, duration_so_far)
1848
+ self._perf_monitor.record_access(top_module)
1849
+
1850
+ importlib.invalidate_caches()
1851
+
1852
+ # NEW v3.0: Save to lockfile on successful install
1853
+ self._save_lockfile()
1854
+
1855
+ # Success - break out of loop
1856
+ install_str = candidate # Use successful candidate for logging
1857
+ break
1858
+
1859
+ except subprocess.CalledProcessError as e:
1860
+ last_error = e
1861
+ if os.environ.get('XWLAZY_VERBOSE'):
1862
+ sys.stderr.write(f"[xwlazy] Install Failed ({strategy_name}) for {candidate}: {e.stderr.decode('utf-8', errors='replace') if e.stderr else str(e)}\n")
1863
+ # Continue to next candidate
1864
+ continue
1865
+ except subprocess.TimeoutExpired:
1866
+ last_error = subprocess.TimeoutExpired
1867
+ if os.environ.get('XWLAZY_VERBOSE'):
1868
+ sys.stderr.write(f"[xwlazy] Install Timeout for {candidate} (exceeded 120s)\n")
1869
+ # Continue to next candidate
1870
+ continue
1871
+ except Exception as e:
1872
+ last_error = e
1873
+ if os.environ.get('XWLAZY_VERBOSE'):
1874
+ sys.stderr.write(f"[xwlazy] Unexpected Error installing {candidate}: {type(e).__name__}: {e}\n")
1875
+ # Continue to next candidate
1876
+ continue
1877
+
1878
+ # If all candidates failed, mark as failed
1879
+ if not success:
1880
+ with self._lock:
1881
+ self.failed_installs.add(top_module)
1882
+ # NEW v3.0.2: Cache failure result
1883
+ self._multi_tier_cache.set(cache_key, False)
1884
+ if last_error:
1885
+ if isinstance(last_error, subprocess.CalledProcessError):
1886
+ if os.environ.get('XWLAZY_VERBOSE'):
1887
+ last_candidate = candidates[-1]
1888
+ sys.stderr.write(f"[xwlazy] All candidates failed. Last attempt ({last_candidate}): {last_error.stderr.decode('utf-8', errors='replace') if last_error.stderr else str(last_error)}\n")
1889
+ elif isinstance(last_error, subprocess.TimeoutExpired):
1890
+ if os.environ.get('XWLAZY_VERBOSE'):
1891
+ last_candidate = candidates[-1]
1892
+ sys.stderr.write(f"[xwlazy] All candidates timed out. Last attempt: {last_candidate}\n")
1893
+ else:
1894
+ if os.environ.get('XWLAZY_VERBOSE'):
1895
+ last_candidate = candidates[-1]
1896
+ sys.stderr.write(f"[xwlazy] All candidates failed. Last attempt ({last_candidate}): {type(last_error).__name__}: {last_error}\n")
1897
+ install_str = candidates[-1] if candidates else install_str # Use last candidate for logging
1898
+
1899
+ # Cleanup: Always remove from installing_now
1900
+ with self._lock:
1901
+ self.installing_now.discard(top_module)
1902
+
1903
+ duration = time.time() - start
1904
+ self._log_audit(install_str, success, duration, strategy_name)
1905
+
1906
+ # NEW v3.0: Record in adaptive learner
1907
+ if self._learner:
1908
+ self._learner.record_import(top_module, duration)
1909
+
1910
+ if success:
1911
+ sys.stdout.write(f"\r[OK] [xwlazy] Installed: {install_str} via {strategy_name} ({round(duration,2)}s)\n")
1912
+
1913
+ def _log_audit(self, pkg, success, duration, strategy="unknown"):
1914
+ """Writes to SBOM TOML and updates stats."""
1915
+ entry = {
1916
+ "timestamp": datetime.now().isoformat(),
1917
+ "package": pkg,
1918
+ "status": "success" if success else "failed",
1919
+ "duration": round(duration, 3),
1920
+ "strategy": strategy
1921
+ }
1922
+ with self._lock:
1923
+ self.stats['installs'] += 1 if success else 0
1924
+ self.stats['failures'] += 1 if not success else 0
1925
+ self.stats['total_time_ms'] += int(duration * 1000)
1926
+ self.stats['history'].append(entry)
1927
+ try:
1928
+ # Read existing data (TOML format, fallback to JSON for backwards compatibility)
1929
+ # Use centralized audit log path
1930
+ data = _read_toml_simple(self._audit_log_path)
1931
+ if data is None:
1932
+ data = []
1933
+ elif not isinstance(data, list):
1934
+ # Convert to list format if needed
1935
+ data = [data] if data else []
1936
+ data.append(entry)
1937
+ # Write as TOML to centralized location
1938
+ _write_toml_simple(data, self._audit_log_path)
1939
+ except (IOError, OSError, Exception) as e:
1940
+ if os.environ.get('XWLAZY_VERBOSE'):
1941
+ sys.stderr.write(f"[xwlazy] Failed to write audit log: {e}\n")
1942
+
1943
+ def find_spec(self, fullname, path, target=None):
1944
+ # Check if xwlazy is disabled via environment variable
1945
+ if os.environ.get('XWLAZY_DISABLE') == '1':
1946
+ return None
1947
+
1948
+ top_module = fullname.split('.')[0]
1949
+ if top_module in sys.builtin_module_names:
1950
+ return None
1951
+
1952
+ if top_module.startswith('_'):
1953
+ top_key = top_module.lower().replace('-', '_')
1954
+ if top_key not in self.manifest_index:
1955
+ return None
1956
+
1957
+ if hasattr(threading.current_thread(), '_xwlazy_active'):
1958
+ return None
1959
+ threading.current_thread()._xwlazy_active = True
1960
+
1961
+ try:
1962
+ policy = self._get_policy(top_module)
1963
+
1964
+ if not policy['enabled']:
1965
+ if os.environ.get('XWLAZY_VERBOSE'):
1966
+ print(f"[SKIP] [xwlazy] Skipped: {top_module} (disabled per-package policy)")
1967
+ return None
1968
+
1969
+ if not policy['allow']:
1970
+ if os.environ.get('XWLAZY_VERBOSE'):
1971
+ print(f"[DENY] [xwlazy] Denied: {top_module} (security policy)")
1972
+ return None
1973
+
1974
+ with self._lock:
1975
+ if top_module in self.global_deny_list:
1976
+ if os.environ.get('XWLAZY_VERBOSE'):
1977
+ print(f"[DENY] [xwlazy] Denied: {top_module} (global deny list)")
1978
+ return None
1979
+
1980
+ if self._is_externally_managed:
1981
+ if os.environ.get('XWLAZY_VERBOSE'):
1982
+ print(f"[SKIP] [xwlazy] Skipped: {top_module} (PEP 668 externally-managed)")
1983
+ return None
1984
+
1985
+ # FIX: Check cache FIRST before calling find_spec
1986
+ # This prevents unnecessary reinstalls when cache says package is already installed
1987
+ # Use walk-up pattern (like _resolve_target) to check cache at all hierarchy levels
1988
+ # For example, for 'exonware.xwlazy.core', check: exonware.xwlazy.core, exonware.xwlazy, exonware
1989
+ parts = fullname.split('.')
1990
+ cached_result = None
1991
+ cached_module = None
1992
+ for i in range(len(parts), 0, -1):
1993
+ prefix = '.'.join(parts[:i])
1994
+ cache_key = f"installed:{prefix}"
1995
+ result = self._multi_tier_cache.get(cache_key)
1996
+ if result is not None:
1997
+ cached_result = result
1998
+ cached_module = prefix
1999
+ break # Found a cache entry at this level
2000
+
2001
+ if cached_result is not None:
2002
+ if cached_result:
2003
+ # Cache says installed at some level - verify with find_spec using that level
2004
+ # This handles multi-tier imports like exonware.xwlazy.core correctly
2005
+ was_in_path = False
2006
+ try:
2007
+ was_in_path = self in sys.meta_path
2008
+ if was_in_path:
2009
+ try:
2010
+ sys.meta_path.remove(self)
2011
+ except ValueError:
2012
+ was_in_path = False
2013
+
2014
+ try:
2015
+ # Verify using the cached module level (not fullname, not just top_module)
2016
+ if importlib.util.find_spec(cached_module):
2017
+ # Verified: package is installed at this level, cache was correct - return early
2018
+ return None
2019
+ except (ImportError, AttributeError, ValueError):
2020
+ # find_spec failed for cached_module - cache might be stale, invalidate and continue
2021
+ if os.environ.get('XWLAZY_VERBOSE'):
2022
+ print(f"[INFO] [xwlazy] Cache says {cached_module} is installed, but find_spec({cached_module}) failed - invalidating cache")
2023
+ self._multi_tier_cache.invalidate(f"installed:{cached_module}")
2024
+ cached_result = None # Treat as cache miss
2025
+ finally:
2026
+ if was_in_path and self not in sys.meta_path:
2027
+ sys.meta_path.insert(0, self)
2028
+ else:
2029
+ # Known failure (from cache)
2030
+ if os.environ.get('XWLAZY_VERBOSE'):
2031
+ print(f"[SKIP] [xwlazy] Skipped: {cached_module} (cached failure)")
2032
+ return None
2033
+
2034
+ # If cache miss or cache was invalidated, check with find_spec and importlib.metadata
2035
+ if cached_result is None:
2036
+ was_in_path = False
2037
+ try:
2038
+ was_in_path = self in sys.meta_path
2039
+ if was_in_path:
2040
+ try:
2041
+ sys.meta_path.remove(self)
2042
+ except ValueError:
2043
+ was_in_path = False
2044
+
2045
+ # Check 1: Try importlib.metadata first (most reliable for installed packages)
2046
+ try:
2047
+ package_name = _extract_package_name(top_module)
2048
+ if package_name:
2049
+ # Try extracted package name
2050
+ if self._is_package_installed_in_venv(package_name):
2051
+ # Package is installed - update cache and return
2052
+ top_cache_key = f"installed:{top_module}"
2053
+ self._multi_tier_cache.set(top_cache_key, True)
2054
+ with self._lock:
2055
+ self.installed_cache.add(top_module)
2056
+ return None
2057
+ # Try module name as package name
2058
+ try:
2059
+ importlib.metadata.distribution(top_module)
2060
+ # Package found - update cache and return
2061
+ top_cache_key = f"installed:{top_module}"
2062
+ self._multi_tier_cache.set(top_cache_key, True)
2063
+ with self._lock:
2064
+ self.installed_cache.add(top_module)
2065
+ return None
2066
+ except importlib.metadata.PackageNotFoundError:
2067
+ # Package not found in metadata - continue to find_spec check
2068
+ pass
2069
+ except Exception:
2070
+ # If metadata check fails, continue to find_spec check
2071
+ pass
2072
+
2073
+ # Check 2: Try find_spec (module might be importable)
2074
+ try:
2075
+ if importlib.util.find_spec(fullname):
2076
+ # Package is importable - but if in venv, verify it's installed in venv
2077
+ # NEW v4.0.3: When in venv, check if package is installed in venv, not just user site-packages
2078
+ package_name = _extract_package_name(top_module)
2079
+ if package_name and self._is_package_installed_in_venv(package_name):
2080
+ # Package is installed in venv (or not in venv) - update cache and return
2081
+ top_cache_key = f"installed:{top_module}"
2082
+ self._multi_tier_cache.set(top_cache_key, True)
2083
+ with self._lock:
2084
+ self.installed_cache.add(top_module)
2085
+ return None
2086
+ elif not package_name:
2087
+ # Could not resolve package name but module is importable - mark as installed
2088
+ top_cache_key = f"installed:{top_module}"
2089
+ self._multi_tier_cache.set(top_cache_key, True)
2090
+ with self._lock:
2091
+ self.installed_cache.add(top_module)
2092
+ return None
2093
+ # else: package_name exists but not installed in venv - proceed with installation
2094
+ except (ImportError, AttributeError, ValueError) as e:
2095
+ if os.environ.get('XWLAZY_VERBOSE'):
2096
+ sys.stderr.write(f"[xwlazy] find_spec check failed for {fullname}: {e}\n")
2097
+ finally:
2098
+ if was_in_path and self not in sys.meta_path:
2099
+ sys.meta_path.insert(0, self)
2100
+
2101
+ # NEW v3.0.2: Check if this is a watched prefix (serialization module)
2102
+ # Serialization modules get special handling (e.g., pickle, json, yaml)
2103
+ is_serialization = self._watched_prefixes.is_watched(fullname)
2104
+ if is_serialization and os.environ.get('XWLAZY_VERBOSE'):
2105
+ print(f"[OK] [xwlazy] Detected serialization module: {top_module} (watched prefix)")
2106
+ # NEW v3.0.2: Serialization modules might need special handling
2107
+ # For now, we just log it. Future enhancement: module wrapping for serialization.
2108
+
2109
+ install_target = self._resolve_target(fullname)
2110
+ if not install_target:
2111
+ return None
2112
+
2113
+ # Cache miss - record it (only if we got here, meaning cache didn't say installed)
2114
+ self._perf_monitor.record_cache_miss()
2115
+ with self._lock:
2116
+ self.stats["cache_misses"] += 1
2117
+
2118
+ with self._lock:
2119
+ if top_module in self.installing_now:
2120
+ # Wait for concurrent installation (max 30s)
2121
+ start_wait = time.time()
2122
+ while top_module in self.installing_now and (time.time() - start_wait) < 30:
2123
+ self._lock.release()
2124
+ time.sleep(0.1)
2125
+ self._lock.acquire()
2126
+ # After waiting, check cache again (it might have been installed)
2127
+ cached_result_after_wait = self._multi_tier_cache.get(cache_key)
2128
+ if cached_result_after_wait:
2129
+ return None
2130
+ if top_module in self.installed_cache:
2131
+ return None
2132
+ if top_module in self.failed_installs:
2133
+ return None
2134
+ return None
2135
+
2136
+ if top_module in self.failed_installs:
2137
+ return None
2138
+ self.installing_now.add(top_module)
2139
+
2140
+ mode = policy['mode']
2141
+
2142
+ if mode == 'lazy':
2143
+ return spec_from_loader(fullname, LazyLoader(self, fullname, install_target))
2144
+ else:
2145
+ # Check if module is already importable or is standard library before attempting installation
2146
+ try:
2147
+ # Check if it's a standard library module
2148
+ if self._is_stdlib_module(top_module):
2149
+ # Standard library module - don't try to install
2150
+ with self._lock:
2151
+ self.installed_cache.add(top_module)
2152
+ self._multi_tier_cache.set(f"installed:{top_module}", True)
2153
+ return None
2154
+
2155
+ # Check if module is already importable and installed in venv (if in venv)
2156
+ # NEW v4.0.3: When in venv, verify package is installed in venv, not just user site-packages
2157
+ spec = importlib.util.find_spec(top_module)
2158
+ if spec is not None and spec.loader is not None:
2159
+ # Module is importable - check if it's standard library first
2160
+ if self._is_stdlib_module(top_module):
2161
+ # Standard library - mark as installed and skip
2162
+ with self._lock:
2163
+ self.installed_cache.add(top_module)
2164
+ self._multi_tier_cache.set(f"installed:{top_module}", True)
2165
+ return None
2166
+
2167
+ # Not stdlib - try to resolve package name from module name for venv check
2168
+ package_name = _extract_package_name(top_module)
2169
+ if package_name:
2170
+ # Check if package is installed in venv (when in venv)
2171
+ if self._is_package_installed_in_venv(package_name):
2172
+ # Package is installed in venv (or not in venv) - mark as installed
2173
+ with self._lock:
2174
+ self.installed_cache.add(top_module)
2175
+ self._multi_tier_cache.set(f"installed:{top_module}", True)
2176
+ return None
2177
+ # else: In venv but package not installed in venv - proceed with installation
2178
+ # This ensures we install into venv instead of relying on user site-packages
2179
+ else:
2180
+ # Could not resolve package name - but module is importable
2181
+ # This likely means it's installed (could be namespace package, etc.)
2182
+ # Mark as installed to avoid unnecessary installation attempts
2183
+ with self._lock:
2184
+ self.installed_cache.add(top_module)
2185
+ self._multi_tier_cache.set(f"installed:{top_module}", True)
2186
+ return None
2187
+ except Exception:
2188
+ # If check fails, proceed with installation attempt
2189
+ pass
2190
+
2191
+ try:
2192
+ # Final check: verify package is not already installed before printing message
2193
+ # This prevents showing INSTALL messages for already-installed packages
2194
+ # Check both by package name and module name
2195
+ package_name = _extract_package_name(top_module)
2196
+ is_installed = False
2197
+
2198
+ # Method 1: Check by package name using importlib.metadata
2199
+ if package_name:
2200
+ try:
2201
+ # Try the extracted package name
2202
+ if self._is_package_installed_in_venv(package_name):
2203
+ is_installed = True
2204
+ else:
2205
+ # Try module name as package name (some packages match module name)
2206
+ if self._is_package_installed_in_venv(top_module):
2207
+ is_installed = True
2208
+ except Exception:
2209
+ pass
2210
+
2211
+ # Method 2: Check if module is importable (might be installed but package name not resolved)
2212
+ if not is_installed:
2213
+ try:
2214
+ spec = importlib.util.find_spec(top_module)
2215
+ if spec is not None and spec.loader is not None:
2216
+ # Module is importable - check if it's a standard library
2217
+ if not self._is_stdlib_module(top_module):
2218
+ # Not stdlib and importable - likely already installed
2219
+ is_installed = True
2220
+ except Exception:
2221
+ pass
2222
+
2223
+ # Method 3: Direct importlib.metadata check (most reliable)
2224
+ if not is_installed:
2225
+ try:
2226
+ # Try module name as package name
2227
+ importlib.metadata.distribution(top_module)
2228
+ is_installed = True
2229
+ except importlib.metadata.PackageNotFoundError:
2230
+ # Package not found - proceed with installation
2231
+ pass
2232
+ except Exception:
2233
+ # Other error - proceed with installation
2234
+ pass
2235
+
2236
+ if is_installed:
2237
+ # Package is already installed - mark as installed and skip (no message)
2238
+ with self._lock:
2239
+ self.installed_cache.add(top_module)
2240
+ self._multi_tier_cache.set(f"installed:{top_module}", True)
2241
+ return None
2242
+
2243
+ # Package is not installed - proceed with installation
2244
+ # Only show INSTALL message if XWLAZY_VERBOSE is enabled (suppress by default)
2245
+ if os.environ.get('XWLAZY_VERBOSE'):
2246
+ sys.stdout.write(f"[INSTALL] [xwlazy] Blocking Install: {top_module} (strategy: {policy['strategy']})...\n")
2247
+ self._perform_install(install_target, top_module)
2248
+ return None
2249
+ finally:
2250
+ with self._lock:
2251
+ self.installing_now.discard(top_module)
2252
+
2253
+ finally:
2254
+ if hasattr(threading.current_thread(), '_xwlazy_active'):
2255
+ del threading.current_thread()._xwlazy_active
2256
+
2257
+ # =============================================================================
2258
+ # ACTIVATION (Enhanced v3.0)
2259
+ # =============================================================================
2260
+
2261
+ _instance = None
2262
+
2263
+ def hook(root=".", default_enabled=True, enable_global_hook=True, enable_learning=False):
2264
+ """
2265
+ Activate xwlazy auto-installation system.
2266
+
2267
+ Args:
2268
+ root: Root directory to search for manifests
2269
+ default_enabled: Opt-in vs Opt-out mode
2270
+ enable_global_hook: Install global __import__ hook (NEW v3.0)
2271
+ enable_learning: Enable adaptive learning (NEW v3.0)
2272
+ """
2273
+ global _instance
2274
+ if not _instance:
2275
+ _instance = XWLazy(root, default_enabled, enable_global_hook, enable_learning)
2276
+ sys.meta_path.insert(0, _instance)
2277
+ return _instance
2278
+
2279
+ def auto_enable_lazy(package_name=None, mode="smart", root="."):
2280
+ """
2281
+ ONE-LINE ACTIVATION! Auto-enable lazy installation for a package.
2282
+
2283
+ NEW v3.0: Zero-code integration - detects from pyproject.toml keywords.
2284
+
2285
+ Usage in any library's __init__.py:
2286
+ from exonware.xwlazy import auto_enable_lazy
2287
+ auto_enable_lazy(__package__)
2288
+
2289
+ Or just call it - it auto-detects:
2290
+ auto_enable_lazy() # Auto-detects from caller's package
2291
+
2292
+ Args:
2293
+ package_name: Package name (auto-detected if None)
2294
+ mode: Installation mode ("smart", "pip", "wheel", "cached")
2295
+ root: Root directory for manifest files
2296
+
2297
+ Returns:
2298
+ XWLazy instance if enabled, None otherwise
2299
+ """
2300
+ # Auto-detect package name from caller
2301
+ if package_name is None:
2302
+ try:
2303
+ frame = inspect.currentframe().f_back
2304
+ package_name = (frame.f_globals.get('__package__') or
2305
+ frame.f_globals.get('__name__', '').split('.')[0])
2306
+ except Exception as e:
2307
+ # Non-critical: frame inspection failure (might be None in some contexts)
2308
+ if os.environ.get('XWLAZY_VERBOSE'):
2309
+ err_msg = str(e).encode('ascii', 'replace').decode('ascii')
2310
+ sys.stderr.write(f"[xwlazy] Could not auto-detect package name from frame: {err_msg}\n")
2311
+ package_name = None
2312
+
2313
+ # Get or create instance
2314
+ guardian = hook(root=root, default_enabled=True, enable_global_hook=True)
2315
+
2316
+ # Check for keyword-based auto-detection
2317
+ if guardian._check_package_keywords(package_name) or guardian._check_package_keywords():
2318
+ guardian.default_enabled = True
2319
+ if os.environ.get('XWLAZY_VERBOSE'):
2320
+ print(f"[OK] [xwlazy] Auto-enabled via keyword detection for: {package_name or 'current package'}")
2321
+ return guardian
2322
+
2323
+ # Configure based on mode
2324
+ if package_name:
2325
+ if mode == "smart":
2326
+ guardian.configure(package_name, enabled=True, mode="lazy", install_strategy="smart")
2327
+ else:
2328
+ guardian.configure(package_name, enabled=True, install_strategy=mode)
2329
+
2330
+ return guardian
2331
+
2332
+ def attach(package_name, submodules=None, submod_attrs=None):
2333
+ """
2334
+ Attach lazily loaded submodules (lazy-loader compatible API).
2335
+
2336
+ NEW v3.0: Compatibility with lazy-loader pattern.
2337
+
2338
+ Returns (__getattr__, __dir__, __all__) for lazy loading.
2339
+ """
2340
+ if submod_attrs is None:
2341
+ submod_attrs = {}
2342
+ if submodules is None:
2343
+ submodules = []
2344
+
2345
+ submodules_set = set(submodules)
2346
+ attr_to_modules = {attr: mod for mod, attrs in submod_attrs.items() for attr in attrs}
2347
+ __all__ = sorted(submodules_set | attr_to_modules.keys())
2348
+
2349
+ def __getattr__(name):
2350
+ if name in submodules_set:
2351
+ return importlib.import_module(f"{package_name}.{name}")
2352
+ elif name in attr_to_modules:
2353
+ submod_path = f"{package_name}.{attr_to_modules[name]}"
2354
+ submod = importlib.import_module(submod_path)
2355
+ attr = getattr(submod, name)
2356
+ if name == attr_to_modules[name]:
2357
+ pkg = sys.modules[package_name]
2358
+ pkg.__dict__[name] = attr
2359
+ return attr
2360
+ else:
2361
+ raise AttributeError(f"module {package_name!r} has no attribute {name!r}")
2362
+
2363
+ def __dir__():
2364
+ return __all__.copy()
2365
+
2366
+ return __getattr__, __dir__, __all__.copy()
2367
+
2368
+ # =============================================================================
2369
+ # ADDITIONAL PUBLIC API FUNCTIONS (Rich API v3.0)
2370
+ # =============================================================================
2371
+
2372
+ def enable_keyword_detection(enabled=True, keyword="xwlazy-enabled"):
2373
+ """Enable/disable keyword-based auto-detection."""
2374
+ global _instance
2375
+ if _instance:
2376
+ _instance._keyword_detection_enabled = enabled
2377
+ _instance._keyword_to_check = keyword
2378
+
2379
+ def is_keyword_detection_enabled():
2380
+ """Check if keyword detection is enabled."""
2381
+ global _instance
2382
+ if _instance:
2383
+ return _instance._keyword_detection_enabled
2384
+ return True
2385
+
2386
+ def check_package_keywords(package_name=None, keyword="xwlazy-enabled"):
2387
+ """Check if package has keyword in metadata."""
2388
+ global _instance
2389
+ if _instance:
2390
+ return _instance._check_package_keywords(package_name)
2391
+ return False
2392
+
2393
+ def enable_learning(enabled=True):
2394
+ """Enable/disable adaptive learning."""
2395
+ global _instance
2396
+ if _instance and not _instance._learner and enabled:
2397
+ _instance._learner = AdaptiveLearner()
2398
+ _instance._enable_learning = True
2399
+ elif _instance and not enabled:
2400
+ _instance._learner = None
2401
+ _instance._enable_learning = False
2402
+
2403
+ def predict_next_imports(current_module=None, limit=5):
2404
+ """Predict likely next imports based on patterns."""
2405
+ global _instance
2406
+ if _instance and _instance._learner:
2407
+ return _instance._learner.predict_next_imports(current_module, limit)
2408
+ return []
2409
+
2410
+ def get_all_stats():
2411
+ """Get comprehensive statistics from singleton instance."""
2412
+ global _instance
2413
+ if _instance:
2414
+ return _instance.get_stats()
2415
+ return {}
2416
+
2417
+ def generate_sbom(output_path=None):
2418
+ """Generate SBOM from singleton instance."""
2419
+ global _instance
2420
+ if _instance:
2421
+ return _instance.generate_sbom(output_path)
2422
+ return None
2423
+
2424
+ def get_lockfile():
2425
+ """Get lockfile contents from singleton instance."""
2426
+ global _instance
2427
+ if _instance:
2428
+ return _instance.get_lockfile()
2429
+ return None
2430
+
2431
+ def save_lockfile():
2432
+ """Save lockfile from singleton instance."""
2433
+ global _instance
2434
+ if _instance:
2435
+ return _instance.save_lockfile()
2436
+
2437
+ def is_externally_managed():
2438
+ """Check if environment is externally managed (PEP 668)."""
2439
+ return (Path(sys.prefix) / "EXTERNALLY-MANAGED").exists()
2440
+
2441
+ def install_global_import_hook():
2442
+ """Install global __import__ hook manually."""
2443
+ global _instance
2444
+ if _instance:
2445
+ _install_global_import_hook(_instance)
2446
+
2447
+ # --- NEW v3.0.2: Watched Prefixes API (Top-Level) ---
2448
+
2449
+ def add_watched_prefix(prefix):
2450
+ """Add a watched prefix for special handling (serialization modules, etc.)."""
2451
+ global _instance
2452
+ if _instance:
2453
+ _instance.add_watched_prefix(prefix)
2454
+
2455
+ def remove_watched_prefix(prefix):
2456
+ """Remove a watched prefix."""
2457
+ global _instance
2458
+ if _instance:
2459
+ _instance.remove_watched_prefix(prefix)
2460
+
2461
+ def get_watched_prefixes():
2462
+ """Get all watched prefixes."""
2463
+ global _instance
2464
+ if _instance:
2465
+ return _instance.get_watched_prefixes()
2466
+ return list(SERIALIZATION_PREFIXES)
2467
+
2468
+ def is_module_watched(module_name):
2469
+ """Check if a module matches any watched prefix."""
2470
+ global _instance
2471
+ if _instance:
2472
+ return _instance.is_watched(module_name)
2473
+ return module_name.split('.')[0] in SERIALIZATION_PREFIXES
2474
+
2475
+ # --- NEW v3.0.2: Cache Management API (Top-Level) ---
2476
+
2477
+ def get_cache_stats():
2478
+ """Get multi-tier cache statistics."""
2479
+ global _instance
2480
+ if _instance:
2481
+ return _instance.get_cache_stats()
2482
+ return {}
2483
+
2484
+ def clear_cache():
2485
+ """Clear all caches (L1 + L2 + resolution cache)."""
2486
+ global _instance
2487
+ if _instance:
2488
+ _instance.clear_cache()
2489
+
2490
+ def invalidate_cache(module_name):
2491
+ """Invalidate cache for a specific module."""
2492
+ global _instance
2493
+ if _instance:
2494
+ _instance.invalidate_cache(module_name)
2495
+
2496
+ # --- NEW v3.0.2: Performance Monitoring API (Top-Level) ---
2497
+
2498
+ def get_performance_stats():
2499
+ """Get enhanced performance monitoring statistics."""
2500
+ global _instance
2501
+ if _instance:
2502
+ return _instance.get_performance_stats()
2503
+ return {}
2504
+
2505
+ def clear_performance_stats():
2506
+ """Clear all performance statistics."""
2507
+ global _instance
2508
+ if _instance:
2509
+ _instance.clear_performance_stats()
2510
+ else:
2511
+ # Create instance if not exists
2512
+ hook(enable_global_hook=True)
2513
+
2514
+ def uninstall_global_import_hook():
2515
+ """Uninstall global __import__ hook."""
2516
+ _uninstall_global_import_hook()
2517
+
2518
+ def is_global_import_hook_installed():
2519
+ """Check if global __import__ hook is installed."""
2520
+ return _global_import_hook_installed
2521
+
2522
+ # Export public API
2523
+ __all__ = [
2524
+ # Core activation
2525
+ 'hook', 'auto_enable_lazy', 'attach',
2526
+ # Class
2527
+ 'XWLazy',
2528
+ # Keyword detection (NEW v3.0)
2529
+ 'enable_keyword_detection', 'is_keyword_detection_enabled', 'check_package_keywords',
2530
+ # Adaptive learning (NEW v3.0)
2531
+ 'enable_learning', 'predict_next_imports',
2532
+ # Statistics & monitoring
2533
+ 'get_all_stats', 'generate_sbom',
2534
+ # Lockfile support (NEW v3.0)
2535
+ 'get_lockfile', 'save_lockfile',
2536
+ # Global hook (NEW v3.0)
2537
+ 'install_global_import_hook', 'uninstall_global_import_hook', 'is_global_import_hook_installed',
2538
+ # Watched prefixes (NEW v3.0.2)
2539
+ 'add_watched_prefix', 'remove_watched_prefix', 'get_watched_prefixes', 'is_module_watched',
2540
+ # Cache management (NEW v3.0.2)
2541
+ 'get_cache_stats', 'clear_cache', 'invalidate_cache',
2542
+ # Performance monitoring (NEW v3.0.2)
2543
+ 'get_performance_stats', 'clear_performance_stats',
2544
+ # Utility
2545
+ 'is_externally_managed',
2546
+ ]