rust-crate-pipeline 1.4.0__py3-none-any.whl → 1.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. rust_crate_pipeline/__init__.py +18 -27
  2. rust_crate_pipeline/__main__.py +1 -0
  3. rust_crate_pipeline/ai_processing.py +718 -596
  4. rust_crate_pipeline/analysis.py +330 -363
  5. rust_crate_pipeline/azure_ai_processing.py +462 -0
  6. rust_crate_pipeline/config.py +46 -28
  7. rust_crate_pipeline/core/__init__.py +19 -0
  8. rust_crate_pipeline/core/canon_registry.py +133 -0
  9. rust_crate_pipeline/core/irl_engine.py +256 -0
  10. rust_crate_pipeline/core/sacred_chain.py +117 -0
  11. rust_crate_pipeline/crate_analysis.py +54 -0
  12. rust_crate_pipeline/crate_list.txt +424 -0
  13. rust_crate_pipeline/github_token_checker.py +108 -112
  14. rust_crate_pipeline/main.py +329 -109
  15. rust_crate_pipeline/network.py +317 -308
  16. rust_crate_pipeline/pipeline.py +317 -375
  17. rust_crate_pipeline/production_config.py +24 -27
  18. rust_crate_pipeline/progress_monitor.py +334 -0
  19. rust_crate_pipeline/scraping/__init__.py +13 -0
  20. rust_crate_pipeline/scraping/unified_scraper.py +259 -0
  21. rust_crate_pipeline/unified_llm_processor.py +637 -0
  22. rust_crate_pipeline/unified_pipeline.py +548 -0
  23. rust_crate_pipeline/utils/file_utils.py +32 -5
  24. rust_crate_pipeline/utils/logging_utils.py +21 -16
  25. rust_crate_pipeline/version.py +79 -47
  26. rust_crate_pipeline-1.4.2.dist-info/METADATA +515 -0
  27. rust_crate_pipeline-1.4.2.dist-info/RECORD +31 -0
  28. rust_crate_pipeline-1.4.0.dist-info/METADATA +0 -585
  29. rust_crate_pipeline-1.4.0.dist-info/RECORD +0 -19
  30. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.2.dist-info}/WHEEL +0 -0
  31. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.2.dist-info}/entry_points.txt +0 -0
  32. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.2.dist-info}/licenses/LICENSE +0 -0
  33. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.2.dist-info}/top_level.txt +0 -0
@@ -4,11 +4,12 @@ import time
4
4
  import psutil
5
5
  import logging
6
6
  from functools import wraps
7
- from typing import Optional
7
+ from typing import Any, Callable, Dict, Optional, Union
8
8
 
9
9
 
10
- def configure_logging(log_dir: Optional[str] = None,
11
- log_level: int = logging.INFO) -> logging.Logger:
10
+ def configure_logging(
11
+ log_dir: Optional[str] = None, log_level: int = logging.INFO
12
+ ) -> logging.Logger:
12
13
  """
13
14
  Configure global logging with file and console handlers
14
15
 
@@ -25,54 +26,58 @@ def configure_logging(log_dir: Optional[str] = None,
25
26
  # Console handler
26
27
  console_handler = logging.StreamHandler()
27
28
  console_handler.setLevel(log_level)
28
- console_format = logging.Formatter(
29
- "%(asctime)s [%(levelname)s] %(message)s")
29
+ console_format = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
30
30
  console_handler.setFormatter(console_format)
31
31
  logger.addHandler(console_handler)
32
32
 
33
33
  # File handler
34
34
  if log_dir:
35
35
  log_file = os.path.join(
36
- log_dir, f"pipeline_{
37
- time.strftime('%Y%m%d-%H%M%S')}.log")
36
+ log_dir,
37
+ f"pipeline_{
38
+ time.strftime('%Y%m%d-%H%M%S')}.log",
39
+ )
38
40
  file_handler = logging.FileHandler(log_file)
39
41
  file_handler.setLevel(log_level)
40
42
  file_format = logging.Formatter(
41
- "%(asctime)s [%(levelname)s] %(name)s: %(message)s")
43
+ "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
44
+ )
42
45
  file_handler.setFormatter(file_format)
43
46
  logger.addHandler(file_handler)
44
47
 
45
48
  return logger
46
49
 
47
50
 
48
- def log_execution_time(func):
51
+ def log_execution_time(func: Callable[..., Any]) -> Callable[..., Any]:
49
52
  """Decorator to log function execution time"""
53
+
50
54
  @wraps(func)
51
- def wrapper(*args, **kwargs):
55
+ def wrapper(*args, **kwargs) -> None:
52
56
  start_time = time.time()
53
57
  result = func(*args, **kwargs)
54
58
  end_time = time.time()
55
- logging.info(
56
- f"{func.__name__} executed in {end_time - start_time:.2f} seconds")
59
+ logging.info(f"{func.__name__} executed in {end_time - start_time:.2f} seconds")
57
60
  return result
61
+
58
62
  return wrapper
59
63
 
60
64
 
61
- def log_resource_usage():
65
+ def log_resource_usage() -> Dict[str, Any]:
62
66
  """Log current resource utilization (CPU, memory, disk)"""
63
67
  cpu_percent = psutil.cpu_percent()
64
68
  mem = psutil.virtual_memory()
65
- disk = psutil.disk_usage('.')
69
+ disk = psutil.disk_usage(".")
66
70
 
67
71
  logging.info(
68
72
  f"Resource Usage - CPU: {cpu_percent}%, Memory: {
69
73
  mem.percent}%, Disk: {
70
- disk.percent}%")
74
+ disk.percent}%"
75
+ )
71
76
 
72
77
  return {
73
78
  "cpu_percent": cpu_percent,
74
79
  "memory_percent": mem.percent,
75
80
  "disk_percent": disk.percent,
76
81
  "memory_available": mem.available,
77
- "disk_free": disk.free
82
+ "disk_free": disk.free,
78
83
  }
@@ -1,47 +1,79 @@
1
- """Version information for rust-crate-pipeline."""
2
-
3
- __version__ = "1.4.0"
4
- __version_info__ = tuple(int(x) for x in __version__.split("."))
5
-
6
- # Version history
7
- # 1.4.0 - Major Release: Rule Zero Compliance Audit Complete
8
- # - Completed comprehensive Rule Zero alignment audit
9
- # - Eliminated all code redundancy and dead code
10
- # - Achieved 100% test coverage (22/22 tests passing)
11
- # - Refactored to pure asyncio architecture (thread-free)
12
- # - Suppressed Pydantic deprecation warnings
13
- # - Full production readiness with Docker support
14
- # - Enhanced documentation with PyPI cross-references
15
- # - Certified Rule Zero compliance across all four principles
16
- # 1.3.1 - Bug Fix Release: Crawl4AI Integration Cleanup
17
- # - Fixed CSS selector syntax errors in Crawl4AI integration
18
- # - Cleaned up duplicate and obsolete test files
19
- # - Resolved import conflicts between workspace and integration configs
20
- # - Improved error handling in enhanced scraping module
21
- # - Standardized on direct llama.cpp approach (removed Ollama dependencies)
22
- # - Enhanced Rule Zero compliance with transparent cleanup process
23
- # 1.3.0 - Quality & Integration Release: Comprehensive code quality improvements
24
- # - Fixed all critical PEP 8 violations (F821, F811, E114)
25
- # - Enhanced error handling with graceful dependency fallbacks
26
- # - Improved module integration and import path resolution
27
- # - Added comprehensive test validation (21/21 tests passing)
28
- # - Enhanced async support and Unicode handling
29
- # - Production-ready CLI interfaces with robust error handling
30
- # - Full Rule Zero compliance validation
31
- # 1.2.0 - Major release: Production-ready, cleaned codebase
32
- # - Unified documentation into single comprehensive README
33
- # - Removed all non-essential development and test files
34
- # - Optimized for PyPI distribution and Docker deployment
35
- # - Enhanced GitHub token integration and setup
36
- # 1.1.2 - Production release: Cleaned up non-essential files
37
- # - Unified documentation into single README
38
- # - Optimized for PyPI distribution
39
- # 1.1.1 - Bug fix: Added missing python-dateutil dependency
40
- # - Fixed relativedelta import error
41
- # 1.1.0 - Updated author and contact information
42
- # - Enhanced package configuration
43
- # 0.1.0 - Initial release
44
- # - Core pipeline functionality
45
- # - AI-powered metadata enrichment
46
- # - Dependency analysis
47
- # - PyPI package setup
1
+ from typing import Dict, List, Tuple, Optional, Any
2
+ """Version information for rust-crate-pipeline."""
3
+
4
+ __version__ = "1.4.2"
5
+ __version_info__ = tuple(int(x) for x in __version__.split("-")[0].split("."))
6
+ __author__ = "SigilDERG Team"
7
+ __email__ = "sigilderg@example.com"
8
+
9
+ # Version history
10
+ # 1.4.2 - Maintenance Release
11
+ # - Updated project to version 1.4.2
12
+ # - General maintenance and dependency updates
13
+ # 1.2.5-dev.20250621 - Dev branch: experimental, not a formal
14
+ # release. Originated from v1.2.5.
15
+ # 1.2.5 - Last official release.
16
+ # 1.5.1 - Configuration Standardization Release: Model Path Consistency
17
+ # - Standardized all configuration to use GGUF model paths
18
+ # - Updated CLI defaults for --crawl4ai-model to
19
+ # ~/models/deepseek/deepseek-coder-6.7b-instruct.Q4_K_M.gguf
20
+ # - Enhanced Rule Zero alignment with transparent configuration practices
21
+ # - Updated all test files to use consistent GGUF model path references
22
+ # - Comprehensive documentation updates for proper model configuration
23
+ # - Removed inconsistent Ollama references in favor of llama-cpp-python
24
+ # - Ensured CLI help text and JSON examples reflect correct model paths
25
+ # 1.5.0 - Major Release: Enhanced Web Scraping with Crawl4AI Integration
26
+ # - Integrated Crawl4AI for advanced web scraping capabilities
27
+ # - Added JavaScript-rendered content extraction via Playwright
28
+ # - Enhanced README parsing with LLM-powered content analysis
29
+ # - New CLI options: --enable-crawl4ai, --disable-crawl4ai, --crawl4ai-model
30
+ # - Enhanced configuration with local GGUF model paths and crawl4ai_timeout
31
+ # - Comprehensive test coverage for all Crawl4AI features
32
+ # - Rule Zero compliant with full transparency and audit trails
33
+ # 1.4.0 - Major Release: Rule Zero Compliance Audit Complete
34
+ # - Completed comprehensive Rule Zero alignment audit
35
+ # - Eliminated all code redundancy and dead code
36
+ # - Achieved 100% test coverage (22/22 tests passing)
37
+ # - Refactored to pure asyncio architecture (thread-free)
38
+ # - Suppressed Pydantic deprecation warnings
39
+ # - Full production readiness with Docker support
40
+ # - Enhanced documentation with PyPI cross-references
41
+ # - Certified Rule Zero compliance across all four principles
42
+ # 1.3.1 - Bug Fix Release: Crawl4AI Integration Cleanup
43
+ # - Fixed CSS selector syntax errors in Crawl4AI integration
44
+ # - Cleaned up duplicate and obsolete test files
45
+ # - Resolved import conflicts between workspace and integration configs
46
+ # - Improved error handling in enhanced scraping module
47
+ # - Standardized on direct llama.cpp approach (removed Ollama dependencies)
48
+ # - Enhanced Rule Zero compliance with transparent cleanup process
49
+ # - Fixed type annotation compatibility issues
50
+ # - Fixed Python 3.9 compatibility for type annotations
51
+ # - Updated dict[str, Any] to "dict[str, Any]" format
52
+ # - Fixed Union type expressions in conditional imports
53
+ # - Resolved IDE linter errors in network.py, pipeline.py, and production_config.py
54
+ # - Improved code quality and maintainability
55
+ # 1.3.0 - Quality & Integration Release: Comprehensive code quality improvements
56
+ # - Fixed all critical PEP 8 violations (F821, F811, E114)
57
+ # - Enhanced error handling with graceful dependency fallbacks
58
+ # - Improved module integration and import path resolution
59
+ # - Added comprehensive test validation (21/21 tests passing)
60
+ # - Enhanced async support and Unicode handling
61
+ # - Production-ready CLI interfaces with robust error handling
62
+ # - Full Rule Zero compliance validation
63
+ # 1.2.0 - Major release: Production-ready, cleaned codebase
64
+ # - Unified documentation into single comprehensive README
65
+ # - Removed all non-essential development and test files
66
+ # - Optimized for PyPI distribution and Docker deployment
67
+ # - Enhanced GitHub token integration and setup
68
+ # 1.1.2 - Production release: Cleaned up non-essential files
69
+ # - Unified documentation into single README
70
+ # - Optimized for PyPI distribution
71
+ # 1.1.1 - Bug fix: Added missing python-dateutil dependency
72
+ # - Fixed relativedelta import error
73
+ # 1.1.0 - Updated author and contact information
74
+ # - Enhanced package configuration
75
+ # 0.1.0 - Initial release
76
+ # - Core pipeline functionality
77
+ # - AI-powered metadata enrichment
78
+ # - Dependency analysis
79
+ # - PyPI package setup