rust-crate-pipeline 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. rust_crate_pipeline/__init__.py +18 -27
  2. rust_crate_pipeline/__main__.py +1 -0
  3. rust_crate_pipeline/ai_processing.py +718 -596
  4. rust_crate_pipeline/analysis.py +330 -363
  5. rust_crate_pipeline/azure_ai_processing.py +462 -0
  6. rust_crate_pipeline/config.py +46 -28
  7. rust_crate_pipeline/core/__init__.py +19 -0
  8. rust_crate_pipeline/core/canon_registry.py +133 -0
  9. rust_crate_pipeline/core/irl_engine.py +256 -0
  10. rust_crate_pipeline/core/sacred_chain.py +117 -0
  11. rust_crate_pipeline/crate_analysis.py +54 -0
  12. rust_crate_pipeline/crate_list.txt +424 -0
  13. rust_crate_pipeline/github_token_checker.py +108 -112
  14. rust_crate_pipeline/main.py +329 -109
  15. rust_crate_pipeline/network.py +317 -308
  16. rust_crate_pipeline/pipeline.py +300 -375
  17. rust_crate_pipeline/production_config.py +24 -27
  18. rust_crate_pipeline/progress_monitor.py +334 -0
  19. rust_crate_pipeline/scraping/__init__.py +13 -0
  20. rust_crate_pipeline/scraping/unified_scraper.py +259 -0
  21. rust_crate_pipeline/unified_llm_processor.py +637 -0
  22. rust_crate_pipeline/unified_pipeline.py +548 -0
  23. rust_crate_pipeline/utils/file_utils.py +32 -5
  24. rust_crate_pipeline/utils/logging_utils.py +21 -16
  25. rust_crate_pipeline/version.py +76 -47
  26. rust_crate_pipeline-1.4.1.dist-info/METADATA +515 -0
  27. rust_crate_pipeline-1.4.1.dist-info/RECORD +31 -0
  28. rust_crate_pipeline-1.4.0.dist-info/METADATA +0 -585
  29. rust_crate_pipeline-1.4.0.dist-info/RECORD +0 -19
  30. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/WHEEL +0 -0
  31. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/entry_points.txt +0 -0
  32. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/licenses/LICENSE +0 -0
  33. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/top_level.txt +0 -0
@@ -4,11 +4,12 @@ import time
4
4
  import psutil
5
5
  import logging
6
6
  from functools import wraps
7
- from typing import Optional
7
+ from typing import Any, Callable, Dict, Optional, Union
8
8
 
9
9
 
10
- def configure_logging(log_dir: Optional[str] = None,
11
- log_level: int = logging.INFO) -> logging.Logger:
10
+ def configure_logging(
11
+ log_dir: Optional[str] = None, log_level: int = logging.INFO
12
+ ) -> logging.Logger:
12
13
  """
13
14
  Configure global logging with file and console handlers
14
15
 
@@ -25,54 +26,58 @@ def configure_logging(log_dir: Optional[str] = None,
25
26
  # Console handler
26
27
  console_handler = logging.StreamHandler()
27
28
  console_handler.setLevel(log_level)
28
- console_format = logging.Formatter(
29
- "%(asctime)s [%(levelname)s] %(message)s")
29
+ console_format = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
30
30
  console_handler.setFormatter(console_format)
31
31
  logger.addHandler(console_handler)
32
32
 
33
33
  # File handler
34
34
  if log_dir:
35
35
  log_file = os.path.join(
36
- log_dir, f"pipeline_{
37
- time.strftime('%Y%m%d-%H%M%S')}.log")
36
+ log_dir,
37
+ f"pipeline_{
38
+ time.strftime('%Y%m%d-%H%M%S')}.log",
39
+ )
38
40
  file_handler = logging.FileHandler(log_file)
39
41
  file_handler.setLevel(log_level)
40
42
  file_format = logging.Formatter(
41
- "%(asctime)s [%(levelname)s] %(name)s: %(message)s")
43
+ "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
44
+ )
42
45
  file_handler.setFormatter(file_format)
43
46
  logger.addHandler(file_handler)
44
47
 
45
48
  return logger
46
49
 
47
50
 
48
- def log_execution_time(func):
51
+ def log_execution_time(func: Callable[..., Any]) -> Callable[..., Any]:
49
52
  """Decorator to log function execution time"""
53
+
50
54
  @wraps(func)
51
- def wrapper(*args, **kwargs):
55
+ def wrapper(*args, **kwargs) -> None:
52
56
  start_time = time.time()
53
57
  result = func(*args, **kwargs)
54
58
  end_time = time.time()
55
- logging.info(
56
- f"{func.__name__} executed in {end_time - start_time:.2f} seconds")
59
+ logging.info(f"{func.__name__} executed in {end_time - start_time:.2f} seconds")
57
60
  return result
61
+
58
62
  return wrapper
59
63
 
60
64
 
61
- def log_resource_usage():
65
+ def log_resource_usage() -> Dict[str, Any]:
62
66
  """Log current resource utilization (CPU, memory, disk)"""
63
67
  cpu_percent = psutil.cpu_percent()
64
68
  mem = psutil.virtual_memory()
65
- disk = psutil.disk_usage('.')
69
+ disk = psutil.disk_usage(".")
66
70
 
67
71
  logging.info(
68
72
  f"Resource Usage - CPU: {cpu_percent}%, Memory: {
69
73
  mem.percent}%, Disk: {
70
- disk.percent}%")
74
+ disk.percent}%"
75
+ )
71
76
 
72
77
  return {
73
78
  "cpu_percent": cpu_percent,
74
79
  "memory_percent": mem.percent,
75
80
  "disk_percent": disk.percent,
76
81
  "memory_available": mem.available,
77
- "disk_free": disk.free
82
+ "disk_free": disk.free,
78
83
  }
@@ -1,47 +1,76 @@
1
- """Version information for rust-crate-pipeline."""
2
-
3
- __version__ = "1.4.0"
4
- __version_info__ = tuple(int(x) for x in __version__.split("."))
5
-
6
- # Version history
7
- # 1.4.0 - Major Release: Rule Zero Compliance Audit Complete
8
- # - Completed comprehensive Rule Zero alignment audit
9
- # - Eliminated all code redundancy and dead code
10
- # - Achieved 100% test coverage (22/22 tests passing)
11
- # - Refactored to pure asyncio architecture (thread-free)
12
- # - Suppressed Pydantic deprecation warnings
13
- # - Full production readiness with Docker support
14
- # - Enhanced documentation with PyPI cross-references
15
- # - Certified Rule Zero compliance across all four principles
16
- # 1.3.1 - Bug Fix Release: Crawl4AI Integration Cleanup
17
- # - Fixed CSS selector syntax errors in Crawl4AI integration
18
- # - Cleaned up duplicate and obsolete test files
19
- # - Resolved import conflicts between workspace and integration configs
20
- # - Improved error handling in enhanced scraping module
21
- # - Standardized on direct llama.cpp approach (removed Ollama dependencies)
22
- # - Enhanced Rule Zero compliance with transparent cleanup process
23
- # 1.3.0 - Quality & Integration Release: Comprehensive code quality improvements
24
- # - Fixed all critical PEP 8 violations (F821, F811, E114)
25
- # - Enhanced error handling with graceful dependency fallbacks
26
- # - Improved module integration and import path resolution
27
- # - Added comprehensive test validation (21/21 tests passing)
28
- # - Enhanced async support and Unicode handling
29
- # - Production-ready CLI interfaces with robust error handling
30
- # - Full Rule Zero compliance validation
31
- # 1.2.0 - Major release: Production-ready, cleaned codebase
32
- # - Unified documentation into single comprehensive README
33
- # - Removed all non-essential development and test files
34
- # - Optimized for PyPI distribution and Docker deployment
35
- # - Enhanced GitHub token integration and setup
36
- # 1.1.2 - Production release: Cleaned up non-essential files
37
- # - Unified documentation into single README
38
- # - Optimized for PyPI distribution
39
- # 1.1.1 - Bug fix: Added missing python-dateutil dependency
40
- # - Fixed relativedelta import error
41
- # 1.1.0 - Updated author and contact information
42
- # - Enhanced package configuration
43
- # 0.1.0 - Initial release
44
- # - Core pipeline functionality
45
- # - AI-powered metadata enrichment
46
- # - Dependency analysis
47
- # - PyPI package setup
1
+ from typing import Dict, List, Tuple, Optional, Any
2
+ """Version information for rust-crate-pipeline."""
3
+
4
+ __version__ = "1.4.1"
5
+ __version_info__ = tuple(int(x) for x in __version__.split("-")[0].split("."))
6
+ __author__ = "SigilDERG Team"
7
+ __email__ = "sigilderg@example.com"
8
+
9
+ # Version history
10
+ # 1.2.5-dev.20250621 - Dev branch: experimental, not a formal
11
+ # release. Originated from v1.2.5.
12
+ # 1.2.5 - Last official release.
13
+ # 1.5.1 - Configuration Standardization Release: Model Path Consistency
14
+ # - Standardized all configuration to use GGUF model paths
15
+ # - Updated CLI defaults for --crawl4ai-model to
16
+ # ~/models/deepseek/deepseek-coder-6.7b-instruct.Q4_K_M.gguf
17
+ # - Enhanced Rule Zero alignment with transparent configuration practices
18
+ # - Updated all test files to use consistent GGUF model path references
19
+ # - Comprehensive documentation updates for proper model configuration
20
+ # - Removed inconsistent Ollama references in favor of llama-cpp-python
21
+ # - Ensured CLI help text and JSON examples reflect correct model paths
22
+ # 1.5.0 - Major Release: Enhanced Web Scraping with Crawl4AI Integration
23
+ # - Integrated Crawl4AI for advanced web scraping capabilities
24
+ # - Added JavaScript-rendered content extraction via Playwright
25
+ # - Enhanced README parsing with LLM-powered content analysis
26
+ # - New CLI options: --enable-crawl4ai, --disable-crawl4ai, --crawl4ai-model
27
+ # - Enhanced configuration with local GGUF model paths and crawl4ai_timeout
28
+ # - Comprehensive test coverage for all Crawl4AI features
29
+ # - Rule Zero compliant with full transparency and audit trails
30
+ # 1.4.0 - Major Release: Rule Zero Compliance Audit Complete
31
+ # - Completed comprehensive Rule Zero alignment audit
32
+ # - Eliminated all code redundancy and dead code
33
+ # - Achieved 100% test coverage (22/22 tests passing)
34
+ # - Refactored to pure asyncio architecture (thread-free)
35
+ # - Suppressed Pydantic deprecation warnings
36
+ # - Full production readiness with Docker support
37
+ # - Enhanced documentation with PyPI cross-references
38
+ # - Certified Rule Zero compliance across all four principles
39
+ # 1.3.1 - Bug Fix Release: Crawl4AI Integration Cleanup
40
+ # - Fixed CSS selector syntax errors in Crawl4AI integration
41
+ # - Cleaned up duplicate and obsolete test files
42
+ # - Resolved import conflicts between workspace and integration configs
43
+ # - Improved error handling in enhanced scraping module
44
+ # - Standardized on direct llama.cpp approach (removed Ollama dependencies)
45
+ # - Enhanced Rule Zero compliance with transparent cleanup process
46
+ # - Fixed type annotation compatibility issues
47
+ # - Fixed Python 3.9 compatibility for type annotations
48
+ # - Updated dict[str, Any] to "dict[str, Any]" format
49
+ # - Fixed Union type expressions in conditional imports
50
+ # - Resolved IDE linter errors in network.py, pipeline.py, and production_config.py
51
+ # - Improved code quality and maintainability
52
+ # 1.3.0 - Quality & Integration Release: Comprehensive code quality improvements
53
+ # - Fixed all critical PEP 8 violations (F821, F811, E114)
54
+ # - Enhanced error handling with graceful dependency fallbacks
55
+ # - Improved module integration and import path resolution
56
+ # - Added comprehensive test validation (21/21 tests passing)
57
+ # - Enhanced async support and Unicode handling
58
+ # - Production-ready CLI interfaces with robust error handling
59
+ # - Full Rule Zero compliance validation
60
+ # 1.2.0 - Major release: Production-ready, cleaned codebase
61
+ # - Unified documentation into single comprehensive README
62
+ # - Removed all non-essential development and test files
63
+ # - Optimized for PyPI distribution and Docker deployment
64
+ # - Enhanced GitHub token integration and setup
65
+ # 1.1.2 - Production release: Cleaned up non-essential files
66
+ # - Unified documentation into single README
67
+ # - Optimized for PyPI distribution
68
+ # 1.1.1 - Bug fix: Added missing python-dateutil dependency
69
+ # - Fixed relativedelta import error
70
+ # 1.1.0 - Updated author and contact information
71
+ # - Enhanced package configuration
72
+ # 0.1.0 - Initial release
73
+ # - Core pipeline functionality
74
+ # - AI-powered metadata enrichment
75
+ # - Dependency analysis
76
+ # - PyPI package setup