rust-crate-pipeline 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rust_crate_pipeline/__init__.py +18 -27
- rust_crate_pipeline/__main__.py +1 -0
- rust_crate_pipeline/ai_processing.py +718 -596
- rust_crate_pipeline/analysis.py +330 -363
- rust_crate_pipeline/azure_ai_processing.py +462 -0
- rust_crate_pipeline/config.py +46 -28
- rust_crate_pipeline/core/__init__.py +19 -0
- rust_crate_pipeline/core/canon_registry.py +133 -0
- rust_crate_pipeline/core/irl_engine.py +256 -0
- rust_crate_pipeline/core/sacred_chain.py +117 -0
- rust_crate_pipeline/crate_analysis.py +54 -0
- rust_crate_pipeline/crate_list.txt +424 -0
- rust_crate_pipeline/github_token_checker.py +108 -112
- rust_crate_pipeline/main.py +329 -109
- rust_crate_pipeline/network.py +317 -308
- rust_crate_pipeline/pipeline.py +300 -375
- rust_crate_pipeline/production_config.py +24 -27
- rust_crate_pipeline/progress_monitor.py +334 -0
- rust_crate_pipeline/scraping/__init__.py +13 -0
- rust_crate_pipeline/scraping/unified_scraper.py +259 -0
- rust_crate_pipeline/unified_llm_processor.py +637 -0
- rust_crate_pipeline/unified_pipeline.py +548 -0
- rust_crate_pipeline/utils/file_utils.py +32 -5
- rust_crate_pipeline/utils/logging_utils.py +21 -16
- rust_crate_pipeline/version.py +76 -47
- rust_crate_pipeline-1.4.1.dist-info/METADATA +515 -0
- rust_crate_pipeline-1.4.1.dist-info/RECORD +31 -0
- rust_crate_pipeline-1.4.0.dist-info/METADATA +0 -585
- rust_crate_pipeline-1.4.0.dist-info/RECORD +0 -19
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/WHEEL +0 -0
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/entry_points.txt +0 -0
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/licenses/LICENSE +0 -0
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/top_level.txt +0 -0
@@ -4,11 +4,12 @@ import time
|
|
4
4
|
import psutil
|
5
5
|
import logging
|
6
6
|
from functools import wraps
|
7
|
-
from typing import Optional
|
7
|
+
from typing import Any, Callable, Dict, Optional, Union
|
8
8
|
|
9
9
|
|
10
|
-
def configure_logging(
|
11
|
-
|
10
|
+
def configure_logging(
|
11
|
+
log_dir: Optional[str] = None, log_level: int = logging.INFO
|
12
|
+
) -> logging.Logger:
|
12
13
|
"""
|
13
14
|
Configure global logging with file and console handlers
|
14
15
|
|
@@ -25,54 +26,58 @@ def configure_logging(log_dir: Optional[str] = None,
|
|
25
26
|
# Console handler
|
26
27
|
console_handler = logging.StreamHandler()
|
27
28
|
console_handler.setLevel(log_level)
|
28
|
-
console_format = logging.Formatter(
|
29
|
-
"%(asctime)s [%(levelname)s] %(message)s")
|
29
|
+
console_format = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
|
30
30
|
console_handler.setFormatter(console_format)
|
31
31
|
logger.addHandler(console_handler)
|
32
32
|
|
33
33
|
# File handler
|
34
34
|
if log_dir:
|
35
35
|
log_file = os.path.join(
|
36
|
-
log_dir,
|
37
|
-
|
36
|
+
log_dir,
|
37
|
+
f"pipeline_{
|
38
|
+
time.strftime('%Y%m%d-%H%M%S')}.log",
|
39
|
+
)
|
38
40
|
file_handler = logging.FileHandler(log_file)
|
39
41
|
file_handler.setLevel(log_level)
|
40
42
|
file_format = logging.Formatter(
|
41
|
-
"%(asctime)s [%(levelname)s] %(name)s: %(message)s"
|
43
|
+
"%(asctime)s [%(levelname)s] %(name)s: %(message)s"
|
44
|
+
)
|
42
45
|
file_handler.setFormatter(file_format)
|
43
46
|
logger.addHandler(file_handler)
|
44
47
|
|
45
48
|
return logger
|
46
49
|
|
47
50
|
|
48
|
-
def log_execution_time(func):
|
51
|
+
def log_execution_time(func: Callable[..., Any]) -> Callable[..., Any]:
|
49
52
|
"""Decorator to log function execution time"""
|
53
|
+
|
50
54
|
@wraps(func)
|
51
|
-
def wrapper(*args, **kwargs):
|
55
|
+
def wrapper(*args, **kwargs) -> None:
|
52
56
|
start_time = time.time()
|
53
57
|
result = func(*args, **kwargs)
|
54
58
|
end_time = time.time()
|
55
|
-
logging.info(
|
56
|
-
f"{func.__name__} executed in {end_time - start_time:.2f} seconds")
|
59
|
+
logging.info(f"{func.__name__} executed in {end_time - start_time:.2f} seconds")
|
57
60
|
return result
|
61
|
+
|
58
62
|
return wrapper
|
59
63
|
|
60
64
|
|
61
|
-
def log_resource_usage():
|
65
|
+
def log_resource_usage() -> Dict[str, Any]:
|
62
66
|
"""Log current resource utilization (CPU, memory, disk)"""
|
63
67
|
cpu_percent = psutil.cpu_percent()
|
64
68
|
mem = psutil.virtual_memory()
|
65
|
-
disk = psutil.disk_usage(
|
69
|
+
disk = psutil.disk_usage(".")
|
66
70
|
|
67
71
|
logging.info(
|
68
72
|
f"Resource Usage - CPU: {cpu_percent}%, Memory: {
|
69
73
|
mem.percent}%, Disk: {
|
70
|
-
disk.percent}%"
|
74
|
+
disk.percent}%"
|
75
|
+
)
|
71
76
|
|
72
77
|
return {
|
73
78
|
"cpu_percent": cpu_percent,
|
74
79
|
"memory_percent": mem.percent,
|
75
80
|
"disk_percent": disk.percent,
|
76
81
|
"memory_available": mem.available,
|
77
|
-
"disk_free": disk.free
|
82
|
+
"disk_free": disk.free,
|
78
83
|
}
|
rust_crate_pipeline/version.py
CHANGED
@@ -1,47 +1,76 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
# -
|
15
|
-
# -
|
16
|
-
#
|
17
|
-
# -
|
18
|
-
# -
|
19
|
-
# -
|
20
|
-
# -
|
21
|
-
# -
|
22
|
-
#
|
23
|
-
#
|
24
|
-
# -
|
25
|
-
# - Enhanced
|
26
|
-
# -
|
27
|
-
# -
|
28
|
-
# -
|
29
|
-
# -
|
30
|
-
#
|
31
|
-
#
|
32
|
-
# -
|
33
|
-
# -
|
34
|
-
# -
|
35
|
-
# -
|
36
|
-
#
|
37
|
-
# -
|
38
|
-
# -
|
39
|
-
# 1.
|
40
|
-
# - Fixed
|
41
|
-
#
|
42
|
-
# -
|
43
|
-
#
|
44
|
-
# -
|
45
|
-
# -
|
46
|
-
# -
|
47
|
-
# -
|
1
|
+
from typing import Dict, List, Tuple, Optional, Any
|
2
|
+
"""Version information for rust-crate-pipeline."""
|
3
|
+
|
4
|
+
__version__ = "1.4.1"
|
5
|
+
__version_info__ = tuple(int(x) for x in __version__.split("-")[0].split("."))
|
6
|
+
__author__ = "SigilDERG Team"
|
7
|
+
__email__ = "sigilderg@example.com"
|
8
|
+
|
9
|
+
# Version history
|
10
|
+
# 1.2.5-dev.20250621 - Dev branch: experimental, not a formal
|
11
|
+
# release. Originated from v1.2.5.
|
12
|
+
# 1.2.5 - Last official release.
|
13
|
+
# 1.5.1 - Configuration Standardization Release: Model Path Consistency
|
14
|
+
# - Standardized all configuration to use GGUF model paths
|
15
|
+
# - Updated CLI defaults for --crawl4ai-model to
|
16
|
+
# ~/models/deepseek/deepseek-coder-6.7b-instruct.Q4_K_M.gguf
|
17
|
+
# - Enhanced Rule Zero alignment with transparent configuration practices
|
18
|
+
# - Updated all test files to use consistent GGUF model path references
|
19
|
+
# - Comprehensive documentation updates for proper model configuration
|
20
|
+
# - Removed inconsistent Ollama references in favor of llama-cpp-python
|
21
|
+
# - Ensured CLI help text and JSON examples reflect correct model paths
|
22
|
+
# 1.5.0 - Major Release: Enhanced Web Scraping with Crawl4AI Integration
|
23
|
+
# - Integrated Crawl4AI for advanced web scraping capabilities
|
24
|
+
# - Added JavaScript-rendered content extraction via Playwright
|
25
|
+
# - Enhanced README parsing with LLM-powered content analysis
|
26
|
+
# - New CLI options: --enable-crawl4ai, --disable-crawl4ai, --crawl4ai-model
|
27
|
+
# - Enhanced configuration with local GGUF model paths and crawl4ai_timeout
|
28
|
+
# - Comprehensive test coverage for all Crawl4AI features
|
29
|
+
# - Rule Zero compliant with full transparency and audit trails
|
30
|
+
# 1.4.0 - Major Release: Rule Zero Compliance Audit Complete
|
31
|
+
# - Completed comprehensive Rule Zero alignment audit
|
32
|
+
# - Eliminated all code redundancy and dead code
|
33
|
+
# - Achieved 100% test coverage (22/22 tests passing)
|
34
|
+
# - Refactored to pure asyncio architecture (thread-free)
|
35
|
+
# - Suppressed Pydantic deprecation warnings
|
36
|
+
# - Full production readiness with Docker support
|
37
|
+
# - Enhanced documentation with PyPI cross-references
|
38
|
+
# - Certified Rule Zero compliance across all four principles
|
39
|
+
# 1.3.1 - Bug Fix Release: Crawl4AI Integration Cleanup
|
40
|
+
# - Fixed CSS selector syntax errors in Crawl4AI integration
|
41
|
+
# - Cleaned up duplicate and obsolete test files
|
42
|
+
# - Resolved import conflicts between workspace and integration configs
|
43
|
+
# - Improved error handling in enhanced scraping module
|
44
|
+
# - Standardized on direct llama.cpp approach (removed Ollama dependencies)
|
45
|
+
# - Enhanced Rule Zero compliance with transparent cleanup process
|
46
|
+
# - Fixed type annotation compatibility issues
|
47
|
+
# - Fixed Python 3.9 compatibility for type annotations
|
48
|
+
# - Updated dict[str, Any] to "dict[str, Any]" format
|
49
|
+
# - Fixed Union type expressions in conditional imports
|
50
|
+
# - Resolved IDE linter errors in network.py, pipeline.py, and production_config.py
|
51
|
+
# - Improved code quality and maintainability
|
52
|
+
# 1.3.0 - Quality & Integration Release: Comprehensive code quality improvements
|
53
|
+
# - Fixed all critical PEP 8 violations (F821, F811, E114)
|
54
|
+
# - Enhanced error handling with graceful dependency fallbacks
|
55
|
+
# - Improved module integration and import path resolution
|
56
|
+
# - Added comprehensive test validation (21/21 tests passing)
|
57
|
+
# - Enhanced async support and Unicode handling
|
58
|
+
# - Production-ready CLI interfaces with robust error handling
|
59
|
+
# - Full Rule Zero compliance validation
|
60
|
+
# 1.2.0 - Major release: Production-ready, cleaned codebase
|
61
|
+
# - Unified documentation into single comprehensive README
|
62
|
+
# - Removed all non-essential development and test files
|
63
|
+
# - Optimized for PyPI distribution and Docker deployment
|
64
|
+
# - Enhanced GitHub token integration and setup
|
65
|
+
# 1.1.2 - Production release: Cleaned up non-essential files
|
66
|
+
# - Unified documentation into single README
|
67
|
+
# - Optimized for PyPI distribution
|
68
|
+
# 1.1.1 - Bug fix: Added missing python-dateutil dependency
|
69
|
+
# - Fixed relativedelta import error
|
70
|
+
# 1.1.0 - Updated author and contact information
|
71
|
+
# - Enhanced package configuration
|
72
|
+
# 0.1.0 - Initial release
|
73
|
+
# - Core pipeline functionality
|
74
|
+
# - AI-powered metadata enrichment
|
75
|
+
# - Dependency analysis
|
76
|
+
# - PyPI package setup
|