rust-crate-pipeline 1.3.0__py3-none-any.whl → 1.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rust_crate_pipeline/core/canon_registry.py +4 -4
- rust_crate_pipeline/core/sacred_chain.py +1 -1
- rust_crate_pipeline/main.py +1 -2
- rust_crate_pipeline/network.py +7 -7
- rust_crate_pipeline/pipeline.py +3 -3
- rust_crate_pipeline/production_config.py +3 -3
- rust_crate_pipeline/unified_pipeline.py +1 -1
- rust_crate_pipeline/version.py +7 -1
- {rust_crate_pipeline-1.3.0.dist-info → rust_crate_pipeline-1.3.2.dist-info}/METADATA +28 -2
- {rust_crate_pipeline-1.3.0.dist-info → rust_crate_pipeline-1.3.2.dist-info}/RECORD +14 -14
- {rust_crate_pipeline-1.3.0.dist-info → rust_crate_pipeline-1.3.2.dist-info}/WHEEL +0 -0
- {rust_crate_pipeline-1.3.0.dist-info → rust_crate_pipeline-1.3.2.dist-info}/entry_points.txt +0 -0
- {rust_crate_pipeline-1.3.0.dist-info → rust_crate_pipeline-1.3.2.dist-info}/licenses/LICENSE +0 -0
- {rust_crate_pipeline-1.3.0.dist-info → rust_crate_pipeline-1.3.2.dist-info}/top_level.txt +0 -0
@@ -26,7 +26,7 @@ class CanonRegistry:
|
|
26
26
|
def __init__(self) -> None:
|
27
27
|
self.canon_entries: Dict[str, CanonEntry] = {}
|
28
28
|
self.authority_chain: List[str] = []
|
29
|
-
self.version = "1.
|
29
|
+
self.version = "1.3.0"
|
30
30
|
self.logger = logging.getLogger(__name__)
|
31
31
|
|
32
32
|
self._initialize_default_canon()
|
@@ -36,7 +36,7 @@ class CanonRegistry:
|
|
36
36
|
"crates.io": {
|
37
37
|
"authority_level": 10,
|
38
38
|
"base_url": "https://crates.io/api/v1/",
|
39
|
-
"version": "1.0",
|
39
|
+
"version": "1.3.0",
|
40
40
|
"last_validated": datetime.now(timezone.utc).isoformat(),
|
41
41
|
},
|
42
42
|
"github.com": {
|
@@ -48,13 +48,13 @@ class CanonRegistry:
|
|
48
48
|
"lib.rs": {
|
49
49
|
"authority_level": 6,
|
50
50
|
"base_url": "https://lib.rs/",
|
51
|
-
"version": "1.0",
|
51
|
+
"version": "1.3.0",
|
52
52
|
"last_validated": datetime.now(timezone.utc).isoformat(),
|
53
53
|
},
|
54
54
|
"docs.rs": {
|
55
55
|
"authority_level": 7,
|
56
56
|
"base_url": "https://docs.rs/",
|
57
|
-
"version": "1.0",
|
57
|
+
"version": "1.3.0",
|
58
58
|
"last_validated": datetime.now(timezone.utc).isoformat(),
|
59
59
|
},
|
60
60
|
}
|
@@ -55,7 +55,7 @@ class SacredChainBase(ABC):
|
|
55
55
|
|
56
56
|
def __init__(self) -> None:
|
57
57
|
self.execution_log: List[SacredChainTrace] = []
|
58
|
-
self.canon_version = "1.
|
58
|
+
self.canon_version = "1.3.0"
|
59
59
|
|
60
60
|
def generate_execution_id(self, input_data: str) -> str:
|
61
61
|
timestamp = datetime.now(timezone.utc).isoformat()
|
rust_crate_pipeline/main.py
CHANGED
@@ -442,8 +442,7 @@ def main() -> None:
|
|
442
442
|
if hasattr(args, "enable_sigil_protocol") and args.enable_sigil_protocol:
|
443
443
|
logging.info("Sigil Protocol mode requested")
|
444
444
|
logging.debug(
|
445
|
-
f"Sigil available: {_sigil_available}, SigilCompliantPipeline: {
|
446
|
-
SigilCompliantPipeline is not None}"
|
445
|
+
f"Sigil available: {_sigil_available}, SigilCompliantPipeline: {SigilCompliantPipeline is not None}"
|
447
446
|
)
|
448
447
|
|
449
448
|
# Import Sigil enhanced pipeline
|
rust_crate_pipeline/network.py
CHANGED
@@ -20,7 +20,7 @@ class GitHubBatchClient:
|
|
20
20
|
# Simple headers without dependency on HTTPClientUtils
|
21
21
|
self.headers = {
|
22
22
|
"Accept": "application/vnd.github.v3+json",
|
23
|
-
"User-Agent": "SigilDERG-Data-Production/1.
|
23
|
+
"User-Agent": "SigilDERG-Data-Production/1.3.2",
|
24
24
|
}
|
25
25
|
if config.github_token:
|
26
26
|
self.headers["Authorization"] = f"token {config.github_token}"
|
@@ -51,7 +51,7 @@ class GitHubBatchClient:
|
|
51
51
|
except Exception:
|
52
52
|
pass
|
53
53
|
|
54
|
-
def get_repo_stats(self, owner: str, repo: str) -> dict[str, Any]:
|
54
|
+
def get_repo_stats(self, owner: str, repo: str) -> "dict[str, Any]":
|
55
55
|
"""Get repository statistics"""
|
56
56
|
try:
|
57
57
|
url = f"https://api.github.com/repos/{owner}/{repo}"
|
@@ -68,11 +68,11 @@ class GitHubBatchClient:
|
|
68
68
|
logging.error(f"Error fetching repo stats: {str(e)}")
|
69
69
|
return {}
|
70
70
|
|
71
|
-
def batch_get_repo_stats(self, repo_list: list[str]) -> dict[str, dict[str, Any]]:
|
71
|
+
def batch_get_repo_stats(self, repo_list: "list[str]") -> "dict[str, dict[str, Any]]":
|
72
72
|
"""Get statistics for multiple repositories in a batch"""
|
73
73
|
self.check_rate_limit()
|
74
74
|
|
75
|
-
results: dict[str, dict[str, Any]] = {}
|
75
|
+
results: "dict[str, dict[str, Any]]" = {}
|
76
76
|
for repo_url in repo_list:
|
77
77
|
# Extract owner/repo from URL
|
78
78
|
match = re.search(r"github\.com/([^/]+)/([^/\.]+)", repo_url)
|
@@ -96,9 +96,9 @@ class CrateAPIClient:
|
|
96
96
|
self.config = config
|
97
97
|
# Simple session without dependency on HTTPClientUtils
|
98
98
|
self.session = requests.Session()
|
99
|
-
self.session.headers.update({"User-Agent": "SigilDERG-Data-Production/1.
|
99
|
+
self.session.headers.update({"User-Agent": "SigilDERG-Data-Production/1.3.2"})
|
100
100
|
|
101
|
-
def fetch_crate_metadata(self, crate_name: str) -> dict[str, Any] | None:
|
101
|
+
def fetch_crate_metadata(self, crate_name: str) -> "dict[str, Any] | None":
|
102
102
|
"""Fetch metadata with retry logic"""
|
103
103
|
for attempt in range(self.config.max_retries):
|
104
104
|
try:
|
@@ -114,7 +114,7 @@ class CrateAPIClient:
|
|
114
114
|
time.sleep(wait)
|
115
115
|
return None
|
116
116
|
|
117
|
-
def _fetch_metadata(self, crate_name: str) -> dict[str, Any] | None:
|
117
|
+
def _fetch_metadata(self, crate_name: str) -> "dict[str, Any] | None":
|
118
118
|
"""Enhanced metadata fetching that tries multiple sources"""
|
119
119
|
# First try crates.io (primary source)
|
120
120
|
try:
|
rust_crate_pipeline/pipeline.py
CHANGED
@@ -69,11 +69,11 @@ class CrateDataPipeline:
|
|
69
69
|
|
70
70
|
self.crates = self._get_crate_list()
|
71
71
|
self.output_dir = self._create_output_dir()
|
72
|
-
self.enhanced_scraper:
|
72
|
+
self.enhanced_scraper: Any = (
|
73
73
|
self._initialize_enhanced_scraper()
|
74
74
|
)
|
75
75
|
|
76
|
-
def _initialize_enhanced_scraper(self) ->
|
76
|
+
def _initialize_enhanced_scraper(self) -> Any:
|
77
77
|
"""Initializes the CrateDocumentationScraper if available and enabled."""
|
78
78
|
if (
|
79
79
|
not ENHANCED_SCRAPING_AVAILABLE
|
@@ -236,7 +236,7 @@ class CrateDataPipeline:
|
|
236
236
|
def _integrate_scraping_results(
|
237
237
|
self,
|
238
238
|
crate: CrateMetadata,
|
239
|
-
scraping_results: "Dict[str,
|
239
|
+
scraping_results: "Dict[str, Any]",
|
240
240
|
) -> None:
|
241
241
|
"""
|
242
242
|
Integrates enhanced scraping results into the crate metadata.
|
@@ -28,7 +28,7 @@ def configure_production_logging() -> None:
|
|
28
28
|
|
29
29
|
|
30
30
|
# Production-optimized settings
|
31
|
-
PRODUCTION_SETTINGS: dict[str, Any] = {
|
31
|
+
PRODUCTION_SETTINGS: "dict[str, Any]" = {
|
32
32
|
# Reduced retries to minimize warnings
|
33
33
|
"max_retries": 2,
|
34
34
|
"validation_retries": 2,
|
@@ -48,7 +48,7 @@ PRODUCTION_SETTINGS: dict[str, Any] = {
|
|
48
48
|
}
|
49
49
|
|
50
50
|
|
51
|
-
def get_production_config() -> dict[str, Any]:
|
51
|
+
def get_production_config() -> "dict[str, Any]":
|
52
52
|
"""Get production configuration dictionary"""
|
53
53
|
return PRODUCTION_SETTINGS.copy()
|
54
54
|
|
@@ -58,7 +58,7 @@ def is_production() -> bool:
|
|
58
58
|
return os.getenv("PRODUCTION", "false").lower() == "true"
|
59
59
|
|
60
60
|
|
61
|
-
def setup_production_environment() -> dict[str, Any]:
|
61
|
+
def setup_production_environment() -> "dict[str, Any]":
|
62
62
|
"""Set up the complete production environment"""
|
63
63
|
configure_production_logging()
|
64
64
|
|
rust_crate_pipeline/version.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from typing import Dict, List, Tuple, Optional, Any
|
2
2
|
"""Version information for rust-crate-pipeline."""
|
3
3
|
|
4
|
-
__version__ = "1.3.
|
4
|
+
__version__ = "1.3.2"
|
5
5
|
__version_info__ = tuple(int(x) for x in __version__.split("-")[0].split("."))
|
6
6
|
|
7
7
|
# Version history
|
@@ -41,6 +41,12 @@ __version_info__ = tuple(int(x) for x in __version__.split("-")[0].split("."))
|
|
41
41
|
# - Improved error handling in enhanced scraping module
|
42
42
|
# - Standardized on direct llama.cpp approach (removed Ollama dependencies)
|
43
43
|
# - Enhanced Rule Zero compliance with transparent cleanup process
|
44
|
+
# - Fixed type annotation compatibility issues
|
45
|
+
# - Fixed Python 3.9 compatibility for type annotations
|
46
|
+
# - Updated dict[str, Any] to "dict[str, Any]" format
|
47
|
+
# - Fixed Union type expressions in conditional imports
|
48
|
+
# - Resolved IDE linter errors in network.py, pipeline.py, and production_config.py
|
49
|
+
# - Improved code quality and maintainability
|
44
50
|
# 1.3.0 - Quality & Integration Release: Comprehensive code quality improvements
|
45
51
|
# - Fixed all critical PEP 8 violations (F821, F811, E114)
|
46
52
|
# - Enhanced error handling with graceful dependency fallbacks
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: rust-crate-pipeline
|
3
|
-
Version: 1.3.
|
3
|
+
Version: 1.3.2
|
4
4
|
Summary: A comprehensive system for gathering, enriching, and analyzing metadata for Rust crates using AI-powered insights
|
5
5
|
Home-page: https://github.com/Superuser666-Sigil/SigilDERG-Data_Production
|
6
6
|
Author: SuperUser666-Sigil
|
@@ -59,10 +59,13 @@ The Rust Crate Pipeline is designed to collect, process, and enrich metadata fro
|
|
59
59
|
|
60
60
|
- **Web Scraping**: Automated collection of crate metadata from crates.io using Crawl4AI
|
61
61
|
- **AI Enrichment**: Local and Azure OpenAI-powered analysis of crate descriptions, features, and documentation
|
62
|
+
- **Multi-Provider LLM Support**: Unified LLM processor supporting OpenAI, Azure OpenAI, Ollama, LM Studio, and LiteLLM
|
62
63
|
- **Cargo Testing**: Automated cargo build, test, and audit execution for comprehensive crate analysis
|
63
64
|
- **Dependency Analysis**: Deep analysis of crate dependencies and their relationships
|
64
65
|
- **Batch Processing**: Efficient processing of multiple crates with configurable batch sizes
|
65
66
|
- **Data Export**: Structured output in JSON format for further analysis
|
67
|
+
- **RAG Cache**: Intelligent caching with Rule Zero policies and architectural patterns
|
68
|
+
- **Docker Support**: Containerized deployment with optimized Docker configurations
|
66
69
|
|
67
70
|
## Installation
|
68
71
|
|
@@ -96,6 +99,10 @@ export AZURE_OPENAI_API_VERSION="2024-02-15-preview"
|
|
96
99
|
|
97
100
|
# PyPI API Token (optional, for publishing)
|
98
101
|
export PYPI_API_TOKEN="your_pypi_token"
|
102
|
+
|
103
|
+
# LiteLLM Configuration (optional, for multi-provider LLM support)
|
104
|
+
export LITELLM_MODEL="deepseek-coder:33b"
|
105
|
+
export LITELLM_BASE_URL="http://localhost:11434" # For Ollama
|
99
106
|
```
|
100
107
|
|
101
108
|
### Configuration File
|
@@ -155,6 +162,25 @@ python -m rust_crate_pipeline --checkpoint-interval 5
|
|
155
162
|
python -m rust_crate_pipeline --log-level DEBUG
|
156
163
|
```
|
157
164
|
|
165
|
+
#### Multi-Provider LLM Support
|
166
|
+
|
167
|
+
```bash
|
168
|
+
# Use OpenAI
|
169
|
+
python -m rust_crate_pipeline.unified_llm_processor --provider openai --model-name gpt-4
|
170
|
+
|
171
|
+
# Use Azure OpenAI
|
172
|
+
python -m rust_crate_pipeline.unified_llm_processor --provider azure --model-name gpt-4
|
173
|
+
|
174
|
+
# Use Ollama (local)
|
175
|
+
python -m rust_crate_pipeline.unified_llm_processor --provider ollama --model-name deepseek-coder:33b
|
176
|
+
|
177
|
+
# Use LM Studio
|
178
|
+
python -m rust_crate_pipeline.unified_llm_processor --provider openai --base-url http://localhost:1234/v1 --model-name local-model
|
179
|
+
|
180
|
+
# Use LiteLLM
|
181
|
+
python -m rust_crate_pipeline.unified_llm_processor --provider litellm --model-name deepseek-coder:33b
|
182
|
+
```
|
183
|
+
|
158
184
|
#### Production Mode
|
159
185
|
|
160
186
|
```bash
|
@@ -282,7 +308,7 @@ pyright rust_crate_pipeline/
|
|
282
308
|
|
283
309
|
## Requirements
|
284
310
|
|
285
|
-
- Python 3.
|
311
|
+
- Python 3.12+
|
286
312
|
- Rust toolchain (for cargo testing)
|
287
313
|
- Git (for GitHub API access)
|
288
314
|
- Internet connection (for web scraping and API calls)
|
@@ -7,24 +7,24 @@ rust_crate_pipeline/config.py,sha256=Fw3fRKCZawKaLQi7YqsmNNku4whZi89mWzr8BVRNS5E
|
|
7
7
|
rust_crate_pipeline/crate_analysis.py,sha256=GsoXemJ9VFyAbb4Sm5gY5ToTqNtOA4pI38AtngAQONk,2090
|
8
8
|
rust_crate_pipeline/crate_list.txt,sha256=W3NxDtxvihyKp9SN85FYXX6p8Hh49IFih1M4-c-CynM,4334
|
9
9
|
rust_crate_pipeline/github_token_checker.py,sha256=COXXS9uoLV9WYIcT02C-bV5uH3fa9D9HJImc07vMjLs,3766
|
10
|
-
rust_crate_pipeline/main.py,sha256=
|
11
|
-
rust_crate_pipeline/network.py,sha256=
|
12
|
-
rust_crate_pipeline/pipeline.py,sha256=
|
13
|
-
rust_crate_pipeline/production_config.py,sha256=
|
10
|
+
rust_crate_pipeline/main.py,sha256=iGYEAYvXkoFFvaA6DIVGiUL3wLhiCzatB6Fvf-Yrj2A,18858
|
11
|
+
rust_crate_pipeline/network.py,sha256=SSSolG5QdK4m9E77Ko3m-8DM1xz1Ha9XP9yeLSCfuaU,13308
|
12
|
+
rust_crate_pipeline/pipeline.py,sha256=YN6PEhg0Si_oo6-Wtm_PviytzWzpQupTPC2e4L1F7XE,16349
|
13
|
+
rust_crate_pipeline/production_config.py,sha256=uWylP9AIZZx7-9aT4sFmAKEEW9miJDxaiek8VE6WP-0,2372
|
14
14
|
rust_crate_pipeline/unified_llm_processor.py,sha256=eo7KotNuqwc7_hgpFm18QLokFoufFslnvi8TnDsSYEg,25064
|
15
|
-
rust_crate_pipeline/unified_pipeline.py,sha256=
|
16
|
-
rust_crate_pipeline/version.py,sha256=
|
15
|
+
rust_crate_pipeline/unified_pipeline.py,sha256=2yglmXVlQfSkVq0HVTPonDee6VxWaQWZw0X2l4lLBGw,23704
|
16
|
+
rust_crate_pipeline/version.py,sha256=f9QzOtJjGR2-83eFezB55H6KgfM0Gi5XAPOooI5Y5_M,4489
|
17
17
|
rust_crate_pipeline/core/__init__.py,sha256=Sq4HWdANGqoYln7JdCog7m3BsGeR3tHdseeflvNetoQ,509
|
18
|
-
rust_crate_pipeline/core/canon_registry.py,sha256=
|
18
|
+
rust_crate_pipeline/core/canon_registry.py,sha256=36tmt_wU6-kSyZnGfh53N64C7E3G-QR7GFbr9epj4zg,4700
|
19
19
|
rust_crate_pipeline/core/irl_engine.py,sha256=QRZUdkN24W9XutLkj8JDplEz6FmnquUrwKsl0s2zRr4,10491
|
20
|
-
rust_crate_pipeline/core/sacred_chain.py,sha256=
|
20
|
+
rust_crate_pipeline/core/sacred_chain.py,sha256=6s4gFLDT6KUwuu0Fpxu6h_YHlsEvHZb3CQw4tRHGyDU,3773
|
21
21
|
rust_crate_pipeline/scraping/__init__.py,sha256=ySkTRg7nIxgcbHJQ3L1XzcrOo281NZu07-XtiGi-558,307
|
22
22
|
rust_crate_pipeline/scraping/unified_scraper.py,sha256=ZE2gkc0vQ3BOLdSX_IV-kMe8QAm2Av4M7VqpkxEKyT4,9965
|
23
23
|
rust_crate_pipeline/utils/file_utils.py,sha256=tMaCPy7ghs9x4Hxu_sviX8MXU2sBjNvohUrvt4MejoM,2853
|
24
24
|
rust_crate_pipeline/utils/logging_utils.py,sha256=e5jG0Yd6k3exgAdbVca46kWADJ_Qz8UJ3yEJzwTqPyI,2452
|
25
|
-
rust_crate_pipeline-1.3.
|
26
|
-
rust_crate_pipeline-1.3.
|
27
|
-
rust_crate_pipeline-1.3.
|
28
|
-
rust_crate_pipeline-1.3.
|
29
|
-
rust_crate_pipeline-1.3.
|
30
|
-
rust_crate_pipeline-1.3.
|
25
|
+
rust_crate_pipeline-1.3.2.dist-info/licenses/LICENSE,sha256=tpd4XNpbssrSx9-iErATOLrOh0ivNPfO2I5MAPUpats,1088
|
26
|
+
rust_crate_pipeline-1.3.2.dist-info/METADATA,sha256=sMYLvaOvjRgZNP1iHuNb0NEWxgqY_uqzdx_PGnWtMT0,11254
|
27
|
+
rust_crate_pipeline-1.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
28
|
+
rust_crate_pipeline-1.3.2.dist-info/entry_points.txt,sha256=9Rr_IRuFRIridXxUSdEJbB3ba0NnpEfKmknZXFdYRC0,70
|
29
|
+
rust_crate_pipeline-1.3.2.dist-info/top_level.txt,sha256=GUdB7RyxHLhijQxui_KTy3B8p_L2APui9C6RYa0FuaE,20
|
30
|
+
rust_crate_pipeline-1.3.2.dist-info/RECORD,,
|
File without changes
|
{rust_crate_pipeline-1.3.0.dist-info → rust_crate_pipeline-1.3.2.dist-info}/entry_points.txt
RENAMED
File without changes
|
{rust_crate_pipeline-1.3.0.dist-info → rust_crate_pipeline-1.3.2.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
File without changes
|