rust-crate-pipeline 1.3.6__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,7 +26,7 @@ class CanonRegistry:
26
26
  def __init__(self) -> None:
27
27
  self.canon_entries: Dict[str, CanonEntry] = {}
28
28
  self.authority_chain: List[str] = []
29
- self.version = "1.3.0"
29
+ self.version = "1.4.0"
30
30
  self.logger = logging.getLogger(__name__)
31
31
 
32
32
  self._initialize_default_canon()
@@ -36,7 +36,7 @@ class CanonRegistry:
36
36
  "crates.io": {
37
37
  "authority_level": 10,
38
38
  "base_url": "https://crates.io/api/v1/",
39
- "version": "1.3.0",
39
+ "version": "1.4.0",
40
40
  "last_validated": datetime.now(timezone.utc).isoformat(),
41
41
  },
42
42
  "github.com": {
@@ -1,108 +1,108 @@
1
- from typing import Dict, List, Tuple, Optional, Any
2
- # github_token_checker.py
3
- """
4
- GitHub Token Checker Module
5
- Lightweight version of the token checker for integration into the main pipeline.
6
- """
7
-
8
- import os
9
- import sys
10
- import requests
11
- import logging
12
-
13
-
14
- def check_github_token_quick() -> None:
15
- """Quick check if GitHub token is available and valid"""
16
- token = os.getenv("GITHUB_TOKEN")
17
-
18
- if not token:
19
- return False, "GITHUB_TOKEN environment variable not set"
20
-
21
- if len(token) < 20:
22
- return False, "GITHUB_TOKEN seems too short - may be invalid"
23
-
24
- try:
25
- # Quick API check
26
- headers = {
27
- "Accept": "application/vnd.github.v3+json",
28
- "Authorization": f"token {token}",
29
- }
30
-
31
- response = requests.get(
32
- "https://api.github.com/rate_limit", headers=headers, timeout=10
33
- )
34
-
35
- if response.status_code == 200:
36
- data = response.json()
37
- remaining = data["resources"]["core"]["remaining"]
38
- return True, f"Token valid, {remaining} API calls remaining"
39
- elif response.status_code == 401:
40
- return False, "GitHub token is invalid or expired"
41
- else:
42
- return (
43
- False,
44
- f"GitHub API returned status code: {response.status_code}",
45
- )
46
- except requests.RequestException as e:
47
- return False, f"API request failed: {e}"
48
- except Exception as e:
49
- return False, f"Error checking token: {str(e)}"
50
-
51
-
52
- def prompt_for_token_setup() -> None:
53
- """Prompt user to set up GitHub token"""
54
- print("\n" + "=" * 60)
55
- print("[KEY] GitHub Token Required")
56
- print("=" * 60)
57
- print("\nThe Rust Crate Pipeline requires a GitHub Personal Access Token")
58
- print("to access repository information and avoid rate limits.")
59
- print("\n[GUIDE] Quick Setup:")
60
- print("1. Get token: https://github.com/settings/tokens")
61
- print("2. Required scopes: public_repo, read:user")
62
- print("3. Set in environment:")
63
- print(' export GITHUB_TOKEN="your_token_here"')
64
- print("\n[TOOLS] Setup Scripts Available:")
65
- print(" ./setup_github_token.sh (Interactive setup)")
66
- print(" python3 check_github_token.py (Full verification)")
67
- print("\n" + "=" * 60)
68
-
69
- # Ask if user wants to continue without token (limited functionality)
70
- response = input("\nContinue without GitHub token? (y/N): ").strip().lower()
71
-
72
- if response in ["y", "yes"]:
73
- print("[WARNING] Running with limited GitHub API access (60 requests/hour)")
74
- print(" You may encounter rate limit warnings.")
75
- return True
76
- else:
77
- print("\n[STOP] Please set up your GitHub token and try again.")
78
- return False
79
-
80
-
81
- def check_and_setup_github_token() -> None:
82
- """Checks and sets up the GitHub token."""
83
- is_valid, message = check_github_token_quick()
84
-
85
- if is_valid:
86
- logging.debug(f"GitHub token check: {message}")
87
- return True
88
-
89
- # Token is missing or invalid
90
- logging.warning(f"GitHub token issue: {message}")
91
-
92
- # Check if we're in a non-interactive environment
93
- if not sys.stdin.isatty():
94
- logging.error("GitHub token not configured and running in non-interactive mode")
95
- logging.error("Set GITHUB_TOKEN environment variable before running")
96
- return False
97
-
98
- # Interactive prompt
99
- return prompt_for_token_setup()
100
-
101
-
102
- if __name__ == "__main__":
103
- # Allow running this module directly for testing
104
- is_valid, message = check_github_token_quick()
105
- print(f"Token check: {'[OK]' if is_valid else '[FAIL]'} {message}")
106
-
107
- if not is_valid:
108
- check_and_setup_github_token()
1
+ from typing import Dict, List, Tuple, Optional, Any
2
+ # github_token_checker.py
3
+ """
4
+ GitHub Token Checker Module
5
+ Lightweight version of the token checker for integration into the main pipeline.
6
+ """
7
+
8
+ import os
9
+ import sys
10
+ import requests
11
+ import logging
12
+
13
+
14
+ def check_github_token_quick() -> tuple[bool, str]:
15
+ """Quick check if GitHub token is available and valid"""
16
+ token = os.getenv("GITHUB_TOKEN")
17
+
18
+ if not token:
19
+ return False, "GITHUB_TOKEN environment variable not set"
20
+
21
+ if len(token) < 20:
22
+ return False, "GITHUB_TOKEN seems too short - may be invalid"
23
+
24
+ try:
25
+ # Quick API check
26
+ headers = {
27
+ "Accept": "application/vnd.github.v3+json",
28
+ "Authorization": f"token {token}",
29
+ }
30
+
31
+ response = requests.get(
32
+ "https://api.github.com/rate_limit", headers=headers, timeout=10
33
+ )
34
+
35
+ if response.status_code == 200:
36
+ data = response.json()
37
+ remaining = data["resources"]["core"]["remaining"]
38
+ return True, f"Token valid, {remaining} API calls remaining"
39
+ elif response.status_code == 401:
40
+ return False, "GitHub token is invalid or expired"
41
+ else:
42
+ return (
43
+ False,
44
+ f"GitHub API returned status code: {response.status_code}",
45
+ )
46
+ except requests.RequestException as e:
47
+ return False, f"API request failed: {e}"
48
+ except Exception as e:
49
+ return False, f"Error checking token: {str(e)}"
50
+
51
+
52
+ def prompt_for_token_setup() -> bool:
53
+ """Prompt user to set up GitHub token"""
54
+ print("\n" + "=" * 60)
55
+ print("[KEY] GitHub Token Required")
56
+ print("=" * 60)
57
+ print("\nThe Rust Crate Pipeline requires a GitHub Personal Access Token")
58
+ print("to access repository information and avoid rate limits.")
59
+ print("\n[GUIDE] Quick Setup:")
60
+ print("1. Get token: https://github.com/settings/tokens")
61
+ print("2. Required scopes: public_repo, read:user")
62
+ print("3. Set in environment:")
63
+ print(' export GITHUB_TOKEN="your_token_here"')
64
+ print("\n[TOOLS] Setup Scripts Available:")
65
+ print(" ./setup_github_token.sh (Interactive setup)")
66
+ print(" python3 check_github_token.py (Full verification)")
67
+ print("\n" + "=" * 60)
68
+
69
+ # Ask if user wants to continue without token (limited functionality)
70
+ response = input("\nContinue without GitHub token? (y/N): ").strip().lower()
71
+
72
+ if response in ["y", "yes"]:
73
+ print("[WARNING] Running with limited GitHub API access (60 requests/hour)")
74
+ print(" You may encounter rate limit warnings.")
75
+ return True
76
+ else:
77
+ print("\n[STOP] Please set up your GitHub token and try again.")
78
+ return False
79
+
80
+
81
+ def check_and_setup_github_token() -> bool:
82
+ """Checks and sets up the GitHub token."""
83
+ is_valid, message = check_github_token_quick()
84
+
85
+ if is_valid:
86
+ logging.debug(f"GitHub token check: {message}")
87
+ return True
88
+
89
+ # Token is missing or invalid
90
+ logging.warning(f"GitHub token issue: {message}")
91
+
92
+ # Check if we're in a non-interactive environment
93
+ if not sys.stdin.isatty():
94
+ logging.error("GitHub token not configured and running in non-interactive mode")
95
+ logging.error("Set GITHUB_TOKEN environment variable before running")
96
+ return False
97
+
98
+ # Interactive prompt
99
+ return prompt_for_token_setup()
100
+
101
+
102
+ if __name__ == "__main__":
103
+ # Allow running this module directly for testing
104
+ is_valid, message = check_github_token_quick()
105
+ print(f"Token check: {'[OK]' if is_valid else '[FAIL]'} {message}")
106
+
107
+ if not is_valid:
108
+ check_and_setup_github_token()
@@ -1,7 +1,7 @@
1
1
  from typing import Dict, List, Tuple, Optional, Any
2
2
  """Version information for rust-crate-pipeline."""
3
3
 
4
- __version__ = "1.3.6"
4
+ __version__ = "1.4.1"
5
5
  __version_info__ = tuple(int(x) for x in __version__.split("-")[0].split("."))
6
6
  __author__ = "SigilDERG Team"
7
7
  __email__ = "sigilderg@example.com"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rust-crate-pipeline
3
- Version: 1.3.6
3
+ Version: 1.4.1
4
4
  Summary: A comprehensive pipeline for analyzing Rust crates with AI enrichment and enhanced scraping
5
5
  Home-page: https://github.com/SigilDERG/rust-crate-pipeline
6
6
  Author: SigilDERG Team
@@ -386,6 +386,13 @@ docker run -it -v $(pwd):/app rust-crate-pipeline
386
386
 
387
387
  ## Recent Improvements
388
388
 
389
+ ### Version 1.4.0
390
+ - **Security**: Robust Ed25519/RSA cryptographic signing and provenance
391
+ - **Automation**: Automated RAG and provenance workflows
392
+ - **CI/CD**: Improved GitHub Actions for validation and publishing
393
+ - **Docker**: Updated Docker image and compose for new version
394
+ - **Bug Fixes**: Workflow and validation fixes for Ed25519
395
+
389
396
  ### Version 1.3.6
390
397
  - **Python 3.12+ Requirement**: Updated to use modern type annotations and language features
391
398
  - **Type Safety**: Enhanced type annotations throughout the codebase with modern syntax
@@ -453,4 +460,56 @@ Or, text attribution:
453
460
 
454
461
  ```
455
462
  This project uses Crawl4AI (https://github.com/unclecode/crawl4ai) for web data extraction.
456
- ```
463
+ ```
464
+
465
+ ## 🚀 Unified, Cross-Platform, Multi-Provider LLM Support
466
+
467
+ This project supports **all major LLM providers** (cloud and local) on **Mac, Linux, and Windows** using a single, unified interface. All LLM calls are routed through the `UnifiedLLMProcessor` and `LLMConfig` abstractions, ensuring:
468
+
469
+ - **One code path for all providers:** Azure OpenAI, OpenAI, Anthropic, Google, Cohere, HuggingFace, Ollama, LM Studio, and any OpenAI-compatible endpoint.
470
+ - **Cross-platform compatibility:** Works out of the box on Mac, Linux, and Windows.
471
+ - **Configurable via CLI and config files:** Select provider, model, API key, endpoint, and provider-specific options at runtime.
472
+ - **Easy extensibility:** Add new providers by updating your config or CLI arguments—no code changes needed.
473
+
474
+ ### 📖 Provider Setup & Usage
475
+ - See [`README_LLM_PROVIDERS.md`](./README_LLM_PROVIDERS.md) for full details, setup instructions, and usage examples for every supported provider.
476
+ - Run `python run_pipeline_with_llm.py --help` for CLI options and provider-specific arguments.
477
+
478
+ ### 🧩 Example Usage
479
+ ```bash
480
+ # Azure OpenAI
481
+ python run_pipeline_with_llm.py --llm-provider azure --llm-model gpt-4o --crates tokio
482
+
483
+ # Ollama (local)
484
+ python run_pipeline_with_llm.py --llm-provider ollama --llm-model llama2 --crates serde
485
+
486
+ # OpenAI API
487
+ python run_pipeline_with_llm.py --llm-provider openai --llm-model gpt-4 --llm-api-key YOUR_KEY --crates tokio
488
+
489
+ # Anthropic Claude
490
+ python run_pipeline_with_llm.py --llm-provider anthropic --llm-model claude-3-sonnet --llm-api-key YOUR_KEY --crates serde
491
+ ```
492
+
493
+ ### 🔒 Security & Best Practices
494
+ - Store API keys as environment variables.
495
+ - Use local providers (Ollama, LM Studio) for full privacy—no data leaves your machine.
496
+ - All LLM calls are routed through a single, auditable interface for maximum maintainability and security.
497
+
498
+ ### 🧪 Testing
499
+ - Run `python test_unified_llm.py` to verify provider support and configuration.
500
+
501
+ For more, see [`README_LLM_PROVIDERS.md`](./README_LLM_PROVIDERS.md) and the CLI help output.
502
+
503
+ ## Public RAG Database Hash Verification
504
+
505
+ The canonical hash of the RAG SQLite database (`sigil_rag_cache.db`) is stored in the public file `sigil_rag_cache.hash`.
506
+
507
+ - **Purpose:** Anyone can verify the integrity of the RAG database by comparing its SHA256 hash to the value in `sigil_rag_cache.hash`.
508
+ - **How to verify:**
509
+
510
+ ```sh
511
+ python audits/validate_db_hash.py --db sigil_rag_cache.db --expected-hash "$(cat sigil_rag_cache.hash)"
512
+ ```
513
+
514
+ - **CI/CD:** The GitHub Actions workflow `.github/workflows/validate-db-hash.yml` automatically checks this on every push.
515
+ - **No secrets required:** The hash is public and verifiable by anyone.
@@ -1,12 +1,12 @@
1
1
  rust_crate_pipeline/__init__.py,sha256=ZJCApGu8h2Rn5-dkoBLXOpdoeD6b36w76--o0fEismQ,1749
2
2
  rust_crate_pipeline/__main__.py,sha256=PexSWQYtbFQg5P36WEnJ0X-oAtT8WDej3bIJoSAcCCQ,157
3
- rust_crate_pipeline/ai_processing.py,sha256=MP6VcvV3Jw2Pjof3NrewjTmO8ruVyJKcJGa9zhS_2eY,24140
3
+ rust_crate_pipeline/ai_processing.py,sha256=Q_jmIL0OzFcP6zSKTgrIikUTHuUB3Py4MqwLXmB7-KQ,29057
4
4
  rust_crate_pipeline/analysis.py,sha256=_cmjynLWaQbGIdLQHU3P3rfqHB3gcNNgCdzStbsKrdw,17021
5
- rust_crate_pipeline/azure_ai_processing.py,sha256=kxbHGNSRSD_5KNkL2ihqCASJq8kdnb_N9u1-ogXbneE,16449
5
+ rust_crate_pipeline/azure_ai_processing.py,sha256=h2ZUaFPt5LmTH--5CXfXBdbKnoJA4Ha8zCfbLawhDz8,16409
6
6
  rust_crate_pipeline/config.py,sha256=Fw3fRKCZawKaLQi7YqsmNNku4whZi89mWzr8BVRNS5E,3009
7
7
  rust_crate_pipeline/crate_analysis.py,sha256=GsoXemJ9VFyAbb4Sm5gY5ToTqNtOA4pI38AtngAQONk,2090
8
8
  rust_crate_pipeline/crate_list.txt,sha256=W3NxDtxvihyKp9SN85FYXX6p8Hh49IFih1M4-c-CynM,4334
9
- rust_crate_pipeline/github_token_checker.py,sha256=COXXS9uoLV9WYIcT02C-bV5uH3fa9D9HJImc07vMjLs,3766
9
+ rust_crate_pipeline/github_token_checker.py,sha256=0IpTh78DSaw4znaed031cSVSZDsi92eDManPzRIIN3Y,3670
10
10
  rust_crate_pipeline/main.py,sha256=iGYEAYvXkoFFvaA6DIVGiUL3wLhiCzatB6Fvf-Yrj2A,18858
11
11
  rust_crate_pipeline/network.py,sha256=mWjiRvOX31piBZ2QiJ-F75DBD4l6cqzTXcQdJvHxe90,12718
12
12
  rust_crate_pipeline/pipeline.py,sha256=CqPHLLRvMOpy-3ONL6hnPahV6Vh6S4M8oDsHd_lDrPc,16203
@@ -14,18 +14,18 @@ rust_crate_pipeline/production_config.py,sha256=uWylP9AIZZx7-9aT4sFmAKEEW9miJDxa
14
14
  rust_crate_pipeline/progress_monitor.py,sha256=5K9KP-Xggi1JEINfRmq2W-wGUHtNIBTcocpDtB1t8iM,13743
15
15
  rust_crate_pipeline/unified_llm_processor.py,sha256=eo7KotNuqwc7_hgpFm18QLokFoufFslnvi8TnDsSYEg,25064
16
16
  rust_crate_pipeline/unified_pipeline.py,sha256=2yglmXVlQfSkVq0HVTPonDee6VxWaQWZw0X2l4lLBGw,23704
17
- rust_crate_pipeline/version.py,sha256=izXdwKOkBxecVcCuMmOVbZnu5y-hHZZkEg39LmBPnis,4481
17
+ rust_crate_pipeline/version.py,sha256=whkmTDquEVytez4svUFUBfbfK0EOvDTPA8K5TuZffbE,4481
18
18
  rust_crate_pipeline/core/__init__.py,sha256=Sq4HWdANGqoYln7JdCog7m3BsGeR3tHdseeflvNetoQ,509
19
- rust_crate_pipeline/core/canon_registry.py,sha256=36tmt_wU6-kSyZnGfh53N64C7E3G-QR7GFbr9epj4zg,4700
19
+ rust_crate_pipeline/core/canon_registry.py,sha256=_3cu0akJvLc7ZnomMaLeMa8adOBYn1dtjpB0yE3vGL8,4700
20
20
  rust_crate_pipeline/core/irl_engine.py,sha256=QRZUdkN24W9XutLkj8JDplEz6FmnquUrwKsl0s2zRr4,10491
21
21
  rust_crate_pipeline/core/sacred_chain.py,sha256=6s4gFLDT6KUwuu0Fpxu6h_YHlsEvHZb3CQw4tRHGyDU,3773
22
22
  rust_crate_pipeline/scraping/__init__.py,sha256=ySkTRg7nIxgcbHJQ3L1XzcrOo281NZu07-XtiGi-558,307
23
23
  rust_crate_pipeline/scraping/unified_scraper.py,sha256=ZE2gkc0vQ3BOLdSX_IV-kMe8QAm2Av4M7VqpkxEKyT4,9965
24
24
  rust_crate_pipeline/utils/file_utils.py,sha256=tMaCPy7ghs9x4Hxu_sviX8MXU2sBjNvohUrvt4MejoM,2853
25
25
  rust_crate_pipeline/utils/logging_utils.py,sha256=e5jG0Yd6k3exgAdbVca46kWADJ_Qz8UJ3yEJzwTqPyI,2452
26
- rust_crate_pipeline-1.3.6.dist-info/licenses/LICENSE,sha256=tpd4XNpbssrSx9-iErATOLrOh0ivNPfO2I5MAPUpats,1088
27
- rust_crate_pipeline-1.3.6.dist-info/METADATA,sha256=BWUkQKtJCbXt1KZbmdofzy0eC4LpoeMBLKVXu3H3hD0,14539
28
- rust_crate_pipeline-1.3.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
- rust_crate_pipeline-1.3.6.dist-info/entry_points.txt,sha256=9Rr_IRuFRIridXxUSdEJbB3ba0NnpEfKmknZXFdYRC0,70
30
- rust_crate_pipeline-1.3.6.dist-info/top_level.txt,sha256=GUdB7RyxHLhijQxui_KTy3B8p_L2APui9C6RYa0FuaE,20
31
- rust_crate_pipeline-1.3.6.dist-info/RECORD,,
26
+ rust_crate_pipeline-1.4.1.dist-info/licenses/LICENSE,sha256=tpd4XNpbssrSx9-iErATOLrOh0ivNPfO2I5MAPUpats,1088
27
+ rust_crate_pipeline-1.4.1.dist-info/METADATA,sha256=OY5aKfWvpdRnLr9oKJ0SyX1N6evt1IYvu4J4GuFjwy0,17605
28
+ rust_crate_pipeline-1.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
+ rust_crate_pipeline-1.4.1.dist-info/entry_points.txt,sha256=9Rr_IRuFRIridXxUSdEJbB3ba0NnpEfKmknZXFdYRC0,70
30
+ rust_crate_pipeline-1.4.1.dist-info/top_level.txt,sha256=GUdB7RyxHLhijQxui_KTy3B8p_L2APui9C6RYa0FuaE,20
31
+ rust_crate_pipeline-1.4.1.dist-info/RECORD,,