rust-crate-pipeline 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. rust_crate_pipeline/__init__.py +18 -27
  2. rust_crate_pipeline/__main__.py +1 -0
  3. rust_crate_pipeline/ai_processing.py +718 -596
  4. rust_crate_pipeline/analysis.py +330 -363
  5. rust_crate_pipeline/azure_ai_processing.py +462 -0
  6. rust_crate_pipeline/config.py +46 -28
  7. rust_crate_pipeline/core/__init__.py +19 -0
  8. rust_crate_pipeline/core/canon_registry.py +133 -0
  9. rust_crate_pipeline/core/irl_engine.py +256 -0
  10. rust_crate_pipeline/core/sacred_chain.py +117 -0
  11. rust_crate_pipeline/crate_analysis.py +54 -0
  12. rust_crate_pipeline/crate_list.txt +424 -0
  13. rust_crate_pipeline/github_token_checker.py +108 -112
  14. rust_crate_pipeline/main.py +329 -109
  15. rust_crate_pipeline/network.py +317 -308
  16. rust_crate_pipeline/pipeline.py +300 -375
  17. rust_crate_pipeline/production_config.py +24 -27
  18. rust_crate_pipeline/progress_monitor.py +334 -0
  19. rust_crate_pipeline/scraping/__init__.py +13 -0
  20. rust_crate_pipeline/scraping/unified_scraper.py +259 -0
  21. rust_crate_pipeline/unified_llm_processor.py +637 -0
  22. rust_crate_pipeline/unified_pipeline.py +548 -0
  23. rust_crate_pipeline/utils/file_utils.py +32 -5
  24. rust_crate_pipeline/utils/logging_utils.py +21 -16
  25. rust_crate_pipeline/version.py +76 -47
  26. rust_crate_pipeline-1.4.1.dist-info/METADATA +515 -0
  27. rust_crate_pipeline-1.4.1.dist-info/RECORD +31 -0
  28. rust_crate_pipeline-1.4.0.dist-info/METADATA +0 -585
  29. rust_crate_pipeline-1.4.0.dist-info/RECORD +0 -19
  30. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/WHEEL +0 -0
  31. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/entry_points.txt +0 -0
  32. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/licenses/LICENSE +0 -0
  33. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,515 @@
1
+ Metadata-Version: 2.4
2
+ Name: rust-crate-pipeline
3
+ Version: 1.4.1
4
+ Summary: A comprehensive pipeline for analyzing Rust crates with AI enrichment and enhanced scraping
5
+ Home-page: https://github.com/SigilDERG/rust-crate-pipeline
6
+ Author: SigilDERG Team
7
+ Author-email: SigilDERG Team <sigilderg@example.com>
8
+ License: MIT
9
+ Project-URL: Homepage, https://github.com/Superuser666-Sigil/SigilDERG-Data_Production
10
+ Project-URL: Documentation, https://github.com/Superuser666-Sigil/SigilDERG-Data_Production#readme
11
+ Project-URL: Repository, https://github.com/Superuser666-Sigil/SigilDERG-Data_Production
12
+ Project-URL: Bug Tracker, https://github.com/Superuser666-Sigil/SigilDERG-Data_Production/issues
13
+ Keywords: rust,crates,analysis,ai,pipeline,scraping
14
+ Classifier: Development Status :: 4 - Beta
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Classifier: Topic :: Software Development :: Quality Assurance
22
+ Requires-Python: >=3.12
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: requests>=2.28.0
26
+ Requires-Dist: requests-cache>=1.0.0
27
+ Requires-Dist: beautifulsoup4>=4.11.0
28
+ Requires-Dist: crawl4ai>=0.6.0
29
+ Requires-Dist: playwright>=1.49.0
30
+ Requires-Dist: tqdm>=4.64.0
31
+ Requires-Dist: llama-cpp-python>=0.2.0
32
+ Requires-Dist: tiktoken>=0.5.0
33
+ Requires-Dist: psutil>=5.9.0
34
+ Requires-Dist: python-dateutil>=2.8.0
35
+ Requires-Dist: litellm>=1.0.0
36
+ Provides-Extra: dev
37
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
38
+ Requires-Dist: black>=22.0.0; extra == "dev"
39
+ Requires-Dist: isort>=5.10.0; extra == "dev"
40
+ Provides-Extra: advanced
41
+ Requires-Dist: radon>=6.0.0; extra == "advanced"
42
+ Requires-Dist: rustworkx>=0.13.0; extra == "advanced"
43
+ Dynamic: author
44
+ Dynamic: home-page
45
+ Dynamic: license-file
46
+ Dynamic: requires-python
47
+
48
+ # Rust Crate Pipeline
49
+
50
+ A comprehensive system for gathering, enriching, and analyzing metadata for Rust crates using AI-powered insights, web scraping, and dependency analysis.
51
+
52
+ ## Overview
53
+
54
+ The Rust Crate Pipeline is designed to collect, process, and enrich metadata from Rust crates available on crates.io. It combines web scraping, AI-powered analysis, and cargo testing to provide comprehensive insights into Rust ecosystem packages.
55
+
56
+ ## Features
57
+
58
+ - **Enhanced Web Scraping**: Automated collection of crate metadata from crates.io using Crawl4AI with Playwright
59
+ - **AI Enrichment**: Local and Azure OpenAI-powered analysis of crate descriptions, features, and documentation
60
+ - **Multi-Provider LLM Support**: Unified LLM processor supporting OpenAI, Azure OpenAI, Ollama, LM Studio, and LiteLLM
61
+ - **Cargo Testing**: Automated cargo build, test, and audit execution for comprehensive crate analysis
62
+ - **Dependency Analysis**: Deep analysis of crate dependencies and their relationships
63
+ - **Batch Processing**: Efficient processing of multiple crates with configurable batch sizes
64
+ - **Data Export**: Structured output in JSON format for further analysis
65
+ - **RAG Cache**: Intelligent caching with Rule Zero policies and architectural patterns
66
+ - **Docker Support**: Containerized deployment with optimized Docker configurations
67
+ - **Real-time Progress Monitoring**: CLI-based progress tracking with ASCII status indicators
68
+ - **Cross-platform Compatibility**: Full Unicode symbol replacement for better encoding support
69
+
70
+ ## Requirements
71
+
72
+ - **Python 3.12+**: Required for modern type annotations and language features
73
+ - **Git**: For cloning repositories during analysis
74
+ - **Cargo**: For Rust crate testing and analysis
75
+ - **Playwright**: Automatically installed for enhanced web scraping
76
+
77
+ ## Installation
78
+
79
+ ```bash
80
+ # Clone the repository
81
+ git clone https://github.com/Superuser666-Sigil/SigilDERG-Data_Production.git
82
+ cd SigilDERG-Data_Production
83
+
84
+ # Install in development mode (includes all dependencies)
85
+ pip install -e .
86
+
87
+ # Install Playwright browsers for enhanced scraping
88
+ playwright install
89
+ ```
90
+
91
+ ### Automatic Dependency Installation
92
+
93
+ The package automatically installs all required dependencies including:
94
+ - `crawl4ai` for web scraping
95
+ - `playwright` for enhanced browser automation
96
+ - `requests` for HTTP requests
97
+ - `aiohttp` for async operations
98
+ - And all other required packages
99
+
100
+ ## Configuration
101
+
102
+ ### Environment Variables
103
+
104
+ Set the following environment variables for full functionality:
105
+
106
+ ```bash
107
+ # GitHub Personal Access Token (required for API access)
108
+ export GITHUB_TOKEN="your_github_token_here"
109
+
110
+ # Azure OpenAI (optional, for cloud AI processing)
111
+ export AZURE_OPENAI_ENDPOINT="https://your-resource.openai.azure.com/"
112
+ export AZURE_OPENAI_API_KEY="your_azure_openai_key"
113
+ export AZURE_OPENAI_DEPLOYMENT_NAME="your_deployment_name"
114
+ export AZURE_OPENAI_API_VERSION="2024-02-15-preview"
115
+
116
+ # PyPI API Token (optional, for publishing)
117
+ export PYPI_API_TOKEN="your_pypi_token"
118
+
119
+ # LiteLLM Configuration (optional, for multi-provider LLM support)
120
+ export LITELLM_MODEL="deepseek-coder:33b"
121
+ export LITELLM_BASE_URL="http://localhost:11434" # For Ollama
122
+ ```
123
+
124
+ ### Configuration File
125
+
126
+ Create a `config.json` file for custom settings:
127
+
128
+ ```json
129
+ {
130
+ "batch_size": 10,
131
+ "n_workers": 4,
132
+ "max_retries": 3,
133
+ "checkpoint_interval": 10,
134
+ "use_azure_openai": true,
135
+ "crawl4ai_config": {
136
+ "max_pages": 5,
137
+ "concurrency": 2
138
+ }
139
+ }
140
+ ```
141
+
142
+ ## Usage
143
+
144
+ ### Command Line Interface
145
+
146
+ #### Basic Usage
147
+
148
+ ```bash
149
+ # Run with default settings
150
+ python -m rust_crate_pipeline
151
+
152
+ # Run with custom batch size
153
+ python -m rust_crate_pipeline --batch-size 20
154
+
155
+ # Run with specific workers
156
+ python -m rust_crate_pipeline --workers 8
157
+
158
+ # Use configuration file
159
+ python -m rust_crate_pipeline --config-file config.json
160
+ ```
161
+
162
+ #### Advanced Options
163
+
164
+ ```bash
165
+ # Enable Azure OpenAI processing
166
+ python -m rust_crate_pipeline --enable-azure-openai
167
+
168
+ # Set custom model path for local AI
169
+ python -m rust_crate_pipeline --model-path /path/to/model.gguf
170
+
171
+ # Configure token limits
172
+ python -m rust_crate_pipeline --max-tokens 2048
173
+
174
+ # Set checkpoint interval
175
+ python -m rust_crate_pipeline --checkpoint-interval 5
176
+
177
+ # Enable verbose logging
178
+ python -m rust_crate_pipeline --log-level DEBUG
179
+
180
+ # Enable enhanced scraping with Playwright
181
+ python -m rust_crate_pipeline --enable-enhanced-scraping
182
+
183
+ # Set output directory for results
184
+ python -m rust_crate_pipeline --output-path ./results
185
+ ```
186
+
187
+ #### Enhanced Scraping
188
+
189
+ The pipeline now supports enhanced web scraping using Playwright for better data extraction:
190
+
191
+ ```bash
192
+ # Enable enhanced scraping (default)
193
+ python -m rust_crate_pipeline --enable-enhanced-scraping
194
+
195
+ # Use basic scraping only
196
+ python -m rust_crate_pipeline --disable-enhanced-scraping
197
+
198
+ # Configure scraping options
199
+ python -m rust_crate_pipeline --scraping-config '{"max_pages": 10, "concurrency": 3}'
200
+ ```
201
+
202
+ #### Multi-Provider LLM Support
203
+
204
+ ```bash
205
+ # Use OpenAI
206
+ python -m rust_crate_pipeline.unified_llm_processor --provider openai --model-name gpt-4
207
+
208
+ # Use Azure OpenAI
209
+ python -m rust_crate_pipeline.unified_llm_processor --provider azure --model-name gpt-4
210
+
211
+ # Use Ollama (local)
212
+ python -m rust_crate_pipeline.unified_llm_processor --provider ollama --model-name deepseek-coder:33b
213
+
214
+ # Use LM Studio
215
+ python -m rust_crate_pipeline.unified_llm_processor --provider openai --base-url http://localhost:1234/v1 --model-name local-model
216
+
217
+ # Use LiteLLM
218
+ python -m rust_crate_pipeline.unified_llm_processor --provider litellm --model-name deepseek-coder:33b
219
+ ```
220
+
221
+ #### Production Mode
222
+
223
+ ```bash
224
+ # Run production pipeline with optimizations
225
+ python run_production.py
226
+
227
+ # Run with Sigil Protocol integration
228
+ python -m rust_crate_pipeline --enable-sigil-protocol
229
+ ```
230
+
231
+ ### Programmatic Usage
232
+
233
+ ```python
234
+ from rust_crate_pipeline import CrateDataPipeline
235
+ from rust_crate_pipeline.config import PipelineConfig
236
+
237
+ # Create configuration
238
+ config = PipelineConfig(
239
+ batch_size=10,
240
+ n_workers=4,
241
+ use_azure_openai=True
242
+ )
243
+
244
+ # Initialize pipeline
245
+ pipeline = CrateDataPipeline(config)
246
+
247
+ # Run pipeline
248
+ import asyncio
249
+ result = asyncio.run(pipeline.run())
250
+ ```
251
+
252
+ ## Sample Data
253
+
254
+ ### Input: Crate List
255
+
256
+ The pipeline processes crates from `rust_crate_pipeline/crate_list.txt`:
257
+
258
+ ```
259
+ tokio
260
+ serde
261
+ reqwest
262
+ actix-web
263
+ clap
264
+ ```
265
+
266
+ ### Output: Enriched Crate Data
267
+
268
+ ```json
269
+ {
270
+ "name": "tokio",
271
+ "version": "1.35.1",
272
+ "description": "An asynchronous runtime for Rust",
273
+ "downloads": 125000000,
274
+ "github_stars": 21500,
275
+ "keywords": ["async", "runtime", "tokio", "futures"],
276
+ "categories": ["asynchronous", "network-programming"],
277
+ "features": {
278
+ "full": ["all features enabled"],
279
+ "rt": ["runtime features"],
280
+ "macros": ["macro support"]
281
+ },
282
+ "readme_summary": "Tokio is an asynchronous runtime for Rust that provides the building blocks for writing network applications.",
283
+ "use_case": "Networking",
284
+ "factual_counterfactual": "✅ Factual: Tokio provides async I/O primitives\n❌ Counterfactual: Tokio is a synchronous runtime",
285
+ "score": 9.5,
286
+ "cargo_test_results": {
287
+ "build_success": true,
288
+ "test_success": true,
289
+ "audit_clean": true,
290
+ "dependencies": 45
291
+ },
292
+ "ai_insights": {
293
+ "complexity": "High",
294
+ "maturity": "Production Ready",
295
+ "community_health": "Excellent"
296
+ }
297
+ }
298
+ ```
299
+
300
+ ## Architecture
301
+
302
+ ### Core Components
303
+
304
+ - **Pipeline Orchestrator**: Manages the overall data processing workflow
305
+ - **Web Scraper**: Collects crate metadata using Crawl4AI
306
+ - **AI Enricher**: Enhances data with local or cloud AI analysis
307
+ - **Cargo Analyzer**: Executes cargo commands for comprehensive testing
308
+ - **Data Exporter**: Outputs structured results in various formats
309
+
310
+ ### Data Flow
311
+
312
+ 1. **Input**: Crate names from `crate_list.txt`
313
+ 2. **Scraping**: Web scraping of crates.io for metadata
314
+ 3. **Enrichment**: AI-powered analysis and insights
315
+ 4. **Testing**: Cargo build, test, and audit execution
316
+ 5. **Output**: Structured JSON with comprehensive crate analysis
317
+
318
+ ## Development
319
+
320
+ ### Prerequisites
321
+
322
+ - Python 3.12+ (required for modern type annotations)
323
+ - Git for version control
324
+ - Cargo for Rust crate testing
325
+
326
+ ### Running Tests
327
+
328
+ ```bash
329
+ # Run all tests
330
+ pytest tests/
331
+
332
+ # Run specific test module
333
+ pytest tests/test_main_integration.py
334
+
335
+ # Run with coverage
336
+ pytest --cov=rust_crate_pipeline tests/
337
+
338
+ # Run type checking
339
+ pyright rust_crate_pipeline/
340
+
341
+ # Run linting
342
+ flake8 rust_crate_pipeline/
343
+ ```
344
+
345
+ ### Code Quality
346
+
347
+ ```bash
348
+ # Format code
349
+ black rust_crate_pipeline/
350
+
351
+ # Sort imports
352
+ isort rust_crate_pipeline/
353
+
354
+ # Type checking
355
+ pyright rust_crate_pipeline/
356
+
357
+ # Lint code
358
+ flake8 rust_crate_pipeline/
359
+ ```
360
+
361
+ ### Building and Publishing
362
+
363
+ ```bash
364
+ # Build package
365
+ python -m build
366
+
367
+ # Upload to PyPI (requires PYPI_API_TOKEN)
368
+ python -m twine upload dist/*
369
+
370
+ # Create release
371
+ python scripts/create_release.py
372
+ ```
373
+
374
+ ### Docker Development
375
+
376
+ ```bash
377
+ # Build Docker image
378
+ docker build -t rust-crate-pipeline .
379
+
380
+ # Run in Docker
381
+ docker run -it rust-crate-pipeline
382
+
383
+ # Run with volume mount for development
384
+ docker run -it -v $(pwd):/app rust-crate-pipeline
385
+ ```
386
+
387
+ ## Recent Improvements
388
+
389
+ ### Version 1.4.0
390
+ - **Security**: Robust Ed25519/RSA cryptographic signing and provenance
391
+ - **Automation**: Automated RAG and provenance workflows
392
+ - **CI/CD**: Improved GitHub Actions for validation and publishing
393
+ - **Docker**: Updated Docker image and compose for new version
394
+ - **Bug Fixes**: Workflow and validation fixes for Ed25519
395
+
396
+ ### Version 1.3.6
397
+ - **Python 3.12+ Requirement**: Updated to use modern type annotations and language features
398
+ - **Type Safety**: Enhanced type annotations throughout the codebase with modern syntax
399
+ - **Build System**: Updated pyproject.toml and setup.py for better compatibility
400
+
401
+ ### Version 1.3.5
402
+ - **Enhanced Web Scraping**: Added Playwright-based scraping for better data extraction
403
+ - **Unicode Compatibility**: Replaced all Unicode symbols with ASCII equivalents for better cross-platform support
404
+ - **Automatic Dependencies**: All required packages are now automatically installed
405
+ - **Real-time Progress**: Added CLI-based progress monitoring with ASCII status indicators
406
+ - **Docker Optimization**: Updated Dockerfile to include Playwright browser installation
407
+
408
+ ### Version 1.3.4
409
+ - **PEP8 Compliance**: Fixed all Unicode emoji and symbols for better encoding support
410
+ - **Cross-platform Compatibility**: Improved compatibility across different operating systems
411
+ - **Type Safety**: Enhanced type annotations throughout the codebase
412
+
413
+ ### Version 1.3.3
414
+ - **Real-time Progress Monitoring**: Added CLI-only progress tracking feature
415
+ - **Enhanced Logging**: Improved status reporting and error handling
416
+
417
+ ### Version 1.3.2
418
+ - **Multi-Provider LLM Support**: Added support for OpenAI, Azure OpenAI, Ollama, LM Studio, and LiteLLM
419
+ - **Unified LLM Processor**: Centralized LLM processing with provider abstraction
420
+ - **Enhanced Error Handling**: Better error recovery and retry mechanisms
421
+
422
+ ## License
423
+
424
+ MIT License - see LICENSE file for details.
425
+
426
+ ## Contributing
427
+
428
+ 1. Fork the repository
429
+ 2. Create a feature branch
430
+ 3. Make your changes
431
+ 4. Add tests for new functionality
432
+ 5. Submit a pull request
433
+
434
+ ## Support
435
+
436
+ For issues and questions:
437
+ - GitHub Issues: https://github.com/Superuser666-Sigil/SigilDERG-Data_Production/issues
438
+ - Documentation: https://github.com/Superuser666-Sigil/SigilDERG-Data_Production#readme
439
+
440
+ ## API Compliance & Attribution
441
+
442
+ ### crates.io and GitHub API Usage
443
+ - This project accesses crates.io and GitHub APIs for data gathering and verification.
444
+ - **User-Agent:** All requests use:
445
+
446
+ `SigilDERG-Data-Production (Superuser666-Sigil; miragemodularframework@gmail.com; https://github.com/Superuser666-Sigil/SigilDERG-Data_Production)`
447
+ - **Contact:** miragemodularframework@gmail.com
448
+ - **GitHub:** [Superuser666-Sigil/SigilDERG-Data_Production](https://github.com/Superuser666-Sigil/SigilDERG-Data_Production)
449
+ - The project respects all rate limits and crawler policies. If you have questions or concerns, please contact us.
450
+
451
+ ### Crawl4AI Attribution
452
+ This project uses [Crawl4AI](https://github.com/unclecode/crawl4ai) for web data extraction.
453
+
454
+ <!-- Badge Attribution (Disco Theme) -->
455
+ <a href="https://github.com/unclecode/crawl4ai">
456
+ <img src="https://raw.githubusercontent.com/unclecode/crawl4ai/main/docs/assets/powered-by-disco.svg" alt="Powered by Crawl4AI" width="200"/>
457
+ </a>
458
+
459
+ Or, text attribution:
460
+
461
+ ```
462
+ This project uses Crawl4AI (https://github.com/unclecode/crawl4ai) for web data extraction.
463
+ ```
464
+
465
+ ## 🚀 Unified, Cross-Platform, Multi-Provider LLM Support
466
+
467
+ This project supports **all major LLM providers** (cloud and local) on **Mac, Linux, and Windows** using a single, unified interface. All LLM calls are routed through the `UnifiedLLMProcessor` and `LLMConfig` abstractions, ensuring:
468
+
469
+ - **One code path for all providers:** Azure OpenAI, OpenAI, Anthropic, Google, Cohere, HuggingFace, Ollama, LM Studio, and any OpenAI-compatible endpoint.
470
+ - **Cross-platform compatibility:** Works out of the box on Mac, Linux, and Windows.
471
+ - **Configurable via CLI and config files:** Select provider, model, API key, endpoint, and provider-specific options at runtime.
472
+ - **Easy extensibility:** Add new providers by updating your config or CLI arguments—no code changes needed.
473
+
474
+ ### 📖 Provider Setup & Usage
475
+ - See [`README_LLM_PROVIDERS.md`](./README_LLM_PROVIDERS.md) for full details, setup instructions, and usage examples for every supported provider.
476
+ - Run `python run_pipeline_with_llm.py --help` for CLI options and provider-specific arguments.
477
+
478
+ ### 🧩 Example Usage
479
+ ```bash
480
+ # Azure OpenAI
481
+ python run_pipeline_with_llm.py --llm-provider azure --llm-model gpt-4o --crates tokio
482
+
483
+ # Ollama (local)
484
+ python run_pipeline_with_llm.py --llm-provider ollama --llm-model llama2 --crates serde
485
+
486
+ # OpenAI API
487
+ python run_pipeline_with_llm.py --llm-provider openai --llm-model gpt-4 --llm-api-key YOUR_KEY --crates tokio
488
+
489
+ # Anthropic Claude
490
+ python run_pipeline_with_llm.py --llm-provider anthropic --llm-model claude-3-sonnet --llm-api-key YOUR_KEY --crates serde
491
+ ```
492
+
493
+ ### 🔒 Security & Best Practices
494
+ - Store API keys as environment variables.
495
+ - Use local providers (Ollama, LM Studio) for full privacy—no data leaves your machine.
496
+ - All LLM calls are routed through a single, auditable interface for maximum maintainability and security.
497
+
498
+ ### 🧪 Testing
499
+ - Run `python test_unified_llm.py` to verify provider support and configuration.
500
+
501
+ For more, see [`README_LLM_PROVIDERS.md`](./README_LLM_PROVIDERS.md) and the CLI help output.
502
+
503
+ ## Public RAG Database Hash Verification
504
+
505
+ The canonical hash of the RAG SQLite database (`sigil_rag_cache.db`) is stored in the public file `sigil_rag_cache.hash`.
506
+
507
+ - **Purpose:** Anyone can verify the integrity of the RAG database by comparing its SHA256 hash to the value in `sigil_rag_cache.hash`.
508
+ - **How to verify:**
509
+
510
+ ```sh
511
+ python audits/validate_db_hash.py --db sigil_rag_cache.db --expected-hash "$(cat sigil_rag_cache.hash)"
512
+ ```
513
+
514
+ - **CI/CD:** The GitHub Actions workflow `.github/workflows/validate-db-hash.yml` automatically checks this on every push.
515
+ - **No secrets required:** The hash is public and verifiable by anyone.
@@ -0,0 +1,31 @@
1
+ rust_crate_pipeline/__init__.py,sha256=ZJCApGu8h2Rn5-dkoBLXOpdoeD6b36w76--o0fEismQ,1749
2
+ rust_crate_pipeline/__main__.py,sha256=PexSWQYtbFQg5P36WEnJ0X-oAtT8WDej3bIJoSAcCCQ,157
3
+ rust_crate_pipeline/ai_processing.py,sha256=Q_jmIL0OzFcP6zSKTgrIikUTHuUB3Py4MqwLXmB7-KQ,29057
4
+ rust_crate_pipeline/analysis.py,sha256=_cmjynLWaQbGIdLQHU3P3rfqHB3gcNNgCdzStbsKrdw,17021
5
+ rust_crate_pipeline/azure_ai_processing.py,sha256=h2ZUaFPt5LmTH--5CXfXBdbKnoJA4Ha8zCfbLawhDz8,16409
6
+ rust_crate_pipeline/config.py,sha256=Fw3fRKCZawKaLQi7YqsmNNku4whZi89mWzr8BVRNS5E,3009
7
+ rust_crate_pipeline/crate_analysis.py,sha256=GsoXemJ9VFyAbb4Sm5gY5ToTqNtOA4pI38AtngAQONk,2090
8
+ rust_crate_pipeline/crate_list.txt,sha256=W3NxDtxvihyKp9SN85FYXX6p8Hh49IFih1M4-c-CynM,4334
9
+ rust_crate_pipeline/github_token_checker.py,sha256=0IpTh78DSaw4znaed031cSVSZDsi92eDManPzRIIN3Y,3670
10
+ rust_crate_pipeline/main.py,sha256=iGYEAYvXkoFFvaA6DIVGiUL3wLhiCzatB6Fvf-Yrj2A,18858
11
+ rust_crate_pipeline/network.py,sha256=mWjiRvOX31piBZ2QiJ-F75DBD4l6cqzTXcQdJvHxe90,12718
12
+ rust_crate_pipeline/pipeline.py,sha256=CqPHLLRvMOpy-3ONL6hnPahV6Vh6S4M8oDsHd_lDrPc,16203
13
+ rust_crate_pipeline/production_config.py,sha256=uWylP9AIZZx7-9aT4sFmAKEEW9miJDxaiek8VE6WP-0,2372
14
+ rust_crate_pipeline/progress_monitor.py,sha256=5K9KP-Xggi1JEINfRmq2W-wGUHtNIBTcocpDtB1t8iM,13743
15
+ rust_crate_pipeline/unified_llm_processor.py,sha256=eo7KotNuqwc7_hgpFm18QLokFoufFslnvi8TnDsSYEg,25064
16
+ rust_crate_pipeline/unified_pipeline.py,sha256=2yglmXVlQfSkVq0HVTPonDee6VxWaQWZw0X2l4lLBGw,23704
17
+ rust_crate_pipeline/version.py,sha256=whkmTDquEVytez4svUFUBfbfK0EOvDTPA8K5TuZffbE,4481
18
+ rust_crate_pipeline/core/__init__.py,sha256=Sq4HWdANGqoYln7JdCog7m3BsGeR3tHdseeflvNetoQ,509
19
+ rust_crate_pipeline/core/canon_registry.py,sha256=_3cu0akJvLc7ZnomMaLeMa8adOBYn1dtjpB0yE3vGL8,4700
20
+ rust_crate_pipeline/core/irl_engine.py,sha256=QRZUdkN24W9XutLkj8JDplEz6FmnquUrwKsl0s2zRr4,10491
21
+ rust_crate_pipeline/core/sacred_chain.py,sha256=6s4gFLDT6KUwuu0Fpxu6h_YHlsEvHZb3CQw4tRHGyDU,3773
22
+ rust_crate_pipeline/scraping/__init__.py,sha256=ySkTRg7nIxgcbHJQ3L1XzcrOo281NZu07-XtiGi-558,307
23
+ rust_crate_pipeline/scraping/unified_scraper.py,sha256=ZE2gkc0vQ3BOLdSX_IV-kMe8QAm2Av4M7VqpkxEKyT4,9965
24
+ rust_crate_pipeline/utils/file_utils.py,sha256=tMaCPy7ghs9x4Hxu_sviX8MXU2sBjNvohUrvt4MejoM,2853
25
+ rust_crate_pipeline/utils/logging_utils.py,sha256=e5jG0Yd6k3exgAdbVca46kWADJ_Qz8UJ3yEJzwTqPyI,2452
26
+ rust_crate_pipeline-1.4.1.dist-info/licenses/LICENSE,sha256=tpd4XNpbssrSx9-iErATOLrOh0ivNPfO2I5MAPUpats,1088
27
+ rust_crate_pipeline-1.4.1.dist-info/METADATA,sha256=OY5aKfWvpdRnLr9oKJ0SyX1N6evt1IYvu4J4GuFjwy0,17605
28
+ rust_crate_pipeline-1.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
+ rust_crate_pipeline-1.4.1.dist-info/entry_points.txt,sha256=9Rr_IRuFRIridXxUSdEJbB3ba0NnpEfKmknZXFdYRC0,70
30
+ rust_crate_pipeline-1.4.1.dist-info/top_level.txt,sha256=GUdB7RyxHLhijQxui_KTy3B8p_L2APui9C6RYa0FuaE,20
31
+ rust_crate_pipeline-1.4.1.dist-info/RECORD,,