rust-crate-pipeline 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rust_crate_pipeline/__init__.py +18 -27
- rust_crate_pipeline/__main__.py +1 -0
- rust_crate_pipeline/ai_processing.py +718 -596
- rust_crate_pipeline/analysis.py +330 -363
- rust_crate_pipeline/azure_ai_processing.py +462 -0
- rust_crate_pipeline/config.py +46 -28
- rust_crate_pipeline/core/__init__.py +19 -0
- rust_crate_pipeline/core/canon_registry.py +133 -0
- rust_crate_pipeline/core/irl_engine.py +256 -0
- rust_crate_pipeline/core/sacred_chain.py +117 -0
- rust_crate_pipeline/crate_analysis.py +54 -0
- rust_crate_pipeline/crate_list.txt +424 -0
- rust_crate_pipeline/github_token_checker.py +108 -112
- rust_crate_pipeline/main.py +329 -109
- rust_crate_pipeline/network.py +317 -308
- rust_crate_pipeline/pipeline.py +300 -375
- rust_crate_pipeline/production_config.py +24 -27
- rust_crate_pipeline/progress_monitor.py +334 -0
- rust_crate_pipeline/scraping/__init__.py +13 -0
- rust_crate_pipeline/scraping/unified_scraper.py +259 -0
- rust_crate_pipeline/unified_llm_processor.py +637 -0
- rust_crate_pipeline/unified_pipeline.py +548 -0
- rust_crate_pipeline/utils/file_utils.py +32 -5
- rust_crate_pipeline/utils/logging_utils.py +21 -16
- rust_crate_pipeline/version.py +76 -47
- rust_crate_pipeline-1.4.1.dist-info/METADATA +515 -0
- rust_crate_pipeline-1.4.1.dist-info/RECORD +31 -0
- rust_crate_pipeline-1.4.0.dist-info/METADATA +0 -585
- rust_crate_pipeline-1.4.0.dist-info/RECORD +0 -19
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/WHEEL +0 -0
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/entry_points.txt +0 -0
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/licenses/LICENSE +0 -0
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/top_level.txt +0 -0
@@ -1,585 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.4
|
2
|
-
Name: rust-crate-pipeline
|
3
|
-
Version: 1.4.0
|
4
|
-
Summary: A comprehensive system for gathering, enriching, and analyzing metadata for Rust crates using AI-powered insights
|
5
|
-
Home-page: https://github.com/Superuser666-Sigil/SigilDERG-Data_Production
|
6
|
-
Author: SuperUser666-Sigil
|
7
|
-
Author-email: SuperUser666-Sigil <miragemodularframework@gmail.com>
|
8
|
-
License-Expression: MIT
|
9
|
-
Project-URL: Homepage, https://github.com/Superuser666-Sigil/SigilDERG-Data_Production
|
10
|
-
Project-URL: Documentation, https://github.com/Superuser666-Sigil/SigilDERG-Data_Production#readme
|
11
|
-
Project-URL: Repository, https://github.com/Superuser666-Sigil/SigilDERG-Data_Production
|
12
|
-
Project-URL: Bug Tracker, https://github.com/Superuser666-Sigil/SigilDERG-Data_Production/issues
|
13
|
-
Keywords: rust,crates,metadata,ai,analysis,pipeline,dependencies
|
14
|
-
Classifier: Development Status :: 4 - Beta
|
15
|
-
Classifier: Intended Audience :: Developers
|
16
|
-
Classifier: Operating System :: OS Independent
|
17
|
-
Classifier: Programming Language :: Python :: 3
|
18
|
-
Classifier: Programming Language :: Python :: 3.8
|
19
|
-
Classifier: Programming Language :: Python :: 3.9
|
20
|
-
Classifier: Programming Language :: Python :: 3.10
|
21
|
-
Classifier: Programming Language :: Python :: 3.11
|
22
|
-
Classifier: Programming Language :: Python :: 3.12
|
23
|
-
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
24
|
-
Classifier: Topic :: Software Development :: Build Tools
|
25
|
-
Classifier: Topic :: Software Development :: Quality Assurance
|
26
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
27
|
-
Requires-Python: >=3.8
|
28
|
-
Description-Content-Type: text/markdown
|
29
|
-
License-File: LICENSE
|
30
|
-
Requires-Dist: requests>=2.28.0
|
31
|
-
Requires-Dist: requests-cache>=1.0.0
|
32
|
-
Requires-Dist: beautifulsoup4>=4.11.0
|
33
|
-
Requires-Dist: tqdm>=4.64.0
|
34
|
-
Requires-Dist: llama-cpp-python>=0.2.0
|
35
|
-
Requires-Dist: tiktoken>=0.5.0
|
36
|
-
Requires-Dist: psutil>=5.9.0
|
37
|
-
Requires-Dist: python-dateutil>=2.8.0
|
38
|
-
Provides-Extra: dev
|
39
|
-
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
40
|
-
Requires-Dist: black>=22.0.0; extra == "dev"
|
41
|
-
Requires-Dist: isort>=5.10.0; extra == "dev"
|
42
|
-
Provides-Extra: advanced
|
43
|
-
Requires-Dist: radon>=6.0.0; extra == "advanced"
|
44
|
-
Requires-Dist: rustworkx>=0.13.0; extra == "advanced"
|
45
|
-
Dynamic: author
|
46
|
-
Dynamic: home-page
|
47
|
-
Dynamic: license-file
|
48
|
-
Dynamic: requires-python
|
49
|
-
|
50
|
-
# Rust Crate Pipeline
|
51
|
-
|
52
|
-
[](https://www.python.org/downloads/)
|
53
|
-
[](https://opensource.org/licenses/MIT)
|
54
|
-
[](https://pypi.org/project/rust-crate-pipeline/)
|
55
|
-
[](https://docker.com/)
|
56
|
-
[](https://github.com/Superuser666-Sigil/SigilDERG-Data_Production/blob/main/SYSTEM_AUDIT_REPORT.md)
|
57
|
-
|
58
|
-
A production-ready, Rule Zero-compliant pipeline for comprehensive Rust crate analysis, featuring AI-powered insights, dependency mapping, and automated data enrichment. Designed for researchers, developers, and data scientists studying the Rust ecosystem.
|
59
|
-
|
60
|
-
📦 **Available on PyPI:** [rust-crate-pipeline](https://pypi.org/project/rust-crate-pipeline/)
|
61
|
-
|
62
|
-
## 🚀 Quick Start
|
63
|
-
|
64
|
-
### 1. Installation
|
65
|
-
|
66
|
-
#### From PyPI (Recommended)
|
67
|
-
|
68
|
-
```bash
|
69
|
-
pip install rust-crate-pipeline
|
70
|
-
```
|
71
|
-
|
72
|
-
For the latest version, visit: [rust-crate-pipeline on PyPI](https://pypi.org/project/rust-crate-pipeline/)
|
73
|
-
|
74
|
-
#### From Source
|
75
|
-
|
76
|
-
```bash
|
77
|
-
git clone https://github.com/Superuser666-Sigil/SigilDERG-Data_Production.git
|
78
|
-
cd SigilDERG-Data_Production
|
79
|
-
pip install -e .
|
80
|
-
```
|
81
|
-
|
82
|
-
#### Development Installation
|
83
|
-
|
84
|
-
```bash
|
85
|
-
git clone https://github.com/Superuser666-Sigil/SigilDERG-Data_Production.git
|
86
|
-
cd SigilDERG-Data_Production
|
87
|
-
pip install -e ".[dev]"
|
88
|
-
```
|
89
|
-
|
90
|
-
### 2. GitHub Token Setup
|
91
|
-
|
92
|
-
The pipeline requires a GitHub Personal Access Token for optimal performance:
|
93
|
-
|
94
|
-
```bash
|
95
|
-
# Interactive setup (Linux/Unix)
|
96
|
-
chmod +x setup_github_token.sh
|
97
|
-
./setup_github_token.sh
|
98
|
-
|
99
|
-
# Manual setup
|
100
|
-
export GITHUB_TOKEN="your_token_here"
|
101
|
-
echo 'export GITHUB_TOKEN="your_token_here"' >> ~/.bashrc
|
102
|
-
|
103
|
-
# Verify setup
|
104
|
-
python3 check_github_token.py
|
105
|
-
```
|
106
|
-
|
107
|
-
**Get your token at**: [GitHub Settings](https://github.com/settings/tokens)
|
108
|
-
**Required scopes**: `public_repo`, `read:user`
|
109
|
-
|
110
|
-
### 3. Basic Usage
|
111
|
-
|
112
|
-
```bash
|
113
|
-
# Standard mode
|
114
|
-
python3 -m rust_crate_pipeline
|
115
|
-
|
116
|
-
# Production mode (reduced warnings, optimized settings)
|
117
|
-
python3 run_production.py
|
118
|
-
|
119
|
-
# Process only 20 crates for testing
|
120
|
-
python3 -m rust_crate_pipeline --limit 20
|
121
|
-
|
122
|
-
# Skip AI processing for faster metadata-only collection
|
123
|
-
python3 -m rust_crate_pipeline --skip-ai --limit 50
|
124
|
-
```
|
125
|
-
|
126
|
-
### 4. Advanced Usage
|
127
|
-
|
128
|
-
```bash
|
129
|
-
# Custom configuration
|
130
|
-
python3 -m rust_crate_pipeline \
|
131
|
-
--limit 100 \
|
132
|
-
--batch-size 5 \
|
133
|
-
--workers 2 \
|
134
|
-
--log-level DEBUG \
|
135
|
-
--output-dir ./results
|
136
|
-
|
137
|
-
# Process specific crates
|
138
|
-
python3 -m rust_crate_pipeline \
|
139
|
-
--crate-list serde tokio actix-web reqwest \
|
140
|
-
--output-dir ./specific_crates
|
141
|
-
|
142
|
-
# Use custom model and config
|
143
|
-
python3 -m rust_crate_pipeline \
|
144
|
-
--model-path ./my-model.gguf \
|
145
|
-
--config-file ./custom_config.json
|
146
|
-
```
|
147
|
-
|
148
|
-
## 🎯 Features
|
149
|
-
|
150
|
-
*Available in the latest version: [rust-crate-pipeline v1.4.0](https://pypi.org/project/rust-crate-pipeline/)*
|
151
|
-
|
152
|
-
### 📊 Data Collection & Analysis
|
153
|
-
|
154
|
-
- **Multi-source metadata**: crates.io, GitHub, lib.rs integration
|
155
|
-
- **Dependency mapping**: Complete dependency graphs and analysis
|
156
|
-
- **Code extraction**: Automatic Rust code example extraction
|
157
|
-
- **Security scanning**: Vulnerability and security pattern analysis
|
158
|
-
- **Performance metrics**: Lines of code, complexity, API surface analysis
|
159
|
-
|
160
|
-
### 🤖 AI-Powered Enrichment
|
161
|
-
|
162
|
-
- **Smart categorization**: Automatic crate classification (Web, ML, Database, etc.)
|
163
|
-
- **Feature summarization**: AI-generated explanations and insights
|
164
|
-
- **Content optimization**: Intelligent README section preservation
|
165
|
-
- **Factual pairs**: Training data generation for fact verification
|
166
|
-
|
167
|
-
### ⚡ Production Features
|
168
|
-
|
169
|
-
- **Automatic GitHub token detection**: Seamless setup and validation
|
170
|
-
- **Smart rate limiting**: Respects GitHub API limits with intelligent backoff
|
171
|
-
- **Robust error handling**: Graceful degradation and comprehensive logging
|
172
|
-
- **Progress checkpointing**: Automatic saving for long-running processes
|
173
|
-
- **Docker ready**: Full container support with optimized configurations
|
174
|
-
|
175
|
-
## 💻 System Requirements
|
176
|
-
|
177
|
-
### Minimum Requirements
|
178
|
-
|
179
|
-
- **Python**: 3.8+
|
180
|
-
- **Memory**: 4GB RAM
|
181
|
-
- **Storage**: 2GB free space
|
182
|
-
- **Network**: Stable internet connection
|
183
|
-
|
184
|
-
### Recommended Setup
|
185
|
-
|
186
|
-
- **Python**: 3.10+
|
187
|
-
- **Memory**: 8GB+ RAM
|
188
|
-
- **Storage**: 10GB+ free space (SSD preferred)
|
189
|
-
- **GitHub Token**: For enhanced API access (5000 vs 60 requests/hour)
|
190
|
-
|
191
|
-
### Dependencies
|
192
|
-
|
193
|
-
Core dependencies are automatically installed:
|
194
|
-
|
195
|
-
```bash
|
196
|
-
requests>=2.28.0
|
197
|
-
requests-cache>=0.9.0
|
198
|
-
beautifulsoup4>=4.11.0
|
199
|
-
tqdm>=4.64.0
|
200
|
-
llama-cpp-python>=0.2.0
|
201
|
-
tiktoken>=0.4.0
|
202
|
-
psutil>=5.9.0
|
203
|
-
python-dateutil>=2.8.0
|
204
|
-
```
|
205
|
-
|
206
|
-
## ⚙️ Configuration & Usage
|
207
|
-
|
208
|
-
### Command Line Options
|
209
|
-
|
210
|
-
| Argument | Type | Default | Description |
|
211
|
-
|----------|------|---------|-------------|
|
212
|
-
| `--limit` | int | None | Limit number of crates to process |
|
213
|
-
| `--batch-size` | int | 10 | Crates processed per batch |
|
214
|
-
| `--workers` | int | 4 | Parallel workers for API requests |
|
215
|
-
| `--output-dir` | str | auto | Custom output directory |
|
216
|
-
| `--model-path` | str | default | Path to LLM model file |
|
217
|
-
| `--max-tokens` | int | 256 | Maximum tokens for LLM generation |
|
218
|
-
| `--checkpoint-interval` | int | 10 | Save progress every N crates |
|
219
|
-
| `--log-level` | str | INFO | Logging verbosity |
|
220
|
-
| `--skip-ai` | flag | False | Skip AI enrichment |
|
221
|
-
| `--skip-source-analysis` | flag | False | Skip source code analysis |
|
222
|
-
| `--crate-list` | list | None | Specific crates to process |
|
223
|
-
| `--config-file` | str | None | JSON configuration file |
|
224
|
-
|
225
|
-
### Production Mode
|
226
|
-
|
227
|
-
Production mode provides optimized settings with reduced warnings:
|
228
|
-
|
229
|
-
```bash
|
230
|
-
# Using production launcher
|
231
|
-
python3 run_production.py [OPTIONS]
|
232
|
-
|
233
|
-
# Using environment variable
|
234
|
-
PRODUCTION=true python3 -m rust_crate_pipeline
|
235
|
-
|
236
|
-
# Docker production mode
|
237
|
-
docker run -e PRODUCTION=true -e GITHUB_TOKEN="token" your-image
|
238
|
-
```
|
239
|
-
|
240
|
-
**Production optimizations:**
|
241
|
-
|
242
|
-
- Reduced retry attempts (3→2) to minimize warnings
|
243
|
-
- Smart GitHub API rate limiting with proactive pausing
|
244
|
-
- Enhanced logging with appropriate levels
|
245
|
-
- Optimized timeout and backoff strategies
|
246
|
-
|
247
|
-
### Configuration Files
|
248
|
-
|
249
|
-
Create a JSON configuration file for custom settings:
|
250
|
-
|
251
|
-
```json
|
252
|
-
{
|
253
|
-
"max_retries": 2,
|
254
|
-
"batch_size": 10,
|
255
|
-
"github_min_remaining": 500,
|
256
|
-
"cache_ttl": 7200,
|
257
|
-
"model_path": "~/models/your-model.gguf"
|
258
|
-
}
|
259
|
-
```
|
260
|
-
|
261
|
-
Use with: `python3 -m rust_crate_pipeline --config-file config.json`
|
262
|
-
|
263
|
-
## 🐳 Docker Deployment
|
264
|
-
|
265
|
-
### Using Docker Compose (Recommended)
|
266
|
-
|
267
|
-
```bash
|
268
|
-
# Set up environment
|
269
|
-
echo "GITHUB_TOKEN=your_token_here" > .env
|
270
|
-
|
271
|
-
# Run with compose
|
272
|
-
docker-compose up -d
|
273
|
-
|
274
|
-
# Monitor logs
|
275
|
-
docker-compose logs -f
|
276
|
-
```
|
277
|
-
|
278
|
-
### Manual Docker Commands
|
279
|
-
|
280
|
-
```bash
|
281
|
-
# Build image
|
282
|
-
docker build -t rust-crate-pipeline .
|
283
|
-
|
284
|
-
# Run container
|
285
|
-
docker run -e GITHUB_TOKEN="your_token" \
|
286
|
-
-e PRODUCTION=true \
|
287
|
-
-v $(pwd)/output:/app/output \
|
288
|
-
rust-crate-pipeline
|
289
|
-
|
290
|
-
# Background execution
|
291
|
-
docker run -d --name pipeline \
|
292
|
-
-e GITHUB_TOKEN="your_token" \
|
293
|
-
rust-crate-pipeline
|
294
|
-
```
|
295
|
-
|
296
|
-
### Docker Environment Variables
|
297
|
-
|
298
|
-
| Variable | Description | Default |
|
299
|
-
|----------|-------------|---------|
|
300
|
-
| `GITHUB_TOKEN` | GitHub Personal Access Token | Required |
|
301
|
-
| `PRODUCTION` | Enable production mode | `false` |
|
302
|
-
| `PYTHONUNBUFFERED` | Force unbuffered output | `1` |
|
303
|
-
|
304
|
-
## 📊 Output & Data Format
|
305
|
-
|
306
|
-
### Output Structure
|
307
|
-
|
308
|
-
```text
|
309
|
-
output/
|
310
|
-
├── enriched_crates_YYYYMMDD_HHMMSS.json # Main results
|
311
|
-
├── metadata_YYYYMMDD_HHMMSS.json # Raw metadata
|
312
|
-
├── errors_YYYYMMDD_HHMMSS.log # Error log
|
313
|
-
└── checkpoints/
|
314
|
-
└── checkpoint_N.json # Progress saves
|
315
|
-
```
|
316
|
-
|
317
|
-
### Data Schema
|
318
|
-
|
319
|
-
Each processed crate includes:
|
320
|
-
|
321
|
-
```json
|
322
|
-
{
|
323
|
-
"name": "serde",
|
324
|
-
"version": "1.0.193",
|
325
|
-
"description": "A generic serialization/deserialization framework",
|
326
|
-
"repository": "https://github.com/serde-rs/serde",
|
327
|
-
"downloads": 50000000,
|
328
|
-
"github_stars": 8500,
|
329
|
-
"category": "Serialization",
|
330
|
-
"use_case": "Data serialization and deserialization",
|
331
|
-
"feature_summary": "Compile-time serialization framework...",
|
332
|
-
"dependencies": [...],
|
333
|
-
"security_analysis": {...},
|
334
|
-
"source_metrics": {...}
|
335
|
-
}
|
336
|
-
```
|
337
|
-
|
338
|
-
## 🔍 Monitoring & Troubleshooting
|
339
|
-
|
340
|
-
### Common Issues & Solutions
|
341
|
-
|
342
|
-
#### GitHub Token Problems
|
343
|
-
|
344
|
-
```bash
|
345
|
-
# Check token status
|
346
|
-
python3 check_github_token.py
|
347
|
-
|
348
|
-
# Common error: Rate limit warnings
|
349
|
-
[WARNING] GitHub API rate limit low: 60 remaining
|
350
|
-
# Solution: Set GITHUB_TOKEN environment variable
|
351
|
-
|
352
|
-
# Common error: Invalid token
|
353
|
-
[ERROR] GitHub token is invalid or expired
|
354
|
-
# Solution: Generate new token at https://github.com/settings/tokens
|
355
|
-
```
|
356
|
-
|
357
|
-
#### LLM Validation Retries
|
358
|
-
|
359
|
-
```bash
|
360
|
-
# Common warning: Validation failures
|
361
|
-
[WARNING] Validation failed on attempt 1/3. Retrying...
|
362
|
-
# Solution: Use production mode to reduce retry warnings
|
363
|
-
PRODUCTION=true python3 -m rust_crate_pipeline
|
364
|
-
```
|
365
|
-
|
366
|
-
#### Resource Issues
|
367
|
-
|
368
|
-
```bash
|
369
|
-
# Memory usage optimization
|
370
|
-
python3 -m rust_crate_pipeline --batch-size 3
|
371
|
-
|
372
|
-
# Disk space monitoring
|
373
|
-
df -h . # Check available space
|
374
|
-
|
375
|
-
# Network timeout handling
|
376
|
-
python3 -m rust_crate_pipeline --log-level DEBUG
|
377
|
-
```
|
378
|
-
|
379
|
-
### Performance Monitoring
|
380
|
-
|
381
|
-
#### Processing Times (Typical)
|
382
|
-
|
383
|
-
- **Metadata only**: 2-3 seconds per crate
|
384
|
-
- **With AI enrichment**: 15-30 seconds per crate
|
385
|
-
- **Full analysis**: 45-60 seconds per crate
|
386
|
-
|
387
|
-
#### Resource Usage
|
388
|
-
|
389
|
-
- **Memory**: 2-4GB during processing
|
390
|
-
- **Storage**: 10-50MB per crate (temporary files)
|
391
|
-
- **Network**: 1-5MB per crate (API calls)
|
392
|
-
|
393
|
-
#### Monitoring Commands
|
394
|
-
|
395
|
-
```bash
|
396
|
-
# Check process status
|
397
|
-
ps aux | grep rust_crate_pipeline
|
398
|
-
|
399
|
-
# Monitor resource usage
|
400
|
-
top -p $(pgrep -f rust_crate_pipeline)
|
401
|
-
|
402
|
-
# Check logs
|
403
|
-
tail -f pipeline.log
|
404
|
-
|
405
|
-
# Docker monitoring
|
406
|
-
docker stats pipeline
|
407
|
-
```
|
408
|
-
|
409
|
-
## 🚀 Deployment Guide
|
410
|
-
|
411
|
-
### SSH/Remote Server Deployment
|
412
|
-
|
413
|
-
```bash
|
414
|
-
# Background execution with logging
|
415
|
-
nohup python3 run_production.py > pipeline.log 2>&1 &
|
416
|
-
|
417
|
-
# Monitor progress
|
418
|
-
tail -f pipeline.log
|
419
|
-
|
420
|
-
# Check process
|
421
|
-
jobs
|
422
|
-
ps aux | grep rust_crate_pipeline
|
423
|
-
```
|
424
|
-
|
425
|
-
### Systemd Service (Linux)
|
426
|
-
|
427
|
-
Create `/etc/systemd/system/rust-crate-pipeline.service`:
|
428
|
-
|
429
|
-
```ini
|
430
|
-
[Unit]
|
431
|
-
Description=Rust Crate Data Pipeline
|
432
|
-
After=network.target
|
433
|
-
|
434
|
-
[Service]
|
435
|
-
Type=simple
|
436
|
-
User=your-username
|
437
|
-
WorkingDirectory=/path/to/pipeline
|
438
|
-
Environment=GITHUB_TOKEN=your_token_here
|
439
|
-
Environment=PRODUCTION=true
|
440
|
-
ExecStart=/usr/bin/python3 run_production.py
|
441
|
-
Restart=on-failure
|
442
|
-
RestartSec=30
|
443
|
-
|
444
|
-
[Install]
|
445
|
-
WantedBy=multi-user.target
|
446
|
-
```
|
447
|
-
|
448
|
-
Enable and start:
|
449
|
-
|
450
|
-
```bash
|
451
|
-
sudo systemctl daemon-reload
|
452
|
-
sudo systemctl enable rust-crate-pipeline
|
453
|
-
sudo systemctl start rust-crate-pipeline
|
454
|
-
sudo systemctl status rust-crate-pipeline
|
455
|
-
```
|
456
|
-
|
457
|
-
## 🏗️ Architecture
|
458
|
-
|
459
|
-
### Core Components
|
460
|
-
|
461
|
-
1. **CrateDataPipeline**: Main orchestration class that coordinates all processing
|
462
|
-
2. **LLMEnricher**: Handles AI-powered enrichment using local LLM models
|
463
|
-
3. **CrateAPIClient**: Manages API interactions with crates.io and fallback sources
|
464
|
-
4. **GitHubBatchClient**: Optimized GitHub API client with rate limiting
|
465
|
-
5. **SourceAnalyzer**: Analyzes source code metrics and complexity
|
466
|
-
6. **SecurityAnalyzer**: Checks for security vulnerabilities and patterns
|
467
|
-
7. **UserBehaviorAnalyzer**: Tracks community engagement and version adoption
|
468
|
-
8. **DependencyAnalyzer**: Builds and analyzes dependency relationships
|
469
|
-
|
470
|
-
### Processing Flow
|
471
|
-
|
472
|
-
```text
|
473
|
-
1. Crate Discovery → 2. Metadata Fetching → 3. AI Enrichment
|
474
|
-
↓ ↓ ↓
|
475
|
-
4. Source Analysis → 5. Security Scanning → 6. Community Analysis
|
476
|
-
↓ ↓ ↓
|
477
|
-
7. Dependency Mapping → 8. Data Aggregation → 9. Report Generation
|
478
|
-
```
|
479
|
-
|
480
|
-
### Project Structure
|
481
|
-
|
482
|
-
```text
|
483
|
-
rust_crate_pipeline/
|
484
|
-
├── __init__.py # Package initialization
|
485
|
-
├── __main__.py # Entry point for python -m execution
|
486
|
-
├── main.py # CLI interface and main execution logic
|
487
|
-
├── config.py # Configuration classes and data models
|
488
|
-
├── pipeline.py # Main orchestration and workflow management
|
489
|
-
├── ai_processing.py # LLM integration and AI-powered enrichment
|
490
|
-
├── network.py # API clients and HTTP request handling
|
491
|
-
├── analysis.py # Source code, security, and dependency analysis
|
492
|
-
├── github_token_checker.py # Token validation and setup
|
493
|
-
├── production_config.py # Production optimizations
|
494
|
-
└── utils/ # Utility functions
|
495
|
-
├── logging_utils.py # Logging configuration and decorators
|
496
|
-
└── file_utils.py # File operations and disk management
|
497
|
-
```
|
498
|
-
|
499
|
-
## 🧪 API Usage
|
500
|
-
|
501
|
-
### Programmatic Usage
|
502
|
-
|
503
|
-
```python
|
504
|
-
from rust_crate_pipeline import CrateDataPipeline, PipelineConfig
|
505
|
-
|
506
|
-
# Create custom configuration
|
507
|
-
config = PipelineConfig(
|
508
|
-
batch_size=5,
|
509
|
-
max_tokens=512,
|
510
|
-
model_path="/path/to/model.gguf"
|
511
|
-
)
|
512
|
-
|
513
|
-
# Initialize and run pipeline
|
514
|
-
pipeline = CrateDataPipeline(config)
|
515
|
-
pipeline.run()
|
516
|
-
|
517
|
-
# Or use individual components
|
518
|
-
from rust_crate_pipeline import LLMEnricher, SourceAnalyzer
|
519
|
-
|
520
|
-
enricher = LLMEnricher(config)
|
521
|
-
analyzer = SourceAnalyzer()
|
522
|
-
```
|
523
|
-
|
524
|
-
### Custom Processing
|
525
|
-
|
526
|
-
```python
|
527
|
-
# Process specific crates with custom options
|
528
|
-
pipeline = CrateDataPipeline(
|
529
|
-
config,
|
530
|
-
limit=50,
|
531
|
-
crate_list=["serde", "tokio", "actix-web"],
|
532
|
-
skip_ai=False,
|
533
|
-
output_dir="./custom_analysis"
|
534
|
-
)
|
535
|
-
```
|
536
|
-
|
537
|
-
## 🔧 Development & Contributing
|
538
|
-
|
539
|
-
### Development Setup
|
540
|
-
|
541
|
-
```bash
|
542
|
-
# Clone and install
|
543
|
-
git clone https://github.com/Superuser666-Sigil/SigilDERG-Data_Production.git
|
544
|
-
cd SigilDERG-Data_Production
|
545
|
-
pip install -r requirements.txt
|
546
|
-
|
547
|
-
# Run tests
|
548
|
-
python3 test_optimizations.py
|
549
|
-
python3 test_token_integration.py
|
550
|
-
|
551
|
-
# Verify installation
|
552
|
-
python3 check_github_token.py
|
553
|
-
```
|
554
|
-
|
555
|
-
### Adding Features
|
556
|
-
|
557
|
-
1. Implement new analyzer in `analysis.py`
|
558
|
-
2. Add configuration options to `config.py`
|
559
|
-
3. Integrate with pipeline in `pipeline.py`
|
560
|
-
4. Add CLI arguments in `main.py`
|
561
|
-
5. Update tests and documentation
|
562
|
-
|
563
|
-
## 📄 License
|
564
|
-
|
565
|
-
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
566
|
-
|
567
|
-
## 🙏 Acknowledgments
|
568
|
-
|
569
|
-
- **Rust Community** for the excellent crates ecosystem
|
570
|
-
- **crates.io** for comprehensive API access
|
571
|
-
- **GitHub** for repository metadata and community data
|
572
|
-
- **Deepseek** for powerful code-focused language models
|
573
|
-
- **llama.cpp** team for efficient local inference
|
574
|
-
|
575
|
-
## 📞 Support
|
576
|
-
|
577
|
-
- **Issues**: Report bugs and request features
|
578
|
-
- **Documentation**: Complete guides and API reference
|
579
|
-
- **Community**: Join discussions and get help
|
580
|
-
|
581
|
-
---
|
582
|
-
|
583
|
-
## Ready to analyze the Rust ecosystem! 🦀✨
|
584
|
-
|
585
|
-
📦 **Get started today:** [Install from PyPI](https://pypi.org/project/rust-crate-pipeline/)
|
@@ -1,19 +0,0 @@
|
|
1
|
-
rust_crate_pipeline/__init__.py,sha256=NxD8_OEGHEHUN9EfJj2S1rRyZ0UMkiF20LNSMnjL9Uk,1939
|
2
|
-
rust_crate_pipeline/__main__.py,sha256=fYgtPofuk4vkwiZ7ELP4GVMNj_QiKmZMSlvhzsNGuDs,155
|
3
|
-
rust_crate_pipeline/ai_processing.py,sha256=sj-qPtIVLuuY_VoWoLbcGQ6_eS_giQyXIPyAGAWOCrs,24814
|
4
|
-
rust_crate_pipeline/analysis.py,sha256=jcHHTBZ_zg5n4VGPXJYM7-NkNeL5hRdgvowkiim0onM,17663
|
5
|
-
rust_crate_pipeline/config.py,sha256=xX4j_vgXaQxVI6Q3UmazzEzFdm6kLhpGbM2Of_fZS6k,2336
|
6
|
-
rust_crate_pipeline/github_token_checker.py,sha256=_cyOiSYc1bCVczr6pUUJc_s822ic7Qi_IW3JtI_4C0w,3796
|
7
|
-
rust_crate_pipeline/main.py,sha256=bemr27xpXIFYEyXtcCQfZpAQ5pPycyiRZKP8nj9kork,10111
|
8
|
-
rust_crate_pipeline/network.py,sha256=MFtn_-9MRBUSehfjLboUBGOMk8gv2edjOjHCR_YEyGc,12677
|
9
|
-
rust_crate_pipeline/pipeline.py,sha256=aOLuIpfvDbPDCvft8ppUa0vRiFVdiz2wltpi26ZJaes,22769
|
10
|
-
rust_crate_pipeline/production_config.py,sha256=24YWT68Fo2Kl8v7Hn1WgqfPrikXma9VZEuEcMr7iDik,2282
|
11
|
-
rust_crate_pipeline/version.py,sha256=4JXcc5UI7bkW_OwMSDTrt2YpSLowN-WFH11PYQDr_BQ,2614
|
12
|
-
rust_crate_pipeline/utils/file_utils.py,sha256=IJOBBp6-w9pnCdqyGcRNwBph_iwI_zzULCdAULGFUy0,2097
|
13
|
-
rust_crate_pipeline/utils/logging_utils.py,sha256=5-o6ohm38sH1ozjZWHPlm9Wj7yILiUzvMsLJDeu11lk,2350
|
14
|
-
rust_crate_pipeline-1.4.0.dist-info/licenses/LICENSE,sha256=tpd4XNpbssrSx9-iErATOLrOh0ivNPfO2I5MAPUpats,1088
|
15
|
-
rust_crate_pipeline-1.4.0.dist-info/METADATA,sha256=srt7t9sB9uJ70LF0jB9gvolb3qb4BLUtdBFYWfiAPDA,17474
|
16
|
-
rust_crate_pipeline-1.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
17
|
-
rust_crate_pipeline-1.4.0.dist-info/entry_points.txt,sha256=9Rr_IRuFRIridXxUSdEJbB3ba0NnpEfKmknZXFdYRC0,70
|
18
|
-
rust_crate_pipeline-1.4.0.dist-info/top_level.txt,sha256=GUdB7RyxHLhijQxui_KTy3B8p_L2APui9C6RYa0FuaE,20
|
19
|
-
rust_crate_pipeline-1.4.0.dist-info/RECORD,,
|
File without changes
|
{rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/entry_points.txt
RENAMED
File without changes
|
{rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
File without changes
|