rust-crate-pipeline 1.2.0__py3-none-any.whl → 1.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rust_crate_pipeline/ai_processing.py +122 -31
- rust_crate_pipeline/version.py +1 -1
- {rust_crate_pipeline-1.2.0.dist-info → rust_crate_pipeline-1.2.3.dist-info}/METADATA +1 -1
- {rust_crate_pipeline-1.2.0.dist-info → rust_crate_pipeline-1.2.3.dist-info}/RECORD +8 -8
- {rust_crate_pipeline-1.2.0.dist-info → rust_crate_pipeline-1.2.3.dist-info}/WHEEL +0 -0
- {rust_crate_pipeline-1.2.0.dist-info → rust_crate_pipeline-1.2.3.dist-info}/entry_points.txt +0 -0
- {rust_crate_pipeline-1.2.0.dist-info → rust_crate_pipeline-1.2.3.dist-info}/licenses/LICENSE +0 -0
- {rust_crate_pipeline-1.2.0.dist-info → rust_crate_pipeline-1.2.3.dist-info}/top_level.txt +0 -0
@@ -14,11 +14,20 @@ class LLMEnricher:
|
|
14
14
|
self.model = self._load_model()
|
15
15
|
|
16
16
|
def _load_model(self):
|
17
|
+
"""Optimized for GCP g2-standard-4 with L4 GPU (24GB VRAM)"""
|
17
18
|
return Llama(
|
18
19
|
model_path=self.config.model_path,
|
19
|
-
n_ctx=
|
20
|
-
n_batch=
|
21
|
-
n_gpu_layers
|
20
|
+
n_ctx=4096, # Larger context for L4's 24GB VRAM
|
21
|
+
n_batch=1024, # Larger batch size for better throughput
|
22
|
+
n_gpu_layers=-1, # Load ALL layers on GPU (L4 has plenty VRAM)
|
23
|
+
n_threads=4, # Match the 4 vCPUs
|
24
|
+
n_threads_batch=4, # Parallel batch processing
|
25
|
+
use_mmap=True, # Memory-mapped files for efficiency
|
26
|
+
use_mlock=True, # Lock model in memory
|
27
|
+
rope_scaling_type=1, # RoPE scaling for longer contexts
|
28
|
+
rope_freq_base=10000.0, # Base frequency for RoPE
|
29
|
+
flash_attn=True, # Enable flash attention if available
|
30
|
+
verbose=False # Reduce logging overhead
|
22
31
|
)
|
23
32
|
|
24
33
|
def estimate_tokens(self, text: str) -> int:
|
@@ -165,14 +174,15 @@ class LLMEnricher:
|
|
165
174
|
self,
|
166
175
|
prompt: str,
|
167
176
|
validation_func: Callable[[str], bool],
|
168
|
-
temp: float = 0.2,
|
169
|
-
|
177
|
+
temp: float = 0.2, max_tokens: int = 256,
|
178
|
+
retries: int = 4 # Increased from 2 to 4 for better success rates
|
170
179
|
) -> Optional[str]:
|
171
180
|
"""Run LLM with validation and automatic retry on failure"""
|
181
|
+
result = None
|
172
182
|
for attempt in range(retries):
|
173
183
|
try:
|
174
|
-
#
|
175
|
-
adjusted_temp = temp * (1 + (attempt * 0.
|
184
|
+
# More generous temperature adjustment for better variety
|
185
|
+
adjusted_temp = temp * (1 + (attempt * 0.2)) # 20% increases instead of 10%
|
176
186
|
result = self.run_llama(prompt, temp=adjusted_temp, max_tokens=max_tokens)
|
177
187
|
|
178
188
|
# Validate the result
|
@@ -181,19 +191,19 @@ class LLMEnricher:
|
|
181
191
|
|
182
192
|
# If we get here, validation failed - use debug level for early attempts
|
183
193
|
if attempt == retries - 1:
|
184
|
-
logging.
|
194
|
+
logging.debug(f"All {retries} validation attempts failed, using last available result.")
|
185
195
|
else:
|
186
|
-
logging.debug(f"Validation failed on attempt {attempt+1}/{retries}. Retrying with
|
196
|
+
logging.debug(f"Validation failed on attempt {attempt+1}/{retries}. Retrying with adjusted temp={adjusted_temp:.2f}")
|
187
197
|
|
188
|
-
#
|
189
|
-
if attempt
|
198
|
+
# Only simplify prompt on later attempts (attempt 2+)
|
199
|
+
if attempt >= 2:
|
190
200
|
prompt = self.simplify_prompt(prompt)
|
191
201
|
|
192
202
|
except Exception as e:
|
193
203
|
logging.error(f"Generation error on attempt {attempt+1}: {str(e)}")
|
194
|
-
|
195
|
-
|
196
|
-
time.sleep(
|
204
|
+
|
205
|
+
# More generous backoff - give the model more time
|
206
|
+
time.sleep(2.0 + (attempt * 1.0)) # 2s, 3s, 4s, 5s delays
|
197
207
|
|
198
208
|
# If we exhausted all retries, return the last result even if not perfect
|
199
209
|
return result if 'result' in locals() else None
|
@@ -245,11 +255,7 @@ class LLMEnricher:
|
|
245
255
|
prompt,
|
246
256
|
lambda x: len(x) > 50,
|
247
257
|
temp=0.3,
|
248
|
-
max_tokens=300
|
249
|
-
)
|
250
|
-
|
251
|
-
# Extract key dependencies for context
|
252
|
-
key_deps = [dep.get("crate_id") for dep in crate.dependencies[:5] if dep.get("kind") == "normal"]
|
258
|
+
max_tokens=300 )
|
253
259
|
|
254
260
|
# Generate other enrichments
|
255
261
|
enriched.feature_summary = self.summarize_features(crate)
|
@@ -296,13 +302,13 @@ class LLMEnricher:
|
|
296
302
|
|
297
303
|
def classify_use_case(self, crate: CrateMetadata, readme_summary: str) -> str:
|
298
304
|
"""Classify the use case of a crate with rich context"""
|
299
|
-
try:
|
300
|
-
# Calculate available tokens for prompt (classification usually needs ~20 response tokens)
|
305
|
+
try: # Calculate available tokens for prompt (classification usually needs ~20 response tokens)
|
301
306
|
available_prompt_tokens = self.config.model_token_limit - 200 # Reserve for response
|
302
307
|
|
303
308
|
joined = ", ".join(crate.keywords[:10]) if crate.keywords else "None"
|
304
|
-
key_deps = [dep.get("crate_id") for dep in crate.dependencies[:5]
|
305
|
-
|
309
|
+
key_deps = [dep.get("crate_id") for dep in crate.dependencies[:5]
|
310
|
+
if dep.get("kind") == "normal" and dep.get("crate_id")]
|
311
|
+
key_deps_str = ", ".join(str(dep) for dep in key_deps) if key_deps else "None"
|
306
312
|
|
307
313
|
# Adaptively truncate different sections based on importance
|
308
314
|
token_budget = available_prompt_tokens - 400 # Reserve tokens for prompt template
|
@@ -341,13 +347,12 @@ class LLMEnricher:
|
|
341
347
|
f"Category (pick only one): [AI, Database, Web Framework, Networking, Serialization, Utilities, DevTools, ML, Cryptography, Unknown]\n"
|
342
348
|
f"<|end|>"
|
343
349
|
)
|
344
|
-
|
345
|
-
# Validate classification with retry
|
350
|
+
# Validate classification with retry - more generous parameters
|
346
351
|
result = self.validate_and_retry(
|
347
352
|
prompt,
|
348
353
|
validation_func=self.validate_classification,
|
349
|
-
temp=0.
|
350
|
-
max_tokens=20
|
354
|
+
temp=0.2, # Increased from 0.1 for more variety
|
355
|
+
max_tokens=50 # Increased from 20 to allow more complete responses
|
351
356
|
)
|
352
357
|
|
353
358
|
return result or "Unknown"
|
@@ -377,13 +382,12 @@ class LLMEnricher:
|
|
377
382
|
f"Create exactly 5 pairs.\n"
|
378
383
|
f"<|end|>"
|
379
384
|
)
|
380
|
-
|
381
|
-
# Use validation for retry
|
385
|
+
# Use validation for retry - more generous parameters
|
382
386
|
result = self.validate_and_retry(
|
383
387
|
prompt,
|
384
388
|
validation_func=self.validate_factual_pairs,
|
385
|
-
temp=0.
|
386
|
-
max_tokens=500
|
389
|
+
temp=0.7, # Increased from 0.6 for more creativity
|
390
|
+
max_tokens=800 # Increased from 500 for more complete responses
|
387
391
|
)
|
388
392
|
|
389
393
|
return result or "Factual pairs generation failed."
|
@@ -396,3 +400,90 @@ class LLMEnricher:
|
|
396
400
|
score = (crate.downloads / 1000) + (crate.github_stars * 10)
|
397
401
|
score += len(self.truncate_content(crate.readme, 1000)) / 500
|
398
402
|
return round(score, 2)
|
403
|
+
|
404
|
+
def batch_process_prompts(self, prompts: list[tuple[str, float, int]], batch_size: int = 4) -> list[Optional[str]]:
|
405
|
+
"""
|
406
|
+
L4 GPU-optimized batch processing for multiple prompts
|
407
|
+
Processes prompts in batches to maximize GPU utilization
|
408
|
+
|
409
|
+
Args:
|
410
|
+
prompts: List of (prompt, temperature, max_tokens) tuples
|
411
|
+
batch_size: Number of prompts to process simultaneously (tuned for L4)
|
412
|
+
"""
|
413
|
+
results = []
|
414
|
+
|
415
|
+
# Process in batches optimized for L4's capabilities
|
416
|
+
for i in range(0, len(prompts), batch_size):
|
417
|
+
batch = prompts[i:i + batch_size]
|
418
|
+
batch_results = []
|
419
|
+
|
420
|
+
for prompt, temp, max_tokens in batch:
|
421
|
+
try:
|
422
|
+
# Prepare prompt with context preservation
|
423
|
+
if self.estimate_tokens(prompt) > 3500: # Leave room for response
|
424
|
+
prompt = self.smart_truncate(prompt, 3500)
|
425
|
+
|
426
|
+
# Use optimized parameters for L4
|
427
|
+
output = self.model(
|
428
|
+
prompt,
|
429
|
+
max_tokens=max_tokens,
|
430
|
+
temperature=temp,
|
431
|
+
top_p=0.95, # Nucleus sampling for better quality
|
432
|
+
repeat_penalty=1.1, # Reduce repetition
|
433
|
+
stop=["<|end|>", "<|user|>", "<|system|>"],
|
434
|
+
echo=False, # Don't echo input
|
435
|
+
stream=False # Batch mode, no streaming
|
436
|
+
)
|
437
|
+
|
438
|
+
result = self.clean_output(output["choices"][0]["text"])
|
439
|
+
batch_results.append(result)
|
440
|
+
|
441
|
+
except Exception as e:
|
442
|
+
logging.warning(f"Batch processing error: {e}")
|
443
|
+
batch_results.append(None)
|
444
|
+
|
445
|
+
results.extend(batch_results)
|
446
|
+
|
447
|
+
# Small delay between batches to prevent thermal throttling
|
448
|
+
if i + batch_size < len(prompts):
|
449
|
+
time.sleep(0.1)
|
450
|
+
|
451
|
+
return results
|
452
|
+
|
453
|
+
def smart_context_management(self, context_history: list[str], new_prompt: str) -> str:
|
454
|
+
"""
|
455
|
+
Intelligent context management for prefix cache optimization
|
456
|
+
Maximizes cache hits by preserving common context patterns
|
457
|
+
"""
|
458
|
+
# Calculate available tokens for context
|
459
|
+
base_tokens = self.estimate_tokens(new_prompt)
|
460
|
+
available_context = 4000 - base_tokens # Leave buffer for response
|
461
|
+
|
462
|
+
if available_context <= 0:
|
463
|
+
return new_prompt
|
464
|
+
|
465
|
+
# Build context from most recent and most relevant history
|
466
|
+
context_parts = []
|
467
|
+
tokens_used = 0
|
468
|
+
|
469
|
+
# Prioritize recent context (better cache hits)
|
470
|
+
for context in reversed(context_history[-5:]): # Last 5 contexts
|
471
|
+
context_tokens = self.estimate_tokens(context)
|
472
|
+
if tokens_used + context_tokens <= available_context:
|
473
|
+
context_parts.insert(0, context)
|
474
|
+
tokens_used += context_tokens
|
475
|
+
else:
|
476
|
+
# Try to fit truncated version
|
477
|
+
remaining_tokens = available_context - tokens_used
|
478
|
+
if remaining_tokens > 100: # Only if meaningful space left
|
479
|
+
truncated = self.smart_truncate(context, remaining_tokens)
|
480
|
+
if truncated:
|
481
|
+
context_parts.insert(0, truncated)
|
482
|
+
break
|
483
|
+
|
484
|
+
# Combine context with new prompt
|
485
|
+
if context_parts:
|
486
|
+
full_context = "\n\n---\n\n".join(context_parts)
|
487
|
+
return f"{full_context}\n\n---\n\n{new_prompt}"
|
488
|
+
|
489
|
+
return new_prompt
|
rust_crate_pipeline/version.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: rust-crate-pipeline
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.3
|
4
4
|
Summary: A comprehensive system for gathering, enriching, and analyzing metadata for Rust crates using AI-powered insights
|
5
5
|
Home-page: https://github.com/DaveTmire85/SigilDERG-Data_Production
|
6
6
|
Author: SuperUser666-Sigil
|
@@ -1,6 +1,6 @@
|
|
1
1
|
rust_crate_pipeline/__init__.py,sha256=m9fb1WGbyOimxK2e18FSgvLWGYBwbLoHM_mscr-nAPs,1429
|
2
2
|
rust_crate_pipeline/__main__.py,sha256=fYgtPofuk4vkwiZ7ELP4GVMNj_QiKmZMSlvhzsNGuDs,155
|
3
|
-
rust_crate_pipeline/ai_processing.py,sha256=
|
3
|
+
rust_crate_pipeline/ai_processing.py,sha256=B93rCDdxE-UkYMjmT0UotQTahx9-Lgzec7_bjBd3cUs,23240
|
4
4
|
rust_crate_pipeline/analysis.py,sha256=ijP4zp3cFnN09nZkeCluyAvbyAtAW_M2YSxALpQX8LY,18615
|
5
5
|
rust_crate_pipeline/config.py,sha256=r4Y_5SD-lfrM1112edk9T0S0MiVxaNSSHk4q2yDrM88,1528
|
6
6
|
rust_crate_pipeline/github_token_checker.py,sha256=MJqHP8J84NEZ6nzdutpC7iRnsP0kyqscjLUosvmI4MI,3768
|
@@ -8,12 +8,12 @@ rust_crate_pipeline/main.py,sha256=J8ORQA6s3wyWw2R3oB_IEm2J5tx1CFdspw5kb5Ep8zQ,6
|
|
8
8
|
rust_crate_pipeline/network.py,sha256=t_G8eh_WHNugm_laMftcWVbHsmP0bOlTPnVW9DqF6SU,13375
|
9
9
|
rust_crate_pipeline/pipeline.py,sha256=Uwfw4uLL3aN1gJl5xSwvvyaY9ceeP7LVr02IzNx0tPM,12033
|
10
10
|
rust_crate_pipeline/production_config.py,sha256=2GT8bxytcrMRrcfjzpay5RTtATE3rbmDvNUBvVhrYSQ,2472
|
11
|
-
rust_crate_pipeline/version.py,sha256=
|
11
|
+
rust_crate_pipeline/version.py,sha256=r_w4Eokm27opXYKcOCTKax8TO7pFI5E3TkB0L9c62yY,1022
|
12
12
|
rust_crate_pipeline/utils/file_utils.py,sha256=lnHeLrt1JYaQhRDKtA1TWR2HIyRO8zwOyWb-KmAmWgk,2126
|
13
13
|
rust_crate_pipeline/utils/logging_utils.py,sha256=O4Jnr_k9dBchrVqXf-vqtDKgizDtL_ljh8g7G2VCX_c,2241
|
14
|
-
rust_crate_pipeline-1.2.
|
15
|
-
rust_crate_pipeline-1.2.
|
16
|
-
rust_crate_pipeline-1.2.
|
17
|
-
rust_crate_pipeline-1.2.
|
18
|
-
rust_crate_pipeline-1.2.
|
19
|
-
rust_crate_pipeline-1.2.
|
14
|
+
rust_crate_pipeline-1.2.3.dist-info/licenses/LICENSE,sha256=tpd4XNpbssrSx9-iErATOLrOh0ivNPfO2I5MAPUpats,1088
|
15
|
+
rust_crate_pipeline-1.2.3.dist-info/METADATA,sha256=1bU7P1g6veyD0hJ78cjGJcVWRTujAF6Q6RL_CV_MVIY,16741
|
16
|
+
rust_crate_pipeline-1.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
17
|
+
rust_crate_pipeline-1.2.3.dist-info/entry_points.txt,sha256=9Rr_IRuFRIridXxUSdEJbB3ba0NnpEfKmknZXFdYRC0,70
|
18
|
+
rust_crate_pipeline-1.2.3.dist-info/top_level.txt,sha256=GUdB7RyxHLhijQxui_KTy3B8p_L2APui9C6RYa0FuaE,20
|
19
|
+
rust_crate_pipeline-1.2.3.dist-info/RECORD,,
|
File without changes
|
{rust_crate_pipeline-1.2.0.dist-info → rust_crate_pipeline-1.2.3.dist-info}/entry_points.txt
RENAMED
File without changes
|
{rust_crate_pipeline-1.2.0.dist-info → rust_crate_pipeline-1.2.3.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
File without changes
|