rust-crate-pipeline 1.2.0__py3-none-any.whl → 1.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,11 +14,20 @@ class LLMEnricher:
14
14
  self.model = self._load_model()
15
15
 
16
16
  def _load_model(self):
17
+ """Optimized for GCP g2-standard-4 with L4 GPU (24GB VRAM)"""
17
18
  return Llama(
18
19
  model_path=self.config.model_path,
19
- n_ctx=1024,
20
- n_batch=512,
21
- n_gpu_layers=32
20
+ n_ctx=4096, # Larger context for L4's 24GB VRAM
21
+ n_batch=1024, # Larger batch size for better throughput
22
+ n_gpu_layers=-1, # Load ALL layers on GPU (L4 has plenty VRAM)
23
+ n_threads=4, # Match the 4 vCPUs
24
+ n_threads_batch=4, # Parallel batch processing
25
+ use_mmap=True, # Memory-mapped files for efficiency
26
+ use_mlock=True, # Lock model in memory
27
+ rope_scaling_type=1, # RoPE scaling for longer contexts
28
+ rope_freq_base=10000.0, # Base frequency for RoPE
29
+ flash_attn=True, # Enable flash attention if available
30
+ verbose=False # Reduce logging overhead
22
31
  )
23
32
 
24
33
  def estimate_tokens(self, text: str) -> int:
@@ -165,14 +174,15 @@ class LLMEnricher:
165
174
  self,
166
175
  prompt: str,
167
176
  validation_func: Callable[[str], bool],
168
- temp: float = 0.2,
169
- max_tokens: int = 256, retries: int = 2 # Reduced default retries
177
+ temp: float = 0.2, max_tokens: int = 256,
178
+ retries: int = 4 # Increased from 2 to 4 for better success rates
170
179
  ) -> Optional[str]:
171
180
  """Run LLM with validation and automatic retry on failure"""
181
+ result = None
172
182
  for attempt in range(retries):
173
183
  try:
174
- # Adjust temperature slightly upward on retries to get different results
175
- adjusted_temp = temp * (1 + (attempt * 0.1))
184
+ # More generous temperature adjustment for better variety
185
+ adjusted_temp = temp * (1 + (attempt * 0.2)) # 20% increases instead of 10%
176
186
  result = self.run_llama(prompt, temp=adjusted_temp, max_tokens=max_tokens)
177
187
 
178
188
  # Validate the result
@@ -181,19 +191,19 @@ class LLMEnricher:
181
191
 
182
192
  # If we get here, validation failed - use debug level for early attempts
183
193
  if attempt == retries - 1:
184
- logging.warning(f"Final validation attempt failed. Using best available result.")
194
+ logging.debug(f"All {retries} validation attempts failed, using last available result.")
185
195
  else:
186
- logging.debug(f"Validation failed on attempt {attempt+1}/{retries}. Retrying with modified parameters.")
196
+ logging.debug(f"Validation failed on attempt {attempt+1}/{retries}. Retrying with adjusted temp={adjusted_temp:.2f}")
187
197
 
188
- # For the last attempt, simplify the prompt
189
- if attempt == retries - 2:
198
+ # Only simplify prompt on later attempts (attempt 2+)
199
+ if attempt >= 2:
190
200
  prompt = self.simplify_prompt(prompt)
191
201
 
192
202
  except Exception as e:
193
203
  logging.error(f"Generation error on attempt {attempt+1}: {str(e)}")
194
-
195
- # Reduced backoff to minimize waiting time
196
- time.sleep(1.0 + (attempt * 0.5))
204
+
205
+ # More generous backoff - give the model more time
206
+ time.sleep(2.0 + (attempt * 1.0)) # 2s, 3s, 4s, 5s delays
197
207
 
198
208
  # If we exhausted all retries, return the last result even if not perfect
199
209
  return result if 'result' in locals() else None
@@ -245,11 +255,7 @@ class LLMEnricher:
245
255
  prompt,
246
256
  lambda x: len(x) > 50,
247
257
  temp=0.3,
248
- max_tokens=300
249
- )
250
-
251
- # Extract key dependencies for context
252
- key_deps = [dep.get("crate_id") for dep in crate.dependencies[:5] if dep.get("kind") == "normal"]
258
+ max_tokens=300 )
253
259
 
254
260
  # Generate other enrichments
255
261
  enriched.feature_summary = self.summarize_features(crate)
@@ -296,13 +302,13 @@ class LLMEnricher:
296
302
 
297
303
  def classify_use_case(self, crate: CrateMetadata, readme_summary: str) -> str:
298
304
  """Classify the use case of a crate with rich context"""
299
- try:
300
- # Calculate available tokens for prompt (classification usually needs ~20 response tokens)
305
+ try: # Calculate available tokens for prompt (classification usually needs ~20 response tokens)
301
306
  available_prompt_tokens = self.config.model_token_limit - 200 # Reserve for response
302
307
 
303
308
  joined = ", ".join(crate.keywords[:10]) if crate.keywords else "None"
304
- key_deps = [dep.get("crate_id") for dep in crate.dependencies[:5] if dep.get("kind") == "normal"]
305
- key_deps_str = ", ".join(key_deps) if key_deps else "None"
309
+ key_deps = [dep.get("crate_id") for dep in crate.dependencies[:5]
310
+ if dep.get("kind") == "normal" and dep.get("crate_id")]
311
+ key_deps_str = ", ".join(str(dep) for dep in key_deps) if key_deps else "None"
306
312
 
307
313
  # Adaptively truncate different sections based on importance
308
314
  token_budget = available_prompt_tokens - 400 # Reserve tokens for prompt template
@@ -341,13 +347,12 @@ class LLMEnricher:
341
347
  f"Category (pick only one): [AI, Database, Web Framework, Networking, Serialization, Utilities, DevTools, ML, Cryptography, Unknown]\n"
342
348
  f"<|end|>"
343
349
  )
344
-
345
- # Validate classification with retry
350
+ # Validate classification with retry - more generous parameters
346
351
  result = self.validate_and_retry(
347
352
  prompt,
348
353
  validation_func=self.validate_classification,
349
- temp=0.1,
350
- max_tokens=20
354
+ temp=0.2, # Increased from 0.1 for more variety
355
+ max_tokens=50 # Increased from 20 to allow more complete responses
351
356
  )
352
357
 
353
358
  return result or "Unknown"
@@ -377,13 +382,12 @@ class LLMEnricher:
377
382
  f"Create exactly 5 pairs.\n"
378
383
  f"<|end|>"
379
384
  )
380
-
381
- # Use validation for retry
385
+ # Use validation for retry - more generous parameters
382
386
  result = self.validate_and_retry(
383
387
  prompt,
384
388
  validation_func=self.validate_factual_pairs,
385
- temp=0.6,
386
- max_tokens=500
389
+ temp=0.7, # Increased from 0.6 for more creativity
390
+ max_tokens=800 # Increased from 500 for more complete responses
387
391
  )
388
392
 
389
393
  return result or "Factual pairs generation failed."
@@ -396,3 +400,90 @@ class LLMEnricher:
396
400
  score = (crate.downloads / 1000) + (crate.github_stars * 10)
397
401
  score += len(self.truncate_content(crate.readme, 1000)) / 500
398
402
  return round(score, 2)
403
+
404
+ def batch_process_prompts(self, prompts: list[tuple[str, float, int]], batch_size: int = 4) -> list[Optional[str]]:
405
+ """
406
+ L4 GPU-optimized batch processing for multiple prompts
407
+ Processes prompts in batches to maximize GPU utilization
408
+
409
+ Args:
410
+ prompts: List of (prompt, temperature, max_tokens) tuples
411
+ batch_size: Number of prompts to process simultaneously (tuned for L4)
412
+ """
413
+ results = []
414
+
415
+ # Process in batches optimized for L4's capabilities
416
+ for i in range(0, len(prompts), batch_size):
417
+ batch = prompts[i:i + batch_size]
418
+ batch_results = []
419
+
420
+ for prompt, temp, max_tokens in batch:
421
+ try:
422
+ # Prepare prompt with context preservation
423
+ if self.estimate_tokens(prompt) > 3500: # Leave room for response
424
+ prompt = self.smart_truncate(prompt, 3500)
425
+
426
+ # Use optimized parameters for L4
427
+ output = self.model(
428
+ prompt,
429
+ max_tokens=max_tokens,
430
+ temperature=temp,
431
+ top_p=0.95, # Nucleus sampling for better quality
432
+ repeat_penalty=1.1, # Reduce repetition
433
+ stop=["<|end|>", "<|user|>", "<|system|>"],
434
+ echo=False, # Don't echo input
435
+ stream=False # Batch mode, no streaming
436
+ )
437
+
438
+ result = self.clean_output(output["choices"][0]["text"])
439
+ batch_results.append(result)
440
+
441
+ except Exception as e:
442
+ logging.warning(f"Batch processing error: {e}")
443
+ batch_results.append(None)
444
+
445
+ results.extend(batch_results)
446
+
447
+ # Small delay between batches to prevent thermal throttling
448
+ if i + batch_size < len(prompts):
449
+ time.sleep(0.1)
450
+
451
+ return results
452
+
453
+ def smart_context_management(self, context_history: list[str], new_prompt: str) -> str:
454
+ """
455
+ Intelligent context management for prefix cache optimization
456
+ Maximizes cache hits by preserving common context patterns
457
+ """
458
+ # Calculate available tokens for context
459
+ base_tokens = self.estimate_tokens(new_prompt)
460
+ available_context = 4000 - base_tokens # Leave buffer for response
461
+
462
+ if available_context <= 0:
463
+ return new_prompt
464
+
465
+ # Build context from most recent and most relevant history
466
+ context_parts = []
467
+ tokens_used = 0
468
+
469
+ # Prioritize recent context (better cache hits)
470
+ for context in reversed(context_history[-5:]): # Last 5 contexts
471
+ context_tokens = self.estimate_tokens(context)
472
+ if tokens_used + context_tokens <= available_context:
473
+ context_parts.insert(0, context)
474
+ tokens_used += context_tokens
475
+ else:
476
+ # Try to fit truncated version
477
+ remaining_tokens = available_context - tokens_used
478
+ if remaining_tokens > 100: # Only if meaningful space left
479
+ truncated = self.smart_truncate(context, remaining_tokens)
480
+ if truncated:
481
+ context_parts.insert(0, truncated)
482
+ break
483
+
484
+ # Combine context with new prompt
485
+ if context_parts:
486
+ full_context = "\n\n---\n\n".join(context_parts)
487
+ return f"{full_context}\n\n---\n\n{new_prompt}"
488
+
489
+ return new_prompt
@@ -1,6 +1,6 @@
1
1
  """Version information for rust-crate-pipeline."""
2
2
 
3
- __version__ = "1.2.0"
3
+ __version__ = "1.2.3"
4
4
  __version_info__ = tuple(int(x) for x in __version__.split("."))
5
5
 
6
6
  # Version history
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rust-crate-pipeline
3
- Version: 1.2.0
3
+ Version: 1.2.3
4
4
  Summary: A comprehensive system for gathering, enriching, and analyzing metadata for Rust crates using AI-powered insights
5
5
  Home-page: https://github.com/DaveTmire85/SigilDERG-Data_Production
6
6
  Author: SuperUser666-Sigil
@@ -1,6 +1,6 @@
1
1
  rust_crate_pipeline/__init__.py,sha256=m9fb1WGbyOimxK2e18FSgvLWGYBwbLoHM_mscr-nAPs,1429
2
2
  rust_crate_pipeline/__main__.py,sha256=fYgtPofuk4vkwiZ7ELP4GVMNj_QiKmZMSlvhzsNGuDs,155
3
- rust_crate_pipeline/ai_processing.py,sha256=Ma5Oo4_pRfhoyvti_ZF6xV9zi4kEukMRzBva76F7cEM,18351
3
+ rust_crate_pipeline/ai_processing.py,sha256=B93rCDdxE-UkYMjmT0UotQTahx9-Lgzec7_bjBd3cUs,23240
4
4
  rust_crate_pipeline/analysis.py,sha256=ijP4zp3cFnN09nZkeCluyAvbyAtAW_M2YSxALpQX8LY,18615
5
5
  rust_crate_pipeline/config.py,sha256=r4Y_5SD-lfrM1112edk9T0S0MiVxaNSSHk4q2yDrM88,1528
6
6
  rust_crate_pipeline/github_token_checker.py,sha256=MJqHP8J84NEZ6nzdutpC7iRnsP0kyqscjLUosvmI4MI,3768
@@ -8,12 +8,12 @@ rust_crate_pipeline/main.py,sha256=J8ORQA6s3wyWw2R3oB_IEm2J5tx1CFdspw5kb5Ep8zQ,6
8
8
  rust_crate_pipeline/network.py,sha256=t_G8eh_WHNugm_laMftcWVbHsmP0bOlTPnVW9DqF6SU,13375
9
9
  rust_crate_pipeline/pipeline.py,sha256=Uwfw4uLL3aN1gJl5xSwvvyaY9ceeP7LVr02IzNx0tPM,12033
10
10
  rust_crate_pipeline/production_config.py,sha256=2GT8bxytcrMRrcfjzpay5RTtATE3rbmDvNUBvVhrYSQ,2472
11
- rust_crate_pipeline/version.py,sha256=Ne-Iy0D2YOCWyWVo3gFNVhuUg4tBtSnlqGIDUEeWtws,1022
11
+ rust_crate_pipeline/version.py,sha256=r_w4Eokm27opXYKcOCTKax8TO7pFI5E3TkB0L9c62yY,1022
12
12
  rust_crate_pipeline/utils/file_utils.py,sha256=lnHeLrt1JYaQhRDKtA1TWR2HIyRO8zwOyWb-KmAmWgk,2126
13
13
  rust_crate_pipeline/utils/logging_utils.py,sha256=O4Jnr_k9dBchrVqXf-vqtDKgizDtL_ljh8g7G2VCX_c,2241
14
- rust_crate_pipeline-1.2.0.dist-info/licenses/LICENSE,sha256=tpd4XNpbssrSx9-iErATOLrOh0ivNPfO2I5MAPUpats,1088
15
- rust_crate_pipeline-1.2.0.dist-info/METADATA,sha256=0iLlshmEVa7L-CNZp2RtrG2eTyGULwT_wx-GfbckhD4,16741
16
- rust_crate_pipeline-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
17
- rust_crate_pipeline-1.2.0.dist-info/entry_points.txt,sha256=9Rr_IRuFRIridXxUSdEJbB3ba0NnpEfKmknZXFdYRC0,70
18
- rust_crate_pipeline-1.2.0.dist-info/top_level.txt,sha256=GUdB7RyxHLhijQxui_KTy3B8p_L2APui9C6RYa0FuaE,20
19
- rust_crate_pipeline-1.2.0.dist-info/RECORD,,
14
+ rust_crate_pipeline-1.2.3.dist-info/licenses/LICENSE,sha256=tpd4XNpbssrSx9-iErATOLrOh0ivNPfO2I5MAPUpats,1088
15
+ rust_crate_pipeline-1.2.3.dist-info/METADATA,sha256=1bU7P1g6veyD0hJ78cjGJcVWRTujAF6Q6RL_CV_MVIY,16741
16
+ rust_crate_pipeline-1.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
17
+ rust_crate_pipeline-1.2.3.dist-info/entry_points.txt,sha256=9Rr_IRuFRIridXxUSdEJbB3ba0NnpEfKmknZXFdYRC0,70
18
+ rust_crate_pipeline-1.2.3.dist-info/top_level.txt,sha256=GUdB7RyxHLhijQxui_KTy3B8p_L2APui9C6RYa0FuaE,20
19
+ rust_crate_pipeline-1.2.3.dist-info/RECORD,,