PyPI - rust-crate-pipeline - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.3__py3-none-any.whl - Mend

rust-crate-pipeline 1.2.0py3-none-any.whl → 1.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

rust_crate_pipeline/ai_processing.py CHANGED Viewed

@@ -14,11 +14,20 @@ class LLMEnricher:
         self.model = self._load_model()
     def _load_model(self):
+        """Optimized for GCP g2-standard-4 with L4 GPU (24GB VRAM)"""
         return Llama(
             model_path=self.config.model_path,
-            n_ctx=1024,
-            n_batch=512,
-            n_gpu_layers=32
+            n_ctx=4096,              # Larger context for L4's 24GB VRAM
+            n_batch=1024,            # Larger batch size for better throughput
+            n_gpu_layers=-1,         # Load ALL layers on GPU (L4 has plenty VRAM)
+            n_threads=4,             # Match the 4 vCPUs
+            n_threads_batch=4,       # Parallel batch processing
+            use_mmap=True,           # Memory-mapped files for efficiency
+            use_mlock=True,          # Lock model in memory
+            rope_scaling_type=1,     # RoPE scaling for longer contexts
+            rope_freq_base=10000.0,  # Base frequency for RoPE
+            flash_attn=True,         # Enable flash attention if available
+            verbose=False            # Reduce logging overhead
         )
     def estimate_tokens(self, text: str) -> int:
@@ -165,14 +174,15 @@ class LLMEnricher:
         self,
         prompt: str,
         validation_func: Callable[[str], bool],
-        temp: float = 0.2,
-        max_tokens: int = 256,        retries: int = 2  # Reduced default retries
+        temp: float = 0.2,        max_tokens: int = 256,
+        retries: int = 4  # Increased from 2 to 4 for better success rates
     ) -> Optional[str]:
         """Run LLM with validation and automatic retry on failure"""
+        result = None
         for attempt in range(retries):
             try:
-                # Adjust temperature slightly upward on retries to get different results
-                adjusted_temp = temp * (1 + (attempt * 0.1))
+                # More generous temperature adjustment for better variety
+                adjusted_temp = temp * (1 + (attempt * 0.2))  # 20% increases instead of 10%
                 result = self.run_llama(prompt, temp=adjusted_temp, max_tokens=max_tokens)
                 # Validate the result
@@ -181,19 +191,19 @@ class LLMEnricher:
                 # If we get here, validation failed - use debug level for early attempts
                 if attempt == retries - 1:
-                    logging.warning(f"Final validation attempt failed. Using best available result.")
+                    logging.debug(f"All {retries} validation attempts failed, using last available result.")
                 else:
-                    logging.debug(f"Validation failed on attempt {attempt+1}/{retries}. Retrying with modified parameters.")
+                    logging.debug(f"Validation failed on attempt {attempt+1}/{retries}. Retrying with adjusted temp={adjusted_temp:.2f}")
-                # For the last attempt, simplify the prompt
-                if attempt == retries - 2:
+                # Only simplify prompt on later attempts (attempt 2+)
+                if attempt >= 2:
                     prompt = self.simplify_prompt(prompt)
             except Exception as e:
                 logging.error(f"Generation error on attempt {attempt+1}: {str(e)}")
-            # Reduced backoff to minimize waiting time
-            time.sleep(1.0 + (attempt * 0.5))
+                # More generous backoff - give the model more time
+            time.sleep(2.0 + (attempt * 1.0))  # 2s, 3s, 4s, 5s delays
         # If we exhausted all retries, return the last result even if not perfect
         return result if 'result' in locals() else None
@@ -245,11 +255,7 @@ class LLMEnricher:
                     prompt,
                     lambda x: len(x) > 50,
                     temp=0.3,
-                    max_tokens=300
-                )
-            # Extract key dependencies for context
-            key_deps = [dep.get("crate_id") for dep in crate.dependencies[:5] if dep.get("kind") == "normal"]
+                    max_tokens=300                )
             # Generate other enrichments
             enriched.feature_summary = self.summarize_features(crate)
@@ -296,13 +302,13 @@ class LLMEnricher:
     def classify_use_case(self, crate: CrateMetadata, readme_summary: str) -> str:
         """Classify the use case of a crate with rich context"""
-        try:
-            # Calculate available tokens for prompt (classification usually needs ~20 response tokens)
+        try:            # Calculate available tokens for prompt (classification usually needs ~20 response tokens)
             available_prompt_tokens = self.config.model_token_limit - 200  # Reserve for response
             joined = ", ".join(crate.keywords[:10]) if crate.keywords else "None"
-            key_deps = [dep.get("crate_id") for dep in crate.dependencies[:5] if dep.get("kind") == "normal"]
-            key_deps_str = ", ".join(key_deps) if key_deps else "None"
+            key_deps = [dep.get("crate_id") for dep in crate.dependencies[:5]
+                       if dep.get("kind") == "normal" and dep.get("crate_id")]
+            key_deps_str = ", ".join(str(dep) for dep in key_deps) if key_deps else "None"
             # Adaptively truncate different sections based on importance
             token_budget = available_prompt_tokens - 400  # Reserve tokens for prompt template
@@ -341,13 +347,12 @@ class LLMEnricher:
                 f"Category (pick only one): [AI, Database, Web Framework, Networking, Serialization, Utilities, DevTools, ML, Cryptography, Unknown]\n"
                 f"<|end|>"
             )
-            # Validate classification with retry
+              # Validate classification with retry - more generous parameters
             result = self.validate_and_retry(
                 prompt,
                 validation_func=self.validate_classification,
-                temp=0.1,
-                max_tokens=20
+                temp=0.2,  # Increased from 0.1 for more variety
+                max_tokens=50  # Increased from 20 to allow more complete responses
             )
             return result or "Unknown"
@@ -377,13 +382,12 @@ class LLMEnricher:
                 f"Create exactly 5 pairs.\n"
                 f"<|end|>"
             )
-            # Use validation for retry
+              # Use validation for retry - more generous parameters
             result = self.validate_and_retry(
                 prompt,
                 validation_func=self.validate_factual_pairs,
-                temp=0.6,
-                max_tokens=500
+                temp=0.7,  # Increased from 0.6 for more creativity
+                max_tokens=800  # Increased from 500 for more complete responses
             )
             return result or "Factual pairs generation failed."
@@ -396,3 +400,90 @@ class LLMEnricher:
         score = (crate.downloads / 1000) + (crate.github_stars * 10)
         score += len(self.truncate_content(crate.readme, 1000)) / 500
         return round(score, 2)
+    def batch_process_prompts(self, prompts: list[tuple[str, float, int]], batch_size: int = 4) -> list[Optional[str]]:
+        """
+        L4 GPU-optimized batch processing for multiple prompts
+        Processes prompts in batches to maximize GPU utilization
+        Args:
+            prompts: List of (prompt, temperature, max_tokens) tuples
+            batch_size: Number of prompts to process simultaneously (tuned for L4)
+        """
+        results = []
+        # Process in batches optimized for L4's capabilities
+        for i in range(0, len(prompts), batch_size):
+            batch = prompts[i:i + batch_size]
+            batch_results = []
+            for prompt, temp, max_tokens in batch:
+                try:
+                    # Prepare prompt with context preservation
+                    if self.estimate_tokens(prompt) > 3500:  # Leave room for response
+                        prompt = self.smart_truncate(prompt, 3500)
+                    # Use optimized parameters for L4
+                    output = self.model(
+                        prompt,
+                        max_tokens=max_tokens,
+                        temperature=temp,
+                        top_p=0.95,              # Nucleus sampling for better quality
+                        repeat_penalty=1.1,      # Reduce repetition
+                        stop=["<|end|>", "<|user|>", "<|system|>"],
+                        echo=False,              # Don't echo input
+                        stream=False             # Batch mode, no streaming
+                    )
+                    result = self.clean_output(output["choices"][0]["text"])
+                    batch_results.append(result)
+                except Exception as e:
+                    logging.warning(f"Batch processing error: {e}")
+                    batch_results.append(None)
+            results.extend(batch_results)
+            # Small delay between batches to prevent thermal throttling
+            if i + batch_size < len(prompts):
+                time.sleep(0.1)
+        return results
+    def smart_context_management(self, context_history: list[str], new_prompt: str) -> str:
+        """
+        Intelligent context management for prefix cache optimization
+        Maximizes cache hits by preserving common context patterns
+        """
+        # Calculate available tokens for context
+        base_tokens = self.estimate_tokens(new_prompt)
+        available_context = 4000 - base_tokens  # Leave buffer for response
+        if available_context <= 0:
+            return new_prompt
+        # Build context from most recent and most relevant history
+        context_parts = []
+        tokens_used = 0
+        # Prioritize recent context (better cache hits)
+        for context in reversed(context_history[-5:]):  # Last 5 contexts
+            context_tokens = self.estimate_tokens(context)
+            if tokens_used + context_tokens <= available_context:
+                context_parts.insert(0, context)
+                tokens_used += context_tokens
+            else:
+                # Try to fit truncated version
+                remaining_tokens = available_context - tokens_used
+                if remaining_tokens > 100:  # Only if meaningful space left
+                    truncated = self.smart_truncate(context, remaining_tokens)
+                    if truncated:
+                        context_parts.insert(0, truncated)
+                break
+        # Combine context with new prompt
+        if context_parts:
+            full_context = "\n\n---\n\n".join(context_parts)
+            return f"{full_context}\n\n---\n\n{new_prompt}"
+        return new_prompt

rust_crate_pipeline/version.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Version information for rust-crate-pipeline."""
-__version__ = "1.2.0"
+__version__ = "1.2.3"
 __version_info__ = tuple(int(x) for x in __version__.split("."))
 # Version history

{rust_crate_pipeline-1.2.0.dist-info → rust_crate_pipeline-1.2.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rust-crate-pipeline
-Version: 1.2.0
+Version: 1.2.3
 Summary: A comprehensive system for gathering, enriching, and analyzing metadata for Rust crates using AI-powered insights
 Home-page: https://github.com/DaveTmire85/SigilDERG-Data_Production
 Author: SuperUser666-Sigil

{rust_crate_pipeline-1.2.0.dist-info → rust_crate_pipeline-1.2.3.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 rust_crate_pipeline/__init__.py,sha256=m9fb1WGbyOimxK2e18FSgvLWGYBwbLoHM_mscr-nAPs,1429
 rust_crate_pipeline/__main__.py,sha256=fYgtPofuk4vkwiZ7ELP4GVMNj_QiKmZMSlvhzsNGuDs,155
-rust_crate_pipeline/ai_processing.py,sha256=Ma5Oo4_pRfhoyvti_ZF6xV9zi4kEukMRzBva76F7cEM,18351
+rust_crate_pipeline/ai_processing.py,sha256=B93rCDdxE-UkYMjmT0UotQTahx9-Lgzec7_bjBd3cUs,23240
 rust_crate_pipeline/analysis.py,sha256=ijP4zp3cFnN09nZkeCluyAvbyAtAW_M2YSxALpQX8LY,18615
 rust_crate_pipeline/config.py,sha256=r4Y_5SD-lfrM1112edk9T0S0MiVxaNSSHk4q2yDrM88,1528
 rust_crate_pipeline/github_token_checker.py,sha256=MJqHP8J84NEZ6nzdutpC7iRnsP0kyqscjLUosvmI4MI,3768
@@ -8,12 +8,12 @@ rust_crate_pipeline/main.py,sha256=J8ORQA6s3wyWw2R3oB_IEm2J5tx1CFdspw5kb5Ep8zQ,6
 rust_crate_pipeline/network.py,sha256=t_G8eh_WHNugm_laMftcWVbHsmP0bOlTPnVW9DqF6SU,13375
 rust_crate_pipeline/pipeline.py,sha256=Uwfw4uLL3aN1gJl5xSwvvyaY9ceeP7LVr02IzNx0tPM,12033
 rust_crate_pipeline/production_config.py,sha256=2GT8bxytcrMRrcfjzpay5RTtATE3rbmDvNUBvVhrYSQ,2472
-rust_crate_pipeline/version.py,sha256=Ne-Iy0D2YOCWyWVo3gFNVhuUg4tBtSnlqGIDUEeWtws,1022
+rust_crate_pipeline/version.py,sha256=r_w4Eokm27opXYKcOCTKax8TO7pFI5E3TkB0L9c62yY,1022
 rust_crate_pipeline/utils/file_utils.py,sha256=lnHeLrt1JYaQhRDKtA1TWR2HIyRO8zwOyWb-KmAmWgk,2126
 rust_crate_pipeline/utils/logging_utils.py,sha256=O4Jnr_k9dBchrVqXf-vqtDKgizDtL_ljh8g7G2VCX_c,2241
-rust_crate_pipeline-1.2.0.dist-info/licenses/LICENSE,sha256=tpd4XNpbssrSx9-iErATOLrOh0ivNPfO2I5MAPUpats,1088
-rust_crate_pipeline-1.2.0.dist-info/METADATA,sha256=0iLlshmEVa7L-CNZp2RtrG2eTyGULwT_wx-GfbckhD4,16741
-rust_crate_pipeline-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-rust_crate_pipeline-1.2.0.dist-info/entry_points.txt,sha256=9Rr_IRuFRIridXxUSdEJbB3ba0NnpEfKmknZXFdYRC0,70
-rust_crate_pipeline-1.2.0.dist-info/top_level.txt,sha256=GUdB7RyxHLhijQxui_KTy3B8p_L2APui9C6RYa0FuaE,20
-rust_crate_pipeline-1.2.0.dist-info/RECORD,,
+rust_crate_pipeline-1.2.3.dist-info/licenses/LICENSE,sha256=tpd4XNpbssrSx9-iErATOLrOh0ivNPfO2I5MAPUpats,1088
+rust_crate_pipeline-1.2.3.dist-info/METADATA,sha256=1bU7P1g6veyD0hJ78cjGJcVWRTujAF6Q6RL_CV_MVIY,16741
+rust_crate_pipeline-1.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+rust_crate_pipeline-1.2.3.dist-info/entry_points.txt,sha256=9Rr_IRuFRIridXxUSdEJbB3ba0NnpEfKmknZXFdYRC0,70
+rust_crate_pipeline-1.2.3.dist-info/top_level.txt,sha256=GUdB7RyxHLhijQxui_KTy3B8p_L2APui9C6RYa0FuaE,20
+rust_crate_pipeline-1.2.3.dist-info/RECORD,,

{rust_crate_pipeline-1.2.0.dist-info → rust_crate_pipeline-1.2.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{rust_crate_pipeline-1.2.0.dist-info → rust_crate_pipeline-1.2.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rust_crate_pipeline-1.2.0.dist-info → rust_crate_pipeline-1.2.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{rust_crate_pipeline-1.2.0.dist-info → rust_crate_pipeline-1.2.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

rust-crate-pipeline 1.2.0__py3-none-any.whl → 1.2.3__py3-none-any.whl

rust-crate-pipeline 1.2.0py3-none-any.whl → 1.2.3py3-none-any.whl