rust-crate-pipeline 1.2.6__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,24 +2,41 @@
2
2
  import re
3
3
  import time
4
4
  import logging
5
- import tiktoken
6
- from typing import Callable, Optional
7
- from llama_cpp import Llama
5
+ from typing import Callable, Optional, Any, Dict, List
8
6
  from .config import PipelineConfig, CrateMetadata, EnrichedCrate
9
7
 
8
+ # Optional imports with fallbacks
9
+ _ai_dependencies_available = True
10
+ try:
11
+ import tiktoken
12
+ from llama_cpp import Llama
13
+ except ImportError as e:
14
+ logging.warning(f"AI dependencies not available: {e}")
15
+ tiktoken = None
16
+ Llama = None
17
+ _ai_dependencies_available = False
18
+
19
+
10
20
  class LLMEnricher:
11
21
  def __init__(self, config: PipelineConfig):
22
+ if not _ai_dependencies_available:
23
+ raise ImportError("AI dependencies (tiktoken, llama_cpp) are not available. Please install them to use LLMEnricher.")
24
+
12
25
  self.config = config
13
- self.tokenizer = tiktoken.get_encoding("cl100k_base")
26
+ self.tokenizer = tiktoken.get_encoding("cl100k_base") # type: ignore
14
27
  self.model = self._load_model()
15
-
28
+
16
29
  def _load_model(self):
17
30
  """Optimized for GCP g2-standard-4 with L4 GPU (24GB VRAM)"""
18
- return Llama(
31
+ if not _ai_dependencies_available:
32
+ raise ImportError("Cannot load model: AI dependencies not available")
33
+
34
+ return Llama( # type: ignore
19
35
  model_path=self.config.model_path,
20
36
  n_ctx=4096, # Larger context for L4's 24GB VRAM
21
37
  n_batch=1024, # Larger batch size for better throughput
22
- n_gpu_layers=-1, # Load ALL layers on GPU (L4 has plenty VRAM)
38
+ # Load ALL layers on GPU (L4 has plenty VRAM)
39
+ n_gpu_layers=-1,
23
40
  n_threads=4, # Match the 4 vCPUs
24
41
  n_threads_batch=4, # Parallel batch processing
25
42
  use_mmap=True, # Memory-mapped files for efficiency
@@ -37,7 +54,7 @@ class LLMEnricher:
37
54
  """Truncate content to fit within token limit"""
38
55
  paragraphs = content.split("\n\n")
39
56
  result, current_tokens = "", 0
40
-
57
+
41
58
  for para in paragraphs:
42
59
  tokens = len(self.tokenizer.encode(para))
43
60
  if current_tokens + tokens <= max_tokens:
@@ -51,27 +68,33 @@ class LLMEnricher:
51
68
  """Intelligently truncate content to preserve the most important parts"""
52
69
  if not content:
53
70
  return ""
54
-
71
+
55
72
  # If content is short enough, return it all
56
73
  if len(self.tokenizer.encode(content)) <= max_tokens:
57
74
  return content
58
-
75
+
59
76
  # Split into sections based on markdown headers
60
77
  sections = []
61
- current_section = {"heading": "Introduction", "content": "", "priority": 10}
62
-
78
+ current_section = {
79
+ "heading": "Introduction",
80
+ "content": "",
81
+ "priority": 10}
82
+
63
83
  for line in content.splitlines():
64
84
  if re.match(r'^#+\s+', line): # It's a header
65
85
  # Save previous section if not empty
66
86
  if current_section["content"].strip():
67
87
  sections.append(current_section)
68
-
88
+
69
89
  # Create new section with appropriate priority
70
90
  heading = re.sub(r'^#+\s+', '', line)
71
91
  priority = 5 # Default priority
72
-
92
+
73
93
  # Assign priority based on content type
74
- if re.search(r'\b(usage|example|getting started)\b', heading, re.I):
94
+ if re.search(
95
+ r'\b(usage|example|getting started)\b',
96
+ heading,
97
+ re.I):
75
98
  priority = 10
76
99
  elif re.search(r'\b(feature|overview|about)\b', heading, re.I):
77
100
  priority = 9
@@ -79,91 +102,122 @@ class LLMEnricher:
79
102
  priority = 8
80
103
  elif re.search(r'\b(api|interface)\b', heading, re.I):
81
104
  priority = 7
82
-
83
- current_section = {"heading": heading, "content": line + "\n", "priority": priority}
105
+
106
+ current_section = {
107
+ "heading": heading,
108
+ "content": line + "\n",
109
+ "priority": priority}
84
110
  else:
85
111
  current_section["content"] += line + "\n"
86
-
112
+
87
113
  # Boost priority if code block is found
88
114
  if "```rust" in line or "```no_run" in line:
89
- current_section["priority"] = max(current_section["priority"], 8)
90
-
115
+ current_section["priority"] = max(
116
+ current_section["priority"], 8)
117
+
91
118
  # Add the last section
92
119
  if current_section["content"].strip():
93
120
  sections.append(current_section)
94
-
121
+
95
122
  # Sort sections by priority (highest first)
96
123
  sections.sort(key=lambda x: x["priority"], reverse=True)
97
-
124
+
98
125
  # Build the result, respecting token limits
99
126
  result = ""
100
127
  tokens_used = 0
101
-
128
+
102
129
  for section in sections:
103
130
  section_text = f"## {section['heading']}\n{section['content']}\n"
104
131
  section_tokens = len(self.tokenizer.encode(section_text))
105
-
132
+
106
133
  if tokens_used + section_tokens <= max_tokens:
107
134
  result += section_text
108
135
  tokens_used += section_tokens
109
136
  elif tokens_used < max_tokens - 100: # If we can fit a truncated version
110
137
  # Take what we can
111
138
  remaining_tokens = max_tokens - tokens_used
112
- truncated_text = self.tokenizer.decode(self.tokenizer.encode(section_text)[:remaining_tokens])
139
+ truncated_text = self.tokenizer.decode(
140
+ self.tokenizer.encode(section_text)[:remaining_tokens])
113
141
  result += truncated_text
114
142
  break
115
-
143
+
116
144
  return result
117
145
 
118
146
  def clean_output(self, output: str, task: str = "general") -> str:
119
147
  """Task-specific output cleaning"""
120
148
  if not output:
121
149
  return ""
122
-
150
+
123
151
  # Remove any remaining prompt artifacts
124
152
  output = output.split("<|end|>")[0].strip()
125
-
153
+
126
154
  if task == "classification":
127
155
  # For classification tasks, extract just the category
128
- categories = ["AI", "Database", "Web Framework", "Networking", "Serialization",
129
- "Utilities", "DevTools", "ML", "Cryptography", "Unknown"]
156
+ categories = [
157
+ "AI",
158
+ "Database",
159
+ "Web Framework",
160
+ "Networking",
161
+ "Serialization",
162
+ "Utilities",
163
+ "DevTools",
164
+ "ML",
165
+ "Cryptography",
166
+ "Unknown"]
130
167
  for category in categories:
131
- if re.search(r'\b' + re.escape(category) + r'\b', output, re.IGNORECASE):
168
+ if re.search(
169
+ r'\b' +
170
+ re.escape(category) +
171
+ r'\b',
172
+ output,
173
+ re.IGNORECASE):
132
174
  return category
133
175
  return "Unknown"
134
-
176
+
135
177
  elif task == "factual_pairs":
136
178
  # For factual pairs, ensure proper formatting
137
179
  pairs = []
138
- facts = re.findall(r'✅\s*Factual:?\s*(.*?)(?=❌|\Z)', output, re.DOTALL)
139
- counterfacts = re.findall(r'❌\s*Counterfactual:?\s*(.*?)(?=✅|\Z)', output, re.DOTALL)
140
-
180
+ facts = re.findall(
181
+ r'✅\s*Factual:?\s*(.*?)(?=❌|\Z)',
182
+ output,
183
+ re.DOTALL)
184
+ counterfacts = re.findall(
185
+ r'❌\s*Counterfactual:?\s*(.*?)(?=✅|\Z)', output, re.DOTALL)
186
+
141
187
  # Pair them up
142
188
  for i in range(min(len(facts), len(counterfacts))):
143
- pairs.append(f"✅ Factual: {facts[i].strip()}\n❌ Counterfactual: {counterfacts[i].strip()}")
144
-
189
+ pairs.append(
190
+ f"✅ Factual: {
191
+ facts[i].strip()}\n❌ Counterfactual: {
192
+ counterfacts[i].strip()}")
193
+
145
194
  return "\n\n".join(pairs)
146
-
195
+
147
196
  else:
148
197
  # General cleaning - more permissive than before
149
- lines = [line.strip() for line in output.splitlines() if line.strip()]
198
+ lines = [line.strip()
199
+ for line in output.splitlines() if line.strip()]
150
200
  return "\n".join(lines)
151
201
 
152
- def run_llama(self, prompt: str, temp: float = 0.2, max_tokens: int = 256) -> Optional[str]:
202
+ def run_llama(self, prompt: str, temp: float = 0.2,
203
+ max_tokens: int = 256) -> Optional[str]:
153
204
  """Run the LLM with customizable parameters per task"""
154
205
  try:
155
206
  token_count = self.estimate_tokens(prompt)
156
207
  if token_count > self.config.prompt_token_margin:
157
- logging.warning(f"Prompt too long ({token_count} tokens). Truncating.")
158
- prompt = self.truncate_content(prompt, self.config.prompt_token_margin - 100)
159
-
208
+ logging.warning(
209
+ f"Prompt too long ({token_count} tokens). Truncating.")
210
+ prompt = self.truncate_content(
211
+ prompt, self.config.prompt_token_margin - 100)
212
+
160
213
  output = self.model(
161
214
  prompt,
162
215
  max_tokens=max_tokens,
163
216
  temperature=temp,
164
- stop=["<|end|>", "<|user|>", "<|system|>"] # Stop at these tokens
217
+ # Stop at these tokens
218
+ stop=["<|end|>", "<|user|>", "<|system|>"]
165
219
  )
166
-
220
+
167
221
  raw_text = output["choices"][0]["text"]
168
222
  return self.clean_output(raw_text)
169
223
  except Exception as e:
@@ -174,7 +228,7 @@ class LLMEnricher:
174
228
  self,
175
229
  prompt: str,
176
230
  validation_func: Callable[[str], bool],
177
- temp: float = 0.2, max_tokens: int = 256,
231
+ temp: float = 0.2, max_tokens: int = 256,
178
232
  retries: int = 4 # Increased from 2 to 4 for better success rates
179
233
  ) -> Optional[str]:
180
234
  """Run LLM with validation and automatic retry on failure"""
@@ -182,58 +236,89 @@ class LLMEnricher:
182
236
  for attempt in range(retries):
183
237
  try:
184
238
  # More generous temperature adjustment for better variety
185
- adjusted_temp = temp * (1 + (attempt * 0.2)) # 20% increases instead of 10%
186
- result = self.run_llama(prompt, temp=adjusted_temp, max_tokens=max_tokens)
187
-
239
+ # 20% increases instead of 10%
240
+ adjusted_temp = temp * (1 + (attempt * 0.2))
241
+ result = self.run_llama(
242
+ prompt, temp=adjusted_temp, max_tokens=max_tokens)
243
+
188
244
  # Validate the result
189
245
  if result and validation_func(result):
190
246
  return result
191
-
192
- # If we get here, validation failed - use debug level for early attempts
247
+
248
+ # If we get here, validation failed - use debug level for early
249
+ # attempts
193
250
  if attempt == retries - 1:
194
- logging.debug(f"All {retries} validation attempts failed, using last available result.")
251
+ logging.debug(
252
+ f"All {retries} validation attempts failed, using last available result.")
195
253
  else:
196
- logging.debug(f"Validation failed on attempt {attempt+1}/{retries}. Retrying with adjusted temp={adjusted_temp:.2f}")
197
-
254
+ logging.debug(
255
+ f"Validation failed on attempt {
256
+ attempt + 1}/{retries}. Retrying with adjusted temp={
257
+ adjusted_temp:.2f}")
258
+
198
259
  # Only simplify prompt on later attempts (attempt 2+)
199
260
  if attempt >= 2:
200
261
  prompt = self.simplify_prompt(prompt)
201
-
262
+
202
263
  except Exception as e:
203
- logging.error(f"Generation error on attempt {attempt+1}: {str(e)}")
204
-
264
+ logging.error(
265
+ f"Generation error on attempt {
266
+ attempt +
267
+ 1}: {
268
+ str(e)}")
269
+
205
270
  # More generous backoff - give the model more time
206
271
  time.sleep(2.0 + (attempt * 1.0)) # 2s, 3s, 4s, 5s delays
207
-
208
- # If we exhausted all retries, return the last result even if not perfect
272
+
273
+ # If we exhausted all retries, return the last result even if not
274
+ # perfect
209
275
  return result if 'result' in locals() else None
210
276
 
211
277
  def simplify_prompt(self, prompt: str) -> str:
212
278
  """Simplify a prompt by removing examples and reducing context"""
213
279
  # Remove few-shot examples
214
- prompt = re.sub(r'# Example [0-9].*?(?=# Crate to Classify|\Z)', '', prompt, flags=re.DOTALL)
215
-
280
+ prompt = re.sub(
281
+ r'# Example [0-9].*?(?=# Crate to Classify|\Z)',
282
+ '',
283
+ prompt,
284
+ flags=re.DOTALL)
285
+
216
286
  # Make instructions more direct
217
- prompt = re.sub(r'<\|system\|>.*?<\|user\|>', '<|system|>Be concise.\n<|user|>', prompt, flags=re.DOTALL)
218
-
287
+ prompt = re.sub(
288
+ r'<\|system\|>.*?<\|user\|>',
289
+ '<|system|>Be concise.\n<|user|>',
290
+ prompt,
291
+ flags=re.DOTALL)
292
+
219
293
  return prompt
220
294
 
221
295
  def validate_classification(self, result: str) -> bool:
222
296
  """Ensure a valid category was returned"""
223
297
  if not result:
224
298
  return False
225
- valid_categories = ["AI", "Database", "Web Framework", "Networking", "Serialization",
226
- "Utilities", "DevTools", "ML", "Cryptography", "Unknown"]
227
- return any(category.lower() == result.strip().lower() for category in valid_categories)
299
+ valid_categories = [
300
+ "AI",
301
+ "Database",
302
+ "Web Framework",
303
+ "Networking",
304
+ "Serialization",
305
+ "Utilities",
306
+ "DevTools",
307
+ "ML",
308
+ "Cryptography",
309
+ "Unknown"]
310
+ return any(category.lower() == result.strip().lower()
311
+ for category in valid_categories)
228
312
 
229
313
  def validate_factual_pairs(self, result: str) -> bool:
230
314
  """Ensure exactly 5 factual/counterfactual pairs exist"""
231
315
  if not result:
232
316
  return False
233
-
317
+
234
318
  facts = re.findall(r'✅\s*Factual:?\s*(.*?)(?=❌|\Z)', result, re.DOTALL)
235
- counterfacts = re.findall(r'❌\s*Counterfactual:?\s*(.*?)(?=✅|\Z)', result, re.DOTALL)
236
-
319
+ counterfacts = re.findall(
320
+ r'❌\s*Counterfactual:?\s*(.*?)(?=✅|\Z)', result, re.DOTALL)
321
+
237
322
  return len(facts) >= 3 and len(counterfacts) >= 3 # At least 3 pairs
238
323
 
239
324
  def enrich_crate(self, crate: CrateMetadata) -> EnrichedCrate:
@@ -241,31 +326,32 @@ class LLMEnricher:
241
326
  # Convert CrateMetadata to EnrichedCrate
242
327
  enriched_dict = crate.__dict__.copy()
243
328
  enriched = EnrichedCrate(**enriched_dict)
244
-
329
+
245
330
  try:
246
331
  # Generate README summary first
247
332
  if crate.readme:
248
333
  readme_content = self.smart_truncate(crate.readme, 2000)
249
334
  prompt = (
250
- f"<|system|>Extract key features from README.\n"
335
+ "<|system|>Extract key features from README.\n"
251
336
  f"<|user|>Summarize key aspects of this Rust crate from its README:\n{readme_content}\n"
252
- f"<|end|>"
337
+ "<|end|>"
253
338
  )
254
339
  enriched.readme_summary = self.validate_and_retry(
255
- prompt,
256
- lambda x: len(x) > 50,
257
- temp=0.3,
258
- max_tokens=300 )
259
-
340
+ prompt,
341
+ lambda x: len(x) > 50,
342
+ temp=0.3,
343
+ max_tokens=300)
344
+
260
345
  # Generate other enrichments
261
346
  enriched.feature_summary = self.summarize_features(crate)
262
347
  enriched.use_case = self.classify_use_case(
263
- crate,
348
+ crate,
264
349
  enriched.readme_summary or ""
265
350
  )
266
351
  enriched.score = self.score_crate(crate)
267
- enriched.factual_counterfactual = self.generate_factual_pairs(crate)
268
-
352
+ enriched.factual_counterfactual = self.generate_factual_pairs(
353
+ crate)
354
+
269
355
  return enriched
270
356
  except Exception as e:
271
357
  logging.error(f"Failed to enrich {crate.name}: {str(e)}")
@@ -276,7 +362,7 @@ class LLMEnricher:
276
362
  try:
277
363
  if not crate.features:
278
364
  return "No features documented for this crate."
279
-
365
+
280
366
  # Format features with their dependencies
281
367
  feature_text = ""
282
368
  for f in crate.features[:8]: # Limit to 8 features for context size
@@ -284,77 +370,86 @@ class LLMEnricher:
284
370
  deps = f.get("dependencies", [])
285
371
  deps_str = ", ".join(deps) if deps else "none"
286
372
  feature_text += f"- {feature_name} (dependencies: {deps_str})\n"
287
-
373
+
288
374
  prompt = (
289
- f"<|system|>You are a Rust programming expert analyzing crate features.\n"
375
+ "<|system|>You are a Rust programming expert analyzing crate features.\n"
290
376
  f"<|user|>For the Rust crate `{crate.name}`, explain these features and what functionality they provide:\n\n"
291
377
  f"{feature_text}\n\n"
292
- f"Provide a concise explanation of each feature's purpose and when a developer would enable it.\n"
293
- f"<|end|>"
378
+ "Provide a concise explanation of each feature's purpose and when a developer would enable it.\n"
379
+ "<|end|>"
294
380
  )
295
-
381
+
296
382
  # Use moderate temperature for informative but natural explanation
297
383
  result = self.run_llama(prompt, temp=0.2, max_tokens=350)
298
384
  return result or "Feature summary not available."
299
385
  except Exception as e:
300
- logging.warning(f"Feature summarization failed for {crate.name}: {str(e)}")
386
+ logging.warning(
387
+ f"Feature summarization failed for {
388
+ crate.name}: {
389
+ str(e)}")
301
390
  return "Feature summary not available."
302
391
 
303
- def classify_use_case(self, crate: CrateMetadata, readme_summary: str) -> str:
392
+ def classify_use_case(
393
+ self,
394
+ crate: CrateMetadata,
395
+ readme_summary: str) -> str:
304
396
  """Classify the use case of a crate with rich context"""
305
397
  try: # Calculate available tokens for prompt (classification usually needs ~20 response tokens)
306
398
  available_prompt_tokens = self.config.model_token_limit - 200 # Reserve for response
307
-
308
- joined = ", ".join(crate.keywords[:10]) if crate.keywords else "None"
309
- key_deps = [dep.get("crate_id") for dep in crate.dependencies[:5]
310
- if dep.get("kind") == "normal" and dep.get("crate_id")]
311
- key_deps_str = ", ".join(str(dep) for dep in key_deps) if key_deps else "None"
312
-
399
+
400
+ joined = ", ".join(
401
+ crate.keywords[:10]) if crate.keywords else "None"
402
+ key_deps = [dep.get("crate_id") for dep in crate.dependencies[:5]
403
+ if dep.get("kind") == "normal" and dep.get("crate_id")]
404
+ key_deps_str = ", ".join(str(dep)
405
+ for dep in key_deps) if key_deps else "None"
406
+
313
407
  # Adaptively truncate different sections based on importance
314
- token_budget = available_prompt_tokens - 400 # Reserve tokens for prompt template
315
-
408
+ token_budget = available_prompt_tokens - \
409
+ 400 # Reserve tokens for prompt template
410
+
316
411
  # Allocate different percentages to each section
317
412
  desc_tokens = int(token_budget * 0.2)
318
413
  readme_tokens = int(token_budget * 0.6)
319
-
414
+
320
415
  desc = self.truncate_content(crate.description, desc_tokens)
321
416
  readme_summary = self.smart_truncate(readme_summary, readme_tokens)
322
-
417
+
323
418
  # Few-shot prompting with examples
324
419
  prompt = (
325
- f"<|system|>You are a Rust expert classifying crates into the most appropriate category.\n"
326
- f"<|user|>\n"
327
- f"# Example 1\n"
328
- f"Crate: `tokio`\n"
329
- f"Description: An asynchronous runtime for the Rust programming language\n"
330
- f"Keywords: async, runtime, futures\n"
331
- f"Key Dependencies: mio, bytes, parking_lot\n"
332
- f"Category: Networking\n\n"
333
-
334
- f"# Example 2\n"
335
- f"Crate: `serde`\n"
336
- f"Description: A generic serialization/deserialization framework\n"
337
- f"Keywords: serde, serialization\n"
338
- f"Key Dependencies: serde_derive\n"
339
- f"Category: Serialization\n\n"
340
-
341
- f"# Crate to Classify\n"
420
+ "<|system|>You are a Rust expert classifying crates into the most appropriate category.\n"
421
+ "<|user|>\n"
422
+ "# Example 1\n"
423
+ "Crate: `tokio`\n"
424
+ "Description: An asynchronous runtime for the Rust programming language\n"
425
+ "Keywords: async, runtime, futures\n"
426
+ "Key Dependencies: mio, bytes, parking_lot\n"
427
+ "Category: Networking\n\n"
428
+
429
+ "# Example 2\n"
430
+ "Crate: `serde`\n"
431
+ "Description: A generic serialization/deserialization framework\n"
432
+ "Keywords: serde, serialization\n"
433
+ "Key Dependencies: serde_derive\n"
434
+ "Category: Serialization\n\n"
435
+
436
+ "# Crate to Classify\n"
342
437
  f"Crate: `{crate.name}`\n"
343
438
  f"Description: {desc}\n"
344
439
  f"Keywords: {joined}\n"
345
440
  f"README Summary: {readme_summary}\n"
346
441
  f"Key Dependencies: {key_deps_str}\n\n"
347
- f"Category (pick only one): [AI, Database, Web Framework, Networking, Serialization, Utilities, DevTools, ML, Cryptography, Unknown]\n"
348
- f"<|end|>"
442
+ "Category (pick only one): [AI, Database, Web Framework, Networking, Serialization, Utilities, DevTools, ML, Cryptography, Unknown]\n"
443
+ "<|end|>"
349
444
  )
350
- # Validate classification with retry - more generous parameters
445
+ # Validate classification with retry - more generous parameters
351
446
  result = self.validate_and_retry(
352
- prompt,
447
+ prompt,
353
448
  validation_func=self.validate_classification,
354
449
  temp=0.2, # Increased from 0.1 for more variety
355
450
  max_tokens=50 # Increased from 20 to allow more complete responses
356
451
  )
357
-
452
+
358
453
  return result or "Unknown"
359
454
  except Exception as e:
360
455
  logging.error(f"Classification failed for {crate.name}: {str(e)}")
@@ -364,35 +459,39 @@ class LLMEnricher:
364
459
  """Generate factual/counterfactual pairs with retry and validation"""
365
460
  try:
366
461
  desc = self.truncate_content(crate.description, 300)
367
- readme_summary = self.truncate_content(getattr(crate, 'readme_summary', '') or '', 300)
368
-
462
+ readme_summary = self.truncate_content(
463
+ getattr(crate, 'readme_summary', '') or '', 300)
464
+
369
465
  prompt = (
370
- f"<|system|>Create exactly 5 factual/counterfactual pairs for the Rust crate. "
371
- f"Factual statements must be true. Counterfactuals should be plausible but incorrect - "
372
- f"make them subtle and convincing rather than simple negations.\n"
373
- f"<|user|>\n"
466
+ "<|system|>Create exactly 5 factual/counterfactual pairs for the Rust crate. "
467
+ "Factual statements must be true. Counterfactuals should be plausible but incorrect - "
468
+ "make them subtle and convincing rather than simple negations.\n"
469
+ "<|user|>\n"
374
470
  f"Crate: {crate.name}\n"
375
471
  f"Description: {desc}\n"
376
472
  f"Repo: {crate.repository}\n"
377
473
  f"README Summary: {readme_summary}\n"
378
474
  f"Key Features: {', '.join([f.get('name', '') for f in crate.features[:5]])}\n\n"
379
- f"Format each pair as:\n"
380
- f"✅ Factual: [true statement about the crate]\n"
381
- f"❌ Counterfactual: [plausible but false statement]\n\n"
382
- f"Create exactly 5 pairs.\n"
383
- f"<|end|>"
475
+ "Format each pair as:\n"
476
+ "✅ Factual: [true statement about the crate]\n"
477
+ "❌ Counterfactual: [plausible but false statement]\n\n"
478
+ "Create exactly 5 pairs.\n"
479
+ "<|end|>"
384
480
  )
385
- # Use validation for retry - more generous parameters
481
+ # Use validation for retry - more generous parameters
386
482
  result = self.validate_and_retry(
387
- prompt,
388
- validation_func=self.validate_factual_pairs,
483
+ prompt,
484
+ validation_func=self.validate_factual_pairs,
389
485
  temp=0.7, # Increased from 0.6 for more creativity
390
486
  max_tokens=800 # Increased from 500 for more complete responses
391
487
  )
392
-
488
+
393
489
  return result or "Factual pairs generation failed."
394
490
  except Exception as e:
395
- logging.error(f"Exception in factual_pairs for {crate.name}: {str(e)}")
491
+ logging.error(
492
+ f"Exception in factual_pairs for {
493
+ crate.name}: {
494
+ str(e)}")
396
495
  return "Factual pairs generation failed."
397
496
 
398
497
  def score_crate(self, crate: CrateMetadata) -> float:
@@ -401,28 +500,33 @@ class LLMEnricher:
401
500
  score += len(self.truncate_content(crate.readme, 1000)) / 500
402
501
  return round(score, 2)
403
502
 
404
- def batch_process_prompts(self, prompts: list[tuple[str, float, int]], batch_size: int = 4) -> list[Optional[str]]:
503
+ def batch_process_prompts(self,
504
+ prompts: list[tuple[str,
505
+ float,
506
+ int]],
507
+ batch_size: int = 4) -> list[Optional[str]]:
405
508
  """
406
509
  L4 GPU-optimized batch processing for multiple prompts
407
510
  Processes prompts in batches to maximize GPU utilization
408
-
511
+
409
512
  Args:
410
513
  prompts: List of (prompt, temperature, max_tokens) tuples
411
514
  batch_size: Number of prompts to process simultaneously (tuned for L4)
412
515
  """
413
516
  results = []
414
-
517
+
415
518
  # Process in batches optimized for L4's capabilities
416
519
  for i in range(0, len(prompts), batch_size):
417
520
  batch = prompts[i:i + batch_size]
418
521
  batch_results = []
419
-
522
+
420
523
  for prompt, temp, max_tokens in batch:
421
524
  try:
422
525
  # Prepare prompt with context preservation
423
- if self.estimate_tokens(prompt) > 3500: # Leave room for response
526
+ if self.estimate_tokens(
527
+ prompt) > 3500: # Leave room for response
424
528
  prompt = self.smart_truncate(prompt, 3500)
425
-
529
+
426
530
  # Use optimized parameters for L4
427
531
  output = self.model(
428
532
  prompt,
@@ -434,23 +538,26 @@ class LLMEnricher:
434
538
  echo=False, # Don't echo input
435
539
  stream=False # Batch mode, no streaming
436
540
  )
437
-
541
+
438
542
  result = self.clean_output(output["choices"][0]["text"])
439
543
  batch_results.append(result)
440
-
544
+
441
545
  except Exception as e:
442
546
  logging.warning(f"Batch processing error: {e}")
443
547
  batch_results.append(None)
444
-
548
+
445
549
  results.extend(batch_results)
446
-
550
+
447
551
  # Small delay between batches to prevent thermal throttling
448
552
  if i + batch_size < len(prompts):
449
553
  time.sleep(0.1)
450
-
554
+
451
555
  return results
452
556
 
453
- def smart_context_management(self, context_history: list[str], new_prompt: str) -> str:
557
+ def smart_context_management(
558
+ self,
559
+ context_history: list[str],
560
+ new_prompt: str) -> str:
454
561
  """
455
562
  Intelligent context management for prefix cache optimization
456
563
  Maximizes cache hits by preserving common context patterns
@@ -458,14 +565,14 @@ class LLMEnricher:
458
565
  # Calculate available tokens for context
459
566
  base_tokens = self.estimate_tokens(new_prompt)
460
567
  available_context = 4000 - base_tokens # Leave buffer for response
461
-
568
+
462
569
  if available_context <= 0:
463
570
  return new_prompt
464
-
571
+
465
572
  # Build context from most recent and most relevant history
466
573
  context_parts = []
467
574
  tokens_used = 0
468
-
575
+
469
576
  # Prioritize recent context (better cache hits)
470
577
  for context in reversed(context_history[-5:]): # Last 5 contexts
471
578
  context_tokens = self.estimate_tokens(context)
@@ -480,10 +587,10 @@ class LLMEnricher:
480
587
  if truncated:
481
588
  context_parts.insert(0, truncated)
482
589
  break
483
-
590
+
484
591
  # Combine context with new prompt
485
592
  if context_parts:
486
593
  full_context = "\n\n---\n\n".join(context_parts)
487
594
  return f"{full_context}\n\n---\n\n{new_prompt}"
488
-
595
+
489
596
  return new_prompt