rust-crate-pipeline 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. rust_crate_pipeline/__init__.py +18 -27
  2. rust_crate_pipeline/__main__.py +1 -0
  3. rust_crate_pipeline/ai_processing.py +718 -596
  4. rust_crate_pipeline/analysis.py +330 -363
  5. rust_crate_pipeline/azure_ai_processing.py +462 -0
  6. rust_crate_pipeline/config.py +46 -28
  7. rust_crate_pipeline/core/__init__.py +19 -0
  8. rust_crate_pipeline/core/canon_registry.py +133 -0
  9. rust_crate_pipeline/core/irl_engine.py +256 -0
  10. rust_crate_pipeline/core/sacred_chain.py +117 -0
  11. rust_crate_pipeline/crate_analysis.py +54 -0
  12. rust_crate_pipeline/crate_list.txt +424 -0
  13. rust_crate_pipeline/github_token_checker.py +108 -112
  14. rust_crate_pipeline/main.py +329 -109
  15. rust_crate_pipeline/network.py +317 -308
  16. rust_crate_pipeline/pipeline.py +300 -375
  17. rust_crate_pipeline/production_config.py +24 -27
  18. rust_crate_pipeline/progress_monitor.py +334 -0
  19. rust_crate_pipeline/scraping/__init__.py +13 -0
  20. rust_crate_pipeline/scraping/unified_scraper.py +259 -0
  21. rust_crate_pipeline/unified_llm_processor.py +637 -0
  22. rust_crate_pipeline/unified_pipeline.py +548 -0
  23. rust_crate_pipeline/utils/file_utils.py +32 -5
  24. rust_crate_pipeline/utils/logging_utils.py +21 -16
  25. rust_crate_pipeline/version.py +76 -47
  26. rust_crate_pipeline-1.4.1.dist-info/METADATA +515 -0
  27. rust_crate_pipeline-1.4.1.dist-info/RECORD +31 -0
  28. rust_crate_pipeline-1.4.0.dist-info/METADATA +0 -585
  29. rust_crate_pipeline-1.4.0.dist-info/RECORD +0 -19
  30. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/WHEEL +0 -0
  31. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/entry_points.txt +0 -0
  32. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/licenses/LICENSE +0 -0
  33. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,548 @@
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import time
5
+ import argparse
6
+ from pathlib import Path
7
+ from typing import Dict, List, Optional, Any, Union, TYPE_CHECKING
8
+
9
+ from .config import PipelineConfig
10
+ from .core import IRLEngine, CanonRegistry, SacredChainTrace, TrustVerdict
11
+ from .scraping import UnifiedScraper, ScrapingResult
12
+ from .crate_analysis import CrateAnalyzer
13
+
14
+ # Import Azure OpenAI enricher if available
15
+ try:
16
+ from .azure_ai_processing import AzureOpenAIEnricher
17
+ AZURE_OPENAI_AVAILABLE = True
18
+ except ImportError:
19
+ AZURE_OPENAI_AVAILABLE = False
20
+ AzureOpenAIEnricher = None # type: ignore # Fallback for type checkers; see below
21
+
22
+ # Import unified LLM processor
23
+ try:
24
+ from .unified_llm_processor import UnifiedLLMProcessor, create_llm_processor_from_args, LLMConfig
25
+ UNIFIED_LLM_AVAILABLE = True
26
+ except ImportError:
27
+ UNIFIED_LLM_AVAILABLE = False
28
+ UnifiedLLMProcessor = None # type: ignore
29
+ create_llm_processor_from_args = None # type: ignore
30
+ LLMConfig = None # type: ignore
31
+
32
+ if TYPE_CHECKING:
33
+ from .azure_ai_processing import AzureOpenAIEnricher # type: ignore[import]
34
+ from .unified_llm_processor import UnifiedLLMProcessor, LLMConfig # type: ignore[import]
35
+
36
+
37
+ class UnifiedSigilPipeline:
38
+
39
+ def __init__(self, config: PipelineConfig, llm_config: Optional[Any] = None) -> None:
40
+ self.config = config
41
+ self.logger = logging.getLogger(__name__)
42
+ self.irl_engine: Optional[IRLEngine] = None
43
+ self.scraper: Optional[UnifiedScraper] = None
44
+ self.canon_registry: CanonRegistry = CanonRegistry()
45
+
46
+ # Initialize AI components
47
+ self.ai_enricher: Optional[Any] = None
48
+ self.unified_llm_processor: Optional[Any] = None
49
+ self.crate_analyzer: Optional[CrateAnalyzer] = None
50
+
51
+ # Store LLM config for later use
52
+ self.llm_config = llm_config
53
+
54
+ self._initialize_components()
55
+
56
+ def _initialize_components(self) -> None:
57
+ try:
58
+ self.irl_engine = IRLEngine(self.config, self.canon_registry)
59
+ self.logger.info("✅ IRL Engine initialized successfully")
60
+
61
+ scraper_config = {
62
+ "verbose": False,
63
+ "word_count_threshold": 10,
64
+ "crawl_config": {
65
+ "max_retries": self.config.max_retries,
66
+ "timeout": self.config.crawl4ai_timeout,
67
+ }
68
+ }
69
+ self.scraper = UnifiedScraper(scraper_config)
70
+ self.logger.info("✅ Unified Scraper initialized successfully")
71
+
72
+ # Initialize unified LLM processor if available
73
+ if UNIFIED_LLM_AVAILABLE and self.llm_config:
74
+ try:
75
+ if UnifiedLLMProcessor is not None:
76
+ self.unified_llm_processor = UnifiedLLMProcessor(self.llm_config)
77
+ self.logger.info(f"✅ Unified LLM Processor initialized with provider: {self.llm_config.provider}")
78
+ else:
79
+ self.logger.warning("⚠️ UnifiedLLMProcessor is None at runtime; skipping initialization.")
80
+ except Exception as e:
81
+ self.logger.warning(f"⚠️ Failed to initialize Unified LLM Processor: {e}")
82
+
83
+ # Initialize Azure OpenAI enricher if available and configured (fallback)
84
+ elif AZURE_OPENAI_AVAILABLE and self.config.use_azure_openai:
85
+ try:
86
+ if AzureOpenAIEnricher is not None:
87
+ self.ai_enricher = AzureOpenAIEnricher(self.config) # type: ignore
88
+ self.logger.info("✅ Azure OpenAI Enricher initialized successfully")
89
+ else:
90
+ self.logger.warning("⚠️ AzureOpenAIEnricher is None at runtime; skipping initialization.")
91
+ except Exception as e:
92
+ self.logger.warning(f"⚠️ Failed to initialize Azure OpenAI Enricher: {e}")
93
+
94
+ except Exception as e:
95
+ self.logger.error(f"❌ Failed to initialize pipeline components: {e}")
96
+ raise
97
+
98
+ async def __aenter__(self) -> "UnifiedSigilPipeline":
99
+ if self.irl_engine:
100
+ await self.irl_engine.__aenter__()
101
+ if self.scraper:
102
+ await self.scraper.__aenter__()
103
+ return self
104
+
105
+ async def __aexit__(self, exc_type: Optional[type], exc_val: Optional[Exception], exc_tb: Optional[Any]) -> None:
106
+ if self.irl_engine:
107
+ await self.irl_engine.__aexit__(exc_type, exc_val, exc_tb)
108
+ if self.scraper:
109
+ await self.scraper.__aexit__(exc_type, exc_val, exc_tb)
110
+
111
+ async def analyze_crate(self, crate_name: str) -> SacredChainTrace:
112
+ if not crate_name or not isinstance(crate_name, str):
113
+ raise ValueError("crate_name must be a non-empty string")
114
+
115
+ self.logger.info(f"🔍 Starting analysis of crate: {crate_name}")
116
+
117
+ try:
118
+ documentation_results = await self._gather_documentation(crate_name)
119
+
120
+ sacred_chain_trace = await self._perform_sacred_chain_analysis(
121
+ crate_name, documentation_results
122
+ )
123
+
124
+ await self._generate_analysis_report(crate_name, sacred_chain_trace)
125
+
126
+ self.logger.info(f"✅ Analysis completed for {crate_name}")
127
+ return sacred_chain_trace
128
+
129
+ except Exception as e:
130
+ self.logger.error(f"❌ Analysis failed for {crate_name}: {e}")
131
+ raise RuntimeError(f"Analysis failed for {crate_name}: {str(e)}")
132
+
133
+ async def _gather_documentation(self, crate_name: str) -> Dict[str, ScrapingResult]:
134
+ if not self.scraper:
135
+ raise RuntimeError("Scraper not initialized")
136
+
137
+ self.logger.info(f"📚 Gathering documentation for {crate_name}")
138
+
139
+ try:
140
+ results = await self.scraper.scrape_crate_documentation(crate_name)
141
+
142
+ successful_sources = [source for source, result in results.items()
143
+ if result.error is None]
144
+ failed_sources = [source for source, result in results.items()
145
+ if result.error is not None]
146
+
147
+ self.logger.info(f"✅ Successfully scraped {len(successful_sources)} sources: {successful_sources}")
148
+ if failed_sources:
149
+ self.logger.warning(f"⚠️ Failed to scrape {len(failed_sources)} sources: {failed_sources}")
150
+
151
+ return results
152
+
153
+ except Exception as e:
154
+ self.logger.error(f"❌ Documentation gathering failed: {e}")
155
+ raise
156
+
157
+ async def _perform_sacred_chain_analysis(
158
+ self, crate_name: str, documentation_results: Dict[str, ScrapingResult]
159
+ ) -> SacredChainTrace:
160
+ if not self.irl_engine:
161
+ raise RuntimeError("IRL Engine not initialized")
162
+
163
+ self.logger.info(f"🔗 Performing Sacred Chain analysis for {crate_name}")
164
+
165
+ try:
166
+ sacred_chain_trace = await self.irl_engine.analyze_with_sacred_chain(crate_name)
167
+
168
+ successful_docs = [result for result in documentation_results.values()
169
+ if result.error is None]
170
+ if successful_docs:
171
+ avg_quality = sum(doc.quality_score for doc in successful_docs) / len(successful_docs)
172
+ sacred_chain_trace.audit_info["documentation_quality"] = avg_quality
173
+ sacred_chain_trace.audit_info["documentation_sources"] = list(documentation_results.keys())
174
+
175
+ # Add crate analysis results if available
176
+ await self._add_crate_analysis_results(crate_name, sacred_chain_trace)
177
+
178
+ # Add AI enrichment if available
179
+ await self._add_ai_enrichment(crate_name, sacred_chain_trace)
180
+
181
+ return sacred_chain_trace
182
+
183
+ except Exception as e:
184
+ self.logger.error(f"❌ Sacred Chain analysis failed: {e}")
185
+ raise
186
+
187
+ async def _add_crate_analysis_results(self, crate_name: str, trace: SacredChainTrace) -> None:
188
+ """Add cargo analysis results to the sacred chain trace"""
189
+ try:
190
+ # For now, we'll use a temporary directory approach
191
+ # In a real implementation, you'd download/extract the crate first
192
+ self.logger.info(f"🔍 Adding crate analysis results for {crate_name}")
193
+
194
+ # This would be implemented based on your crate source strategy
195
+ # For now, we'll add a placeholder
196
+ trace.audit_info["crate_analysis"] = {
197
+ "status": "not_implemented",
198
+ "note": "Crate analysis requires downloading/extracting the crate source"
199
+ }
200
+
201
+ except Exception as e:
202
+ self.logger.warning(f"⚠️ Failed to add crate analysis results: {e}")
203
+
204
+ async def _add_ai_enrichment(self, crate_name: str, trace: SacredChainTrace) -> None:
205
+ """Add AI enrichment results to the sacred chain trace"""
206
+ # Use unified LLM processor if available, otherwise fall back to Azure OpenAI
207
+ if self.unified_llm_processor:
208
+ await self._add_unified_llm_enrichment(crate_name, trace)
209
+ elif self.ai_enricher:
210
+ await self._add_azure_openai_enrichment(crate_name, trace)
211
+ else:
212
+ self.logger.info("ℹ️ No AI enricher available, skipping AI enrichment")
213
+
214
+ async def _add_unified_llm_enrichment(self, crate_name: str, trace: SacredChainTrace) -> None:
215
+ """Add enrichment using unified LLM processor"""
216
+ if not self.unified_llm_processor:
217
+ return
218
+
219
+ try:
220
+ self.logger.info(f"🤖 Adding unified LLM enrichment for {crate_name}")
221
+
222
+ # Create a mock crate metadata for AI analysis
223
+ # In a real implementation, this would come from your scraping results
224
+ from .config import CrateMetadata
225
+
226
+ mock_crate = CrateMetadata(
227
+ name=crate_name,
228
+ version="unknown",
229
+ description=trace.suggestion or "No description available",
230
+ repository="",
231
+ keywords=[],
232
+ categories=[],
233
+ readme="",
234
+ downloads=0,
235
+ github_stars=0,
236
+ dependencies=[],
237
+ features={},
238
+ code_snippets=[],
239
+ readme_sections={},
240
+ librs_downloads=None,
241
+ source="crates.io",
242
+ enhanced_scraping={},
243
+ enhanced_features=[],
244
+ enhanced_dependencies=[]
245
+ )
246
+
247
+ # Enrich the crate using unified LLM processor
248
+ enriched_crate = self.unified_llm_processor.enrich_crate(mock_crate)
249
+
250
+ # Add enrichment results to trace
251
+ trace.audit_info["ai_enrichment"] = {
252
+ "provider": self.llm_config.provider if self.llm_config else "unknown",
253
+ "model": self.llm_config.model if self.llm_config else "unknown",
254
+ "readme_summary": enriched_crate.readme_summary,
255
+ "use_case": enriched_crate.use_case,
256
+ "score": enriched_crate.score,
257
+ "factual_counterfactual": enriched_crate.factual_counterfactual
258
+ }
259
+
260
+ self.logger.info(f"✅ Unified LLM enrichment completed for {crate_name}")
261
+
262
+ except Exception as e:
263
+ self.logger.warning(f"⚠️ Failed to add unified LLM enrichment: {e}")
264
+
265
+ async def _add_azure_openai_enrichment(self, crate_name: str, trace: SacredChainTrace) -> None:
266
+ """Add enrichment using Azure OpenAI (fallback method)"""
267
+ if not self.ai_enricher:
268
+ return
269
+
270
+ try:
271
+ self.logger.info(f"🤖 Adding Azure OpenAI enrichment for {crate_name}")
272
+
273
+ # Create a mock crate metadata for AI analysis
274
+ # In a real implementation, this would come from your scraping results
275
+ from .config import CrateMetadata
276
+
277
+ mock_crate = CrateMetadata(
278
+ name=crate_name,
279
+ version="unknown",
280
+ description=trace.suggestion or "No description available",
281
+ repository="",
282
+ keywords=[],
283
+ categories=[],
284
+ readme="",
285
+ downloads=0,
286
+ github_stars=0,
287
+ dependencies=[],
288
+ features={},
289
+ code_snippets=[],
290
+ readme_sections={},
291
+ librs_downloads=None,
292
+ source="crates.io",
293
+ enhanced_scraping={},
294
+ enhanced_features=[],
295
+ enhanced_dependencies=[]
296
+ )
297
+
298
+ # Enrich the crate using Azure OpenAI
299
+ enriched_crate = self.ai_enricher.enrich_crate(mock_crate)
300
+
301
+ # Add enrichment results to trace
302
+ trace.audit_info["ai_enrichment"] = {
303
+ "provider": "azure_openai",
304
+ "model": self.config.azure_openai_deployment_name,
305
+ "readme_summary": enriched_crate.readme_summary,
306
+ "use_case": enriched_crate.use_case,
307
+ "score": enriched_crate.score,
308
+ "factual_counterfactual": enriched_crate.factual_counterfactual
309
+ }
310
+
311
+ self.logger.info(f"✅ Azure OpenAI enrichment completed for {crate_name}")
312
+
313
+ except Exception as e:
314
+ self.logger.warning(f"⚠️ Failed to add Azure OpenAI enrichment: {e}")
315
+
316
+ async def _generate_analysis_report(self, crate_name: str, trace: SacredChainTrace) -> None:
317
+ report_data = {
318
+ "crate_name": crate_name,
319
+ "analysis_timestamp": trace.timestamp,
320
+ "execution_id": trace.execution_id,
321
+ "verdict": trace.verdict.value,
322
+ "irl_score": trace.irl_score,
323
+ "suggestion": trace.suggestion,
324
+ "context_sources": trace.context_sources,
325
+ "reasoning_steps": trace.reasoning_steps,
326
+ "audit_info": trace.audit_info,
327
+ "canon_version": trace.canon_version,
328
+ }
329
+
330
+ report_file = Path(f"analysis_report_{crate_name}_{int(time.time())}.json")
331
+ try:
332
+ with open(report_file, "w") as f:
333
+ json.dump(report_data, f, indent=2)
334
+ self.logger.info(f"📄 Analysis report saved: {report_file}")
335
+ except IOError as e:
336
+ self.logger.error(f"❌ Failed to save analysis report: {e}")
337
+
338
+ async def analyze_multiple_crates(self, crate_names: List[str]) -> Dict[str, SacredChainTrace]:
339
+ if not crate_names:
340
+ return {}
341
+
342
+ self.logger.info(f"🚀 Starting concurrent analysis of {len(crate_names)} crates")
343
+
344
+ semaphore = asyncio.Semaphore(self.config.n_workers)
345
+
346
+ async def analyze_single_crate(crate_name: str) -> "tuple[str, SacredChainTrace]":
347
+ async with semaphore:
348
+ try:
349
+ trace = await self.analyze_crate(crate_name)
350
+ return crate_name, trace
351
+ except Exception as e:
352
+ self.logger.error(f"❌ Analysis failed for {crate_name}: {e}")
353
+ error_trace = SacredChainTrace(
354
+ input_data=crate_name,
355
+ context_sources=[],
356
+ reasoning_steps=[f"Analysis failed: {str(e)}"],
357
+ suggestion="DEFER: Analysis failed",
358
+ verdict=TrustVerdict.DEFER,
359
+ audit_info={"error": str(e)},
360
+ irl_score=0.0,
361
+ execution_id=f"error-{int(time.time())}",
362
+ timestamp=time.strftime("%Y-%m-%dT%H:%M:%SZ"),
363
+ canon_version="1.3.0",
364
+ )
365
+ return crate_name, error_trace
366
+
367
+ tasks = [analyze_single_crate(name) for name in crate_names]
368
+ results = await asyncio.gather(*tasks, return_exceptions=True)
369
+
370
+ analysis_results: Dict[str, SacredChainTrace] = {}
371
+ for result in results:
372
+ if isinstance(result, tuple):
373
+ crate_name, trace = result
374
+ analysis_results[crate_name] = trace
375
+ else:
376
+ self.logger.error(f"❌ Unexpected result type: {type(result)}")
377
+
378
+ self.logger.info(f"✅ Completed analysis of {len(analysis_results)} crates")
379
+ return analysis_results
380
+
381
+ def get_pipeline_summary(self) -> Dict[str, Any]:
382
+ """Get a summary of the pipeline configuration and status"""
383
+ summary = {
384
+ "pipeline_version": "1.3.0",
385
+ "components": {
386
+ "irl_engine": self.irl_engine is not None,
387
+ "scraper": self.scraper is not None,
388
+ "canon_registry": self.canon_registry is not None,
389
+ },
390
+ "ai_components": {
391
+ "unified_llm_processor": self.unified_llm_processor is not None,
392
+ "azure_openai_enricher": self.ai_enricher is not None,
393
+ "crate_analyzer": self.crate_analyzer is not None,
394
+ },
395
+ "configuration": {
396
+ "max_tokens": self.config.max_tokens,
397
+ "checkpoint_interval": self.config.checkpoint_interval,
398
+ "batch_size": self.config.batch_size,
399
+ "enable_crawl4ai": self.config.enable_crawl4ai,
400
+ }
401
+ }
402
+
403
+ # Add LLM configuration if available
404
+ if self.llm_config:
405
+ summary["llm_configuration"] = {
406
+ "provider": self.llm_config.provider,
407
+ "model": self.llm_config.model,
408
+ "temperature": self.llm_config.temperature,
409
+ "max_tokens": self.llm_config.max_tokens,
410
+ "timeout": self.llm_config.timeout,
411
+ "max_retries": self.llm_config.max_retries
412
+ }
413
+ elif self.config.use_azure_openai:
414
+ summary["llm_configuration"] = {
415
+ "provider": "azure_openai",
416
+ "model": self.config.azure_openai_deployment_name,
417
+ "endpoint": self.config.azure_openai_endpoint,
418
+ "max_tokens": self.config.max_tokens
419
+ }
420
+
421
+ return summary
422
+
423
+
424
+ def create_pipeline_from_args(args: argparse.Namespace) -> UnifiedSigilPipeline:
425
+ """Create pipeline from command line arguments"""
426
+ # Create base config
427
+ config = PipelineConfig()
428
+
429
+ # Create LLM config if LLM arguments are provided
430
+ llm_config = None
431
+ if hasattr(args, 'llm_provider') and args.llm_provider:
432
+ if UNIFIED_LLM_AVAILABLE and LLMConfig is not None:
433
+ llm_config = LLMConfig(
434
+ provider=args.llm_provider,
435
+ model=args.llm_model or "gpt-4",
436
+ api_base=getattr(args, 'llm_api_base', None),
437
+ api_key=getattr(args, 'llm_api_key', None),
438
+ temperature=getattr(args, 'llm_temperature', 0.2),
439
+ max_tokens=getattr(args, 'llm_max_tokens', 256),
440
+ timeout=getattr(args, 'llm_timeout', 30),
441
+ max_retries=getattr(args, 'llm_max_retries', 3),
442
+ # Provider-specific settings
443
+ azure_deployment=getattr(args, 'azure_deployment', None),
444
+ azure_api_version=getattr(args, 'azure_api_version', None),
445
+ ollama_host=getattr(args, 'ollama_host', None),
446
+ lmstudio_host=getattr(args, 'lmstudio_host', None)
447
+ )
448
+ else:
449
+ logging.warning("Unified LLM processor not available, falling back to Azure OpenAI")
450
+
451
+ return UnifiedSigilPipeline(config, llm_config)
452
+
453
+
454
+ def add_llm_arguments(parser: argparse.ArgumentParser) -> None:
455
+ """Add LLM-related command line arguments to the parser"""
456
+ llm_group = parser.add_argument_group('LLM Configuration')
457
+
458
+ llm_group.add_argument(
459
+ '--llm-provider',
460
+ choices=['azure', 'ollama', 'lmstudio', 'openai', 'anthropic', 'google', 'cohere', 'huggingface'],
461
+ help='LLM provider to use (default: azure)'
462
+ )
463
+
464
+ llm_group.add_argument(
465
+ '--llm-model',
466
+ help='Model name/identifier (e.g., gpt-4, llama2, claude-3)'
467
+ )
468
+
469
+ llm_group.add_argument(
470
+ '--llm-api-base',
471
+ help='API base URL (for local providers or custom endpoints)'
472
+ )
473
+
474
+ llm_group.add_argument(
475
+ '--llm-api-key',
476
+ help='API key (if required by provider)'
477
+ )
478
+
479
+ llm_group.add_argument(
480
+ '--llm-temperature',
481
+ type=float,
482
+ default=0.2,
483
+ help='Temperature for LLM generation (default: 0.2)'
484
+ )
485
+
486
+ llm_group.add_argument(
487
+ '--llm-max-tokens',
488
+ type=int,
489
+ default=256,
490
+ help='Maximum tokens for LLM generation (default: 256)'
491
+ )
492
+
493
+ llm_group.add_argument(
494
+ '--llm-timeout',
495
+ type=int,
496
+ default=30,
497
+ help='Timeout for LLM API calls in seconds (default: 30)'
498
+ )
499
+
500
+ llm_group.add_argument(
501
+ '--llm-max-retries',
502
+ type=int,
503
+ default=3,
504
+ help='Maximum retries for LLM API calls (default: 3)'
505
+ )
506
+
507
+ # Provider-specific arguments
508
+ azure_group = parser.add_argument_group('Azure OpenAI Configuration')
509
+ azure_group.add_argument(
510
+ '--azure-deployment',
511
+ help='Azure OpenAI deployment name'
512
+ )
513
+ azure_group.add_argument(
514
+ '--azure-api-version',
515
+ help='Azure OpenAI API version'
516
+ )
517
+
518
+ ollama_group = parser.add_argument_group('Ollama Configuration')
519
+ ollama_group.add_argument(
520
+ '--ollama-host',
521
+ default='http://localhost:11434',
522
+ help='Ollama host URL (default: http://localhost:11434)'
523
+ )
524
+
525
+ lmstudio_group = parser.add_argument_group('LM Studio Configuration')
526
+ lmstudio_group.add_argument(
527
+ '--lmstudio-host',
528
+ default='http://localhost:1234/v1',
529
+ help='LM Studio host URL (default: http://localhost:1234/v1)'
530
+ )
531
+
532
+
533
+ async def quick_analyze_crate(crate_name: str, config: Optional[PipelineConfig] = None, llm_config: Optional[Any] = None) -> SacredChainTrace:
534
+ """Quick analysis of a single crate"""
535
+ if config is None:
536
+ config = PipelineConfig()
537
+
538
+ async with UnifiedSigilPipeline(config, llm_config) as pipeline:
539
+ return await pipeline.analyze_crate(crate_name)
540
+
541
+
542
+ async def batch_analyze_crates(crate_names: List[str], config: Optional[PipelineConfig] = None, llm_config: Optional[Any] = None) -> Dict[str, SacredChainTrace]:
543
+ """Batch analysis of multiple crates"""
544
+ if config is None:
545
+ config = PipelineConfig()
546
+
547
+ async with UnifiedSigilPipeline(config, llm_config) as pipeline:
548
+ return await pipeline.analyze_multiple_crates(crate_names)
@@ -3,7 +3,7 @@ import json
3
3
  import os
4
4
  import shutil
5
5
  from datetime import datetime
6
- from typing import List, Dict
6
+ from typing import Any, Union
7
7
 
8
8
 
9
9
  def create_output_dir(base_name: str = "crate_data") -> str:
@@ -22,7 +22,7 @@ def create_output_dir(base_name: str = "crate_data") -> str:
22
22
  return output_dir
23
23
 
24
24
 
25
- def save_checkpoint(data: List[Dict], prefix: str, output_dir: str) -> str:
25
+ def save_checkpoint(data: list[dict], prefix: str, output_dir: str) -> str:
26
26
  """
27
27
  Save processing checkpoint with status metadata
28
28
 
@@ -45,7 +45,7 @@ def save_checkpoint(data: List[Dict], prefix: str, output_dir: str) -> str:
45
45
  status = {
46
46
  "timestamp": timestamp,
47
47
  "total_items": len(data),
48
- "checkpoint_file": filename
48
+ "checkpoint_file": filename,
49
49
  }
50
50
 
51
51
  status_file = os.path.join(output_dir, f"{prefix}_status_{timestamp}.json")
@@ -55,7 +55,7 @@ def save_checkpoint(data: List[Dict], prefix: str, output_dir: str) -> str:
55
55
  return filename
56
56
 
57
57
 
58
- def safe_file_cleanup(path: str):
58
+ def safe_file_cleanup(path: str) -> None:
59
59
  """Safely remove files or directories"""
60
60
  try:
61
61
  if os.path.isfile(path):
@@ -70,7 +70,34 @@ def disk_space_check(min_free_gb: float = 1.0) -> bool:
70
70
  """Check if sufficient disk space is available"""
71
71
  try:
72
72
  free_bytes = shutil.disk_usage(".").free
73
- free_gb = free_bytes / (1024 ** 3)
73
+ free_gb = free_bytes / (1024**3)
74
74
  return free_gb >= min_free_gb
75
75
  except Exception:
76
76
  return True # Assume OK if check fails
77
+
78
+
79
+ def load_rule_zero_typing_quick_lookup(
80
+ path: Union[str, None] = None,
81
+ ) -> dict[str, Any]:
82
+ """
83
+ Load the Rule Zero Python Typing & PEP8 Quick Lookup Table as a dict.
84
+
85
+ Args:
86
+ path: Optional path to the quick lookup JSON file.
87
+
88
+ Returns:
89
+ Dictionary with quick lookup entries.
90
+ """
91
+ if not path:
92
+ path = os.path.abspath(
93
+ os.path.join(
94
+ os.path.dirname(__file__),
95
+ "../../rule_zero_typing_quick_lookup.json",
96
+ )
97
+ )
98
+ try:
99
+ with open(path, encoding="utf-8") as f:
100
+ return json.load(f)
101
+ except Exception as e:
102
+ print(f"Failed to load Rule Zero typing quick lookup: {e}")
103
+ return {}