haoline 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. haoline/.streamlit/config.toml +10 -0
  2. haoline/__init__.py +248 -0
  3. haoline/analyzer.py +935 -0
  4. haoline/cli.py +2712 -0
  5. haoline/compare.py +811 -0
  6. haoline/compare_visualizations.py +1564 -0
  7. haoline/edge_analysis.py +525 -0
  8. haoline/eval/__init__.py +131 -0
  9. haoline/eval/adapters.py +844 -0
  10. haoline/eval/cli.py +390 -0
  11. haoline/eval/comparison.py +542 -0
  12. haoline/eval/deployment.py +633 -0
  13. haoline/eval/schemas.py +833 -0
  14. haoline/examples/__init__.py +15 -0
  15. haoline/examples/basic_inspection.py +74 -0
  16. haoline/examples/compare_models.py +117 -0
  17. haoline/examples/hardware_estimation.py +78 -0
  18. haoline/format_adapters.py +1001 -0
  19. haoline/formats/__init__.py +123 -0
  20. haoline/formats/coreml.py +250 -0
  21. haoline/formats/gguf.py +483 -0
  22. haoline/formats/openvino.py +255 -0
  23. haoline/formats/safetensors.py +273 -0
  24. haoline/formats/tflite.py +369 -0
  25. haoline/hardware.py +2307 -0
  26. haoline/hierarchical_graph.py +462 -0
  27. haoline/html_export.py +1573 -0
  28. haoline/layer_summary.py +769 -0
  29. haoline/llm_summarizer.py +465 -0
  30. haoline/op_icons.py +618 -0
  31. haoline/operational_profiling.py +1492 -0
  32. haoline/patterns.py +1116 -0
  33. haoline/pdf_generator.py +265 -0
  34. haoline/privacy.py +250 -0
  35. haoline/pydantic_models.py +241 -0
  36. haoline/report.py +1923 -0
  37. haoline/report_sections.py +539 -0
  38. haoline/risks.py +521 -0
  39. haoline/schema.py +523 -0
  40. haoline/streamlit_app.py +2024 -0
  41. haoline/tests/__init__.py +4 -0
  42. haoline/tests/conftest.py +123 -0
  43. haoline/tests/test_analyzer.py +868 -0
  44. haoline/tests/test_compare_visualizations.py +293 -0
  45. haoline/tests/test_edge_analysis.py +243 -0
  46. haoline/tests/test_eval.py +604 -0
  47. haoline/tests/test_format_adapters.py +460 -0
  48. haoline/tests/test_hardware.py +237 -0
  49. haoline/tests/test_hardware_recommender.py +90 -0
  50. haoline/tests/test_hierarchical_graph.py +326 -0
  51. haoline/tests/test_html_export.py +180 -0
  52. haoline/tests/test_layer_summary.py +428 -0
  53. haoline/tests/test_llm_patterns.py +540 -0
  54. haoline/tests/test_llm_summarizer.py +339 -0
  55. haoline/tests/test_patterns.py +774 -0
  56. haoline/tests/test_pytorch.py +327 -0
  57. haoline/tests/test_report.py +383 -0
  58. haoline/tests/test_risks.py +398 -0
  59. haoline/tests/test_schema.py +417 -0
  60. haoline/tests/test_tensorflow.py +380 -0
  61. haoline/tests/test_visualizations.py +316 -0
  62. haoline/universal_ir.py +856 -0
  63. haoline/visualizations.py +1086 -0
  64. haoline/visualize_yolo.py +44 -0
  65. haoline/web.py +110 -0
  66. haoline-0.3.0.dist-info/METADATA +471 -0
  67. haoline-0.3.0.dist-info/RECORD +70 -0
  68. haoline-0.3.0.dist-info/WHEEL +4 -0
  69. haoline-0.3.0.dist-info/entry_points.txt +5 -0
  70. haoline-0.3.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,465 @@
1
+ # Copyright (c) 2025 HaoLine Contributors
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ """
5
+ LLM Summarizer module for HaoLine.
6
+
7
+ Generates human-readable model summaries using LLM APIs (OpenAI, etc.).
8
+ Takes the structured JSON report and produces:
9
+ - Short summary (1-2 sentences) for quick overview
10
+ - Detailed summary (paragraph) for model cards
11
+
12
+ Usage:
13
+ summarizer = LLMSummarizer() # Uses OPENAI_API_KEY env var
14
+ result = summarizer.summarize(report)
15
+ print(result.short_summary)
16
+ print(result.detailed_summary)
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import logging
23
+ import os
24
+ from dataclasses import dataclass
25
+ from typing import TYPE_CHECKING, Any, ClassVar
26
+
27
+ if TYPE_CHECKING:
28
+ from .report import InspectionReport
29
+
30
+ # Check for OpenAI availability
31
+ _OPENAI_AVAILABLE = False
32
+ try:
33
+ import openai
34
+ from openai import OpenAI
35
+
36
+ _OPENAI_AVAILABLE = True
37
+ except ImportError:
38
+ openai = None # type: ignore
39
+ OpenAI = None # type: ignore
40
+
41
+
42
+ def is_available() -> bool:
43
+ """Check if LLM summarization is available (openai package installed)."""
44
+ return _OPENAI_AVAILABLE
45
+
46
+
47
+ def has_api_key() -> bool:
48
+ """Check if OpenAI API key is configured."""
49
+ return bool(os.environ.get("OPENAI_API_KEY"))
50
+
51
+
52
+ @dataclass
53
+ class LLMSummary:
54
+ """Container for LLM-generated summaries."""
55
+
56
+ short_summary: str # 1-2 sentences
57
+ detailed_summary: str # Full paragraph
58
+ model_used: str # e.g., "gpt-4o-mini"
59
+ tokens_used: int # Total tokens consumed
60
+ success: bool # Whether summarization succeeded
61
+ error_message: str | None = None # Error details if failed
62
+
63
+
64
+ # Prompt templates for model summarization
65
+ SYSTEM_PROMPT = """You are an expert ML engineer analyzing ONNX model architectures.
66
+ Your task is to provide clear, accurate summaries of model structure and characteristics.
67
+ Be concise but informative. Focus on:
68
+ - Architecture type and key patterns (CNN, Transformer, RNN, hybrid)
69
+ - Model size, computational complexity, and memory requirements
70
+ - Hardware deployment considerations (VRAM, latency, bottlenecks)
71
+ - Quantization status and precision characteristics
72
+ - KV cache requirements for transformer/LLM models
73
+ - Potential use cases based on structure
74
+ - Any notable characteristics, risks, or optimization opportunities
75
+
76
+ Respond in plain text without markdown formatting."""
77
+
78
+ SHORT_SUMMARY_PROMPT = """Based on this ONNX model analysis, write a 1-2 sentence summary.
79
+ Focus on: what type of model this is, its size, quantization status, and primary use case.
80
+
81
+ Model Analysis:
82
+ {report_json}
83
+
84
+ Write only the summary, no preamble or explanation."""
85
+
86
+ DETAILED_SUMMARY_PROMPT = """Based on this ONNX model analysis, write a detailed paragraph (4-6 sentences).
87
+ Include:
88
+ 1. Architecture type and structure (e.g., CNN, Transformer, hybrid, LLM)
89
+ 2. Model complexity (parameters, FLOPs, model size, peak memory)
90
+ 3. Precision and quantization status (FP32, FP16, INT8, mixed precision)
91
+ 4. Key architectural patterns detected (attention heads, residual blocks, etc.)
92
+ 5. Hardware deployment analysis:
93
+ - VRAM requirements and whether it fits on target GPU
94
+ - Bottleneck classification (compute-bound vs memory-bound)
95
+ - Theoretical latency and throughput
96
+ 6. For transformers: KV cache requirements per token and full context
97
+ 7. Any risk signals, deployment concerns, or optimization recommendations
98
+
99
+ Model Analysis:
100
+ {report_json}
101
+
102
+ Write only the summary paragraph, no preamble or bullet points."""
103
+
104
+
105
+ class LLMSummarizer:
106
+ """
107
+ Generate human-readable summaries of ONNX models using LLM APIs.
108
+
109
+ Supports OpenAI API with graceful fallback when unavailable.
110
+
111
+ Example:
112
+ summarizer = LLMSummarizer()
113
+ result = summarizer.summarize(report)
114
+ if result.success:
115
+ print(result.detailed_summary)
116
+ """
117
+
118
+ DEFAULT_MODEL: ClassVar[str] = "gpt-4o-mini" # Cost-effective, fast, good quality
119
+ FALLBACK_MODELS: ClassVar[list[str]] = [
120
+ "gpt-3.5-turbo",
121
+ "gpt-4o",
122
+ ] # Fallbacks if primary fails
123
+
124
+ def __init__(
125
+ self,
126
+ api_key: str | None = None,
127
+ model: str | None = None,
128
+ logger: logging.Logger | None = None,
129
+ ):
130
+ """
131
+ Initialize the LLM summarizer.
132
+
133
+ Args:
134
+ api_key: OpenAI API key. If None, reads from OPENAI_API_KEY env var.
135
+ model: Model to use. If None, uses gpt-4o-mini.
136
+ logger: Logger for diagnostic output.
137
+ """
138
+ self.logger = logger or logging.getLogger("haoline.llm")
139
+ self.model = model or self.DEFAULT_MODEL
140
+
141
+ if not _OPENAI_AVAILABLE:
142
+ self.client = None
143
+ self.logger.warning("openai package not installed. LLM summarization disabled.")
144
+ return
145
+
146
+ # Get API key from parameter or environment
147
+ resolved_key = api_key or os.environ.get("OPENAI_API_KEY")
148
+ if not resolved_key:
149
+ self.client = None
150
+ self.logger.warning("No OpenAI API key found. Set OPENAI_API_KEY environment variable.")
151
+ return
152
+
153
+ self.client = OpenAI(api_key=resolved_key)
154
+ self.logger.debug(f"LLM summarizer initialized with model: {self.model}")
155
+
156
+ def is_configured(self) -> bool:
157
+ """Check if the summarizer is properly configured and ready to use."""
158
+ return self.client is not None
159
+
160
+ def summarize(self, report: InspectionReport) -> LLMSummary:
161
+ """
162
+ Generate both short and detailed summaries for a model report.
163
+
164
+ Args:
165
+ report: The inspection report to summarize.
166
+
167
+ Returns:
168
+ LLMSummary with both summaries and metadata.
169
+ """
170
+ if not self.is_configured():
171
+ return LLMSummary(
172
+ short_summary="",
173
+ detailed_summary="",
174
+ model_used="",
175
+ tokens_used=0,
176
+ success=False,
177
+ error_message="LLM summarizer not configured. Install openai and set OPENAI_API_KEY.",
178
+ )
179
+
180
+ # Prepare a condensed version of the report for the prompt
181
+ report_json = self._prepare_report_for_prompt(report)
182
+
183
+ total_tokens = 0
184
+ short_summary = ""
185
+ detailed_summary = ""
186
+ error_message = None
187
+
188
+ # Generate short summary
189
+ try:
190
+ short_summary, tokens = self._generate_completion(
191
+ SHORT_SUMMARY_PROMPT.format(report_json=report_json)
192
+ )
193
+ total_tokens += tokens
194
+ self.logger.debug(f"Short summary generated ({tokens} tokens)")
195
+ except Exception as e:
196
+ self.logger.warning(f"Failed to generate short summary: {e}")
197
+ error_message = str(e)
198
+
199
+ # Generate detailed summary
200
+ try:
201
+ detailed_summary, tokens = self._generate_completion(
202
+ DETAILED_SUMMARY_PROMPT.format(report_json=report_json)
203
+ )
204
+ total_tokens += tokens
205
+ self.logger.debug(f"Detailed summary generated ({tokens} tokens)")
206
+ except Exception as e:
207
+ self.logger.warning(f"Failed to generate detailed summary: {e}")
208
+ if not error_message:
209
+ error_message = str(e)
210
+
211
+ success = bool(short_summary or detailed_summary)
212
+
213
+ return LLMSummary(
214
+ short_summary=short_summary,
215
+ detailed_summary=detailed_summary,
216
+ model_used=self.model,
217
+ tokens_used=total_tokens,
218
+ success=success,
219
+ error_message=error_message if not success else None,
220
+ )
221
+
222
+ def generate_short_summary(self, report: InspectionReport) -> str:
223
+ """Generate only a short summary (1-2 sentences)."""
224
+ if not self.is_configured():
225
+ return ""
226
+
227
+ report_json = self._prepare_report_for_prompt(report)
228
+ try:
229
+ summary, _ = self._generate_completion(
230
+ SHORT_SUMMARY_PROMPT.format(report_json=report_json)
231
+ )
232
+ return summary
233
+ except Exception as e:
234
+ self.logger.error(f"Failed to generate short summary: {e}")
235
+ return ""
236
+
237
+ def generate_detailed_summary(self, report: InspectionReport) -> str:
238
+ """Generate only a detailed summary (paragraph)."""
239
+ if not self.is_configured():
240
+ return ""
241
+
242
+ report_json = self._prepare_report_for_prompt(report)
243
+ try:
244
+ summary, _ = self._generate_completion(
245
+ DETAILED_SUMMARY_PROMPT.format(report_json=report_json)
246
+ )
247
+ return summary
248
+ except Exception as e:
249
+ self.logger.error(f"Failed to generate detailed summary: {e}")
250
+ return ""
251
+
252
+ def _generate_completion(self, user_prompt: str) -> tuple[str, int]:
253
+ """
254
+ Call the OpenAI API to generate a completion.
255
+
256
+ Args:
257
+ user_prompt: The user prompt to send.
258
+
259
+ Returns:
260
+ Tuple of (response_text, tokens_used)
261
+
262
+ Raises:
263
+ Exception: If API call fails after retries.
264
+ RuntimeError: If client is not configured.
265
+ """
266
+ if self.client is None:
267
+ raise RuntimeError("LLM client is not configured")
268
+
269
+ try:
270
+ response = self.client.chat.completions.create(
271
+ model=self.model,
272
+ messages=[
273
+ {"role": "system", "content": SYSTEM_PROMPT},
274
+ {"role": "user", "content": user_prompt},
275
+ ],
276
+ max_tokens=500,
277
+ temperature=0.3, # Lower temperature for more consistent outputs
278
+ )
279
+
280
+ content = response.choices[0].message.content or ""
281
+ tokens = response.usage.total_tokens if response.usage else 0
282
+
283
+ return content.strip(), tokens
284
+
285
+ except openai.RateLimitError as e:
286
+ self.logger.warning(f"Rate limit hit: {e}. Consider adding retry logic.")
287
+ raise
288
+
289
+ except openai.APIConnectionError as e:
290
+ self.logger.error(f"API connection error: {e}")
291
+ raise
292
+
293
+ except openai.AuthenticationError as e:
294
+ self.logger.error(f"Authentication failed: {e}. Check your API key.")
295
+ raise
296
+
297
+ except Exception as e:
298
+ self.logger.error(f"Unexpected error calling OpenAI API: {e}")
299
+ raise
300
+
301
+ def _prepare_report_for_prompt(self, report: InspectionReport) -> str:
302
+ """
303
+ Prepare a condensed version of the report for LLM consumption.
304
+
305
+ Keeps the most relevant information while staying within token limits.
306
+ Includes all analysis sections: metrics, precision, memory, hardware, KV cache, etc.
307
+ """
308
+ # Build a focused summary dict
309
+ summary: dict[str, Any] = {
310
+ "model_name": report.metadata.path.split("/")[-1].split("\\")[-1],
311
+ "producer": f"{report.metadata.producer_name} {report.metadata.producer_version}".strip(),
312
+ "opsets": report.metadata.opsets,
313
+ }
314
+
315
+ if report.graph_summary:
316
+ summary["graph"] = {
317
+ "nodes": report.graph_summary.num_nodes,
318
+ "inputs": report.graph_summary.num_inputs,
319
+ "outputs": report.graph_summary.num_outputs,
320
+ "initializers": report.graph_summary.num_initializers,
321
+ "input_shapes": report.graph_summary.input_shapes,
322
+ "output_shapes": report.graph_summary.output_shapes,
323
+ "top_operators": dict(
324
+ sorted(report.graph_summary.op_type_counts.items(), key=lambda x: -x[1])[:10]
325
+ ),
326
+ }
327
+
328
+ if report.param_counts:
329
+ param_summary: dict[str, Any] = {
330
+ "total": report.param_counts.total,
331
+ "by_op_type": dict(
332
+ sorted(report.param_counts.by_op_type.items(), key=lambda x: -x[1])[:5]
333
+ ),
334
+ }
335
+ # Precision breakdown (Story 41.5: LLM prompt enhancement)
336
+ if report.param_counts.precision_breakdown:
337
+ param_summary["precision_breakdown"] = report.param_counts.precision_breakdown
338
+ if report.param_counts.is_quantized:
339
+ param_summary["is_quantized"] = True
340
+ if report.param_counts.quantized_ops:
341
+ param_summary["quantized_ops"] = report.param_counts.quantized_ops[:5]
342
+ # Shared weights
343
+ if report.param_counts.num_shared_weights > 0:
344
+ param_summary["num_shared_weights"] = report.param_counts.num_shared_weights
345
+ summary["parameters"] = param_summary
346
+
347
+ if report.flop_counts:
348
+ summary["flops"] = {
349
+ "total": report.flop_counts.total,
350
+ "by_op_type": dict(
351
+ sorted(report.flop_counts.by_op_type.items(), key=lambda x: -x[1])[:5]
352
+ ),
353
+ }
354
+
355
+ if report.memory_estimates:
356
+ mem = report.memory_estimates
357
+ memory_summary: dict[str, Any] = {
358
+ "model_size_bytes": mem.model_size_bytes,
359
+ "peak_activation_bytes": mem.peak_activation_bytes,
360
+ }
361
+ # KV Cache for transformers (Story 41.5)
362
+ if mem.kv_cache_bytes_per_token > 0:
363
+ memory_summary["kv_cache"] = {
364
+ "bytes_per_token": mem.kv_cache_bytes_per_token,
365
+ "bytes_full_context": mem.kv_cache_bytes_full_context,
366
+ }
367
+ if mem.kv_cache_config:
368
+ memory_summary["kv_cache"]["config"] = mem.kv_cache_config
369
+ # Memory breakdown by op type (Story 41.5)
370
+ if mem.breakdown:
371
+ bd = mem.breakdown
372
+ if bd.weights_by_op_type:
373
+ memory_summary["weights_by_op_type"] = dict(
374
+ sorted(bd.weights_by_op_type.items(), key=lambda x: -x[1])[:5]
375
+ )
376
+ if bd.activations_by_op_type:
377
+ memory_summary["activations_by_op_type"] = dict(
378
+ sorted(bd.activations_by_op_type.items(), key=lambda x: -x[1])[:5]
379
+ )
380
+ summary["memory"] = memory_summary
381
+
382
+ summary["architecture_type"] = report.architecture_type
383
+
384
+ if report.detected_blocks:
385
+ block_types: dict[str, int] = {}
386
+ for block in report.detected_blocks:
387
+ block_types[block.block_type] = block_types.get(block.block_type, 0) + 1
388
+ summary["detected_blocks"] = block_types
389
+
390
+ if report.risk_signals:
391
+ summary["risks"] = [
392
+ {"id": r.id, "severity": r.severity, "description": r.description}
393
+ for r in report.risk_signals[:5] # Top 5 risks
394
+ ]
395
+
396
+ if report.hardware_estimates:
397
+ hw = report.hardware_estimates
398
+ hw_summary: dict[str, Any] = {
399
+ "device": hw.device,
400
+ "precision": hw.precision,
401
+ "batch_size": hw.batch_size,
402
+ "vram_required_bytes": hw.vram_required_bytes,
403
+ "fits_in_vram": hw.fits_in_vram,
404
+ "theoretical_latency_ms": round(hw.theoretical_latency_ms, 2),
405
+ "bottleneck": hw.bottleneck,
406
+ }
407
+ # Extended hardware metrics (Story 41.5)
408
+ if hasattr(hw, "compute_utilization_estimate"):
409
+ hw_summary["compute_utilization"] = round(hw.compute_utilization_estimate * 100, 1)
410
+ if hasattr(hw, "gpu_saturation"):
411
+ hw_summary["gpu_saturation_percent"] = round(hw.gpu_saturation * 100, 2)
412
+ if hasattr(hw, "throughput_fps"):
413
+ hw_summary["throughput_fps"] = round(hw.throughput_fps, 1)
414
+ summary["hardware_estimates"] = hw_summary
415
+
416
+ # System requirements if available
417
+ if hasattr(report, "system_requirements") and report.system_requirements:
418
+ sr = report.system_requirements
419
+ summary["system_requirements"] = {
420
+ "minimum": {
421
+ "gpu": sr.minimum.gpu,
422
+ "vram_gb": sr.minimum.vram_gb,
423
+ "description": sr.minimum.description,
424
+ },
425
+ "recommended": {
426
+ "gpu": sr.recommended.gpu,
427
+ "vram_gb": sr.recommended.vram_gb,
428
+ "description": sr.recommended.description,
429
+ },
430
+ }
431
+
432
+ # Bottleneck analysis with recommendations (Story 41.5.7)
433
+ if hasattr(report, "bottleneck_analysis") and report.bottleneck_analysis:
434
+ ba = report.bottleneck_analysis
435
+ summary["bottleneck_analysis"] = {
436
+ "type": ba.bottleneck_type,
437
+ "compute_ratio": ba.compute_ratio,
438
+ "memory_ratio": ba.memory_ratio,
439
+ "efficiency_percent": ba.efficiency_percent,
440
+ "recommendations": ba.recommendations[:3], # Top 3 recommendations
441
+ }
442
+
443
+ return json.dumps(summary, indent=2)
444
+
445
+
446
+ def summarize_report(
447
+ report: InspectionReport,
448
+ api_key: str | None = None,
449
+ model: str | None = None,
450
+ logger: logging.Logger | None = None,
451
+ ) -> LLMSummary:
452
+ """
453
+ Convenience function to generate LLM summaries for a report.
454
+
455
+ Args:
456
+ report: The inspection report to summarize.
457
+ api_key: OpenAI API key (optional, uses env var if not provided).
458
+ model: Model to use (optional, defaults to gpt-4o-mini).
459
+ logger: Logger for output.
460
+
461
+ Returns:
462
+ LLMSummary with results.
463
+ """
464
+ summarizer = LLMSummarizer(api_key=api_key, model=model, logger=logger)
465
+ return summarizer.summarize(report)