empathy-framework 3.7.0__py3-none-any.whl → 3.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. coach_wizards/code_reviewer_README.md +60 -0
  2. coach_wizards/code_reviewer_wizard.py +180 -0
  3. {empathy_framework-3.7.0.dist-info → empathy_framework-3.8.0.dist-info}/METADATA +148 -11
  4. empathy_framework-3.8.0.dist-info/RECORD +333 -0
  5. {empathy_framework-3.7.0.dist-info → empathy_framework-3.8.0.dist-info}/top_level.txt +5 -1
  6. empathy_healthcare_plugin/monitors/__init__.py +9 -0
  7. empathy_healthcare_plugin/monitors/clinical_protocol_monitor.py +315 -0
  8. empathy_healthcare_plugin/monitors/monitoring/__init__.py +44 -0
  9. empathy_healthcare_plugin/monitors/monitoring/protocol_checker.py +300 -0
  10. empathy_healthcare_plugin/monitors/monitoring/protocol_loader.py +214 -0
  11. empathy_healthcare_plugin/monitors/monitoring/sensor_parsers.py +306 -0
  12. empathy_healthcare_plugin/monitors/monitoring/trajectory_analyzer.py +389 -0
  13. empathy_llm_toolkit/agent_factory/__init__.py +53 -0
  14. empathy_llm_toolkit/agent_factory/adapters/__init__.py +85 -0
  15. empathy_llm_toolkit/agent_factory/adapters/autogen_adapter.py +312 -0
  16. empathy_llm_toolkit/agent_factory/adapters/crewai_adapter.py +454 -0
  17. empathy_llm_toolkit/agent_factory/adapters/haystack_adapter.py +298 -0
  18. empathy_llm_toolkit/agent_factory/adapters/langchain_adapter.py +362 -0
  19. empathy_llm_toolkit/agent_factory/adapters/langgraph_adapter.py +333 -0
  20. empathy_llm_toolkit/agent_factory/adapters/native.py +228 -0
  21. empathy_llm_toolkit/agent_factory/adapters/wizard_adapter.py +426 -0
  22. empathy_llm_toolkit/agent_factory/base.py +305 -0
  23. empathy_llm_toolkit/agent_factory/crews/__init__.py +67 -0
  24. empathy_llm_toolkit/agent_factory/crews/code_review.py +1113 -0
  25. empathy_llm_toolkit/agent_factory/crews/health_check.py +1246 -0
  26. empathy_llm_toolkit/agent_factory/crews/refactoring.py +1128 -0
  27. empathy_llm_toolkit/agent_factory/crews/security_audit.py +1018 -0
  28. empathy_llm_toolkit/agent_factory/decorators.py +286 -0
  29. empathy_llm_toolkit/agent_factory/factory.py +558 -0
  30. empathy_llm_toolkit/agent_factory/framework.py +192 -0
  31. empathy_llm_toolkit/agent_factory/memory_integration.py +324 -0
  32. empathy_llm_toolkit/agent_factory/resilient.py +320 -0
  33. empathy_llm_toolkit/cli/__init__.py +8 -0
  34. empathy_llm_toolkit/cli/sync_claude.py +487 -0
  35. empathy_llm_toolkit/code_health.py +150 -3
  36. empathy_llm_toolkit/config/__init__.py +29 -0
  37. empathy_llm_toolkit/config/unified.py +295 -0
  38. empathy_llm_toolkit/routing/__init__.py +32 -0
  39. empathy_llm_toolkit/routing/model_router.py +362 -0
  40. empathy_llm_toolkit/security/IMPLEMENTATION_SUMMARY.md +413 -0
  41. empathy_llm_toolkit/security/PHASE2_COMPLETE.md +384 -0
  42. empathy_llm_toolkit/security/PHASE2_SECRETS_DETECTOR_COMPLETE.md +271 -0
  43. empathy_llm_toolkit/security/QUICK_REFERENCE.md +316 -0
  44. empathy_llm_toolkit/security/README.md +262 -0
  45. empathy_llm_toolkit/security/__init__.py +62 -0
  46. empathy_llm_toolkit/security/audit_logger.py +929 -0
  47. empathy_llm_toolkit/security/audit_logger_example.py +152 -0
  48. empathy_llm_toolkit/security/pii_scrubber.py +640 -0
  49. empathy_llm_toolkit/security/secrets_detector.py +678 -0
  50. empathy_llm_toolkit/security/secrets_detector_example.py +304 -0
  51. empathy_llm_toolkit/security/secure_memdocs.py +1192 -0
  52. empathy_llm_toolkit/security/secure_memdocs_example.py +278 -0
  53. empathy_llm_toolkit/wizards/__init__.py +38 -0
  54. empathy_llm_toolkit/wizards/base_wizard.py +364 -0
  55. empathy_llm_toolkit/wizards/customer_support_wizard.py +190 -0
  56. empathy_llm_toolkit/wizards/healthcare_wizard.py +362 -0
  57. empathy_llm_toolkit/wizards/patient_assessment_README.md +64 -0
  58. empathy_llm_toolkit/wizards/patient_assessment_wizard.py +193 -0
  59. empathy_llm_toolkit/wizards/technology_wizard.py +194 -0
  60. empathy_os/__init__.py +52 -52
  61. empathy_os/adaptive/__init__.py +13 -0
  62. empathy_os/adaptive/task_complexity.py +127 -0
  63. empathy_os/cache/__init__.py +117 -0
  64. empathy_os/cache/base.py +166 -0
  65. empathy_os/cache/dependency_manager.py +253 -0
  66. empathy_os/cache/hash_only.py +248 -0
  67. empathy_os/cache/hybrid.py +390 -0
  68. empathy_os/cache/storage.py +282 -0
  69. empathy_os/cli.py +118 -8
  70. empathy_os/cli_unified.py +121 -1
  71. empathy_os/config/__init__.py +63 -0
  72. empathy_os/config/xml_config.py +239 -0
  73. empathy_os/config.py +2 -1
  74. empathy_os/dashboard/__init__.py +15 -0
  75. empathy_os/dashboard/server.py +743 -0
  76. empathy_os/memory/__init__.py +195 -0
  77. empathy_os/memory/claude_memory.py +466 -0
  78. empathy_os/memory/config.py +224 -0
  79. empathy_os/memory/control_panel.py +1298 -0
  80. empathy_os/memory/edges.py +179 -0
  81. empathy_os/memory/graph.py +567 -0
  82. empathy_os/memory/long_term.py +1194 -0
  83. empathy_os/memory/nodes.py +179 -0
  84. empathy_os/memory/redis_bootstrap.py +540 -0
  85. empathy_os/memory/security/__init__.py +31 -0
  86. empathy_os/memory/security/audit_logger.py +930 -0
  87. empathy_os/memory/security/pii_scrubber.py +640 -0
  88. empathy_os/memory/security/secrets_detector.py +678 -0
  89. empathy_os/memory/short_term.py +2119 -0
  90. empathy_os/memory/storage/__init__.py +15 -0
  91. empathy_os/memory/summary_index.py +583 -0
  92. empathy_os/memory/unified.py +619 -0
  93. empathy_os/metrics/__init__.py +12 -0
  94. empathy_os/metrics/prompt_metrics.py +190 -0
  95. empathy_os/models/__init__.py +136 -0
  96. empathy_os/models/__main__.py +13 -0
  97. empathy_os/models/cli.py +655 -0
  98. empathy_os/models/empathy_executor.py +354 -0
  99. empathy_os/models/executor.py +252 -0
  100. empathy_os/models/fallback.py +671 -0
  101. empathy_os/models/provider_config.py +563 -0
  102. empathy_os/models/registry.py +382 -0
  103. empathy_os/models/tasks.py +302 -0
  104. empathy_os/models/telemetry.py +548 -0
  105. empathy_os/models/token_estimator.py +378 -0
  106. empathy_os/models/validation.py +274 -0
  107. empathy_os/monitoring/__init__.py +52 -0
  108. empathy_os/monitoring/alerts.py +23 -0
  109. empathy_os/monitoring/alerts_cli.py +268 -0
  110. empathy_os/monitoring/multi_backend.py +271 -0
  111. empathy_os/monitoring/otel_backend.py +363 -0
  112. empathy_os/optimization/__init__.py +19 -0
  113. empathy_os/optimization/context_optimizer.py +272 -0
  114. empathy_os/plugins/__init__.py +28 -0
  115. empathy_os/plugins/base.py +361 -0
  116. empathy_os/plugins/registry.py +268 -0
  117. empathy_os/project_index/__init__.py +30 -0
  118. empathy_os/project_index/cli.py +335 -0
  119. empathy_os/project_index/crew_integration.py +430 -0
  120. empathy_os/project_index/index.py +425 -0
  121. empathy_os/project_index/models.py +501 -0
  122. empathy_os/project_index/reports.py +473 -0
  123. empathy_os/project_index/scanner.py +538 -0
  124. empathy_os/prompts/__init__.py +61 -0
  125. empathy_os/prompts/config.py +77 -0
  126. empathy_os/prompts/context.py +177 -0
  127. empathy_os/prompts/parser.py +285 -0
  128. empathy_os/prompts/registry.py +313 -0
  129. empathy_os/prompts/templates.py +208 -0
  130. empathy_os/resilience/__init__.py +56 -0
  131. empathy_os/resilience/circuit_breaker.py +256 -0
  132. empathy_os/resilience/fallback.py +179 -0
  133. empathy_os/resilience/health.py +300 -0
  134. empathy_os/resilience/retry.py +209 -0
  135. empathy_os/resilience/timeout.py +135 -0
  136. empathy_os/routing/__init__.py +43 -0
  137. empathy_os/routing/chain_executor.py +433 -0
  138. empathy_os/routing/classifier.py +217 -0
  139. empathy_os/routing/smart_router.py +234 -0
  140. empathy_os/routing/wizard_registry.py +307 -0
  141. empathy_os/trust/__init__.py +28 -0
  142. empathy_os/trust/circuit_breaker.py +579 -0
  143. empathy_os/validation/__init__.py +19 -0
  144. empathy_os/validation/xml_validator.py +281 -0
  145. empathy_os/wizard_factory_cli.py +170 -0
  146. empathy_os/workflows/__init__.py +360 -0
  147. empathy_os/workflows/base.py +1660 -0
  148. empathy_os/workflows/bug_predict.py +962 -0
  149. empathy_os/workflows/code_review.py +960 -0
  150. empathy_os/workflows/code_review_adapters.py +310 -0
  151. empathy_os/workflows/code_review_pipeline.py +720 -0
  152. empathy_os/workflows/config.py +600 -0
  153. empathy_os/workflows/dependency_check.py +648 -0
  154. empathy_os/workflows/document_gen.py +1069 -0
  155. empathy_os/workflows/documentation_orchestrator.py +1205 -0
  156. empathy_os/workflows/health_check.py +679 -0
  157. empathy_os/workflows/keyboard_shortcuts/__init__.py +39 -0
  158. empathy_os/workflows/keyboard_shortcuts/generators.py +386 -0
  159. empathy_os/workflows/keyboard_shortcuts/parsers.py +414 -0
  160. empathy_os/workflows/keyboard_shortcuts/prompts.py +295 -0
  161. empathy_os/workflows/keyboard_shortcuts/schema.py +193 -0
  162. empathy_os/workflows/keyboard_shortcuts/workflow.py +505 -0
  163. empathy_os/workflows/manage_documentation.py +804 -0
  164. empathy_os/workflows/new_sample_workflow1.py +146 -0
  165. empathy_os/workflows/new_sample_workflow1_README.md +150 -0
  166. empathy_os/workflows/perf_audit.py +687 -0
  167. empathy_os/workflows/pr_review.py +748 -0
  168. empathy_os/workflows/progress.py +445 -0
  169. empathy_os/workflows/progress_server.py +322 -0
  170. empathy_os/workflows/refactor_plan.py +693 -0
  171. empathy_os/workflows/release_prep.py +808 -0
  172. empathy_os/workflows/research_synthesis.py +404 -0
  173. empathy_os/workflows/secure_release.py +585 -0
  174. empathy_os/workflows/security_adapters.py +297 -0
  175. empathy_os/workflows/security_audit.py +1046 -0
  176. empathy_os/workflows/step_config.py +234 -0
  177. empathy_os/workflows/test5.py +125 -0
  178. empathy_os/workflows/test5_README.md +158 -0
  179. empathy_os/workflows/test_gen.py +1855 -0
  180. empathy_os/workflows/test_lifecycle.py +526 -0
  181. empathy_os/workflows/test_maintenance.py +626 -0
  182. empathy_os/workflows/test_maintenance_cli.py +590 -0
  183. empathy_os/workflows/test_maintenance_crew.py +821 -0
  184. empathy_os/workflows/xml_enhanced_crew.py +285 -0
  185. empathy_software_plugin/cli/__init__.py +120 -0
  186. empathy_software_plugin/cli/inspect.py +362 -0
  187. empathy_software_plugin/cli.py +3 -1
  188. empathy_software_plugin/wizards/__init__.py +42 -0
  189. empathy_software_plugin/wizards/advanced_debugging_wizard.py +392 -0
  190. empathy_software_plugin/wizards/agent_orchestration_wizard.py +511 -0
  191. empathy_software_plugin/wizards/ai_collaboration_wizard.py +503 -0
  192. empathy_software_plugin/wizards/ai_context_wizard.py +441 -0
  193. empathy_software_plugin/wizards/ai_documentation_wizard.py +503 -0
  194. empathy_software_plugin/wizards/base_wizard.py +288 -0
  195. empathy_software_plugin/wizards/book_chapter_wizard.py +519 -0
  196. empathy_software_plugin/wizards/code_review_wizard.py +606 -0
  197. empathy_software_plugin/wizards/debugging/__init__.py +50 -0
  198. empathy_software_plugin/wizards/debugging/bug_risk_analyzer.py +414 -0
  199. empathy_software_plugin/wizards/debugging/config_loaders.py +442 -0
  200. empathy_software_plugin/wizards/debugging/fix_applier.py +469 -0
  201. empathy_software_plugin/wizards/debugging/language_patterns.py +383 -0
  202. empathy_software_plugin/wizards/debugging/linter_parsers.py +470 -0
  203. empathy_software_plugin/wizards/debugging/verification.py +369 -0
  204. empathy_software_plugin/wizards/enhanced_testing_wizard.py +537 -0
  205. empathy_software_plugin/wizards/memory_enhanced_debugging_wizard.py +816 -0
  206. empathy_software_plugin/wizards/multi_model_wizard.py +501 -0
  207. empathy_software_plugin/wizards/pattern_extraction_wizard.py +422 -0
  208. empathy_software_plugin/wizards/pattern_retriever_wizard.py +400 -0
  209. empathy_software_plugin/wizards/performance/__init__.py +9 -0
  210. empathy_software_plugin/wizards/performance/bottleneck_detector.py +221 -0
  211. empathy_software_plugin/wizards/performance/profiler_parsers.py +278 -0
  212. empathy_software_plugin/wizards/performance/trajectory_analyzer.py +429 -0
  213. empathy_software_plugin/wizards/performance_profiling_wizard.py +305 -0
  214. empathy_software_plugin/wizards/prompt_engineering_wizard.py +425 -0
  215. empathy_software_plugin/wizards/rag_pattern_wizard.py +461 -0
  216. empathy_software_plugin/wizards/security/__init__.py +32 -0
  217. empathy_software_plugin/wizards/security/exploit_analyzer.py +290 -0
  218. empathy_software_plugin/wizards/security/owasp_patterns.py +241 -0
  219. empathy_software_plugin/wizards/security/vulnerability_scanner.py +604 -0
  220. empathy_software_plugin/wizards/security_analysis_wizard.py +322 -0
  221. empathy_software_plugin/wizards/security_learning_wizard.py +740 -0
  222. empathy_software_plugin/wizards/tech_debt_wizard.py +726 -0
  223. empathy_software_plugin/wizards/testing/__init__.py +27 -0
  224. empathy_software_plugin/wizards/testing/coverage_analyzer.py +459 -0
  225. empathy_software_plugin/wizards/testing/quality_analyzer.py +531 -0
  226. empathy_software_plugin/wizards/testing/test_suggester.py +533 -0
  227. empathy_software_plugin/wizards/testing_wizard.py +274 -0
  228. hot_reload/README.md +473 -0
  229. hot_reload/__init__.py +62 -0
  230. hot_reload/config.py +84 -0
  231. hot_reload/integration.py +228 -0
  232. hot_reload/reloader.py +298 -0
  233. hot_reload/watcher.py +179 -0
  234. hot_reload/websocket.py +176 -0
  235. scaffolding/README.md +589 -0
  236. scaffolding/__init__.py +35 -0
  237. scaffolding/__main__.py +14 -0
  238. scaffolding/cli.py +240 -0
  239. test_generator/__init__.py +38 -0
  240. test_generator/__main__.py +14 -0
  241. test_generator/cli.py +226 -0
  242. test_generator/generator.py +325 -0
  243. test_generator/risk_analyzer.py +216 -0
  244. workflow_patterns/__init__.py +33 -0
  245. workflow_patterns/behavior.py +249 -0
  246. workflow_patterns/core.py +76 -0
  247. workflow_patterns/output.py +99 -0
  248. workflow_patterns/registry.py +255 -0
  249. workflow_patterns/structural.py +288 -0
  250. workflow_scaffolding/__init__.py +11 -0
  251. workflow_scaffolding/__main__.py +12 -0
  252. workflow_scaffolding/cli.py +206 -0
  253. workflow_scaffolding/generator.py +265 -0
  254. agents/code_inspection/patterns/inspection/recurring_B112.json +0 -18
  255. agents/code_inspection/patterns/inspection/recurring_F541.json +0 -16
  256. agents/code_inspection/patterns/inspection/recurring_FORMAT.json +0 -25
  257. agents/code_inspection/patterns/inspection/recurring_bug_20250822_def456.json +0 -16
  258. agents/code_inspection/patterns/inspection/recurring_bug_20250915_abc123.json +0 -16
  259. agents/code_inspection/patterns/inspection/recurring_bug_20251212_3c5b9951.json +0 -16
  260. agents/code_inspection/patterns/inspection/recurring_bug_20251212_97c0f72f.json +0 -16
  261. agents/code_inspection/patterns/inspection/recurring_bug_20251212_a0871d53.json +0 -16
  262. agents/code_inspection/patterns/inspection/recurring_bug_20251212_a9b6ec41.json +0 -16
  263. agents/code_inspection/patterns/inspection/recurring_bug_null_001.json +0 -16
  264. agents/code_inspection/patterns/inspection/recurring_builtin.json +0 -16
  265. agents/compliance_anticipation_agent.py +0 -1422
  266. agents/compliance_db.py +0 -339
  267. agents/epic_integration_wizard.py +0 -530
  268. agents/notifications.py +0 -291
  269. agents/trust_building_behaviors.py +0 -872
  270. empathy_framework-3.7.0.dist-info/RECORD +0 -105
  271. {empathy_framework-3.7.0.dist-info → empathy_framework-3.8.0.dist-info}/WHEEL +0 -0
  272. {empathy_framework-3.7.0.dist-info → empathy_framework-3.8.0.dist-info}/entry_points.txt +0 -0
  273. {empathy_framework-3.7.0.dist-info → empathy_framework-3.8.0.dist-info}/licenses/LICENSE +0 -0
  274. /empathy_os/{monitoring.py → agent_monitoring.py} +0 -0
@@ -0,0 +1,1069 @@
1
+ """Document Generation Workflow
2
+
3
+ A cost-optimized, enterprise-safe documentation pipeline:
4
+ 1. Haiku: Generate outline from code/specs (cheap, fast)
5
+ 2. Sonnet: Write each section (capable, chunked for large projects)
6
+ 3. Opus: Final review + consistency polish (premium, chunked if needed)
7
+
8
+ Enterprise Features:
9
+ - Auto-scaling tokens based on project complexity
10
+ - Chunked polish for large documents
11
+ - Cost guardrails with configurable max_cost
12
+ - Graceful degradation with partial results on errors
13
+
14
+ Copyright 2025 Smart-AI-Memory
15
+ Licensed under Fair Source License 0.9
16
+ """
17
+
18
+ import logging
19
+ from datetime import datetime
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+ from .base import BaseWorkflow, ModelTier
24
+ from .step_config import WorkflowStepConfig
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # Approximate cost per 1K tokens (USD) - used for cost estimation
29
+ # These are estimates and should be updated as pricing changes
30
+ TOKEN_COSTS = {
31
+ ModelTier.CHEAP: {"input": 0.00025, "output": 0.00125}, # Haiku
32
+ ModelTier.CAPABLE: {"input": 0.003, "output": 0.015}, # Sonnet
33
+ ModelTier.PREMIUM: {"input": 0.015, "output": 0.075}, # Opus
34
+ }
35
+
36
+ # Define step configurations for executor-based execution
37
+ # Note: max_tokens for polish is dynamically set based on input size
38
+ DOC_GEN_STEPS = {
39
+ "polish": WorkflowStepConfig(
40
+ name="polish",
41
+ task_type="final_review", # Premium tier task
42
+ tier_hint="premium",
43
+ description="Polish and improve documentation for consistency and quality",
44
+ max_tokens=20000, # Increased to handle large chunked documents
45
+ ),
46
+ }
47
+
48
+
49
+ class DocumentGenerationWorkflow(BaseWorkflow):
50
+ """Multi-tier document generation workflow.
51
+
52
+ Uses cheap models for outlining, capable models for content
53
+ generation, and premium models for final polish and consistency
54
+ review.
55
+
56
+ Usage:
57
+ workflow = DocumentGenerationWorkflow()
58
+ result = await workflow.execute(
59
+ source_code="...",
60
+ doc_type="api_reference",
61
+ audience="developers"
62
+ )
63
+ """
64
+
65
+ name = "doc-gen"
66
+ description = "Cost-optimized documentation generation pipeline"
67
+ stages = ["outline", "write", "polish"]
68
+ tier_map = {
69
+ "outline": ModelTier.CHEAP,
70
+ "write": ModelTier.CAPABLE,
71
+ "polish": ModelTier.PREMIUM,
72
+ }
73
+
74
+ def __init__(
75
+ self,
76
+ skip_polish_threshold: int = 1000,
77
+ max_sections: int = 10,
78
+ max_write_tokens: int | None = None, # Auto-scaled if None
79
+ section_focus: list[str] | None = None,
80
+ chunked_generation: bool = True,
81
+ sections_per_chunk: int = 3,
82
+ max_cost: float = 5.0, # Cost guardrail in USD
83
+ cost_warning_threshold: float = 0.8, # Warn at 80% of max_cost
84
+ graceful_degradation: bool = True, # Return partial results on error
85
+ export_path: str | Path | None = None, # Export docs to file (e.g., "docs/generated")
86
+ max_display_chars: int = 45000, # Max chars before chunking output
87
+ **kwargs: Any,
88
+ ):
89
+ """Initialize workflow with enterprise-safe defaults.
90
+
91
+ Args:
92
+ skip_polish_threshold: Skip premium polish for docs under this
93
+ token count (they're already good enough).
94
+ max_sections: Maximum number of sections to generate.
95
+ max_write_tokens: Maximum tokens for content generation.
96
+ If None, auto-scales based on section count (recommended).
97
+ section_focus: Optional list of specific sections to generate
98
+ (e.g., ["Testing Guide", "API Reference"]).
99
+ chunked_generation: If True, generates large docs in chunks to avoid
100
+ truncation (default True).
101
+ sections_per_chunk: Number of sections to generate per chunk (default 3).
102
+ max_cost: Maximum cost in USD before stopping (default $5).
103
+ Set to 0 to disable cost limits.
104
+ cost_warning_threshold: Percentage of max_cost to trigger warning (default 0.8).
105
+ graceful_degradation: If True, return partial results on errors
106
+ instead of failing completely (default True).
107
+ export_path: Optional directory to export generated docs (e.g., "docs/generated").
108
+ If provided, documentation will be saved to a file automatically.
109
+ max_display_chars: Maximum characters before splitting output into chunks
110
+ for display (default 45000). Helps avoid terminal/UI truncation.
111
+
112
+ """
113
+ super().__init__(**kwargs)
114
+ self.skip_polish_threshold = skip_polish_threshold
115
+ self.max_sections = max_sections
116
+ self._user_max_write_tokens = max_write_tokens # Store user preference
117
+ self.max_write_tokens = max_write_tokens or 16000 # Will be auto-scaled
118
+ self.section_focus = section_focus
119
+ self.chunked_generation = chunked_generation
120
+ self.sections_per_chunk = sections_per_chunk
121
+ self.max_cost = max_cost
122
+ self.cost_warning_threshold = cost_warning_threshold
123
+ self.graceful_degradation = graceful_degradation
124
+ self.export_path = Path(export_path) if export_path else None
125
+ self.max_display_chars = max_display_chars
126
+ self._total_content_tokens: int = 0
127
+ self._accumulated_cost: float = 0.0
128
+ self._cost_warning_issued: bool = False
129
+ self._partial_results: dict = {}
130
+
131
+ def _estimate_cost(self, tier: ModelTier, input_tokens: int, output_tokens: int) -> float:
132
+ """Estimate cost for a given tier and token counts."""
133
+ costs = TOKEN_COSTS.get(tier, TOKEN_COSTS[ModelTier.CAPABLE])
134
+ input_cost = (input_tokens / 1000) * costs["input"]
135
+ output_cost = (output_tokens / 1000) * costs["output"]
136
+ return input_cost + output_cost
137
+
138
+ def _track_cost(
139
+ self,
140
+ tier: ModelTier,
141
+ input_tokens: int,
142
+ output_tokens: int,
143
+ ) -> tuple[float, bool]:
144
+ """Track accumulated cost and check against limits.
145
+
146
+ Returns:
147
+ Tuple of (cost_for_this_call, should_stop)
148
+
149
+ """
150
+ cost = self._estimate_cost(tier, input_tokens, output_tokens)
151
+ self._accumulated_cost += cost
152
+
153
+ # Check warning threshold
154
+ if (
155
+ self.max_cost > 0
156
+ and not self._cost_warning_issued
157
+ and self._accumulated_cost >= self.max_cost * self.cost_warning_threshold
158
+ ):
159
+ self._cost_warning_issued = True
160
+ logger.warning(
161
+ f"Doc-gen cost approaching limit: ${self._accumulated_cost:.2f} "
162
+ f"of ${self.max_cost:.2f} ({self.cost_warning_threshold * 100:.0f}% threshold)",
163
+ )
164
+
165
+ # Check if we should stop
166
+ should_stop = self.max_cost > 0 and self._accumulated_cost >= self.max_cost
167
+ if should_stop:
168
+ logger.warning(
169
+ f"Doc-gen cost limit reached: ${self._accumulated_cost:.2f} >= ${self.max_cost:.2f}",
170
+ )
171
+
172
+ return cost, should_stop
173
+
174
+ def _auto_scale_tokens(self, section_count: int) -> int:
175
+ """Auto-scale max_write_tokens based on section count.
176
+
177
+ Enterprise projects may have 20+ sections requiring more tokens.
178
+ """
179
+ if self._user_max_write_tokens is not None:
180
+ return self._user_max_write_tokens # User override
181
+
182
+ # Base: 2000 tokens per section, minimum 16000, maximum 64000
183
+ scaled = max(16000, min(64000, section_count * 2000))
184
+ logger.info(f"Auto-scaled max_write_tokens to {scaled} for {section_count} sections")
185
+ return scaled
186
+
187
+ def _export_document(
188
+ self,
189
+ document: str,
190
+ doc_type: str,
191
+ report: str | None = None,
192
+ ) -> tuple[Path | None, Path | None]:
193
+ """Export generated documentation to file.
194
+
195
+ Args:
196
+ document: The generated documentation content
197
+ doc_type: Document type for naming
198
+ report: Optional report to save alongside document
199
+
200
+ Returns:
201
+ Tuple of (doc_path, report_path) or (None, None) if export disabled
202
+
203
+ """
204
+ if not self.export_path:
205
+ return None, None
206
+
207
+ # Create export directory
208
+ self.export_path.mkdir(parents=True, exist_ok=True)
209
+
210
+ # Generate filename with timestamp
211
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
212
+ safe_doc_type = doc_type.replace(" ", "_").replace("/", "-").lower()
213
+ doc_filename = f"{safe_doc_type}_{timestamp}.md"
214
+ report_filename = f"{safe_doc_type}_{timestamp}_report.txt"
215
+
216
+ doc_path = self.export_path / doc_filename
217
+ report_path = self.export_path / report_filename if report else None
218
+
219
+ # Write document
220
+ try:
221
+ doc_path.write_text(document, encoding="utf-8")
222
+ logger.info(f"Documentation exported to: {doc_path}")
223
+
224
+ # Write report if provided
225
+ if report and report_path:
226
+ report_path.write_text(report, encoding="utf-8")
227
+ logger.info(f"Report exported to: {report_path}")
228
+
229
+ return doc_path, report_path
230
+ except Exception as e:
231
+ logger.error(f"Failed to export documentation: {e}")
232
+ return None, None
233
+
234
+ def _chunk_output_for_display(self, content: str, chunk_prefix: str = "PART") -> list[str]:
235
+ """Split large output into displayable chunks.
236
+
237
+ Args:
238
+ content: The content to chunk
239
+ chunk_prefix: Prefix for chunk headers
240
+
241
+ Returns:
242
+ List of content chunks, each under max_display_chars
243
+
244
+ """
245
+ if len(content) <= self.max_display_chars:
246
+ return [content]
247
+
248
+ chunks = []
249
+ # Try to split on section boundaries (## headers)
250
+ import re
251
+
252
+ sections = re.split(r"(?=^## )", content, flags=re.MULTILINE)
253
+
254
+ current_chunk = ""
255
+ chunk_num = 1
256
+
257
+ for section in sections:
258
+ # If adding this section would exceed limit, save current chunk
259
+ if current_chunk and len(current_chunk) + len(section) > self.max_display_chars:
260
+ chunks.append(
261
+ f"{'=' * 60}\n{chunk_prefix} {chunk_num} of {{total}}\n{'=' * 60}\n\n"
262
+ + current_chunk,
263
+ )
264
+ chunk_num += 1
265
+ current_chunk = section
266
+ else:
267
+ current_chunk += section
268
+
269
+ # Add final chunk
270
+ if current_chunk:
271
+ chunks.append(
272
+ f"{'=' * 60}\n{chunk_prefix} {chunk_num} of {{total}}\n{'=' * 60}\n\n"
273
+ + current_chunk,
274
+ )
275
+
276
+ # Update total count in all chunks
277
+ total = len(chunks)
278
+ chunks = [chunk.format(total=total) for chunk in chunks]
279
+
280
+ return chunks
281
+
282
+ def should_skip_stage(self, stage_name: str, input_data: Any) -> tuple[bool, str | None]:
283
+ """Skip polish for short documents."""
284
+ if stage_name == "polish":
285
+ if self._total_content_tokens < self.skip_polish_threshold:
286
+ self.tier_map["polish"] = ModelTier.CAPABLE
287
+ return False, None
288
+ return False, None
289
+
290
+ async def run_stage(
291
+ self,
292
+ stage_name: str,
293
+ tier: ModelTier,
294
+ input_data: Any,
295
+ ) -> tuple[Any, int, int]:
296
+ """Execute a document generation stage."""
297
+ if stage_name == "outline":
298
+ return await self._outline(input_data, tier)
299
+ if stage_name == "write":
300
+ return await self._write(input_data, tier)
301
+ if stage_name == "polish":
302
+ return await self._polish(input_data, tier)
303
+ raise ValueError(f"Unknown stage: {stage_name}")
304
+
305
+ async def _outline(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
306
+ """Generate document outline from source."""
307
+ source_code = input_data.get("source_code", "")
308
+ target = input_data.get("target", "")
309
+ doc_type = input_data.get("doc_type", "general")
310
+ audience = input_data.get("audience", "developers")
311
+
312
+ # Use target if source_code not provided
313
+ content_to_document = source_code or target
314
+
315
+ # If target looks like a file path and source_code wasn't provided, read the file
316
+ if not source_code and target:
317
+ from pathlib import Path
318
+
319
+ target_path = Path(target)
320
+ if target_path.exists() and target_path.is_file():
321
+ try:
322
+ content_to_document = target_path.read_text(encoding="utf-8")
323
+ # Prepend file info for context
324
+ content_to_document = f"# File: {target}\n\n{content_to_document}"
325
+ except Exception as e:
326
+ # If we can't read the file, log and use the path as-is
327
+ import logging
328
+
329
+ logging.getLogger(__name__).warning(f"Could not read file {target}: {e}")
330
+ elif target_path.suffix in (
331
+ ".py",
332
+ ".js",
333
+ ".ts",
334
+ ".tsx",
335
+ ".java",
336
+ ".go",
337
+ ".rs",
338
+ ".md",
339
+ ".txt",
340
+ ):
341
+ # Looks like a file path but doesn't exist - warn
342
+ import logging
343
+
344
+ logging.getLogger(__name__).warning(
345
+ f"Target appears to be a file path but doesn't exist: {target}",
346
+ )
347
+
348
+ system = """You are a technical writer. Create a detailed outline for documentation.
349
+
350
+ Based on the content provided, generate an outline with:
351
+ 1. Logical section structure (5-8 sections)
352
+ 2. Brief description of each section's purpose
353
+ 3. Key points to cover in each section
354
+
355
+ Format as a numbered list with section titles and descriptions."""
356
+
357
+ user_message = f"""Create a documentation outline:
358
+
359
+ Document Type: {doc_type}
360
+ Target Audience: {audience}
361
+
362
+ Content to document:
363
+ {content_to_document[:4000]}"""
364
+
365
+ response, input_tokens, output_tokens = await self._call_llm(
366
+ tier,
367
+ system,
368
+ user_message,
369
+ max_tokens=1000,
370
+ )
371
+
372
+ return (
373
+ {
374
+ "outline": response,
375
+ "doc_type": doc_type,
376
+ "audience": audience,
377
+ "content_to_document": content_to_document,
378
+ },
379
+ input_tokens,
380
+ output_tokens,
381
+ )
382
+
383
+ def _parse_outline_sections(self, outline: str) -> list[str]:
384
+ """Parse top-level section titles from the outline.
385
+
386
+ Only matches main sections like "1. Introduction", "2. Setup", etc.
387
+ Ignores sub-sections like "2.1 Prerequisites" or nested items.
388
+ """
389
+ import re
390
+
391
+ sections = []
392
+ # Match only top-level sections: digit followed by period and space/letter
393
+ # e.g., "1. Introduction" but NOT "1.1 Sub-section" or "2.1.3 Deep"
394
+ top_level_pattern = re.compile(r"^(\d+)\.\s+([A-Za-z].*)")
395
+
396
+ for line in outline.split("\n"):
397
+ stripped = line.strip()
398
+ match = top_level_pattern.match(stripped)
399
+ if match:
400
+ # section_num = match.group(1) - not needed, only extracting title
401
+ title = match.group(2).strip()
402
+ # Remove any trailing description after " - "
403
+ if " - " in title:
404
+ title = title.split(" - ")[0].strip()
405
+ sections.append(title)
406
+
407
+ return sections
408
+
409
+ async def _write(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
410
+ """Write content based on the outline."""
411
+ outline = input_data.get("outline", "")
412
+ doc_type = input_data.get("doc_type", "general")
413
+ audience = input_data.get("audience", "developers")
414
+ content_to_document = input_data.get("content_to_document", "")
415
+
416
+ # Parse sections from outline
417
+ sections = self._parse_outline_sections(outline)
418
+
419
+ # Auto-scale tokens based on section count
420
+ self.max_write_tokens = self._auto_scale_tokens(len(sections))
421
+
422
+ # Use chunked generation for large outlines (more than sections_per_chunk * 2)
423
+ use_chunking = (
424
+ self.chunked_generation
425
+ and len(sections) > self.sections_per_chunk * 2
426
+ and not self.section_focus # Don't chunk if already focused
427
+ )
428
+
429
+ if use_chunking:
430
+ return await self._write_chunked(
431
+ sections,
432
+ outline,
433
+ doc_type,
434
+ audience,
435
+ content_to_document,
436
+ tier,
437
+ )
438
+
439
+ # Handle section_focus for targeted generation
440
+ section_instruction = ""
441
+ if self.section_focus:
442
+ sections_list = ", ".join(self.section_focus)
443
+ section_instruction = f"""
444
+ IMPORTANT: Focus ONLY on generating these specific sections:
445
+ {sections_list}
446
+
447
+ Generate comprehensive, detailed content for each of these sections."""
448
+
449
+ system = f"""You are a technical writer. Write comprehensive documentation.
450
+
451
+ Based on the outline provided, write full content for each section:
452
+ 1. Use clear, professional language
453
+ 2. Include code examples where appropriate
454
+ 3. Use markdown formatting
455
+ 4. Be thorough and detailed - do NOT truncate sections
456
+ 5. Target the specified audience
457
+ 6. Complete ALL sections before stopping
458
+ {section_instruction}
459
+
460
+ Write the complete document with all sections."""
461
+
462
+ user_message = f"""Write documentation based on this outline:
463
+
464
+ Document Type: {doc_type}
465
+ Target Audience: {audience}
466
+
467
+ Outline:
468
+ {outline}
469
+
470
+ Source content for reference:
471
+ {content_to_document[:5000]}"""
472
+
473
+ response, input_tokens, output_tokens = await self._call_llm(
474
+ tier,
475
+ system,
476
+ user_message,
477
+ max_tokens=self.max_write_tokens,
478
+ )
479
+
480
+ self._total_content_tokens = output_tokens
481
+
482
+ return (
483
+ {
484
+ "draft_document": response,
485
+ "doc_type": doc_type,
486
+ "audience": audience,
487
+ "outline": outline,
488
+ "chunked": False,
489
+ },
490
+ input_tokens,
491
+ output_tokens,
492
+ )
493
+
494
+ async def _write_chunked(
495
+ self,
496
+ sections: list[str],
497
+ outline: str,
498
+ doc_type: str,
499
+ audience: str,
500
+ content_to_document: str,
501
+ tier: ModelTier,
502
+ ) -> tuple[dict, int, int]:
503
+ """Generate documentation in chunks to avoid truncation.
504
+
505
+ Enterprise-safe: includes cost tracking and graceful degradation.
506
+ """
507
+ all_content: list[str] = []
508
+ total_input_tokens: int = 0
509
+ total_output_tokens: int = 0
510
+ stopped_early: bool = False
511
+ error_message: str | None = None
512
+
513
+ # Split sections into chunks
514
+ chunks = []
515
+ for i in range(0, len(sections), self.sections_per_chunk):
516
+ chunks.append(sections[i : i + self.sections_per_chunk])
517
+
518
+ logger.info(f"Generating documentation in {len(chunks)} chunks")
519
+
520
+ for chunk_idx, chunk_sections in enumerate(chunks):
521
+ sections_list = ", ".join(chunk_sections)
522
+
523
+ # Build context about what came before
524
+ previous_context = ""
525
+ if chunk_idx > 0 and all_content:
526
+ # Include last 500 chars of previous content for continuity
527
+ previous_context = f"""
528
+ Previous sections already written (for context/continuity):
529
+ ...{all_content[-1][-500:]}
530
+
531
+ Continue with the next sections, maintaining consistent style and terminology."""
532
+
533
+ system = f"""You are a technical writer. Write comprehensive documentation.
534
+
535
+ Write ONLY the following sections (you are generating part {chunk_idx + 1} of {len(chunks)}):
536
+ {sections_list}
537
+
538
+ Requirements:
539
+ 1. Use clear, professional language
540
+ 2. Include code examples where appropriate
541
+ 3. Use markdown formatting with ## headers
542
+ 4. Be thorough and detailed - complete each section fully
543
+ 5. Target {audience} audience
544
+ 6. Write ONLY these specific sections, nothing else"""
545
+
546
+ user_message = f"""Write documentation for these specific sections:
547
+
548
+ Document Type: {doc_type}
549
+ Target Audience: {audience}
550
+
551
+ Sections to write: {sections_list}
552
+
553
+ Full outline (for context):
554
+ {outline}
555
+
556
+ Source content for reference:
557
+ {content_to_document[:3000]}
558
+ {previous_context}"""
559
+
560
+ try:
561
+ response, input_tokens, output_tokens = await self._call_llm(
562
+ tier,
563
+ system,
564
+ user_message,
565
+ max_tokens=self.max_write_tokens // len(chunks) + 2000,
566
+ )
567
+
568
+ # Track cost and check limits
569
+ _, should_stop = self._track_cost(tier, input_tokens, output_tokens)
570
+
571
+ all_content.append(response)
572
+ total_input_tokens += input_tokens
573
+ total_output_tokens += output_tokens
574
+
575
+ logger.info(
576
+ f"Chunk {chunk_idx + 1}/{len(chunks)} complete: "
577
+ f"{len(response)} chars, {output_tokens} tokens, "
578
+ f"cost so far: ${self._accumulated_cost:.2f}",
579
+ )
580
+
581
+ # Check cost limit
582
+ if should_stop:
583
+ stopped_early = True
584
+ remaining = len(chunks) - chunk_idx - 1
585
+ error_message = (
586
+ f"Cost limit reached (${self._accumulated_cost:.2f}). "
587
+ f"Stopped after {chunk_idx + 1}/{len(chunks)} chunks. "
588
+ f"{remaining} chunks not generated."
589
+ )
590
+ logger.warning(error_message)
591
+ break
592
+
593
+ except Exception as e:
594
+ error_message = f"Error generating chunk {chunk_idx + 1}: {e}"
595
+ logger.error(error_message)
596
+ if not self.graceful_degradation:
597
+ raise
598
+ stopped_early = True
599
+ break
600
+
601
+ # Combine all chunks
602
+ combined_document = "\n\n".join(all_content)
603
+ self._total_content_tokens = total_output_tokens
604
+
605
+ # Store partial results for graceful degradation
606
+ self._partial_results = {
607
+ "draft_document": combined_document,
608
+ "sections_completed": len(all_content),
609
+ "sections_total": len(chunks),
610
+ }
611
+
612
+ result = {
613
+ "draft_document": combined_document,
614
+ "doc_type": doc_type,
615
+ "audience": audience,
616
+ "outline": outline,
617
+ "chunked": True,
618
+ "chunk_count": len(chunks),
619
+ "chunks_completed": len(all_content),
620
+ "stopped_early": stopped_early,
621
+ "accumulated_cost": self._accumulated_cost,
622
+ }
623
+
624
+ if error_message:
625
+ result["warning"] = error_message
626
+
627
+ return (result, total_input_tokens, total_output_tokens)
628
+
629
+ async def _polish(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
630
+ """Final review and consistency polish using LLM.
631
+
632
+ Enterprise-safe: chunks large documents to avoid truncation.
633
+ Supports XML-enhanced prompts when enabled in workflow config.
634
+ """
635
+ draft_document = input_data.get("draft_document", "")
636
+ doc_type = input_data.get("doc_type", "general")
637
+ audience = input_data.get("audience", "developers")
638
+
639
+ # Check if document is too large and needs chunked polishing
640
+ # Rough estimate: 4 chars per token, 10k tokens threshold for chunking
641
+ estimated_tokens = len(draft_document) // 4
642
+ needs_chunked_polish = estimated_tokens > 10000
643
+
644
+ if needs_chunked_polish:
645
+ logger.info(
646
+ f"Large document detected (~{estimated_tokens} tokens). "
647
+ "Using chunked polish for enterprise safety.",
648
+ )
649
+ return await self._polish_chunked(input_data, tier)
650
+
651
+ # Build input payload for prompt
652
+ input_payload = f"""Document Type: {doc_type}
653
+ Target Audience: {audience}
654
+
655
+ Draft:
656
+ {draft_document}"""
657
+
658
+ # Check if XML prompts are enabled
659
+ if self._is_xml_enabled():
660
+ # Use XML-enhanced prompt
661
+ user_message = self._render_xml_prompt(
662
+ role="senior technical editor",
663
+ goal="Polish and improve the documentation for consistency and quality",
664
+ instructions=[
665
+ "Standardize terminology and formatting",
666
+ "Improve clarity and flow",
667
+ "Add missing cross-references",
668
+ "Fix grammatical issues",
669
+ "Identify gaps and add helpful notes",
670
+ "Ensure examples are complete and accurate",
671
+ ],
672
+ constraints=[
673
+ "Maintain the original structure and intent",
674
+ "Keep content appropriate for the target audience",
675
+ "Preserve code examples while improving explanations",
676
+ ],
677
+ input_type="documentation_draft",
678
+ input_payload=input_payload,
679
+ extra={
680
+ "doc_type": doc_type,
681
+ "audience": audience,
682
+ },
683
+ )
684
+ system = None # XML prompt includes all context
685
+ else:
686
+ # Use legacy plain text prompts
687
+ system = """You are a senior technical editor. Polish and improve the documentation:
688
+
689
+ 1. CONSISTENCY:
690
+ - Standardize terminology
691
+ - Fix formatting inconsistencies
692
+ - Ensure consistent code style
693
+
694
+ 2. QUALITY:
695
+ - Improve clarity and flow
696
+ - Add missing cross-references
697
+ - Fix grammatical issues
698
+
699
+ 3. COMPLETENESS:
700
+ - Identify gaps
701
+ - Add helpful notes or warnings
702
+ - Ensure examples are complete
703
+
704
+ Return the polished document with improvements noted at the end."""
705
+
706
+ user_message = f"""Polish this documentation:
707
+
708
+ {input_payload}"""
709
+
710
+ # Calculate polish tokens based on draft size (at least as much as write stage)
711
+ polish_max_tokens = max(self.max_write_tokens, 20000)
712
+
713
+ # Try executor-based execution first (Phase 3 pattern)
714
+ if self._executor is not None or self._api_key:
715
+ try:
716
+ step = DOC_GEN_STEPS["polish"]
717
+ # Override step max_tokens with dynamic value
718
+ step.max_tokens = polish_max_tokens
719
+ response, input_tokens, output_tokens, cost = await self.run_step_with_executor(
720
+ step=step,
721
+ prompt=user_message,
722
+ system=system,
723
+ )
724
+ except Exception:
725
+ # Fall back to legacy _call_llm if executor fails
726
+ response, input_tokens, output_tokens = await self._call_llm(
727
+ tier,
728
+ system or "",
729
+ user_message,
730
+ max_tokens=polish_max_tokens,
731
+ )
732
+ else:
733
+ # Legacy path for backward compatibility
734
+ response, input_tokens, output_tokens = await self._call_llm(
735
+ tier,
736
+ system or "",
737
+ user_message,
738
+ max_tokens=polish_max_tokens,
739
+ )
740
+
741
+ # Parse XML response if enforcement is enabled
742
+ parsed_data = self._parse_xml_response(response)
743
+
744
+ result = {
745
+ "document": response,
746
+ "doc_type": doc_type,
747
+ "audience": audience,
748
+ "model_tier_used": tier.value,
749
+ }
750
+
751
+ # Merge parsed XML data if available
752
+ if parsed_data.get("xml_parsed"):
753
+ result.update(
754
+ {
755
+ "xml_parsed": True,
756
+ "summary": parsed_data.get("summary"),
757
+ "findings": parsed_data.get("findings", []),
758
+ "checklist": parsed_data.get("checklist", []),
759
+ },
760
+ )
761
+
762
+ # Add formatted report for human readability
763
+ result["formatted_report"] = format_doc_gen_report(result, input_data)
764
+
765
+ # Export documentation if export_path is configured
766
+ doc_path, report_path = self._export_document(
767
+ document=response,
768
+ doc_type=doc_type,
769
+ report=result["formatted_report"],
770
+ )
771
+ if doc_path:
772
+ result["export_path"] = str(doc_path)
773
+ result["report_path"] = str(report_path) if report_path else None
774
+ logger.info(f"Documentation saved to: {doc_path}")
775
+
776
+ # Chunk output for display if needed
777
+ output_chunks = self._chunk_output_for_display(
778
+ result["formatted_report"],
779
+ chunk_prefix="DOC OUTPUT",
780
+ )
781
+ if len(output_chunks) > 1:
782
+ result["output_chunks"] = output_chunks
783
+ result["output_chunk_count"] = len(output_chunks)
784
+ logger.info(
785
+ f"Report split into {len(output_chunks)} chunks for display "
786
+ f"(total {len(result['formatted_report'])} chars)",
787
+ )
788
+
789
+ return (result, input_tokens, output_tokens)
790
+
791
+ async def _polish_chunked(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
792
+ """Polish large documents in chunks to avoid truncation.
793
+
794
+ Splits the document by section headers and polishes each chunk separately,
795
+ then combines the results.
796
+ """
797
+ import re
798
+
799
+ draft_document = input_data.get("draft_document", "")
800
+ doc_type = input_data.get("doc_type", "general")
801
+ audience = input_data.get("audience", "developers")
802
+
803
+ # Split document by major section headers (## headers)
804
+ sections = re.split(r"(?=^## )", draft_document, flags=re.MULTILINE)
805
+ sections = [s.strip() for s in sections if s.strip()]
806
+
807
+ if len(sections) <= 1:
808
+ # If we can't split by sections, split by character count
809
+ chunk_size = 15000 # ~3750 tokens per chunk
810
+ sections = [
811
+ draft_document[i : i + chunk_size]
812
+ for i in range(0, len(draft_document), chunk_size)
813
+ ]
814
+
815
+ logger.info(f"Polishing document in {len(sections)} chunks")
816
+
817
+ polished_chunks: list[str] = []
818
+ total_input_tokens: int = 0
819
+ total_output_tokens: int = 0
820
+
821
+ for chunk_idx, section in enumerate(sections):
822
+ system = """You are a senior technical editor. Polish this section of documentation:
823
+
824
+ 1. Standardize terminology and formatting
825
+ 2. Improve clarity and flow
826
+ 3. Fix grammatical issues
827
+ 4. Ensure code examples are complete and accurate
828
+
829
+ Return ONLY the polished section. Do not add commentary."""
830
+
831
+ user_message = f"""Polish this documentation section (part {chunk_idx + 1} of {len(sections)}):
832
+
833
+ Document Type: {doc_type}
834
+ Target Audience: {audience}
835
+
836
+ Section to polish:
837
+ {section}"""
838
+
839
+ try:
840
+ response, input_tokens, output_tokens = await self._call_llm(
841
+ tier,
842
+ system,
843
+ user_message,
844
+ max_tokens=8000,
845
+ )
846
+
847
+ # Track cost
848
+ _, should_stop = self._track_cost(tier, input_tokens, output_tokens)
849
+
850
+ polished_chunks.append(response)
851
+ total_input_tokens += input_tokens
852
+ total_output_tokens += output_tokens
853
+
854
+ logger.info(
855
+ f"Polish chunk {chunk_idx + 1}/{len(sections)} complete, "
856
+ f"cost so far: ${self._accumulated_cost:.2f}",
857
+ )
858
+
859
+ if should_stop:
860
+ logger.warning(
861
+ f"Cost limit reached during polish. "
862
+ f"Returning {len(polished_chunks)}/{len(sections)} polished chunks.",
863
+ )
864
+ # Add remaining sections unpolished
865
+ polished_chunks.extend(sections[chunk_idx + 1 :])
866
+ break
867
+
868
+ except Exception as e:
869
+ logger.error(f"Error polishing chunk {chunk_idx + 1}: {e}")
870
+ if self.graceful_degradation:
871
+ # Keep original section on error
872
+ polished_chunks.append(section)
873
+ else:
874
+ raise
875
+
876
+ # Combine polished chunks
877
+ polished_document = "\n\n".join(polished_chunks)
878
+
879
+ result = {
880
+ "document": polished_document,
881
+ "doc_type": doc_type,
882
+ "audience": audience,
883
+ "model_tier_used": tier.value,
884
+ "polish_chunked": True,
885
+ "polish_chunks": len(sections),
886
+ "accumulated_cost": self._accumulated_cost,
887
+ }
888
+
889
+ # Add formatted report
890
+ result["formatted_report"] = format_doc_gen_report(result, input_data)
891
+
892
+ # Export documentation if export_path is configured
893
+ doc_path, report_path = self._export_document(
894
+ document=polished_document,
895
+ doc_type=doc_type,
896
+ report=result["formatted_report"],
897
+ )
898
+ if doc_path:
899
+ result["export_path"] = str(doc_path)
900
+ result["report_path"] = str(report_path) if report_path else None
901
+ logger.info(f"Documentation saved to: {doc_path}")
902
+
903
+ # Chunk output for display if needed
904
+ output_chunks = self._chunk_output_for_display(
905
+ result["formatted_report"],
906
+ chunk_prefix="DOC OUTPUT",
907
+ )
908
+ if len(output_chunks) > 1:
909
+ result["output_chunks"] = output_chunks
910
+ result["output_chunk_count"] = len(output_chunks)
911
+ logger.info(
912
+ f"Report split into {len(output_chunks)} chunks for display "
913
+ f"(total {len(result['formatted_report'])} chars)",
914
+ )
915
+
916
+ return (result, total_input_tokens, total_output_tokens)
917
+
918
+
919
+ def format_doc_gen_report(result: dict, input_data: dict) -> str:
920
+ """Format document generation output as a human-readable report.
921
+
922
+ Args:
923
+ result: The polish stage result
924
+ input_data: Input data from previous stages
925
+
926
+ Returns:
927
+ Formatted report string
928
+
929
+ """
930
+ lines = []
931
+
932
+ # Header
933
+ doc_type = result.get("doc_type", "general").replace("_", " ").title()
934
+ audience = result.get("audience", "developers").title()
935
+
936
+ lines.append("=" * 60)
937
+ lines.append("DOCUMENTATION GENERATION REPORT")
938
+ lines.append("=" * 60)
939
+ lines.append("")
940
+ lines.append(f"Document Type: {doc_type}")
941
+ lines.append(f"Target Audience: {audience}")
942
+ lines.append("")
943
+
944
+ # Outline summary
945
+ outline = input_data.get("outline", "")
946
+ if outline:
947
+ lines.append("-" * 60)
948
+ lines.append("DOCUMENT OUTLINE")
949
+ lines.append("-" * 60)
950
+ # Show just a preview of the outline
951
+ outline_lines = outline.split("\n")[:10]
952
+ lines.extend(outline_lines)
953
+ if len(outline.split("\n")) > 10:
954
+ lines.append("...")
955
+ lines.append("")
956
+
957
+ # Generated document
958
+ document = result.get("document", "")
959
+ if document:
960
+ lines.append("-" * 60)
961
+ lines.append("GENERATED DOCUMENTATION")
962
+ lines.append("-" * 60)
963
+ lines.append("")
964
+ lines.append(document)
965
+ lines.append("")
966
+
967
+ # Statistics
968
+ word_count = len(document.split()) if document else 0
969
+ section_count = document.count("##") if document else 0 # Count markdown headers
970
+ was_chunked = input_data.get("chunked", False)
971
+ chunk_count = input_data.get("chunk_count", 0)
972
+ chunks_completed = input_data.get("chunks_completed", chunk_count)
973
+ stopped_early = input_data.get("stopped_early", False)
974
+ accumulated_cost = result.get("accumulated_cost", 0)
975
+ polish_chunked = result.get("polish_chunked", False)
976
+
977
+ lines.append("-" * 60)
978
+ lines.append("STATISTICS")
979
+ lines.append("-" * 60)
980
+ lines.append(f"Word Count: {word_count}")
981
+ lines.append(f"Section Count: ~{section_count}")
982
+ if was_chunked:
983
+ if stopped_early:
984
+ lines.append(
985
+ f"Generation Mode: Chunked ({chunks_completed}/{chunk_count} chunks completed)",
986
+ )
987
+ else:
988
+ lines.append(f"Generation Mode: Chunked ({chunk_count} chunks)")
989
+ if polish_chunked:
990
+ polish_chunks = result.get("polish_chunks", 0)
991
+ lines.append(f"Polish Mode: Chunked ({polish_chunks} sections)")
992
+ if accumulated_cost > 0:
993
+ lines.append(f"Estimated Cost: ${accumulated_cost:.2f}")
994
+ lines.append("")
995
+
996
+ # Export info
997
+ export_path = result.get("export_path")
998
+ if export_path:
999
+ lines.append("-" * 60)
1000
+ lines.append("FILE EXPORT")
1001
+ lines.append("-" * 60)
1002
+ lines.append(f"Documentation saved to: {export_path}")
1003
+ report_path = result.get("report_path")
1004
+ if report_path:
1005
+ lines.append(f"Report saved to: {report_path}")
1006
+ lines.append("")
1007
+ lines.append("Full documentation is available in the exported file.")
1008
+ lines.append("")
1009
+
1010
+ # Warning notice (cost limit, errors, etc.)
1011
+ warning = input_data.get("warning") or result.get("warning")
1012
+ if warning or stopped_early:
1013
+ lines.append("-" * 60)
1014
+ lines.append("⚠️ WARNING")
1015
+ lines.append("-" * 60)
1016
+ if warning:
1017
+ lines.append(warning)
1018
+ if stopped_early and not warning:
1019
+ lines.append("Generation stopped early due to cost or error limits.")
1020
+ lines.append("")
1021
+
1022
+ # Truncation detection and scope notice
1023
+ truncation_indicators = [
1024
+ document.rstrip().endswith("..."),
1025
+ document.rstrip().endswith("-"),
1026
+ "```" in document and document.count("```") % 2 != 0, # Unclosed code block
1027
+ any(
1028
+ phrase in document.lower()
1029
+ for phrase in ["continued in", "see next section", "to be continued"]
1030
+ ),
1031
+ ]
1032
+
1033
+ # Count planned sections from outline (top-level only)
1034
+ import re
1035
+
1036
+ planned_sections = 0
1037
+ top_level_pattern = re.compile(r"^(\d+)\.\s+([A-Za-z].*)")
1038
+ if outline:
1039
+ for line in outline.split("\n"):
1040
+ stripped = line.strip()
1041
+ if top_level_pattern.match(stripped):
1042
+ planned_sections += 1
1043
+
1044
+ is_truncated = any(truncation_indicators) or (
1045
+ planned_sections > 0 and section_count < planned_sections - 1
1046
+ )
1047
+
1048
+ if is_truncated or planned_sections > section_count + 1:
1049
+ lines.append("-" * 60)
1050
+ lines.append("SCOPE NOTICE")
1051
+ lines.append("-" * 60)
1052
+ lines.append("⚠️ DOCUMENTATION MAY BE INCOMPLETE")
1053
+ if planned_sections > 0:
1054
+ lines.append(f" Planned sections: {planned_sections}")
1055
+ lines.append(f" Generated sections: {section_count}")
1056
+ lines.append("")
1057
+ lines.append("To generate missing sections, re-run with section_focus:")
1058
+ lines.append(" workflow = DocumentGenerationWorkflow(")
1059
+ lines.append(' section_focus=["Testing Guide", "API Reference"]')
1060
+ lines.append(" )")
1061
+ lines.append("")
1062
+
1063
+ # Footer
1064
+ lines.append("=" * 60)
1065
+ model_tier = result.get("model_tier_used", "unknown")
1066
+ lines.append(f"Generated using {model_tier} tier model")
1067
+ lines.append("=" * 60)
1068
+
1069
+ return "\n".join(lines)