empathy-framework 2.4.0__py3-none-any.whl → 3.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coach_wizards/__init__.py +13 -12
- coach_wizards/accessibility_wizard.py +12 -12
- coach_wizards/api_wizard.py +12 -12
- coach_wizards/base_wizard.py +26 -20
- coach_wizards/cicd_wizard.py +15 -13
- coach_wizards/code_reviewer_README.md +60 -0
- coach_wizards/code_reviewer_wizard.py +180 -0
- coach_wizards/compliance_wizard.py +12 -12
- coach_wizards/database_wizard.py +12 -12
- coach_wizards/debugging_wizard.py +12 -12
- coach_wizards/documentation_wizard.py +12 -12
- coach_wizards/generate_wizards.py +1 -2
- coach_wizards/localization_wizard.py +101 -19
- coach_wizards/migration_wizard.py +12 -12
- coach_wizards/monitoring_wizard.py +12 -12
- coach_wizards/observability_wizard.py +12 -12
- coach_wizards/performance_wizard.py +12 -12
- coach_wizards/prompt_engineering_wizard.py +661 -0
- coach_wizards/refactoring_wizard.py +12 -12
- coach_wizards/scaling_wizard.py +12 -12
- coach_wizards/security_wizard.py +12 -12
- coach_wizards/testing_wizard.py +12 -12
- empathy_framework-3.8.2.dist-info/METADATA +1176 -0
- empathy_framework-3.8.2.dist-info/RECORD +333 -0
- empathy_framework-3.8.2.dist-info/entry_points.txt +22 -0
- {empathy_framework-2.4.0.dist-info → empathy_framework-3.8.2.dist-info}/top_level.txt +5 -1
- empathy_healthcare_plugin/__init__.py +1 -2
- empathy_healthcare_plugin/monitors/__init__.py +9 -0
- empathy_healthcare_plugin/monitors/clinical_protocol_monitor.py +315 -0
- empathy_healthcare_plugin/monitors/monitoring/__init__.py +44 -0
- empathy_healthcare_plugin/monitors/monitoring/protocol_checker.py +300 -0
- empathy_healthcare_plugin/monitors/monitoring/protocol_loader.py +214 -0
- empathy_healthcare_plugin/monitors/monitoring/sensor_parsers.py +306 -0
- empathy_healthcare_plugin/monitors/monitoring/trajectory_analyzer.py +389 -0
- empathy_llm_toolkit/__init__.py +7 -7
- empathy_llm_toolkit/agent_factory/__init__.py +53 -0
- empathy_llm_toolkit/agent_factory/adapters/__init__.py +85 -0
- empathy_llm_toolkit/agent_factory/adapters/autogen_adapter.py +312 -0
- empathy_llm_toolkit/agent_factory/adapters/crewai_adapter.py +454 -0
- empathy_llm_toolkit/agent_factory/adapters/haystack_adapter.py +298 -0
- empathy_llm_toolkit/agent_factory/adapters/langchain_adapter.py +362 -0
- empathy_llm_toolkit/agent_factory/adapters/langgraph_adapter.py +333 -0
- empathy_llm_toolkit/agent_factory/adapters/native.py +228 -0
- empathy_llm_toolkit/agent_factory/adapters/wizard_adapter.py +426 -0
- empathy_llm_toolkit/agent_factory/base.py +305 -0
- empathy_llm_toolkit/agent_factory/crews/__init__.py +67 -0
- empathy_llm_toolkit/agent_factory/crews/code_review.py +1113 -0
- empathy_llm_toolkit/agent_factory/crews/health_check.py +1246 -0
- empathy_llm_toolkit/agent_factory/crews/refactoring.py +1128 -0
- empathy_llm_toolkit/agent_factory/crews/security_audit.py +1018 -0
- empathy_llm_toolkit/agent_factory/decorators.py +286 -0
- empathy_llm_toolkit/agent_factory/factory.py +558 -0
- empathy_llm_toolkit/agent_factory/framework.py +192 -0
- empathy_llm_toolkit/agent_factory/memory_integration.py +324 -0
- empathy_llm_toolkit/agent_factory/resilient.py +320 -0
- empathy_llm_toolkit/claude_memory.py +14 -15
- empathy_llm_toolkit/cli/__init__.py +8 -0
- empathy_llm_toolkit/cli/sync_claude.py +487 -0
- empathy_llm_toolkit/code_health.py +186 -28
- empathy_llm_toolkit/config/__init__.py +29 -0
- empathy_llm_toolkit/config/unified.py +295 -0
- empathy_llm_toolkit/contextual_patterns.py +11 -12
- empathy_llm_toolkit/core.py +168 -53
- empathy_llm_toolkit/git_pattern_extractor.py +17 -13
- empathy_llm_toolkit/levels.py +6 -13
- empathy_llm_toolkit/pattern_confidence.py +14 -18
- empathy_llm_toolkit/pattern_resolver.py +10 -12
- empathy_llm_toolkit/pattern_summary.py +16 -14
- empathy_llm_toolkit/providers.py +194 -28
- empathy_llm_toolkit/routing/__init__.py +32 -0
- empathy_llm_toolkit/routing/model_router.py +362 -0
- empathy_llm_toolkit/security/IMPLEMENTATION_SUMMARY.md +413 -0
- empathy_llm_toolkit/security/PHASE2_COMPLETE.md +384 -0
- empathy_llm_toolkit/security/PHASE2_SECRETS_DETECTOR_COMPLETE.md +271 -0
- empathy_llm_toolkit/security/QUICK_REFERENCE.md +316 -0
- empathy_llm_toolkit/security/README.md +262 -0
- empathy_llm_toolkit/security/__init__.py +62 -0
- empathy_llm_toolkit/security/audit_logger.py +929 -0
- empathy_llm_toolkit/security/audit_logger_example.py +152 -0
- empathy_llm_toolkit/security/pii_scrubber.py +640 -0
- empathy_llm_toolkit/security/secrets_detector.py +678 -0
- empathy_llm_toolkit/security/secrets_detector_example.py +304 -0
- empathy_llm_toolkit/security/secure_memdocs.py +1192 -0
- empathy_llm_toolkit/security/secure_memdocs_example.py +278 -0
- empathy_llm_toolkit/session_status.py +20 -22
- empathy_llm_toolkit/state.py +28 -21
- empathy_llm_toolkit/wizards/__init__.py +38 -0
- empathy_llm_toolkit/wizards/base_wizard.py +364 -0
- empathy_llm_toolkit/wizards/customer_support_wizard.py +190 -0
- empathy_llm_toolkit/wizards/healthcare_wizard.py +362 -0
- empathy_llm_toolkit/wizards/patient_assessment_README.md +64 -0
- empathy_llm_toolkit/wizards/patient_assessment_wizard.py +193 -0
- empathy_llm_toolkit/wizards/technology_wizard.py +194 -0
- empathy_os/__init__.py +125 -84
- empathy_os/adaptive/__init__.py +13 -0
- empathy_os/adaptive/task_complexity.py +127 -0
- empathy_os/{monitoring.py → agent_monitoring.py} +28 -28
- empathy_os/cache/__init__.py +117 -0
- empathy_os/cache/base.py +166 -0
- empathy_os/cache/dependency_manager.py +253 -0
- empathy_os/cache/hash_only.py +248 -0
- empathy_os/cache/hybrid.py +390 -0
- empathy_os/cache/storage.py +282 -0
- empathy_os/cli.py +1516 -70
- empathy_os/cli_unified.py +597 -0
- empathy_os/config/__init__.py +63 -0
- empathy_os/config/xml_config.py +239 -0
- empathy_os/config.py +95 -37
- empathy_os/coordination.py +72 -68
- empathy_os/core.py +94 -107
- empathy_os/cost_tracker.py +74 -55
- empathy_os/dashboard/__init__.py +15 -0
- empathy_os/dashboard/server.py +743 -0
- empathy_os/discovery.py +17 -14
- empathy_os/emergence.py +21 -22
- empathy_os/exceptions.py +18 -30
- empathy_os/feedback_loops.py +30 -33
- empathy_os/levels.py +32 -35
- empathy_os/leverage_points.py +31 -32
- empathy_os/logging_config.py +19 -16
- empathy_os/memory/__init__.py +195 -0
- empathy_os/memory/claude_memory.py +466 -0
- empathy_os/memory/config.py +224 -0
- empathy_os/memory/control_panel.py +1298 -0
- empathy_os/memory/edges.py +179 -0
- empathy_os/memory/graph.py +567 -0
- empathy_os/memory/long_term.py +1194 -0
- empathy_os/memory/nodes.py +179 -0
- empathy_os/memory/redis_bootstrap.py +540 -0
- empathy_os/memory/security/__init__.py +31 -0
- empathy_os/memory/security/audit_logger.py +930 -0
- empathy_os/memory/security/pii_scrubber.py +640 -0
- empathy_os/memory/security/secrets_detector.py +678 -0
- empathy_os/memory/short_term.py +2119 -0
- empathy_os/memory/storage/__init__.py +15 -0
- empathy_os/memory/summary_index.py +583 -0
- empathy_os/memory/unified.py +619 -0
- empathy_os/metrics/__init__.py +12 -0
- empathy_os/metrics/prompt_metrics.py +190 -0
- empathy_os/models/__init__.py +136 -0
- empathy_os/models/__main__.py +13 -0
- empathy_os/models/cli.py +655 -0
- empathy_os/models/empathy_executor.py +354 -0
- empathy_os/models/executor.py +252 -0
- empathy_os/models/fallback.py +671 -0
- empathy_os/models/provider_config.py +563 -0
- empathy_os/models/registry.py +382 -0
- empathy_os/models/tasks.py +302 -0
- empathy_os/models/telemetry.py +548 -0
- empathy_os/models/token_estimator.py +378 -0
- empathy_os/models/validation.py +274 -0
- empathy_os/monitoring/__init__.py +52 -0
- empathy_os/monitoring/alerts.py +23 -0
- empathy_os/monitoring/alerts_cli.py +268 -0
- empathy_os/monitoring/multi_backend.py +271 -0
- empathy_os/monitoring/otel_backend.py +363 -0
- empathy_os/optimization/__init__.py +19 -0
- empathy_os/optimization/context_optimizer.py +272 -0
- empathy_os/pattern_library.py +30 -29
- empathy_os/persistence.py +35 -37
- empathy_os/platform_utils.py +261 -0
- empathy_os/plugins/__init__.py +28 -0
- empathy_os/plugins/base.py +361 -0
- empathy_os/plugins/registry.py +268 -0
- empathy_os/project_index/__init__.py +30 -0
- empathy_os/project_index/cli.py +335 -0
- empathy_os/project_index/crew_integration.py +430 -0
- empathy_os/project_index/index.py +425 -0
- empathy_os/project_index/models.py +501 -0
- empathy_os/project_index/reports.py +473 -0
- empathy_os/project_index/scanner.py +538 -0
- empathy_os/prompts/__init__.py +61 -0
- empathy_os/prompts/config.py +77 -0
- empathy_os/prompts/context.py +177 -0
- empathy_os/prompts/parser.py +285 -0
- empathy_os/prompts/registry.py +313 -0
- empathy_os/prompts/templates.py +208 -0
- empathy_os/redis_config.py +144 -58
- empathy_os/redis_memory.py +79 -77
- empathy_os/resilience/__init__.py +56 -0
- empathy_os/resilience/circuit_breaker.py +256 -0
- empathy_os/resilience/fallback.py +179 -0
- empathy_os/resilience/health.py +300 -0
- empathy_os/resilience/retry.py +209 -0
- empathy_os/resilience/timeout.py +135 -0
- empathy_os/routing/__init__.py +43 -0
- empathy_os/routing/chain_executor.py +433 -0
- empathy_os/routing/classifier.py +217 -0
- empathy_os/routing/smart_router.py +234 -0
- empathy_os/routing/wizard_registry.py +307 -0
- empathy_os/templates.py +19 -14
- empathy_os/trust/__init__.py +28 -0
- empathy_os/trust/circuit_breaker.py +579 -0
- empathy_os/trust_building.py +67 -58
- empathy_os/validation/__init__.py +19 -0
- empathy_os/validation/xml_validator.py +281 -0
- empathy_os/wizard_factory_cli.py +170 -0
- empathy_os/{workflows.py → workflow_commands.py} +131 -37
- empathy_os/workflows/__init__.py +360 -0
- empathy_os/workflows/base.py +1660 -0
- empathy_os/workflows/bug_predict.py +962 -0
- empathy_os/workflows/code_review.py +960 -0
- empathy_os/workflows/code_review_adapters.py +310 -0
- empathy_os/workflows/code_review_pipeline.py +720 -0
- empathy_os/workflows/config.py +600 -0
- empathy_os/workflows/dependency_check.py +648 -0
- empathy_os/workflows/document_gen.py +1069 -0
- empathy_os/workflows/documentation_orchestrator.py +1205 -0
- empathy_os/workflows/health_check.py +679 -0
- empathy_os/workflows/keyboard_shortcuts/__init__.py +39 -0
- empathy_os/workflows/keyboard_shortcuts/generators.py +386 -0
- empathy_os/workflows/keyboard_shortcuts/parsers.py +414 -0
- empathy_os/workflows/keyboard_shortcuts/prompts.py +295 -0
- empathy_os/workflows/keyboard_shortcuts/schema.py +193 -0
- empathy_os/workflows/keyboard_shortcuts/workflow.py +505 -0
- empathy_os/workflows/manage_documentation.py +804 -0
- empathy_os/workflows/new_sample_workflow1.py +146 -0
- empathy_os/workflows/new_sample_workflow1_README.md +150 -0
- empathy_os/workflows/perf_audit.py +687 -0
- empathy_os/workflows/pr_review.py +748 -0
- empathy_os/workflows/progress.py +445 -0
- empathy_os/workflows/progress_server.py +322 -0
- empathy_os/workflows/refactor_plan.py +693 -0
- empathy_os/workflows/release_prep.py +808 -0
- empathy_os/workflows/research_synthesis.py +404 -0
- empathy_os/workflows/secure_release.py +585 -0
- empathy_os/workflows/security_adapters.py +297 -0
- empathy_os/workflows/security_audit.py +1046 -0
- empathy_os/workflows/step_config.py +234 -0
- empathy_os/workflows/test5.py +125 -0
- empathy_os/workflows/test5_README.md +158 -0
- empathy_os/workflows/test_gen.py +1855 -0
- empathy_os/workflows/test_lifecycle.py +526 -0
- empathy_os/workflows/test_maintenance.py +626 -0
- empathy_os/workflows/test_maintenance_cli.py +590 -0
- empathy_os/workflows/test_maintenance_crew.py +821 -0
- empathy_os/workflows/xml_enhanced_crew.py +285 -0
- empathy_software_plugin/__init__.py +1 -2
- empathy_software_plugin/cli/__init__.py +120 -0
- empathy_software_plugin/cli/inspect.py +362 -0
- empathy_software_plugin/cli.py +49 -27
- empathy_software_plugin/plugin.py +4 -8
- empathy_software_plugin/wizards/__init__.py +42 -0
- empathy_software_plugin/wizards/advanced_debugging_wizard.py +392 -0
- empathy_software_plugin/wizards/agent_orchestration_wizard.py +511 -0
- empathy_software_plugin/wizards/ai_collaboration_wizard.py +503 -0
- empathy_software_plugin/wizards/ai_context_wizard.py +441 -0
- empathy_software_plugin/wizards/ai_documentation_wizard.py +503 -0
- empathy_software_plugin/wizards/base_wizard.py +288 -0
- empathy_software_plugin/wizards/book_chapter_wizard.py +519 -0
- empathy_software_plugin/wizards/code_review_wizard.py +606 -0
- empathy_software_plugin/wizards/debugging/__init__.py +50 -0
- empathy_software_plugin/wizards/debugging/bug_risk_analyzer.py +414 -0
- empathy_software_plugin/wizards/debugging/config_loaders.py +442 -0
- empathy_software_plugin/wizards/debugging/fix_applier.py +469 -0
- empathy_software_plugin/wizards/debugging/language_patterns.py +383 -0
- empathy_software_plugin/wizards/debugging/linter_parsers.py +470 -0
- empathy_software_plugin/wizards/debugging/verification.py +369 -0
- empathy_software_plugin/wizards/enhanced_testing_wizard.py +537 -0
- empathy_software_plugin/wizards/memory_enhanced_debugging_wizard.py +816 -0
- empathy_software_plugin/wizards/multi_model_wizard.py +501 -0
- empathy_software_plugin/wizards/pattern_extraction_wizard.py +422 -0
- empathy_software_plugin/wizards/pattern_retriever_wizard.py +400 -0
- empathy_software_plugin/wizards/performance/__init__.py +9 -0
- empathy_software_plugin/wizards/performance/bottleneck_detector.py +221 -0
- empathy_software_plugin/wizards/performance/profiler_parsers.py +278 -0
- empathy_software_plugin/wizards/performance/trajectory_analyzer.py +429 -0
- empathy_software_plugin/wizards/performance_profiling_wizard.py +305 -0
- empathy_software_plugin/wizards/prompt_engineering_wizard.py +425 -0
- empathy_software_plugin/wizards/rag_pattern_wizard.py +461 -0
- empathy_software_plugin/wizards/security/__init__.py +32 -0
- empathy_software_plugin/wizards/security/exploit_analyzer.py +290 -0
- empathy_software_plugin/wizards/security/owasp_patterns.py +241 -0
- empathy_software_plugin/wizards/security/vulnerability_scanner.py +604 -0
- empathy_software_plugin/wizards/security_analysis_wizard.py +322 -0
- empathy_software_plugin/wizards/security_learning_wizard.py +740 -0
- empathy_software_plugin/wizards/tech_debt_wizard.py +726 -0
- empathy_software_plugin/wizards/testing/__init__.py +27 -0
- empathy_software_plugin/wizards/testing/coverage_analyzer.py +459 -0
- empathy_software_plugin/wizards/testing/quality_analyzer.py +531 -0
- empathy_software_plugin/wizards/testing/test_suggester.py +533 -0
- empathy_software_plugin/wizards/testing_wizard.py +274 -0
- hot_reload/README.md +473 -0
- hot_reload/__init__.py +62 -0
- hot_reload/config.py +84 -0
- hot_reload/integration.py +228 -0
- hot_reload/reloader.py +298 -0
- hot_reload/watcher.py +179 -0
- hot_reload/websocket.py +176 -0
- scaffolding/README.md +589 -0
- scaffolding/__init__.py +35 -0
- scaffolding/__main__.py +14 -0
- scaffolding/cli.py +240 -0
- test_generator/__init__.py +38 -0
- test_generator/__main__.py +14 -0
- test_generator/cli.py +226 -0
- test_generator/generator.py +325 -0
- test_generator/risk_analyzer.py +216 -0
- workflow_patterns/__init__.py +33 -0
- workflow_patterns/behavior.py +249 -0
- workflow_patterns/core.py +76 -0
- workflow_patterns/output.py +99 -0
- workflow_patterns/registry.py +255 -0
- workflow_patterns/structural.py +288 -0
- workflow_scaffolding/__init__.py +11 -0
- workflow_scaffolding/__main__.py +12 -0
- workflow_scaffolding/cli.py +206 -0
- workflow_scaffolding/generator.py +265 -0
- agents/code_inspection/patterns/inspection/recurring_B112.json +0 -18
- agents/code_inspection/patterns/inspection/recurring_F541.json +0 -16
- agents/code_inspection/patterns/inspection/recurring_FORMAT.json +0 -25
- agents/code_inspection/patterns/inspection/recurring_bug_20250822_def456.json +0 -16
- agents/code_inspection/patterns/inspection/recurring_bug_20250915_abc123.json +0 -16
- agents/code_inspection/patterns/inspection/recurring_bug_20251212_3c5b9951.json +0 -16
- agents/code_inspection/patterns/inspection/recurring_bug_20251212_97c0f72f.json +0 -16
- agents/code_inspection/patterns/inspection/recurring_bug_20251212_a0871d53.json +0 -16
- agents/code_inspection/patterns/inspection/recurring_bug_20251212_a9b6ec41.json +0 -16
- agents/code_inspection/patterns/inspection/recurring_bug_null_001.json +0 -16
- agents/code_inspection/patterns/inspection/recurring_builtin.json +0 -16
- agents/compliance_anticipation_agent.py +0 -1427
- agents/epic_integration_wizard.py +0 -541
- agents/trust_building_behaviors.py +0 -891
- empathy_framework-2.4.0.dist-info/METADATA +0 -485
- empathy_framework-2.4.0.dist-info/RECORD +0 -102
- empathy_framework-2.4.0.dist-info/entry_points.txt +0 -6
- empathy_llm_toolkit/htmlcov/status.json +0 -1
- empathy_llm_toolkit/security/htmlcov/status.json +0 -1
- {empathy_framework-2.4.0.dist-info → empathy_framework-3.8.2.dist-info}/WHEEL +0 -0
- {empathy_framework-2.4.0.dist-info → empathy_framework-3.8.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,1069 @@
|
|
|
1
|
+
"""Document Generation Workflow
|
|
2
|
+
|
|
3
|
+
A cost-optimized, enterprise-safe documentation pipeline:
|
|
4
|
+
1. Haiku: Generate outline from code/specs (cheap, fast)
|
|
5
|
+
2. Sonnet: Write each section (capable, chunked for large projects)
|
|
6
|
+
3. Opus: Final review + consistency polish (premium, chunked if needed)
|
|
7
|
+
|
|
8
|
+
Enterprise Features:
|
|
9
|
+
- Auto-scaling tokens based on project complexity
|
|
10
|
+
- Chunked polish for large documents
|
|
11
|
+
- Cost guardrails with configurable max_cost
|
|
12
|
+
- Graceful degradation with partial results on errors
|
|
13
|
+
|
|
14
|
+
Copyright 2025 Smart-AI-Memory
|
|
15
|
+
Licensed under Fair Source License 0.9
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
from datetime import datetime
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from .base import BaseWorkflow, ModelTier
|
|
24
|
+
from .step_config import WorkflowStepConfig
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
# Approximate cost per 1K tokens (USD) - used for cost estimation
|
|
29
|
+
# These are estimates and should be updated as pricing changes
|
|
30
|
+
TOKEN_COSTS = {
|
|
31
|
+
ModelTier.CHEAP: {"input": 0.00025, "output": 0.00125}, # Haiku
|
|
32
|
+
ModelTier.CAPABLE: {"input": 0.003, "output": 0.015}, # Sonnet
|
|
33
|
+
ModelTier.PREMIUM: {"input": 0.015, "output": 0.075}, # Opus
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
# Define step configurations for executor-based execution
|
|
37
|
+
# Note: max_tokens for polish is dynamically set based on input size
|
|
38
|
+
DOC_GEN_STEPS = {
|
|
39
|
+
"polish": WorkflowStepConfig(
|
|
40
|
+
name="polish",
|
|
41
|
+
task_type="final_review", # Premium tier task
|
|
42
|
+
tier_hint="premium",
|
|
43
|
+
description="Polish and improve documentation for consistency and quality",
|
|
44
|
+
max_tokens=20000, # Increased to handle large chunked documents
|
|
45
|
+
),
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class DocumentGenerationWorkflow(BaseWorkflow):
|
|
50
|
+
"""Multi-tier document generation workflow.
|
|
51
|
+
|
|
52
|
+
Uses cheap models for outlining, capable models for content
|
|
53
|
+
generation, and premium models for final polish and consistency
|
|
54
|
+
review.
|
|
55
|
+
|
|
56
|
+
Usage:
|
|
57
|
+
workflow = DocumentGenerationWorkflow()
|
|
58
|
+
result = await workflow.execute(
|
|
59
|
+
source_code="...",
|
|
60
|
+
doc_type="api_reference",
|
|
61
|
+
audience="developers"
|
|
62
|
+
)
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
name = "doc-gen"
|
|
66
|
+
description = "Cost-optimized documentation generation pipeline"
|
|
67
|
+
stages = ["outline", "write", "polish"]
|
|
68
|
+
tier_map = {
|
|
69
|
+
"outline": ModelTier.CHEAP,
|
|
70
|
+
"write": ModelTier.CAPABLE,
|
|
71
|
+
"polish": ModelTier.PREMIUM,
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
skip_polish_threshold: int = 1000,
|
|
77
|
+
max_sections: int = 10,
|
|
78
|
+
max_write_tokens: int | None = None, # Auto-scaled if None
|
|
79
|
+
section_focus: list[str] | None = None,
|
|
80
|
+
chunked_generation: bool = True,
|
|
81
|
+
sections_per_chunk: int = 3,
|
|
82
|
+
max_cost: float = 5.0, # Cost guardrail in USD
|
|
83
|
+
cost_warning_threshold: float = 0.8, # Warn at 80% of max_cost
|
|
84
|
+
graceful_degradation: bool = True, # Return partial results on error
|
|
85
|
+
export_path: str | Path | None = None, # Export docs to file (e.g., "docs/generated")
|
|
86
|
+
max_display_chars: int = 45000, # Max chars before chunking output
|
|
87
|
+
**kwargs: Any,
|
|
88
|
+
):
|
|
89
|
+
"""Initialize workflow with enterprise-safe defaults.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
skip_polish_threshold: Skip premium polish for docs under this
|
|
93
|
+
token count (they're already good enough).
|
|
94
|
+
max_sections: Maximum number of sections to generate.
|
|
95
|
+
max_write_tokens: Maximum tokens for content generation.
|
|
96
|
+
If None, auto-scales based on section count (recommended).
|
|
97
|
+
section_focus: Optional list of specific sections to generate
|
|
98
|
+
(e.g., ["Testing Guide", "API Reference"]).
|
|
99
|
+
chunked_generation: If True, generates large docs in chunks to avoid
|
|
100
|
+
truncation (default True).
|
|
101
|
+
sections_per_chunk: Number of sections to generate per chunk (default 3).
|
|
102
|
+
max_cost: Maximum cost in USD before stopping (default $5).
|
|
103
|
+
Set to 0 to disable cost limits.
|
|
104
|
+
cost_warning_threshold: Percentage of max_cost to trigger warning (default 0.8).
|
|
105
|
+
graceful_degradation: If True, return partial results on errors
|
|
106
|
+
instead of failing completely (default True).
|
|
107
|
+
export_path: Optional directory to export generated docs (e.g., "docs/generated").
|
|
108
|
+
If provided, documentation will be saved to a file automatically.
|
|
109
|
+
max_display_chars: Maximum characters before splitting output into chunks
|
|
110
|
+
for display (default 45000). Helps avoid terminal/UI truncation.
|
|
111
|
+
|
|
112
|
+
"""
|
|
113
|
+
super().__init__(**kwargs)
|
|
114
|
+
self.skip_polish_threshold = skip_polish_threshold
|
|
115
|
+
self.max_sections = max_sections
|
|
116
|
+
self._user_max_write_tokens = max_write_tokens # Store user preference
|
|
117
|
+
self.max_write_tokens = max_write_tokens or 16000 # Will be auto-scaled
|
|
118
|
+
self.section_focus = section_focus
|
|
119
|
+
self.chunked_generation = chunked_generation
|
|
120
|
+
self.sections_per_chunk = sections_per_chunk
|
|
121
|
+
self.max_cost = max_cost
|
|
122
|
+
self.cost_warning_threshold = cost_warning_threshold
|
|
123
|
+
self.graceful_degradation = graceful_degradation
|
|
124
|
+
self.export_path = Path(export_path) if export_path else None
|
|
125
|
+
self.max_display_chars = max_display_chars
|
|
126
|
+
self._total_content_tokens: int = 0
|
|
127
|
+
self._accumulated_cost: float = 0.0
|
|
128
|
+
self._cost_warning_issued: bool = False
|
|
129
|
+
self._partial_results: dict = {}
|
|
130
|
+
|
|
131
|
+
def _estimate_cost(self, tier: ModelTier, input_tokens: int, output_tokens: int) -> float:
|
|
132
|
+
"""Estimate cost for a given tier and token counts."""
|
|
133
|
+
costs = TOKEN_COSTS.get(tier, TOKEN_COSTS[ModelTier.CAPABLE])
|
|
134
|
+
input_cost = (input_tokens / 1000) * costs["input"]
|
|
135
|
+
output_cost = (output_tokens / 1000) * costs["output"]
|
|
136
|
+
return input_cost + output_cost
|
|
137
|
+
|
|
138
|
+
def _track_cost(
|
|
139
|
+
self,
|
|
140
|
+
tier: ModelTier,
|
|
141
|
+
input_tokens: int,
|
|
142
|
+
output_tokens: int,
|
|
143
|
+
) -> tuple[float, bool]:
|
|
144
|
+
"""Track accumulated cost and check against limits.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Tuple of (cost_for_this_call, should_stop)
|
|
148
|
+
|
|
149
|
+
"""
|
|
150
|
+
cost = self._estimate_cost(tier, input_tokens, output_tokens)
|
|
151
|
+
self._accumulated_cost += cost
|
|
152
|
+
|
|
153
|
+
# Check warning threshold
|
|
154
|
+
if (
|
|
155
|
+
self.max_cost > 0
|
|
156
|
+
and not self._cost_warning_issued
|
|
157
|
+
and self._accumulated_cost >= self.max_cost * self.cost_warning_threshold
|
|
158
|
+
):
|
|
159
|
+
self._cost_warning_issued = True
|
|
160
|
+
logger.warning(
|
|
161
|
+
f"Doc-gen cost approaching limit: ${self._accumulated_cost:.2f} "
|
|
162
|
+
f"of ${self.max_cost:.2f} ({self.cost_warning_threshold * 100:.0f}% threshold)",
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Check if we should stop
|
|
166
|
+
should_stop = self.max_cost > 0 and self._accumulated_cost >= self.max_cost
|
|
167
|
+
if should_stop:
|
|
168
|
+
logger.warning(
|
|
169
|
+
f"Doc-gen cost limit reached: ${self._accumulated_cost:.2f} >= ${self.max_cost:.2f}",
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
return cost, should_stop
|
|
173
|
+
|
|
174
|
+
def _auto_scale_tokens(self, section_count: int) -> int:
|
|
175
|
+
"""Auto-scale max_write_tokens based on section count.
|
|
176
|
+
|
|
177
|
+
Enterprise projects may have 20+ sections requiring more tokens.
|
|
178
|
+
"""
|
|
179
|
+
if self._user_max_write_tokens is not None:
|
|
180
|
+
return self._user_max_write_tokens # User override
|
|
181
|
+
|
|
182
|
+
# Base: 2000 tokens per section, minimum 16000, maximum 64000
|
|
183
|
+
scaled = max(16000, min(64000, section_count * 2000))
|
|
184
|
+
logger.info(f"Auto-scaled max_write_tokens to {scaled} for {section_count} sections")
|
|
185
|
+
return scaled
|
|
186
|
+
|
|
187
|
+
def _export_document(
|
|
188
|
+
self,
|
|
189
|
+
document: str,
|
|
190
|
+
doc_type: str,
|
|
191
|
+
report: str | None = None,
|
|
192
|
+
) -> tuple[Path | None, Path | None]:
|
|
193
|
+
"""Export generated documentation to file.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
document: The generated documentation content
|
|
197
|
+
doc_type: Document type for naming
|
|
198
|
+
report: Optional report to save alongside document
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Tuple of (doc_path, report_path) or (None, None) if export disabled
|
|
202
|
+
|
|
203
|
+
"""
|
|
204
|
+
if not self.export_path:
|
|
205
|
+
return None, None
|
|
206
|
+
|
|
207
|
+
# Create export directory
|
|
208
|
+
self.export_path.mkdir(parents=True, exist_ok=True)
|
|
209
|
+
|
|
210
|
+
# Generate filename with timestamp
|
|
211
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
212
|
+
safe_doc_type = doc_type.replace(" ", "_").replace("/", "-").lower()
|
|
213
|
+
doc_filename = f"{safe_doc_type}_{timestamp}.md"
|
|
214
|
+
report_filename = f"{safe_doc_type}_{timestamp}_report.txt"
|
|
215
|
+
|
|
216
|
+
doc_path = self.export_path / doc_filename
|
|
217
|
+
report_path = self.export_path / report_filename if report else None
|
|
218
|
+
|
|
219
|
+
# Write document
|
|
220
|
+
try:
|
|
221
|
+
doc_path.write_text(document, encoding="utf-8")
|
|
222
|
+
logger.info(f"Documentation exported to: {doc_path}")
|
|
223
|
+
|
|
224
|
+
# Write report if provided
|
|
225
|
+
if report and report_path:
|
|
226
|
+
report_path.write_text(report, encoding="utf-8")
|
|
227
|
+
logger.info(f"Report exported to: {report_path}")
|
|
228
|
+
|
|
229
|
+
return doc_path, report_path
|
|
230
|
+
except Exception as e:
|
|
231
|
+
logger.error(f"Failed to export documentation: {e}")
|
|
232
|
+
return None, None
|
|
233
|
+
|
|
234
|
+
def _chunk_output_for_display(self, content: str, chunk_prefix: str = "PART") -> list[str]:
|
|
235
|
+
"""Split large output into displayable chunks.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
content: The content to chunk
|
|
239
|
+
chunk_prefix: Prefix for chunk headers
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
List of content chunks, each under max_display_chars
|
|
243
|
+
|
|
244
|
+
"""
|
|
245
|
+
if len(content) <= self.max_display_chars:
|
|
246
|
+
return [content]
|
|
247
|
+
|
|
248
|
+
chunks = []
|
|
249
|
+
# Try to split on section boundaries (## headers)
|
|
250
|
+
import re
|
|
251
|
+
|
|
252
|
+
sections = re.split(r"(?=^## )", content, flags=re.MULTILINE)
|
|
253
|
+
|
|
254
|
+
current_chunk = ""
|
|
255
|
+
chunk_num = 1
|
|
256
|
+
|
|
257
|
+
for section in sections:
|
|
258
|
+
# If adding this section would exceed limit, save current chunk
|
|
259
|
+
if current_chunk and len(current_chunk) + len(section) > self.max_display_chars:
|
|
260
|
+
chunks.append(
|
|
261
|
+
f"{'=' * 60}\n{chunk_prefix} {chunk_num} of {{total}}\n{'=' * 60}\n\n"
|
|
262
|
+
+ current_chunk,
|
|
263
|
+
)
|
|
264
|
+
chunk_num += 1
|
|
265
|
+
current_chunk = section
|
|
266
|
+
else:
|
|
267
|
+
current_chunk += section
|
|
268
|
+
|
|
269
|
+
# Add final chunk
|
|
270
|
+
if current_chunk:
|
|
271
|
+
chunks.append(
|
|
272
|
+
f"{'=' * 60}\n{chunk_prefix} {chunk_num} of {{total}}\n{'=' * 60}\n\n"
|
|
273
|
+
+ current_chunk,
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# Update total count in all chunks
|
|
277
|
+
total = len(chunks)
|
|
278
|
+
chunks = [chunk.format(total=total) for chunk in chunks]
|
|
279
|
+
|
|
280
|
+
return chunks
|
|
281
|
+
|
|
282
|
+
def should_skip_stage(self, stage_name: str, input_data: Any) -> tuple[bool, str | None]:
|
|
283
|
+
"""Skip polish for short documents."""
|
|
284
|
+
if stage_name == "polish":
|
|
285
|
+
if self._total_content_tokens < self.skip_polish_threshold:
|
|
286
|
+
self.tier_map["polish"] = ModelTier.CAPABLE
|
|
287
|
+
return False, None
|
|
288
|
+
return False, None
|
|
289
|
+
|
|
290
|
+
async def run_stage(
|
|
291
|
+
self,
|
|
292
|
+
stage_name: str,
|
|
293
|
+
tier: ModelTier,
|
|
294
|
+
input_data: Any,
|
|
295
|
+
) -> tuple[Any, int, int]:
|
|
296
|
+
"""Execute a document generation stage."""
|
|
297
|
+
if stage_name == "outline":
|
|
298
|
+
return await self._outline(input_data, tier)
|
|
299
|
+
if stage_name == "write":
|
|
300
|
+
return await self._write(input_data, tier)
|
|
301
|
+
if stage_name == "polish":
|
|
302
|
+
return await self._polish(input_data, tier)
|
|
303
|
+
raise ValueError(f"Unknown stage: {stage_name}")
|
|
304
|
+
|
|
305
|
+
async def _outline(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
|
|
306
|
+
"""Generate document outline from source."""
|
|
307
|
+
source_code = input_data.get("source_code", "")
|
|
308
|
+
target = input_data.get("target", "")
|
|
309
|
+
doc_type = input_data.get("doc_type", "general")
|
|
310
|
+
audience = input_data.get("audience", "developers")
|
|
311
|
+
|
|
312
|
+
# Use target if source_code not provided
|
|
313
|
+
content_to_document = source_code or target
|
|
314
|
+
|
|
315
|
+
# If target looks like a file path and source_code wasn't provided, read the file
|
|
316
|
+
if not source_code and target:
|
|
317
|
+
from pathlib import Path
|
|
318
|
+
|
|
319
|
+
target_path = Path(target)
|
|
320
|
+
if target_path.exists() and target_path.is_file():
|
|
321
|
+
try:
|
|
322
|
+
content_to_document = target_path.read_text(encoding="utf-8")
|
|
323
|
+
# Prepend file info for context
|
|
324
|
+
content_to_document = f"# File: {target}\n\n{content_to_document}"
|
|
325
|
+
except Exception as e:
|
|
326
|
+
# If we can't read the file, log and use the path as-is
|
|
327
|
+
import logging
|
|
328
|
+
|
|
329
|
+
logging.getLogger(__name__).warning(f"Could not read file {target}: {e}")
|
|
330
|
+
elif target_path.suffix in (
|
|
331
|
+
".py",
|
|
332
|
+
".js",
|
|
333
|
+
".ts",
|
|
334
|
+
".tsx",
|
|
335
|
+
".java",
|
|
336
|
+
".go",
|
|
337
|
+
".rs",
|
|
338
|
+
".md",
|
|
339
|
+
".txt",
|
|
340
|
+
):
|
|
341
|
+
# Looks like a file path but doesn't exist - warn
|
|
342
|
+
import logging
|
|
343
|
+
|
|
344
|
+
logging.getLogger(__name__).warning(
|
|
345
|
+
f"Target appears to be a file path but doesn't exist: {target}",
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
system = """You are a technical writer. Create a detailed outline for documentation.
|
|
349
|
+
|
|
350
|
+
Based on the content provided, generate an outline with:
|
|
351
|
+
1. Logical section structure (5-8 sections)
|
|
352
|
+
2. Brief description of each section's purpose
|
|
353
|
+
3. Key points to cover in each section
|
|
354
|
+
|
|
355
|
+
Format as a numbered list with section titles and descriptions."""
|
|
356
|
+
|
|
357
|
+
user_message = f"""Create a documentation outline:
|
|
358
|
+
|
|
359
|
+
Document Type: {doc_type}
|
|
360
|
+
Target Audience: {audience}
|
|
361
|
+
|
|
362
|
+
Content to document:
|
|
363
|
+
{content_to_document[:4000]}"""
|
|
364
|
+
|
|
365
|
+
response, input_tokens, output_tokens = await self._call_llm(
|
|
366
|
+
tier,
|
|
367
|
+
system,
|
|
368
|
+
user_message,
|
|
369
|
+
max_tokens=1000,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
return (
|
|
373
|
+
{
|
|
374
|
+
"outline": response,
|
|
375
|
+
"doc_type": doc_type,
|
|
376
|
+
"audience": audience,
|
|
377
|
+
"content_to_document": content_to_document,
|
|
378
|
+
},
|
|
379
|
+
input_tokens,
|
|
380
|
+
output_tokens,
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
def _parse_outline_sections(self, outline: str) -> list[str]:
|
|
384
|
+
"""Parse top-level section titles from the outline.
|
|
385
|
+
|
|
386
|
+
Only matches main sections like "1. Introduction", "2. Setup", etc.
|
|
387
|
+
Ignores sub-sections like "2.1 Prerequisites" or nested items.
|
|
388
|
+
"""
|
|
389
|
+
import re
|
|
390
|
+
|
|
391
|
+
sections = []
|
|
392
|
+
# Match only top-level sections: digit followed by period and space/letter
|
|
393
|
+
# e.g., "1. Introduction" but NOT "1.1 Sub-section" or "2.1.3 Deep"
|
|
394
|
+
top_level_pattern = re.compile(r"^(\d+)\.\s+([A-Za-z].*)")
|
|
395
|
+
|
|
396
|
+
for line in outline.split("\n"):
|
|
397
|
+
stripped = line.strip()
|
|
398
|
+
match = top_level_pattern.match(stripped)
|
|
399
|
+
if match:
|
|
400
|
+
# section_num = match.group(1) - not needed, only extracting title
|
|
401
|
+
title = match.group(2).strip()
|
|
402
|
+
# Remove any trailing description after " - "
|
|
403
|
+
if " - " in title:
|
|
404
|
+
title = title.split(" - ")[0].strip()
|
|
405
|
+
sections.append(title)
|
|
406
|
+
|
|
407
|
+
return sections
|
|
408
|
+
|
|
409
|
+
async def _write(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
|
|
410
|
+
"""Write content based on the outline."""
|
|
411
|
+
outline = input_data.get("outline", "")
|
|
412
|
+
doc_type = input_data.get("doc_type", "general")
|
|
413
|
+
audience = input_data.get("audience", "developers")
|
|
414
|
+
content_to_document = input_data.get("content_to_document", "")
|
|
415
|
+
|
|
416
|
+
# Parse sections from outline
|
|
417
|
+
sections = self._parse_outline_sections(outline)
|
|
418
|
+
|
|
419
|
+
# Auto-scale tokens based on section count
|
|
420
|
+
self.max_write_tokens = self._auto_scale_tokens(len(sections))
|
|
421
|
+
|
|
422
|
+
# Use chunked generation for large outlines (more than sections_per_chunk * 2)
|
|
423
|
+
use_chunking = (
|
|
424
|
+
self.chunked_generation
|
|
425
|
+
and len(sections) > self.sections_per_chunk * 2
|
|
426
|
+
and not self.section_focus # Don't chunk if already focused
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
if use_chunking:
|
|
430
|
+
return await self._write_chunked(
|
|
431
|
+
sections,
|
|
432
|
+
outline,
|
|
433
|
+
doc_type,
|
|
434
|
+
audience,
|
|
435
|
+
content_to_document,
|
|
436
|
+
tier,
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
# Handle section_focus for targeted generation
|
|
440
|
+
section_instruction = ""
|
|
441
|
+
if self.section_focus:
|
|
442
|
+
sections_list = ", ".join(self.section_focus)
|
|
443
|
+
section_instruction = f"""
|
|
444
|
+
IMPORTANT: Focus ONLY on generating these specific sections:
|
|
445
|
+
{sections_list}
|
|
446
|
+
|
|
447
|
+
Generate comprehensive, detailed content for each of these sections."""
|
|
448
|
+
|
|
449
|
+
system = f"""You are a technical writer. Write comprehensive documentation.
|
|
450
|
+
|
|
451
|
+
Based on the outline provided, write full content for each section:
|
|
452
|
+
1. Use clear, professional language
|
|
453
|
+
2. Include code examples where appropriate
|
|
454
|
+
3. Use markdown formatting
|
|
455
|
+
4. Be thorough and detailed - do NOT truncate sections
|
|
456
|
+
5. Target the specified audience
|
|
457
|
+
6. Complete ALL sections before stopping
|
|
458
|
+
{section_instruction}
|
|
459
|
+
|
|
460
|
+
Write the complete document with all sections."""
|
|
461
|
+
|
|
462
|
+
user_message = f"""Write documentation based on this outline:
|
|
463
|
+
|
|
464
|
+
Document Type: {doc_type}
|
|
465
|
+
Target Audience: {audience}
|
|
466
|
+
|
|
467
|
+
Outline:
|
|
468
|
+
{outline}
|
|
469
|
+
|
|
470
|
+
Source content for reference:
|
|
471
|
+
{content_to_document[:5000]}"""
|
|
472
|
+
|
|
473
|
+
response, input_tokens, output_tokens = await self._call_llm(
|
|
474
|
+
tier,
|
|
475
|
+
system,
|
|
476
|
+
user_message,
|
|
477
|
+
max_tokens=self.max_write_tokens,
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
self._total_content_tokens = output_tokens
|
|
481
|
+
|
|
482
|
+
return (
|
|
483
|
+
{
|
|
484
|
+
"draft_document": response,
|
|
485
|
+
"doc_type": doc_type,
|
|
486
|
+
"audience": audience,
|
|
487
|
+
"outline": outline,
|
|
488
|
+
"chunked": False,
|
|
489
|
+
},
|
|
490
|
+
input_tokens,
|
|
491
|
+
output_tokens,
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
async def _write_chunked(
|
|
495
|
+
self,
|
|
496
|
+
sections: list[str],
|
|
497
|
+
outline: str,
|
|
498
|
+
doc_type: str,
|
|
499
|
+
audience: str,
|
|
500
|
+
content_to_document: str,
|
|
501
|
+
tier: ModelTier,
|
|
502
|
+
) -> tuple[dict, int, int]:
|
|
503
|
+
"""Generate documentation in chunks to avoid truncation.
|
|
504
|
+
|
|
505
|
+
Enterprise-safe: includes cost tracking and graceful degradation.
|
|
506
|
+
"""
|
|
507
|
+
all_content: list[str] = []
|
|
508
|
+
total_input_tokens: int = 0
|
|
509
|
+
total_output_tokens: int = 0
|
|
510
|
+
stopped_early: bool = False
|
|
511
|
+
error_message: str | None = None
|
|
512
|
+
|
|
513
|
+
# Split sections into chunks
|
|
514
|
+
chunks = []
|
|
515
|
+
for i in range(0, len(sections), self.sections_per_chunk):
|
|
516
|
+
chunks.append(sections[i : i + self.sections_per_chunk])
|
|
517
|
+
|
|
518
|
+
logger.info(f"Generating documentation in {len(chunks)} chunks")
|
|
519
|
+
|
|
520
|
+
for chunk_idx, chunk_sections in enumerate(chunks):
|
|
521
|
+
sections_list = ", ".join(chunk_sections)
|
|
522
|
+
|
|
523
|
+
# Build context about what came before
|
|
524
|
+
previous_context = ""
|
|
525
|
+
if chunk_idx > 0 and all_content:
|
|
526
|
+
# Include last 500 chars of previous content for continuity
|
|
527
|
+
previous_context = f"""
|
|
528
|
+
Previous sections already written (for context/continuity):
|
|
529
|
+
...{all_content[-1][-500:]}
|
|
530
|
+
|
|
531
|
+
Continue with the next sections, maintaining consistent style and terminology."""
|
|
532
|
+
|
|
533
|
+
system = f"""You are a technical writer. Write comprehensive documentation.
|
|
534
|
+
|
|
535
|
+
Write ONLY the following sections (you are generating part {chunk_idx + 1} of {len(chunks)}):
|
|
536
|
+
{sections_list}
|
|
537
|
+
|
|
538
|
+
Requirements:
|
|
539
|
+
1. Use clear, professional language
|
|
540
|
+
2. Include code examples where appropriate
|
|
541
|
+
3. Use markdown formatting with ## headers
|
|
542
|
+
4. Be thorough and detailed - complete each section fully
|
|
543
|
+
5. Target {audience} audience
|
|
544
|
+
6. Write ONLY these specific sections, nothing else"""
|
|
545
|
+
|
|
546
|
+
user_message = f"""Write documentation for these specific sections:
|
|
547
|
+
|
|
548
|
+
Document Type: {doc_type}
|
|
549
|
+
Target Audience: {audience}
|
|
550
|
+
|
|
551
|
+
Sections to write: {sections_list}
|
|
552
|
+
|
|
553
|
+
Full outline (for context):
|
|
554
|
+
{outline}
|
|
555
|
+
|
|
556
|
+
Source content for reference:
|
|
557
|
+
{content_to_document[:3000]}
|
|
558
|
+
{previous_context}"""
|
|
559
|
+
|
|
560
|
+
try:
|
|
561
|
+
response, input_tokens, output_tokens = await self._call_llm(
|
|
562
|
+
tier,
|
|
563
|
+
system,
|
|
564
|
+
user_message,
|
|
565
|
+
max_tokens=self.max_write_tokens // len(chunks) + 2000,
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
# Track cost and check limits
|
|
569
|
+
_, should_stop = self._track_cost(tier, input_tokens, output_tokens)
|
|
570
|
+
|
|
571
|
+
all_content.append(response)
|
|
572
|
+
total_input_tokens += input_tokens
|
|
573
|
+
total_output_tokens += output_tokens
|
|
574
|
+
|
|
575
|
+
logger.info(
|
|
576
|
+
f"Chunk {chunk_idx + 1}/{len(chunks)} complete: "
|
|
577
|
+
f"{len(response)} chars, {output_tokens} tokens, "
|
|
578
|
+
f"cost so far: ${self._accumulated_cost:.2f}",
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
# Check cost limit
|
|
582
|
+
if should_stop:
|
|
583
|
+
stopped_early = True
|
|
584
|
+
remaining = len(chunks) - chunk_idx - 1
|
|
585
|
+
error_message = (
|
|
586
|
+
f"Cost limit reached (${self._accumulated_cost:.2f}). "
|
|
587
|
+
f"Stopped after {chunk_idx + 1}/{len(chunks)} chunks. "
|
|
588
|
+
f"{remaining} chunks not generated."
|
|
589
|
+
)
|
|
590
|
+
logger.warning(error_message)
|
|
591
|
+
break
|
|
592
|
+
|
|
593
|
+
except Exception as e:
|
|
594
|
+
error_message = f"Error generating chunk {chunk_idx + 1}: {e}"
|
|
595
|
+
logger.error(error_message)
|
|
596
|
+
if not self.graceful_degradation:
|
|
597
|
+
raise
|
|
598
|
+
stopped_early = True
|
|
599
|
+
break
|
|
600
|
+
|
|
601
|
+
# Combine all chunks
|
|
602
|
+
combined_document = "\n\n".join(all_content)
|
|
603
|
+
self._total_content_tokens = total_output_tokens
|
|
604
|
+
|
|
605
|
+
# Store partial results for graceful degradation
|
|
606
|
+
self._partial_results = {
|
|
607
|
+
"draft_document": combined_document,
|
|
608
|
+
"sections_completed": len(all_content),
|
|
609
|
+
"sections_total": len(chunks),
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
result = {
|
|
613
|
+
"draft_document": combined_document,
|
|
614
|
+
"doc_type": doc_type,
|
|
615
|
+
"audience": audience,
|
|
616
|
+
"outline": outline,
|
|
617
|
+
"chunked": True,
|
|
618
|
+
"chunk_count": len(chunks),
|
|
619
|
+
"chunks_completed": len(all_content),
|
|
620
|
+
"stopped_early": stopped_early,
|
|
621
|
+
"accumulated_cost": self._accumulated_cost,
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
if error_message:
|
|
625
|
+
result["warning"] = error_message
|
|
626
|
+
|
|
627
|
+
return (result, total_input_tokens, total_output_tokens)
|
|
628
|
+
|
|
629
|
+
async def _polish(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
|
|
630
|
+
"""Final review and consistency polish using LLM.
|
|
631
|
+
|
|
632
|
+
Enterprise-safe: chunks large documents to avoid truncation.
|
|
633
|
+
Supports XML-enhanced prompts when enabled in workflow config.
|
|
634
|
+
"""
|
|
635
|
+
draft_document = input_data.get("draft_document", "")
|
|
636
|
+
doc_type = input_data.get("doc_type", "general")
|
|
637
|
+
audience = input_data.get("audience", "developers")
|
|
638
|
+
|
|
639
|
+
# Check if document is too large and needs chunked polishing
|
|
640
|
+
# Rough estimate: 4 chars per token, 10k tokens threshold for chunking
|
|
641
|
+
estimated_tokens = len(draft_document) // 4
|
|
642
|
+
needs_chunked_polish = estimated_tokens > 10000
|
|
643
|
+
|
|
644
|
+
if needs_chunked_polish:
|
|
645
|
+
logger.info(
|
|
646
|
+
f"Large document detected (~{estimated_tokens} tokens). "
|
|
647
|
+
"Using chunked polish for enterprise safety.",
|
|
648
|
+
)
|
|
649
|
+
return await self._polish_chunked(input_data, tier)
|
|
650
|
+
|
|
651
|
+
# Build input payload for prompt
|
|
652
|
+
input_payload = f"""Document Type: {doc_type}
|
|
653
|
+
Target Audience: {audience}
|
|
654
|
+
|
|
655
|
+
Draft:
|
|
656
|
+
{draft_document}"""
|
|
657
|
+
|
|
658
|
+
# Check if XML prompts are enabled
|
|
659
|
+
if self._is_xml_enabled():
|
|
660
|
+
# Use XML-enhanced prompt
|
|
661
|
+
user_message = self._render_xml_prompt(
|
|
662
|
+
role="senior technical editor",
|
|
663
|
+
goal="Polish and improve the documentation for consistency and quality",
|
|
664
|
+
instructions=[
|
|
665
|
+
"Standardize terminology and formatting",
|
|
666
|
+
"Improve clarity and flow",
|
|
667
|
+
"Add missing cross-references",
|
|
668
|
+
"Fix grammatical issues",
|
|
669
|
+
"Identify gaps and add helpful notes",
|
|
670
|
+
"Ensure examples are complete and accurate",
|
|
671
|
+
],
|
|
672
|
+
constraints=[
|
|
673
|
+
"Maintain the original structure and intent",
|
|
674
|
+
"Keep content appropriate for the target audience",
|
|
675
|
+
"Preserve code examples while improving explanations",
|
|
676
|
+
],
|
|
677
|
+
input_type="documentation_draft",
|
|
678
|
+
input_payload=input_payload,
|
|
679
|
+
extra={
|
|
680
|
+
"doc_type": doc_type,
|
|
681
|
+
"audience": audience,
|
|
682
|
+
},
|
|
683
|
+
)
|
|
684
|
+
system = None # XML prompt includes all context
|
|
685
|
+
else:
|
|
686
|
+
# Use legacy plain text prompts
|
|
687
|
+
system = """You are a senior technical editor. Polish and improve the documentation:
|
|
688
|
+
|
|
689
|
+
1. CONSISTENCY:
|
|
690
|
+
- Standardize terminology
|
|
691
|
+
- Fix formatting inconsistencies
|
|
692
|
+
- Ensure consistent code style
|
|
693
|
+
|
|
694
|
+
2. QUALITY:
|
|
695
|
+
- Improve clarity and flow
|
|
696
|
+
- Add missing cross-references
|
|
697
|
+
- Fix grammatical issues
|
|
698
|
+
|
|
699
|
+
3. COMPLETENESS:
|
|
700
|
+
- Identify gaps
|
|
701
|
+
- Add helpful notes or warnings
|
|
702
|
+
- Ensure examples are complete
|
|
703
|
+
|
|
704
|
+
Return the polished document with improvements noted at the end."""
|
|
705
|
+
|
|
706
|
+
user_message = f"""Polish this documentation:
|
|
707
|
+
|
|
708
|
+
{input_payload}"""
|
|
709
|
+
|
|
710
|
+
# Calculate polish tokens based on draft size (at least as much as write stage)
|
|
711
|
+
polish_max_tokens = max(self.max_write_tokens, 20000)
|
|
712
|
+
|
|
713
|
+
# Try executor-based execution first (Phase 3 pattern)
|
|
714
|
+
if self._executor is not None or self._api_key:
|
|
715
|
+
try:
|
|
716
|
+
step = DOC_GEN_STEPS["polish"]
|
|
717
|
+
# Override step max_tokens with dynamic value
|
|
718
|
+
step.max_tokens = polish_max_tokens
|
|
719
|
+
response, input_tokens, output_tokens, cost = await self.run_step_with_executor(
|
|
720
|
+
step=step,
|
|
721
|
+
prompt=user_message,
|
|
722
|
+
system=system,
|
|
723
|
+
)
|
|
724
|
+
except Exception:
|
|
725
|
+
# Fall back to legacy _call_llm if executor fails
|
|
726
|
+
response, input_tokens, output_tokens = await self._call_llm(
|
|
727
|
+
tier,
|
|
728
|
+
system or "",
|
|
729
|
+
user_message,
|
|
730
|
+
max_tokens=polish_max_tokens,
|
|
731
|
+
)
|
|
732
|
+
else:
|
|
733
|
+
# Legacy path for backward compatibility
|
|
734
|
+
response, input_tokens, output_tokens = await self._call_llm(
|
|
735
|
+
tier,
|
|
736
|
+
system or "",
|
|
737
|
+
user_message,
|
|
738
|
+
max_tokens=polish_max_tokens,
|
|
739
|
+
)
|
|
740
|
+
|
|
741
|
+
# Parse XML response if enforcement is enabled
|
|
742
|
+
parsed_data = self._parse_xml_response(response)
|
|
743
|
+
|
|
744
|
+
result = {
|
|
745
|
+
"document": response,
|
|
746
|
+
"doc_type": doc_type,
|
|
747
|
+
"audience": audience,
|
|
748
|
+
"model_tier_used": tier.value,
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
# Merge parsed XML data if available
|
|
752
|
+
if parsed_data.get("xml_parsed"):
|
|
753
|
+
result.update(
|
|
754
|
+
{
|
|
755
|
+
"xml_parsed": True,
|
|
756
|
+
"summary": parsed_data.get("summary"),
|
|
757
|
+
"findings": parsed_data.get("findings", []),
|
|
758
|
+
"checklist": parsed_data.get("checklist", []),
|
|
759
|
+
},
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
# Add formatted report for human readability
|
|
763
|
+
result["formatted_report"] = format_doc_gen_report(result, input_data)
|
|
764
|
+
|
|
765
|
+
# Export documentation if export_path is configured
|
|
766
|
+
doc_path, report_path = self._export_document(
|
|
767
|
+
document=response,
|
|
768
|
+
doc_type=doc_type,
|
|
769
|
+
report=result["formatted_report"],
|
|
770
|
+
)
|
|
771
|
+
if doc_path:
|
|
772
|
+
result["export_path"] = str(doc_path)
|
|
773
|
+
result["report_path"] = str(report_path) if report_path else None
|
|
774
|
+
logger.info(f"Documentation saved to: {doc_path}")
|
|
775
|
+
|
|
776
|
+
# Chunk output for display if needed
|
|
777
|
+
output_chunks = self._chunk_output_for_display(
|
|
778
|
+
result["formatted_report"],
|
|
779
|
+
chunk_prefix="DOC OUTPUT",
|
|
780
|
+
)
|
|
781
|
+
if len(output_chunks) > 1:
|
|
782
|
+
result["output_chunks"] = output_chunks
|
|
783
|
+
result["output_chunk_count"] = len(output_chunks)
|
|
784
|
+
logger.info(
|
|
785
|
+
f"Report split into {len(output_chunks)} chunks for display "
|
|
786
|
+
f"(total {len(result['formatted_report'])} chars)",
|
|
787
|
+
)
|
|
788
|
+
|
|
789
|
+
return (result, input_tokens, output_tokens)
|
|
790
|
+
|
|
791
|
+
async def _polish_chunked(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
|
|
792
|
+
"""Polish large documents in chunks to avoid truncation.
|
|
793
|
+
|
|
794
|
+
Splits the document by section headers and polishes each chunk separately,
|
|
795
|
+
then combines the results.
|
|
796
|
+
"""
|
|
797
|
+
import re
|
|
798
|
+
|
|
799
|
+
draft_document = input_data.get("draft_document", "")
|
|
800
|
+
doc_type = input_data.get("doc_type", "general")
|
|
801
|
+
audience = input_data.get("audience", "developers")
|
|
802
|
+
|
|
803
|
+
# Split document by major section headers (## headers)
|
|
804
|
+
sections = re.split(r"(?=^## )", draft_document, flags=re.MULTILINE)
|
|
805
|
+
sections = [s.strip() for s in sections if s.strip()]
|
|
806
|
+
|
|
807
|
+
if len(sections) <= 1:
|
|
808
|
+
# If we can't split by sections, split by character count
|
|
809
|
+
chunk_size = 15000 # ~3750 tokens per chunk
|
|
810
|
+
sections = [
|
|
811
|
+
draft_document[i : i + chunk_size]
|
|
812
|
+
for i in range(0, len(draft_document), chunk_size)
|
|
813
|
+
]
|
|
814
|
+
|
|
815
|
+
logger.info(f"Polishing document in {len(sections)} chunks")
|
|
816
|
+
|
|
817
|
+
polished_chunks: list[str] = []
|
|
818
|
+
total_input_tokens: int = 0
|
|
819
|
+
total_output_tokens: int = 0
|
|
820
|
+
|
|
821
|
+
for chunk_idx, section in enumerate(sections):
|
|
822
|
+
system = """You are a senior technical editor. Polish this section of documentation:
|
|
823
|
+
|
|
824
|
+
1. Standardize terminology and formatting
|
|
825
|
+
2. Improve clarity and flow
|
|
826
|
+
3. Fix grammatical issues
|
|
827
|
+
4. Ensure code examples are complete and accurate
|
|
828
|
+
|
|
829
|
+
Return ONLY the polished section. Do not add commentary."""
|
|
830
|
+
|
|
831
|
+
user_message = f"""Polish this documentation section (part {chunk_idx + 1} of {len(sections)}):
|
|
832
|
+
|
|
833
|
+
Document Type: {doc_type}
|
|
834
|
+
Target Audience: {audience}
|
|
835
|
+
|
|
836
|
+
Section to polish:
|
|
837
|
+
{section}"""
|
|
838
|
+
|
|
839
|
+
try:
|
|
840
|
+
response, input_tokens, output_tokens = await self._call_llm(
|
|
841
|
+
tier,
|
|
842
|
+
system,
|
|
843
|
+
user_message,
|
|
844
|
+
max_tokens=8000,
|
|
845
|
+
)
|
|
846
|
+
|
|
847
|
+
# Track cost
|
|
848
|
+
_, should_stop = self._track_cost(tier, input_tokens, output_tokens)
|
|
849
|
+
|
|
850
|
+
polished_chunks.append(response)
|
|
851
|
+
total_input_tokens += input_tokens
|
|
852
|
+
total_output_tokens += output_tokens
|
|
853
|
+
|
|
854
|
+
logger.info(
|
|
855
|
+
f"Polish chunk {chunk_idx + 1}/{len(sections)} complete, "
|
|
856
|
+
f"cost so far: ${self._accumulated_cost:.2f}",
|
|
857
|
+
)
|
|
858
|
+
|
|
859
|
+
if should_stop:
|
|
860
|
+
logger.warning(
|
|
861
|
+
f"Cost limit reached during polish. "
|
|
862
|
+
f"Returning {len(polished_chunks)}/{len(sections)} polished chunks.",
|
|
863
|
+
)
|
|
864
|
+
# Add remaining sections unpolished
|
|
865
|
+
polished_chunks.extend(sections[chunk_idx + 1 :])
|
|
866
|
+
break
|
|
867
|
+
|
|
868
|
+
except Exception as e:
|
|
869
|
+
logger.error(f"Error polishing chunk {chunk_idx + 1}: {e}")
|
|
870
|
+
if self.graceful_degradation:
|
|
871
|
+
# Keep original section on error
|
|
872
|
+
polished_chunks.append(section)
|
|
873
|
+
else:
|
|
874
|
+
raise
|
|
875
|
+
|
|
876
|
+
# Combine polished chunks
|
|
877
|
+
polished_document = "\n\n".join(polished_chunks)
|
|
878
|
+
|
|
879
|
+
result = {
|
|
880
|
+
"document": polished_document,
|
|
881
|
+
"doc_type": doc_type,
|
|
882
|
+
"audience": audience,
|
|
883
|
+
"model_tier_used": tier.value,
|
|
884
|
+
"polish_chunked": True,
|
|
885
|
+
"polish_chunks": len(sections),
|
|
886
|
+
"accumulated_cost": self._accumulated_cost,
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
# Add formatted report
|
|
890
|
+
result["formatted_report"] = format_doc_gen_report(result, input_data)
|
|
891
|
+
|
|
892
|
+
# Export documentation if export_path is configured
|
|
893
|
+
doc_path, report_path = self._export_document(
|
|
894
|
+
document=polished_document,
|
|
895
|
+
doc_type=doc_type,
|
|
896
|
+
report=result["formatted_report"],
|
|
897
|
+
)
|
|
898
|
+
if doc_path:
|
|
899
|
+
result["export_path"] = str(doc_path)
|
|
900
|
+
result["report_path"] = str(report_path) if report_path else None
|
|
901
|
+
logger.info(f"Documentation saved to: {doc_path}")
|
|
902
|
+
|
|
903
|
+
# Chunk output for display if needed
|
|
904
|
+
output_chunks = self._chunk_output_for_display(
|
|
905
|
+
result["formatted_report"],
|
|
906
|
+
chunk_prefix="DOC OUTPUT",
|
|
907
|
+
)
|
|
908
|
+
if len(output_chunks) > 1:
|
|
909
|
+
result["output_chunks"] = output_chunks
|
|
910
|
+
result["output_chunk_count"] = len(output_chunks)
|
|
911
|
+
logger.info(
|
|
912
|
+
f"Report split into {len(output_chunks)} chunks for display "
|
|
913
|
+
f"(total {len(result['formatted_report'])} chars)",
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
return (result, total_input_tokens, total_output_tokens)
|
|
917
|
+
|
|
918
|
+
|
|
919
|
+
def format_doc_gen_report(result: dict, input_data: dict) -> str:
|
|
920
|
+
"""Format document generation output as a human-readable report.
|
|
921
|
+
|
|
922
|
+
Args:
|
|
923
|
+
result: The polish stage result
|
|
924
|
+
input_data: Input data from previous stages
|
|
925
|
+
|
|
926
|
+
Returns:
|
|
927
|
+
Formatted report string
|
|
928
|
+
|
|
929
|
+
"""
|
|
930
|
+
lines = []
|
|
931
|
+
|
|
932
|
+
# Header
|
|
933
|
+
doc_type = result.get("doc_type", "general").replace("_", " ").title()
|
|
934
|
+
audience = result.get("audience", "developers").title()
|
|
935
|
+
|
|
936
|
+
lines.append("=" * 60)
|
|
937
|
+
lines.append("DOCUMENTATION GENERATION REPORT")
|
|
938
|
+
lines.append("=" * 60)
|
|
939
|
+
lines.append("")
|
|
940
|
+
lines.append(f"Document Type: {doc_type}")
|
|
941
|
+
lines.append(f"Target Audience: {audience}")
|
|
942
|
+
lines.append("")
|
|
943
|
+
|
|
944
|
+
# Outline summary
|
|
945
|
+
outline = input_data.get("outline", "")
|
|
946
|
+
if outline:
|
|
947
|
+
lines.append("-" * 60)
|
|
948
|
+
lines.append("DOCUMENT OUTLINE")
|
|
949
|
+
lines.append("-" * 60)
|
|
950
|
+
# Show just a preview of the outline
|
|
951
|
+
outline_lines = outline.split("\n")[:10]
|
|
952
|
+
lines.extend(outline_lines)
|
|
953
|
+
if len(outline.split("\n")) > 10:
|
|
954
|
+
lines.append("...")
|
|
955
|
+
lines.append("")
|
|
956
|
+
|
|
957
|
+
# Generated document
|
|
958
|
+
document = result.get("document", "")
|
|
959
|
+
if document:
|
|
960
|
+
lines.append("-" * 60)
|
|
961
|
+
lines.append("GENERATED DOCUMENTATION")
|
|
962
|
+
lines.append("-" * 60)
|
|
963
|
+
lines.append("")
|
|
964
|
+
lines.append(document)
|
|
965
|
+
lines.append("")
|
|
966
|
+
|
|
967
|
+
# Statistics
|
|
968
|
+
word_count = len(document.split()) if document else 0
|
|
969
|
+
section_count = document.count("##") if document else 0 # Count markdown headers
|
|
970
|
+
was_chunked = input_data.get("chunked", False)
|
|
971
|
+
chunk_count = input_data.get("chunk_count", 0)
|
|
972
|
+
chunks_completed = input_data.get("chunks_completed", chunk_count)
|
|
973
|
+
stopped_early = input_data.get("stopped_early", False)
|
|
974
|
+
accumulated_cost = result.get("accumulated_cost", 0)
|
|
975
|
+
polish_chunked = result.get("polish_chunked", False)
|
|
976
|
+
|
|
977
|
+
lines.append("-" * 60)
|
|
978
|
+
lines.append("STATISTICS")
|
|
979
|
+
lines.append("-" * 60)
|
|
980
|
+
lines.append(f"Word Count: {word_count}")
|
|
981
|
+
lines.append(f"Section Count: ~{section_count}")
|
|
982
|
+
if was_chunked:
|
|
983
|
+
if stopped_early:
|
|
984
|
+
lines.append(
|
|
985
|
+
f"Generation Mode: Chunked ({chunks_completed}/{chunk_count} chunks completed)",
|
|
986
|
+
)
|
|
987
|
+
else:
|
|
988
|
+
lines.append(f"Generation Mode: Chunked ({chunk_count} chunks)")
|
|
989
|
+
if polish_chunked:
|
|
990
|
+
polish_chunks = result.get("polish_chunks", 0)
|
|
991
|
+
lines.append(f"Polish Mode: Chunked ({polish_chunks} sections)")
|
|
992
|
+
if accumulated_cost > 0:
|
|
993
|
+
lines.append(f"Estimated Cost: ${accumulated_cost:.2f}")
|
|
994
|
+
lines.append("")
|
|
995
|
+
|
|
996
|
+
# Export info
|
|
997
|
+
export_path = result.get("export_path")
|
|
998
|
+
if export_path:
|
|
999
|
+
lines.append("-" * 60)
|
|
1000
|
+
lines.append("FILE EXPORT")
|
|
1001
|
+
lines.append("-" * 60)
|
|
1002
|
+
lines.append(f"Documentation saved to: {export_path}")
|
|
1003
|
+
report_path = result.get("report_path")
|
|
1004
|
+
if report_path:
|
|
1005
|
+
lines.append(f"Report saved to: {report_path}")
|
|
1006
|
+
lines.append("")
|
|
1007
|
+
lines.append("Full documentation is available in the exported file.")
|
|
1008
|
+
lines.append("")
|
|
1009
|
+
|
|
1010
|
+
# Warning notice (cost limit, errors, etc.)
|
|
1011
|
+
warning = input_data.get("warning") or result.get("warning")
|
|
1012
|
+
if warning or stopped_early:
|
|
1013
|
+
lines.append("-" * 60)
|
|
1014
|
+
lines.append("⚠️ WARNING")
|
|
1015
|
+
lines.append("-" * 60)
|
|
1016
|
+
if warning:
|
|
1017
|
+
lines.append(warning)
|
|
1018
|
+
if stopped_early and not warning:
|
|
1019
|
+
lines.append("Generation stopped early due to cost or error limits.")
|
|
1020
|
+
lines.append("")
|
|
1021
|
+
|
|
1022
|
+
# Truncation detection and scope notice
|
|
1023
|
+
truncation_indicators = [
|
|
1024
|
+
document.rstrip().endswith("..."),
|
|
1025
|
+
document.rstrip().endswith("-"),
|
|
1026
|
+
"```" in document and document.count("```") % 2 != 0, # Unclosed code block
|
|
1027
|
+
any(
|
|
1028
|
+
phrase in document.lower()
|
|
1029
|
+
for phrase in ["continued in", "see next section", "to be continued"]
|
|
1030
|
+
),
|
|
1031
|
+
]
|
|
1032
|
+
|
|
1033
|
+
# Count planned sections from outline (top-level only)
|
|
1034
|
+
import re
|
|
1035
|
+
|
|
1036
|
+
planned_sections = 0
|
|
1037
|
+
top_level_pattern = re.compile(r"^(\d+)\.\s+([A-Za-z].*)")
|
|
1038
|
+
if outline:
|
|
1039
|
+
for line in outline.split("\n"):
|
|
1040
|
+
stripped = line.strip()
|
|
1041
|
+
if top_level_pattern.match(stripped):
|
|
1042
|
+
planned_sections += 1
|
|
1043
|
+
|
|
1044
|
+
is_truncated = any(truncation_indicators) or (
|
|
1045
|
+
planned_sections > 0 and section_count < planned_sections - 1
|
|
1046
|
+
)
|
|
1047
|
+
|
|
1048
|
+
if is_truncated or planned_sections > section_count + 1:
|
|
1049
|
+
lines.append("-" * 60)
|
|
1050
|
+
lines.append("SCOPE NOTICE")
|
|
1051
|
+
lines.append("-" * 60)
|
|
1052
|
+
lines.append("⚠️ DOCUMENTATION MAY BE INCOMPLETE")
|
|
1053
|
+
if planned_sections > 0:
|
|
1054
|
+
lines.append(f" Planned sections: {planned_sections}")
|
|
1055
|
+
lines.append(f" Generated sections: {section_count}")
|
|
1056
|
+
lines.append("")
|
|
1057
|
+
lines.append("To generate missing sections, re-run with section_focus:")
|
|
1058
|
+
lines.append(" workflow = DocumentGenerationWorkflow(")
|
|
1059
|
+
lines.append(' section_focus=["Testing Guide", "API Reference"]')
|
|
1060
|
+
lines.append(" )")
|
|
1061
|
+
lines.append("")
|
|
1062
|
+
|
|
1063
|
+
# Footer
|
|
1064
|
+
lines.append("=" * 60)
|
|
1065
|
+
model_tier = result.get("model_tier_used", "unknown")
|
|
1066
|
+
lines.append(f"Generated using {model_tier} tier model")
|
|
1067
|
+
lines.append("=" * 60)
|
|
1068
|
+
|
|
1069
|
+
return "\n".join(lines)
|