empathy-framework 5.0.3__py3-none-any.whl → 5.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.0.dist-info}/METADATA +259 -142
  2. {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.0.dist-info}/RECORD +56 -26
  3. empathy_framework-5.1.0.dist-info/licenses/LICENSE +201 -0
  4. empathy_framework-5.1.0.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +101 -0
  5. empathy_os/__init__.py +1 -1
  6. empathy_os/cli/commands/batch.py +5 -5
  7. empathy_os/cli/commands/routing.py +1 -1
  8. empathy_os/cli/commands/workflow.py +2 -1
  9. empathy_os/cli/parsers/cache 2.py +65 -0
  10. empathy_os/cli_minimal.py +3 -3
  11. empathy_os/cli_router 2.py +416 -0
  12. empathy_os/dashboard/__init__.py +1 -2
  13. empathy_os/dashboard/app 2.py +512 -0
  14. empathy_os/dashboard/app.py +1 -1
  15. empathy_os/dashboard/simple_server 2.py +403 -0
  16. empathy_os/dashboard/standalone_server 2.py +536 -0
  17. empathy_os/memory/types 2.py +441 -0
  18. empathy_os/models/__init__.py +19 -0
  19. empathy_os/models/adaptive_routing 2.py +437 -0
  20. empathy_os/models/auth_cli.py +444 -0
  21. empathy_os/models/auth_strategy.py +450 -0
  22. empathy_os/project_index/scanner_parallel 2.py +291 -0
  23. empathy_os/telemetry/agent_coordination 2.py +478 -0
  24. empathy_os/telemetry/agent_coordination.py +3 -3
  25. empathy_os/telemetry/agent_tracking 2.py +350 -0
  26. empathy_os/telemetry/agent_tracking.py +1 -2
  27. empathy_os/telemetry/approval_gates 2.py +563 -0
  28. empathy_os/telemetry/event_streaming 2.py +405 -0
  29. empathy_os/telemetry/event_streaming.py +3 -3
  30. empathy_os/telemetry/feedback_loop 2.py +557 -0
  31. empathy_os/telemetry/feedback_loop.py +1 -1
  32. empathy_os/vscode_bridge 2.py +173 -0
  33. empathy_os/workflows/__init__.py +8 -0
  34. empathy_os/workflows/autonomous_test_gen.py +569 -0
  35. empathy_os/workflows/bug_predict.py +45 -0
  36. empathy_os/workflows/code_review.py +92 -22
  37. empathy_os/workflows/document_gen.py +594 -62
  38. empathy_os/workflows/llm_base.py +363 -0
  39. empathy_os/workflows/perf_audit.py +69 -0
  40. empathy_os/workflows/progressive/README 2.md +454 -0
  41. empathy_os/workflows/progressive/__init__ 2.py +92 -0
  42. empathy_os/workflows/progressive/cli 2.py +242 -0
  43. empathy_os/workflows/progressive/core 2.py +488 -0
  44. empathy_os/workflows/progressive/orchestrator 2.py +701 -0
  45. empathy_os/workflows/progressive/reports 2.py +528 -0
  46. empathy_os/workflows/progressive/telemetry 2.py +280 -0
  47. empathy_os/workflows/progressive/test_gen 2.py +514 -0
  48. empathy_os/workflows/progressive/workflow 2.py +628 -0
  49. empathy_os/workflows/release_prep.py +54 -0
  50. empathy_os/workflows/security_audit.py +154 -79
  51. empathy_os/workflows/test_gen.py +60 -0
  52. empathy_os/workflows/test_gen_behavioral.py +477 -0
  53. empathy_os/workflows/test_gen_parallel.py +341 -0
  54. empathy_framework-5.0.3.dist-info/licenses/LICENSE +0 -139
  55. {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.0.dist-info}/WHEEL +0 -0
  56. {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.0.dist-info}/entry_points.txt +0 -0
  57. {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.0.dist-info}/top_level.txt +0 -0
@@ -86,6 +86,7 @@ class DocumentGenerationWorkflow(BaseWorkflow):
86
86
  graceful_degradation: bool = True, # Return partial results on error
87
87
  export_path: str | Path | None = None, # Export docs to file (e.g., "docs/generated")
88
88
  max_display_chars: int = 45000, # Max chars before chunking output
89
+ enable_auth_strategy: bool = True, # Enable intelligent auth routing
89
90
  **kwargs: Any,
90
91
  ):
91
92
  """Initialize workflow with enterprise-safe defaults.
@@ -110,6 +111,8 @@ class DocumentGenerationWorkflow(BaseWorkflow):
110
111
  If provided, documentation will be saved to a file automatically.
111
112
  max_display_chars: Maximum characters before splitting output into chunks
112
113
  for display (default 45000). Helps avoid terminal/UI truncation.
114
+ enable_auth_strategy: If True, use intelligent subscription vs API routing
115
+ based on module size (default True).
113
116
 
114
117
  """
115
118
  super().__init__(**kwargs)
@@ -125,10 +128,12 @@ class DocumentGenerationWorkflow(BaseWorkflow):
125
128
  self.graceful_degradation = graceful_degradation
126
129
  self.export_path = Path(export_path) if export_path else None
127
130
  self.max_display_chars = max_display_chars
131
+ self.enable_auth_strategy = enable_auth_strategy
128
132
  self._total_content_tokens: int = 0
129
133
  self._accumulated_cost: float = 0.0
130
134
  self._cost_warning_issued: bool = False
131
135
  self._partial_results: dict = {}
136
+ self._auth_mode_used: str | None = None # Track which auth was recommended
132
137
 
133
138
  def _estimate_cost(self, tier: ModelTier, input_tokens: int, output_tokens: int) -> float:
134
139
  """Estimate cost for a given tier and token counts."""
@@ -308,6 +313,8 @@ class DocumentGenerationWorkflow(BaseWorkflow):
308
313
 
309
314
  async def _outline(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
310
315
  """Generate document outline from source."""
316
+ from pathlib import Path
317
+
311
318
  source_code = input_data.get("source_code", "")
312
319
  target = input_data.get("target", "")
313
320
  doc_type = input_data.get("doc_type", "general")
@@ -318,8 +325,6 @@ class DocumentGenerationWorkflow(BaseWorkflow):
318
325
 
319
326
  # If target looks like a file path and source_code wasn't provided, read the file
320
327
  if not source_code and target:
321
- from pathlib import Path
322
-
323
328
  target_path = Path(target)
324
329
  if target_path.exists() and target_path.is_file():
325
330
  try:
@@ -349,22 +354,108 @@ class DocumentGenerationWorkflow(BaseWorkflow):
349
354
  f"Target appears to be a file path but doesn't exist: {target}",
350
355
  )
351
356
 
352
- system = """You are a technical writer. Create a detailed outline for documentation.
357
+ # === AUTH STRATEGY INTEGRATION ===
358
+ # Detect module size and recommend auth mode (first stage only)
359
+ if self.enable_auth_strategy:
360
+ try:
361
+ from empathy_os.models import (
362
+ count_lines_of_code,
363
+ get_auth_strategy,
364
+ get_module_size_category,
365
+ )
366
+
367
+ # Calculate module size
368
+ module_lines = 0
369
+ if target and Path(target).exists():
370
+ module_lines = count_lines_of_code(target)
371
+ elif content_to_document:
372
+ # Count from source code content
373
+ module_lines = len(
374
+ [
375
+ line
376
+ for line in content_to_document.split("\n")
377
+ if line.strip() and not line.strip().startswith("#")
378
+ ]
379
+ )
380
+
381
+ if module_lines > 0:
382
+ # Get auth strategy (first-time setup if needed)
383
+ strategy = get_auth_strategy()
353
384
 
354
- Based on the content provided, generate an outline with:
355
- 1. Logical section structure (5-8 sections)
356
- 2. Brief description of each section's purpose
357
- 3. Key points to cover in each section
385
+ # Get recommended auth mode
386
+ recommended_mode = strategy.get_recommended_mode(module_lines)
387
+ self._auth_mode_used = recommended_mode.value
358
388
 
359
- Format as a numbered list with section titles and descriptions."""
389
+ # Get size category
390
+ size_category = get_module_size_category(module_lines)
360
391
 
361
- user_message = f"""Create a documentation outline:
392
+ # Log recommendation
393
+ logger.info(
394
+ f"Module: {target or 'source'} ({module_lines} LOC, {size_category})"
395
+ )
396
+ logger.info(f"Recommended auth mode: {recommended_mode.value}")
397
+
398
+ # Get cost estimate
399
+ cost_estimate = strategy.estimate_cost(module_lines, recommended_mode)
400
+
401
+ if recommended_mode.value == "subscription":
402
+ logger.info(
403
+ f"Cost: {cost_estimate['quota_cost']} "
404
+ f"(fits in {cost_estimate['fits_in_context']} context)"
405
+ )
406
+ else: # API
407
+ logger.info(
408
+ f"Cost: ~${cost_estimate['monetary_cost']:.4f} "
409
+ f"(1M context window)"
410
+ )
411
+
412
+ except Exception as e:
413
+ # Don't fail workflow if auth strategy fails
414
+ logger.warning(f"Auth strategy detection failed: {e}")
415
+
416
+ system = """You are an expert technical writer specializing in API Reference documentation.
417
+
418
+ IMPORTANT: This is API REFERENCE documentation, not a tutorial. Focus on documenting EVERY function/class with structured Args/Returns/Raises format.
419
+
420
+ Create a detailed, structured outline for API Reference documentation:
421
+
422
+ 1. **Logical Section Structure** (emphasize API reference sections):
423
+ - Overview/Introduction (brief)
424
+ - Quick Start (1 complete example)
425
+ - API Reference - Functions (one subsection per function with Args/Returns/Raises)
426
+ - API Reference - Classes (one subsection per class with Args/Returns/Raises for methods)
427
+ - Usage Examples (showing how to combine multiple functions)
428
+ - Additional reference sections as needed
429
+
430
+ 2. **For Each Section**:
431
+ - Clear purpose and what readers will learn
432
+ - Specific topics to cover
433
+ - Types of examples to include (with actual code)
434
+
435
+ 3. **Key Requirements**:
436
+ - Include sections for real, copy-paste ready code examples
437
+ - Plan for comprehensive API documentation with all parameters
438
+ - Include edge cases and error handling examples
439
+ - Add best practices and common patterns
440
+
441
+ Format as a numbered list with section titles and detailed descriptions."""
442
+
443
+ user_message = f"""Create a comprehensive documentation outline:
362
444
 
363
445
  Document Type: {doc_type}
364
446
  Target Audience: {audience}
365
447
 
448
+ IMPORTANT: This documentation should be production-ready with:
449
+ - Real, executable code examples (not placeholders)
450
+ - Complete API reference with parameter types and descriptions
451
+ - Usage guides showing common patterns
452
+ - Edge case handling and error scenarios
453
+ - Best practices for the target audience
454
+
366
455
  Content to document:
367
- {content_to_document[:4000]}"""
456
+ {content_to_document[:4000]}
457
+
458
+ Generate an outline that covers all these aspects comprehensively."""
368
459
 
369
460
  response, input_tokens, output_tokens = await self._call_llm(
370
461
  tier,
@@ -450,29 +541,118 @@ IMPORTANT: Focus ONLY on generating these specific sections:
450
541
 
451
542
  Generate comprehensive, detailed content for each of these sections."""
452
543
 
453
- system = f"""You are a technical writer. Write comprehensive documentation.
544
+ system = f"""You are an expert technical writer creating comprehensive developer documentation.
545
+
546
+ YOUR TASK HAS TWO CRITICAL PHASES - YOU MUST COMPLETE BOTH:
547
+
548
+ ═══════════════════════════════════════════════════════════════
549
+ PHASE 1: Write Comprehensive Documentation
550
+ ═══════════════════════════════════════════════════════════════
551
+
552
+ Write clear, helpful documentation with:
553
+ - Overview and introduction explaining what this code does
554
+ - Real, executable code examples (NOT placeholders - use actual code from source)
555
+ - Usage guides showing how to use the code in real scenarios
556
+ - Best practices and common patterns
557
+ - Step-by-step instructions where helpful
558
+ - Tables, diagrams, and visual aids as appropriate
559
+ - Clear explanations appropriate for {audience}
560
+
561
+ Do this naturally - write the kind of documentation that helps developers understand and use the code effectively.
562
+
563
+ ═══════════════════════════════════════════════════════════════
564
+ PHASE 2: Add Structured API Reference Sections (MANDATORY)
565
+ ═══════════════════════════════════════════════════════════════
566
+
567
+ After writing the comprehensive documentation above, you MUST add structured API reference sections for EVERY function and class method.
568
+
569
+ For EACH function/method in the source code, add this EXACT structure:
570
+
571
+ ---
572
+ ### `function_name()`
573
+
574
+ **Function Signature:**
575
+ ```python
576
+ def function_name(param1: type, param2: type = default) -> return_type
577
+ ```
578
+
579
+ **Description:**
580
+ [Brief description of what the function does - 1-2 sentences]
581
+
582
+ **Args:**
583
+ - `param1` (`type`): Clear description of this parameter
584
+ - `param2` (`type`, optional): Description. Defaults to `default`.
585
+
586
+ **Returns:**
587
+ - `return_type`: Description of the return value
588
+
589
+ **Raises:**
590
+ - `ExceptionType`: Description of when and why this exception occurs
591
+ - `AnotherException`: Another exception case
592
+
593
+ **Example:**
594
+ ```python
595
+ from module import function_name
596
+
597
+ # Show real usage with actual code
598
+ result = function_name(actual_value, param2=123)
599
+ print(result)
600
+ ```
601
+ ---
602
+
603
+ CRITICAL RULES FOR PHASE 2:
604
+ - Include **Args:** header for ALL functions (write "None" if no parameters)
605
+ - Include **Returns:** header for ALL functions (write "None" if void/no return)
606
+ - Include **Raises:** header for ALL functions (write "None" if no exceptions)
607
+ - Use backticks for code: `param_name` (`type`)
608
+ - Document EVERY public function and method you see in the source code
454
609
 
455
- Based on the outline provided, write full content for each section:
456
- 1. Use clear, professional language
457
- 2. Include code examples where appropriate
458
- 3. Use markdown formatting
459
- 4. Be thorough and detailed - do NOT truncate sections
460
- 5. Target the specified audience
461
- 6. Complete ALL sections before stopping
462
610
  {section_instruction}
463
611
 
464
- Write the complete document with all sections."""
612
+ ═══════════════════════════════════════════════════════════════
613
+ REMINDER: BOTH PHASES ARE MANDATORY
614
+ ═══════════════════════════════════════════════════════════════
615
+
616
+ 1. Write comprehensive documentation (Phase 1) - what you do naturally
617
+ 2. Add structured API reference sections (Phase 2) - for every function/method
618
+
619
+ Do NOT skip Phase 2 after completing Phase 1. Both phases are required for complete documentation."""
465
620
 
466
- user_message = f"""Write documentation based on this outline:
621
+ user_message = f"""Write comprehensive, production-ready documentation in TWO PHASES:
467
622
 
468
623
  Document Type: {doc_type}
469
624
  Target Audience: {audience}
470
625
 
471
- Outline:
626
+ Outline to follow:
472
627
  {outline}
473
628
 
474
- Source content for reference:
475
- {content_to_document[:5000]}"""
629
+ Source code to document (extract actual class names, function signatures, parameters):
630
+ {content_to_document[:5000]}
631
+
632
+ ═══════════════════════════════════════════════════════════════
633
+ YOUR TASK:
634
+ ═══════════════════════════════════════════════════════════════
635
+
636
+ PHASE 1: Write comprehensive documentation
637
+ - Use the outline above as your guide
638
+ - Include real, executable code examples from the source
639
+ - Show usage patterns, best practices, common workflows
640
+ - Write clear explanations that help developers understand the code
641
+
642
+ PHASE 2: Add structured API reference sections
643
+ - For EACH function/method in the source code, add:
644
+ - Function signature
645
+ - Description
646
+ - **Args:** section (every parameter with type and description)
647
+ - **Returns:** section (return type and description)
648
+ - **Raises:** section (exceptions that can occur)
649
+ - Example code snippet
650
+
651
+ ═══════════════════════════════════════════════════════════════
652
+ IMPORTANT: Complete BOTH phases. Don't stop after Phase 1.
653
+ ═══════════════════════════════════════════════════════════════
654
+
655
+ Generate the complete documentation now, ensuring both comprehensive content AND structured API reference sections."""
476
656
 
477
657
  response, input_tokens, output_tokens = await self._call_llm(
478
658
  tier,
@@ -490,6 +670,7 @@ Source content for reference:
490
670
  "audience": audience,
491
671
  "outline": outline,
492
672
  "chunked": False,
673
+ "source_code": content_to_document, # Pass through for API reference generation
493
674
  },
494
675
  input_tokens,
495
676
  output_tokens,
@@ -534,32 +715,72 @@ Previous sections already written (for context/continuity):
534
715
 
535
716
  Continue with the next sections, maintaining consistent style and terminology."""
536
717
 
537
- system = f"""You are a technical writer. Write comprehensive documentation.
718
+ system = f"""You are an expert technical writer creating comprehensive developer documentation.
538
719
 
539
- Write ONLY the following sections (you are generating part {chunk_idx + 1} of {len(chunks)}):
540
- {sections_list}
720
+ Write ONLY these sections (part {chunk_idx + 1} of {len(chunks)}): {sections_list}
721
+
722
+ YOUR TASK FOR THESE SECTIONS (TWO PHASES):
723
+
724
+ ═══════════════════════════════════════════════════════════════
725
+ PHASE 1: Comprehensive Content
726
+ ═══════════════════════════════════════════════════════════════
727
+ - Write clear explanations and overviews
728
+ - Include real, executable code examples (extract from source)
729
+ - Show usage patterns and workflows
730
+ - Add best practices and common patterns
731
+ - Professional language for {audience}
732
+
733
+ ═══════════════════════════════════════════════════════════════
734
+ PHASE 2: Structured API Reference
735
+ ═══════════════════════════════════════════════════════════════
736
+ For EACH function/method in these sections, add:
737
+
738
+ ### `function_name()`
541
739
 
542
- Requirements:
543
- 1. Use clear, professional language
544
- 2. Include code examples where appropriate
545
- 3. Use markdown formatting with ## headers
546
- 4. Be thorough and detailed - complete each section fully
547
- 5. Target {audience} audience
548
- 6. Write ONLY these specific sections, nothing else"""
740
+ **Function Signature:**
741
+ ```python
742
+ def function_name(params) -> return_type
743
+ ```
549
744
 
550
- user_message = f"""Write documentation for these specific sections:
745
+ **Description:**
746
+ [Brief description]
747
+
748
+ **Args:**
749
+ - `param` (`type`): Description
750
+
751
+ **Returns:**
752
+ - `type`: Description
753
+
754
+ **Raises:**
755
+ - `Exception`: When it occurs
756
+
757
+ **Example:**
758
+ ```python
759
+ # Real usage example
760
+ ```
761
+
762
+ ═══════════════════════════════════════════════════════════════
763
+ Complete BOTH phases for these sections.
764
+ ═══════════════════════════════════════════════════════════════"""
765
+
766
+ user_message = f"""Write comprehensive documentation for these sections in TWO PHASES:
767
+
768
+ Sections to write: {sections_list}
551
769
 
552
770
  Document Type: {doc_type}
553
771
  Target Audience: {audience}
554
772
 
555
- Sections to write: {sections_list}
773
+ Source code (extract actual functions/classes from here):
774
+ {content_to_document[:3000]}
556
775
 
557
776
  Full outline (for context):
558
777
  {outline}
778
+ {previous_context}
559
779
 
560
- Source content for reference:
561
- {content_to_document[:3000]}
562
- {previous_context}"""
780
+ PHASE 1: Write comprehensive content with real code examples
781
+ PHASE 2: Add structured API reference sections with **Args:**, **Returns:**, **Raises:**
782
+
783
+ Generate complete sections now, ensuring both phases are complete."""
563
784
 
564
785
  try:
565
786
  response, input_tokens, output_tokens = await self._call_llm(
@@ -623,6 +844,7 @@ Source content for reference:
623
844
  "chunks_completed": len(all_content),
624
845
  "stopped_early": stopped_early,
625
846
  "accumulated_cost": self._accumulated_cost,
847
+ "source_code": content_to_document, # Pass through for API reference generation
626
848
  }
627
849
 
628
850
  if error_message:
@@ -688,28 +910,82 @@ Draft:
688
910
  system = None # XML prompt includes all context
689
911
  else:
690
912
  # Use legacy plain text prompts
691
- system = """You are a senior technical editor. Polish and improve the documentation:
692
-
693
- 1. CONSISTENCY:
694
- - Standardize terminology
913
+ system = """You are a senior technical editor specializing in developer documentation.
914
+
915
+ Polish and improve this documentation. The writer was asked to complete TWO PHASES:
916
+ - Phase 1: Comprehensive content with real examples
917
+ - Phase 2: Structured API reference sections with **Args:**, **Returns:**, **Raises:**
918
+
919
+ Your job is to verify BOTH phases are complete and polish to production quality.
920
+
921
+ ═══════════════════════════════════════════════════════════════
922
+ CRITICAL: Verify Phase 2 Completion
923
+ ═══════════════════════════════════════════════════════════════
924
+
925
+ 1. **Check for Missing API Reference Sections**:
926
+ - Scan the entire document for all functions and methods
927
+ - EVERY function MUST have these sections:
928
+ - **Args:** (write "None" if no parameters)
929
+ - **Returns:** (write "None" if void)
930
+ - **Raises:** (write "None" if no exceptions)
931
+ - If ANY function is missing these sections, ADD them now
932
+ - Format: **Args:**, **Returns:**, **Raises:** (bold headers with colons)
933
+
934
+ 2. **Polish API Reference Sections**:
935
+ - Verify all parameters have types in backticks: `param` (`type`)
936
+ - Ensure return values are clearly described
937
+ - Check exception documentation is complete
938
+ - Validate code examples in each function section
939
+
940
+ 3. **Polish General Content**:
941
+ - Verify code examples are complete and runnable
942
+ - Ensure proper imports and setup code
943
+ - Replace any placeholders with real code
944
+ - Standardize terminology throughout
695
945
  - Fix formatting inconsistencies
696
- - Ensure consistent code style
697
-
698
- 2. QUALITY:
699
946
  - Improve clarity and flow
700
- - Add missing cross-references
701
- - Fix grammatical issues
947
+ - Add cross-references between sections
948
+
949
+ 4. **Production Readiness**:
950
+ - Remove any TODO or placeholder comments
951
+ - Ensure professional tone
952
+ - Add helpful notes, tips, and warnings
953
+ - Verify edge cases are covered
954
+
955
+ ═══════════════════════════════════════════════════════════════
956
+ Return the complete, polished document. Add a brief "## Polish Notes" section at the end summarizing improvements made."""
957
+
958
+ user_message = f"""Polish this documentation to production quality.
959
+
960
+ The writer was asked to complete TWO PHASES:
961
+ 1. Comprehensive content with real examples
962
+ 2. Structured API reference with **Args:**, **Returns:**, **Raises:** for every function
702
963
 
703
- 3. COMPLETENESS:
704
- - Identify gaps
705
- - Add helpful notes or warnings
706
- - Ensure examples are complete
964
+ Verify BOTH phases are complete, then polish:
707
965
 
708
- Return the polished document with improvements noted at the end."""
966
+ {input_payload}
709
967
 
710
- user_message = f"""Polish this documentation:
968
+ ═══════════════════════════════════════════════════════════════
969
+ YOUR TASKS:
970
+ ═══════════════════════════════════════════════════════════════
711
971
 
712
- {input_payload}"""
972
+ 1. SCAN for missing API reference sections
973
+ - Find every function/method in the document
974
+ - Check if it has **Args:**, **Returns:**, **Raises:** sections
975
+ - ADD these sections if missing (use "None" if no parameters/returns/exceptions)
976
+
977
+ 2. POLISH existing content
978
+ - Verify code examples are complete and runnable
979
+ - Ensure terminology is consistent
980
+ - Fix formatting issues
981
+ - Improve clarity and flow
982
+
983
+ 3. VALIDATE production readiness
984
+ - Remove TODOs and placeholders
985
+ - Add warnings and best practices
986
+ - Ensure professional tone
987
+
988
+ Return the complete, polished documentation with all API reference sections present."""
713
989
 
714
990
  # Calculate polish tokens based on draft size (at least as much as write stage)
715
991
  polish_max_tokens = max(self.max_write_tokens, 20000)
@@ -745,11 +1021,25 @@ Return the polished document with improvements noted at the end."""
745
1021
  # Parse XML response if enforcement is enabled
746
1022
  parsed_data = self._parse_xml_response(response)
747
1023
 
1024
+ # Add structured API reference sections (Step 4: Post-processing)
1025
+ source_code = input_data.get("source_code", "")
1026
+ if source_code:
1027
+ logger.info("Adding structured API reference sections to polished document...")
1028
+ response = await self._add_api_reference_sections(
1029
+ narrative_doc=response,
1030
+ source_code=source_code,
1031
+ tier=ModelTier.CHEAP, # Use cheap tier for structured extraction
1032
+ )
1033
+ else:
1034
+ logger.warning("No source code available for API reference generation")
1035
+
748
1036
  result = {
749
1037
  "document": response,
750
1038
  "doc_type": doc_type,
751
1039
  "audience": audience,
752
1040
  "model_tier_used": tier.value,
1041
+ "accumulated_cost": self._accumulated_cost, # Track total cost
1042
+ "auth_mode_used": self._auth_mode_used, # Track recommended auth mode
753
1043
  }
754
1044
 
755
1045
  # Merge parsed XML data if available
@@ -823,22 +1113,46 @@ Return the polished document with improvements noted at the end."""
823
1113
  total_output_tokens: int = 0
824
1114
 
825
1115
  for chunk_idx, section in enumerate(sections):
826
- system = """You are a senior technical editor. Polish this section of documentation:
1116
+ system = """You are a senior technical editor specializing in developer documentation.
1117
+
1118
+ Polish this section to production quality. The writer was asked to complete TWO PHASES:
1119
+ 1. Comprehensive content with real examples
1120
+ 2. Structured API reference with **Args:**, **Returns:**, **Raises:** for every function
827
1121
 
828
- 1. Standardize terminology and formatting
829
- 2. Improve clarity and flow
830
- 3. Fix grammatical issues
831
- 4. Ensure code examples are complete and accurate
1122
+ Verify both phases are complete in this section:
832
1123
 
833
- Return ONLY the polished section. Do not add commentary."""
1124
+ ═══════════════════════════════════════════════════════════════
1125
+ CRITICAL: Check for Missing API Reference Format
1126
+ ═══════════════════════════════════════════════════════════════
834
1127
 
835
- user_message = f"""Polish this documentation section (part {chunk_idx + 1} of {len(sections)}):
1128
+ 1. **Scan for functions/methods in this section**
1129
+ - If any function is missing **Args:**, **Returns:**, **Raises:** sections, ADD them
1130
+ - Format: **Args:**, **Returns:**, **Raises:** (bold headers with colons)
1131
+ - Write "None" if no parameters/returns/exceptions
1132
+
1133
+ 2. **Polish API Documentation**:
1134
+ - Verify parameters documented with types in backticks
1135
+ - Ensure return values and exceptions are clear
1136
+ - Validate code examples are complete
1137
+
1138
+ 3. **Polish General Content**:
1139
+ - Ensure all examples are runnable with proper imports
1140
+ - Standardize terminology and formatting
1141
+ - Fix grammatical issues
1142
+ - Remove TODOs and placeholders
1143
+
1144
+ Return ONLY the polished section. Do not add commentary about changes."""
1145
+
1146
+ user_message = f"""Polish this section to production quality (part {chunk_idx + 1} of {len(sections)}):
836
1147
 
837
1148
  Document Type: {doc_type}
838
1149
  Target Audience: {audience}
839
1150
 
840
1151
  Section to polish:
841
- {section}"""
1152
+ {section}
1153
+
1154
+ Check if all functions have **Args:**, **Returns:**, **Raises:** sections - add if missing.
1155
+ Make all code examples complete and executable."""
842
1156
 
843
1157
  try:
844
1158
  response, input_tokens, output_tokens = await self._call_llm(
@@ -880,6 +1194,18 @@ Section to polish:
880
1194
  # Combine polished chunks
881
1195
  polished_document = "\n\n".join(polished_chunks)
882
1196
 
1197
+ # Add structured API reference sections (Step 4: Post-processing)
1198
+ source_code = input_data.get("source_code", "")
1199
+ if source_code:
1200
+ logger.info("Adding structured API reference sections to chunked polished document...")
1201
+ polished_document = await self._add_api_reference_sections(
1202
+ narrative_doc=polished_document,
1203
+ source_code=source_code,
1204
+ tier=ModelTier.CHEAP, # Use cheap tier for structured extraction
1205
+ )
1206
+ else:
1207
+ logger.warning("No source code available for API reference generation")
1208
+
883
1209
  result = {
884
1210
  "document": polished_document,
885
1211
  "doc_type": doc_type,
@@ -919,6 +1245,212 @@ Section to polish:
919
1245
 
920
1246
  return (result, total_input_tokens, total_output_tokens)
921
1247
 
1248
+ def _extract_functions_from_source(self, source_code: str) -> list[dict]:
1249
+ """Extract function information from source code using AST.
1250
+
1251
+ Args:
1252
+ source_code: Python source code to parse
1253
+
1254
+ Returns:
1255
+ List of dicts with function information (name, args, returns, docstring)
1256
+ """
1257
+ import ast
1258
+
1259
+ functions = []
1260
+
1261
+ try:
1262
+ tree = ast.parse(source_code)
1263
+ except SyntaxError as e:
1264
+ logger.warning(f"Failed to parse source code: {e}")
1265
+ return functions
1266
+
1267
+ for node in ast.walk(tree):
1268
+ # Extract top-level functions and class methods
1269
+ if isinstance(node, ast.FunctionDef):
1270
+ # Skip private functions (starting with _)
1271
+ if node.name.startswith("_"):
1272
+ continue
1273
+
1274
+ # Extract function signature
1275
+ args_list = []
1276
+ for arg in node.args.args:
1277
+ arg_name = arg.arg
1278
+ # Get type annotation if available
1279
+ arg_type = ast.unparse(arg.annotation) if arg.annotation else "Any"
1280
+ args_list.append({"name": arg_name, "type": arg_type})
1281
+
1282
+ # Extract return type
1283
+ return_type = ast.unparse(node.returns) if node.returns else "Any"
1284
+
1285
+ # Extract docstring
1286
+ docstring = ast.get_docstring(node) or ""
1287
+
1288
+ functions.append({
1289
+ "name": node.name,
1290
+ "args": args_list,
1291
+ "return_type": return_type,
1292
+ "docstring": docstring,
1293
+ "lineno": node.lineno,
1294
+ })
1295
+
1296
+ return functions
1297
+
1298
+ async def _generate_api_section_for_function(
1299
+ self,
1300
+ func_info: dict,
1301
+ tier: ModelTier,
1302
+ ) -> str:
1303
+ """Generate structured API reference section for a single function.
1304
+
1305
+ This is a focused prompt that ONLY asks for Args/Returns/Raises format,
1306
+ not narrative documentation.
1307
+
1308
+ Args:
1309
+ func_info: Function information from AST extraction
1310
+ tier: Model tier to use for generation
1311
+
1312
+ Returns:
1313
+ Markdown formatted API reference section
1314
+ """
1315
+ func_name = func_info["name"]
1316
+ args_list = func_info["args"]
1317
+ return_type = func_info["return_type"]
1318
+ docstring = func_info["docstring"]
1319
+
1320
+ # Build function signature
1321
+ args_str = ", ".join([f"{arg['name']}: {arg['type']}" for arg in args_list])
1322
+ signature = f"def {func_name}({args_str}) -> {return_type}"
1323
+
1324
+ system = """You are an API documentation generator. Output ONLY structured API reference sections in the EXACT format specified below.
1325
+
1326
+ CRITICAL: Do NOT write explanatory text, questions, or narrative. Output ONLY the formatted section.
1327
+
1328
+ REQUIRED FORMAT (copy this structure EXACTLY, replace bracketed content):
1329
+
1330
+ ### `function_name()`
1331
+
1332
+ **Function Signature:**
1333
+ ```python
1334
+ def function_name(param: type) -> return_type
1335
+ ```
1336
+
1337
+ **Description:**
1338
+ Brief 1-2 sentence description.
1339
+
1340
+ **Args:**
1341
+ - `param_name` (`type`): Parameter description
1342
+
1343
+ **Returns:**
1344
+ - `return_type`: Return value description
1345
+
1346
+ **Raises:**
1347
+ - `ExceptionType`: When this exception occurs
1348
+
1349
+ IMPORTANT:
1350
+ - Use "**Args:**" (NOT "Parameters" or "params")
1351
+ - Write "None" if no Args/Returns/Raises
1352
+ - NO conversational text - just the formatted section"""
1353
+
1354
+ user_message = f"""Generate API reference section using EXACT format specified in system prompt.
1355
+
1356
+ Function:
1357
+ ```python
1358
+ {signature}
1359
+ ```
1360
+
1361
+ Docstring:
1362
+ {docstring if docstring else "No docstring"}
1363
+
1364
+ Output the formatted section EXACTLY as shown in system prompt. Use **Args:** (not Parameters). NO conversational text."""
1365
+
1366
+ try:
1367
+ response, input_tokens, output_tokens = await self._call_llm(
1368
+ tier,
1369
+ system,
1370
+ user_message,
1371
+ max_tokens=1000, # Small response - just the structured section
1372
+ )
1373
+
1374
+ # Track cost
1375
+ self._track_cost(tier, input_tokens, output_tokens)
1376
+
1377
+ return response
1378
+
1379
+ except Exception as e:
1380
+ logger.error(f"Failed to generate API section for {func_name}: {e}")
1381
+ # Return minimal fallback
1382
+ return f"""### `{func_name}()`
1383
+
1384
+ **Function Signature:**
1385
+ ```python
1386
+ {signature}
1387
+ ```
1388
+
1389
+ **Description:**
1390
+ {docstring.split('.')[0] if docstring else "No description available."}
1391
+
1392
+ **Args:**
1393
+ None
1394
+
1395
+ **Returns:**
1396
+ - `{return_type}`: Return value
1397
+
1398
+ **Raises:**
1399
+ None
1400
+ """
1401
+
1402
+ async def _add_api_reference_sections(
1403
+ self,
1404
+ narrative_doc: str,
1405
+ source_code: str,
1406
+ tier: ModelTier,
1407
+ ) -> str:
1408
+ """Add structured API reference sections to narrative documentation.
1409
+
1410
+ This is Step 4 of the pipeline: after outline, write, and polish,
1411
+ we add structured API reference sections extracted from source code.
1412
+
1413
+ Args:
1414
+ narrative_doc: The polished narrative documentation
1415
+ source_code: Original source code to extract functions from
1416
+ tier: Model tier to use for API section generation
1417
+
1418
+ Returns:
1419
+ Complete documentation with API reference appendix
1420
+ """
1421
+ logger.info("Adding structured API reference sections...")
1422
+
1423
+ # Extract functions from source code
1424
+ functions = self._extract_functions_from_source(source_code)
1425
+
1426
+ if not functions:
1427
+ logger.warning("No public functions found in source code")
1428
+ return narrative_doc
1429
+
1430
+ logger.info(f"Found {len(functions)} public functions to document")
1431
+
1432
+ # Generate API section for each function
1433
+ api_sections = []
1434
+ for func_info in functions:
1435
+ func_name = func_info["name"]
1436
+ logger.debug(f"Generating API reference for {func_name}()")
1437
+
1438
+ api_section = await self._generate_api_section_for_function(
1439
+ func_info, tier
1440
+ )
1441
+ api_sections.append(api_section)
1442
+
1443
+ # Append API reference section to narrative doc
1444
+ full_doc = narrative_doc
1445
+ full_doc += "\n\n---\n\n"
1446
+ full_doc += "## API Reference\n\n"
1447
+ full_doc += "Complete structured reference for all public functions:\n\n"
1448
+ full_doc += "\n\n".join(api_sections)
1449
+
1450
+ logger.info(f"Added {len(api_sections)} API reference sections")
1451
+
1452
+ return full_doc
1453
+
922
1454
 
923
1455
  def format_doc_gen_report(result: dict, input_data: dict) -> str:
924
1456
  """Format document generation output as a human-readable report.