empathy-framework 5.0.3__py3-none-any.whl → 5.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.1.dist-info}/METADATA +259 -142
- {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.1.dist-info}/RECORD +58 -28
- empathy_framework-5.1.1.dist-info/licenses/LICENSE +201 -0
- empathy_framework-5.1.1.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +101 -0
- empathy_os/__init__.py +1 -1
- empathy_os/cli/commands/batch.py +5 -5
- empathy_os/cli/commands/routing.py +1 -1
- empathy_os/cli/commands/workflow.py +2 -1
- empathy_os/cli/parsers/cache 2.py +65 -0
- empathy_os/cli_minimal.py +3 -3
- empathy_os/cli_router 2.py +416 -0
- empathy_os/cli_router.py +12 -0
- empathy_os/dashboard/__init__.py +1 -2
- empathy_os/dashboard/app 2.py +512 -0
- empathy_os/dashboard/app.py +1 -1
- empathy_os/dashboard/simple_server 2.py +403 -0
- empathy_os/dashboard/standalone_server 2.py +536 -0
- empathy_os/memory/types 2.py +441 -0
- empathy_os/meta_workflows/intent_detector.py +71 -0
- empathy_os/models/__init__.py +19 -0
- empathy_os/models/adaptive_routing 2.py +437 -0
- empathy_os/models/auth_cli.py +444 -0
- empathy_os/models/auth_strategy.py +450 -0
- empathy_os/project_index/scanner_parallel 2.py +291 -0
- empathy_os/telemetry/agent_coordination 2.py +478 -0
- empathy_os/telemetry/agent_coordination.py +3 -3
- empathy_os/telemetry/agent_tracking 2.py +350 -0
- empathy_os/telemetry/agent_tracking.py +1 -2
- empathy_os/telemetry/approval_gates 2.py +563 -0
- empathy_os/telemetry/event_streaming 2.py +405 -0
- empathy_os/telemetry/event_streaming.py +3 -3
- empathy_os/telemetry/feedback_loop 2.py +557 -0
- empathy_os/telemetry/feedback_loop.py +1 -1
- empathy_os/vscode_bridge 2.py +173 -0
- empathy_os/workflows/__init__.py +8 -0
- empathy_os/workflows/autonomous_test_gen.py +569 -0
- empathy_os/workflows/bug_predict.py +45 -0
- empathy_os/workflows/code_review.py +92 -22
- empathy_os/workflows/document_gen.py +594 -62
- empathy_os/workflows/llm_base.py +363 -0
- empathy_os/workflows/perf_audit.py +69 -0
- empathy_os/workflows/progressive/README 2.md +454 -0
- empathy_os/workflows/progressive/__init__ 2.py +92 -0
- empathy_os/workflows/progressive/cli 2.py +242 -0
- empathy_os/workflows/progressive/core 2.py +488 -0
- empathy_os/workflows/progressive/orchestrator 2.py +701 -0
- empathy_os/workflows/progressive/reports 2.py +528 -0
- empathy_os/workflows/progressive/telemetry 2.py +280 -0
- empathy_os/workflows/progressive/test_gen 2.py +514 -0
- empathy_os/workflows/progressive/workflow 2.py +628 -0
- empathy_os/workflows/release_prep.py +54 -0
- empathy_os/workflows/security_audit.py +154 -79
- empathy_os/workflows/test_gen.py +60 -0
- empathy_os/workflows/test_gen_behavioral.py +477 -0
- empathy_os/workflows/test_gen_parallel.py +341 -0
- empathy_framework-5.0.3.dist-info/licenses/LICENSE +0 -139
- {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.1.dist-info}/WHEEL +0 -0
- {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.1.dist-info}/entry_points.txt +0 -0
- {empathy_framework-5.0.3.dist-info → empathy_framework-5.1.1.dist-info}/top_level.txt +0 -0
|
@@ -86,6 +86,7 @@ class DocumentGenerationWorkflow(BaseWorkflow):
|
|
|
86
86
|
graceful_degradation: bool = True, # Return partial results on error
|
|
87
87
|
export_path: str | Path | None = None, # Export docs to file (e.g., "docs/generated")
|
|
88
88
|
max_display_chars: int = 45000, # Max chars before chunking output
|
|
89
|
+
enable_auth_strategy: bool = True, # Enable intelligent auth routing
|
|
89
90
|
**kwargs: Any,
|
|
90
91
|
):
|
|
91
92
|
"""Initialize workflow with enterprise-safe defaults.
|
|
@@ -110,6 +111,8 @@ class DocumentGenerationWorkflow(BaseWorkflow):
|
|
|
110
111
|
If provided, documentation will be saved to a file automatically.
|
|
111
112
|
max_display_chars: Maximum characters before splitting output into chunks
|
|
112
113
|
for display (default 45000). Helps avoid terminal/UI truncation.
|
|
114
|
+
enable_auth_strategy: If True, use intelligent subscription vs API routing
|
|
115
|
+
based on module size (default True).
|
|
113
116
|
|
|
114
117
|
"""
|
|
115
118
|
super().__init__(**kwargs)
|
|
@@ -125,10 +128,12 @@ class DocumentGenerationWorkflow(BaseWorkflow):
|
|
|
125
128
|
self.graceful_degradation = graceful_degradation
|
|
126
129
|
self.export_path = Path(export_path) if export_path else None
|
|
127
130
|
self.max_display_chars = max_display_chars
|
|
131
|
+
self.enable_auth_strategy = enable_auth_strategy
|
|
128
132
|
self._total_content_tokens: int = 0
|
|
129
133
|
self._accumulated_cost: float = 0.0
|
|
130
134
|
self._cost_warning_issued: bool = False
|
|
131
135
|
self._partial_results: dict = {}
|
|
136
|
+
self._auth_mode_used: str | None = None # Track which auth was recommended
|
|
132
137
|
|
|
133
138
|
def _estimate_cost(self, tier: ModelTier, input_tokens: int, output_tokens: int) -> float:
|
|
134
139
|
"""Estimate cost for a given tier and token counts."""
|
|
@@ -308,6 +313,8 @@ class DocumentGenerationWorkflow(BaseWorkflow):
|
|
|
308
313
|
|
|
309
314
|
async def _outline(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
|
|
310
315
|
"""Generate document outline from source."""
|
|
316
|
+
from pathlib import Path
|
|
317
|
+
|
|
311
318
|
source_code = input_data.get("source_code", "")
|
|
312
319
|
target = input_data.get("target", "")
|
|
313
320
|
doc_type = input_data.get("doc_type", "general")
|
|
@@ -318,8 +325,6 @@ class DocumentGenerationWorkflow(BaseWorkflow):
|
|
|
318
325
|
|
|
319
326
|
# If target looks like a file path and source_code wasn't provided, read the file
|
|
320
327
|
if not source_code and target:
|
|
321
|
-
from pathlib import Path
|
|
322
|
-
|
|
323
328
|
target_path = Path(target)
|
|
324
329
|
if target_path.exists() and target_path.is_file():
|
|
325
330
|
try:
|
|
@@ -349,22 +354,108 @@ class DocumentGenerationWorkflow(BaseWorkflow):
|
|
|
349
354
|
f"Target appears to be a file path but doesn't exist: {target}",
|
|
350
355
|
)
|
|
351
356
|
|
|
352
|
-
|
|
357
|
+
# === AUTH STRATEGY INTEGRATION ===
|
|
358
|
+
# Detect module size and recommend auth mode (first stage only)
|
|
359
|
+
if self.enable_auth_strategy:
|
|
360
|
+
try:
|
|
361
|
+
from empathy_os.models import (
|
|
362
|
+
count_lines_of_code,
|
|
363
|
+
get_auth_strategy,
|
|
364
|
+
get_module_size_category,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
# Calculate module size
|
|
368
|
+
module_lines = 0
|
|
369
|
+
if target and Path(target).exists():
|
|
370
|
+
module_lines = count_lines_of_code(target)
|
|
371
|
+
elif content_to_document:
|
|
372
|
+
# Count from source code content
|
|
373
|
+
module_lines = len(
|
|
374
|
+
[
|
|
375
|
+
line
|
|
376
|
+
for line in content_to_document.split("\n")
|
|
377
|
+
if line.strip() and not line.strip().startswith("#")
|
|
378
|
+
]
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
if module_lines > 0:
|
|
382
|
+
# Get auth strategy (first-time setup if needed)
|
|
383
|
+
strategy = get_auth_strategy()
|
|
353
384
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
3. Key points to cover in each section
|
|
385
|
+
# Get recommended auth mode
|
|
386
|
+
recommended_mode = strategy.get_recommended_mode(module_lines)
|
|
387
|
+
self._auth_mode_used = recommended_mode.value
|
|
358
388
|
|
|
359
|
-
|
|
389
|
+
# Get size category
|
|
390
|
+
size_category = get_module_size_category(module_lines)
|
|
360
391
|
|
|
361
|
-
|
|
392
|
+
# Log recommendation
|
|
393
|
+
logger.info(
|
|
394
|
+
f"Module: {target or 'source'} ({module_lines} LOC, {size_category})"
|
|
395
|
+
)
|
|
396
|
+
logger.info(f"Recommended auth mode: {recommended_mode.value}")
|
|
397
|
+
|
|
398
|
+
# Get cost estimate
|
|
399
|
+
cost_estimate = strategy.estimate_cost(module_lines, recommended_mode)
|
|
400
|
+
|
|
401
|
+
if recommended_mode.value == "subscription":
|
|
402
|
+
logger.info(
|
|
403
|
+
f"Cost: {cost_estimate['quota_cost']} "
|
|
404
|
+
f"(fits in {cost_estimate['fits_in_context']} context)"
|
|
405
|
+
)
|
|
406
|
+
else: # API
|
|
407
|
+
logger.info(
|
|
408
|
+
f"Cost: ~${cost_estimate['monetary_cost']:.4f} "
|
|
409
|
+
f"(1M context window)"
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
except Exception as e:
|
|
413
|
+
# Don't fail workflow if auth strategy fails
|
|
414
|
+
logger.warning(f"Auth strategy detection failed: {e}")
|
|
415
|
+
|
|
416
|
+
system = """You are an expert technical writer specializing in API Reference documentation.
|
|
417
|
+
|
|
418
|
+
IMPORTANT: This is API REFERENCE documentation, not a tutorial. Focus on documenting EVERY function/class with structured Args/Returns/Raises format.
|
|
419
|
+
|
|
420
|
+
Create a detailed, structured outline for API Reference documentation:
|
|
421
|
+
|
|
422
|
+
1. **Logical Section Structure** (emphasize API reference sections):
|
|
423
|
+
- Overview/Introduction (brief)
|
|
424
|
+
- Quick Start (1 complete example)
|
|
425
|
+
- API Reference - Functions (one subsection per function with Args/Returns/Raises)
|
|
426
|
+
- API Reference - Classes (one subsection per class with Args/Returns/Raises for methods)
|
|
427
|
+
- Usage Examples (showing how to combine multiple functions)
|
|
428
|
+
- Additional reference sections as needed
|
|
429
|
+
|
|
430
|
+
2. **For Each Section**:
|
|
431
|
+
- Clear purpose and what readers will learn
|
|
432
|
+
- Specific topics to cover
|
|
433
|
+
- Types of examples to include (with actual code)
|
|
434
|
+
|
|
435
|
+
3. **Key Requirements**:
|
|
436
|
+
- Include sections for real, copy-paste ready code examples
|
|
437
|
+
- Plan for comprehensive API documentation with all parameters
|
|
438
|
+
- Include edge cases and error handling examples
|
|
439
|
+
- Add best practices and common patterns
|
|
440
|
+
|
|
441
|
+
Format as a numbered list with section titles and detailed descriptions."""
|
|
442
|
+
|
|
443
|
+
user_message = f"""Create a comprehensive documentation outline:
|
|
362
444
|
|
|
363
445
|
Document Type: {doc_type}
|
|
364
446
|
Target Audience: {audience}
|
|
365
447
|
|
|
448
|
+
IMPORTANT: This documentation should be production-ready with:
|
|
449
|
+
- Real, executable code examples (not placeholders)
|
|
450
|
+
- Complete API reference with parameter types and descriptions
|
|
451
|
+
- Usage guides showing common patterns
|
|
452
|
+
- Edge case handling and error scenarios
|
|
453
|
+
- Best practices for the target audience
|
|
454
|
+
|
|
366
455
|
Content to document:
|
|
367
|
-
{content_to_document[:4000]}
|
|
456
|
+
{content_to_document[:4000]}
|
|
457
|
+
|
|
458
|
+
Generate an outline that covers all these aspects comprehensively."""
|
|
368
459
|
|
|
369
460
|
response, input_tokens, output_tokens = await self._call_llm(
|
|
370
461
|
tier,
|
|
@@ -450,29 +541,118 @@ IMPORTANT: Focus ONLY on generating these specific sections:
|
|
|
450
541
|
|
|
451
542
|
Generate comprehensive, detailed content for each of these sections."""
|
|
452
543
|
|
|
453
|
-
system = f"""You are
|
|
544
|
+
system = f"""You are an expert technical writer creating comprehensive developer documentation.
|
|
545
|
+
|
|
546
|
+
YOUR TASK HAS TWO CRITICAL PHASES - YOU MUST COMPLETE BOTH:
|
|
547
|
+
|
|
548
|
+
═══════════════════════════════════════════════════════════════
|
|
549
|
+
PHASE 1: Write Comprehensive Documentation
|
|
550
|
+
═══════════════════════════════════════════════════════════════
|
|
551
|
+
|
|
552
|
+
Write clear, helpful documentation with:
|
|
553
|
+
- Overview and introduction explaining what this code does
|
|
554
|
+
- Real, executable code examples (NOT placeholders - use actual code from source)
|
|
555
|
+
- Usage guides showing how to use the code in real scenarios
|
|
556
|
+
- Best practices and common patterns
|
|
557
|
+
- Step-by-step instructions where helpful
|
|
558
|
+
- Tables, diagrams, and visual aids as appropriate
|
|
559
|
+
- Clear explanations appropriate for {audience}
|
|
560
|
+
|
|
561
|
+
Do this naturally - write the kind of documentation that helps developers understand and use the code effectively.
|
|
562
|
+
|
|
563
|
+
═══════════════════════════════════════════════════════════════
|
|
564
|
+
PHASE 2: Add Structured API Reference Sections (MANDATORY)
|
|
565
|
+
═══════════════════════════════════════════════════════════════
|
|
566
|
+
|
|
567
|
+
After writing the comprehensive documentation above, you MUST add structured API reference sections for EVERY function and class method.
|
|
568
|
+
|
|
569
|
+
For EACH function/method in the source code, add this EXACT structure:
|
|
570
|
+
|
|
571
|
+
---
|
|
572
|
+
### `function_name()`
|
|
573
|
+
|
|
574
|
+
**Function Signature:**
|
|
575
|
+
```python
|
|
576
|
+
def function_name(param1: type, param2: type = default) -> return_type
|
|
577
|
+
```
|
|
578
|
+
|
|
579
|
+
**Description:**
|
|
580
|
+
[Brief description of what the function does - 1-2 sentences]
|
|
581
|
+
|
|
582
|
+
**Args:**
|
|
583
|
+
- `param1` (`type`): Clear description of this parameter
|
|
584
|
+
- `param2` (`type`, optional): Description. Defaults to `default`.
|
|
585
|
+
|
|
586
|
+
**Returns:**
|
|
587
|
+
- `return_type`: Description of the return value
|
|
588
|
+
|
|
589
|
+
**Raises:**
|
|
590
|
+
- `ExceptionType`: Description of when and why this exception occurs
|
|
591
|
+
- `AnotherException`: Another exception case
|
|
592
|
+
|
|
593
|
+
**Example:**
|
|
594
|
+
```python
|
|
595
|
+
from module import function_name
|
|
596
|
+
|
|
597
|
+
# Show real usage with actual code
|
|
598
|
+
result = function_name(actual_value, param2=123)
|
|
599
|
+
print(result)
|
|
600
|
+
```
|
|
601
|
+
---
|
|
602
|
+
|
|
603
|
+
CRITICAL RULES FOR PHASE 2:
|
|
604
|
+
- Include **Args:** header for ALL functions (write "None" if no parameters)
|
|
605
|
+
- Include **Returns:** header for ALL functions (write "None" if void/no return)
|
|
606
|
+
- Include **Raises:** header for ALL functions (write "None" if no exceptions)
|
|
607
|
+
- Use backticks for code: `param_name` (`type`)
|
|
608
|
+
- Document EVERY public function and method you see in the source code
|
|
454
609
|
|
|
455
|
-
Based on the outline provided, write full content for each section:
|
|
456
|
-
1. Use clear, professional language
|
|
457
|
-
2. Include code examples where appropriate
|
|
458
|
-
3. Use markdown formatting
|
|
459
|
-
4. Be thorough and detailed - do NOT truncate sections
|
|
460
|
-
5. Target the specified audience
|
|
461
|
-
6. Complete ALL sections before stopping
|
|
462
610
|
{section_instruction}
|
|
463
611
|
|
|
464
|
-
|
|
612
|
+
═══════════════════════════════════════════════════════════════
|
|
613
|
+
REMINDER: BOTH PHASES ARE MANDATORY
|
|
614
|
+
═══════════════════════════════════════════════════════════════
|
|
615
|
+
|
|
616
|
+
1. Write comprehensive documentation (Phase 1) - what you do naturally
|
|
617
|
+
2. Add structured API reference sections (Phase 2) - for every function/method
|
|
618
|
+
|
|
619
|
+
Do NOT skip Phase 2 after completing Phase 1. Both phases are required for complete documentation."""
|
|
465
620
|
|
|
466
|
-
user_message = f"""Write documentation
|
|
621
|
+
user_message = f"""Write comprehensive, production-ready documentation in TWO PHASES:
|
|
467
622
|
|
|
468
623
|
Document Type: {doc_type}
|
|
469
624
|
Target Audience: {audience}
|
|
470
625
|
|
|
471
|
-
Outline:
|
|
626
|
+
Outline to follow:
|
|
472
627
|
{outline}
|
|
473
628
|
|
|
474
|
-
Source
|
|
475
|
-
{content_to_document[:5000]}
|
|
629
|
+
Source code to document (extract actual class names, function signatures, parameters):
|
|
630
|
+
{content_to_document[:5000]}
|
|
631
|
+
|
|
632
|
+
═══════════════════════════════════════════════════════════════
|
|
633
|
+
YOUR TASK:
|
|
634
|
+
═══════════════════════════════════════════════════════════════
|
|
635
|
+
|
|
636
|
+
PHASE 1: Write comprehensive documentation
|
|
637
|
+
- Use the outline above as your guide
|
|
638
|
+
- Include real, executable code examples from the source
|
|
639
|
+
- Show usage patterns, best practices, common workflows
|
|
640
|
+
- Write clear explanations that help developers understand the code
|
|
641
|
+
|
|
642
|
+
PHASE 2: Add structured API reference sections
|
|
643
|
+
- For EACH function/method in the source code, add:
|
|
644
|
+
- Function signature
|
|
645
|
+
- Description
|
|
646
|
+
- **Args:** section (every parameter with type and description)
|
|
647
|
+
- **Returns:** section (return type and description)
|
|
648
|
+
- **Raises:** section (exceptions that can occur)
|
|
649
|
+
- Example code snippet
|
|
650
|
+
|
|
651
|
+
═══════════════════════════════════════════════════════════════
|
|
652
|
+
IMPORTANT: Complete BOTH phases. Don't stop after Phase 1.
|
|
653
|
+
═══════════════════════════════════════════════════════════════
|
|
654
|
+
|
|
655
|
+
Generate the complete documentation now, ensuring both comprehensive content AND structured API reference sections."""
|
|
476
656
|
|
|
477
657
|
response, input_tokens, output_tokens = await self._call_llm(
|
|
478
658
|
tier,
|
|
@@ -490,6 +670,7 @@ Source content for reference:
|
|
|
490
670
|
"audience": audience,
|
|
491
671
|
"outline": outline,
|
|
492
672
|
"chunked": False,
|
|
673
|
+
"source_code": content_to_document, # Pass through for API reference generation
|
|
493
674
|
},
|
|
494
675
|
input_tokens,
|
|
495
676
|
output_tokens,
|
|
@@ -534,32 +715,72 @@ Previous sections already written (for context/continuity):
|
|
|
534
715
|
|
|
535
716
|
Continue with the next sections, maintaining consistent style and terminology."""
|
|
536
717
|
|
|
537
|
-
system = f"""You are
|
|
718
|
+
system = f"""You are an expert technical writer creating comprehensive developer documentation.
|
|
538
719
|
|
|
539
|
-
Write ONLY
|
|
540
|
-
|
|
720
|
+
Write ONLY these sections (part {chunk_idx + 1} of {len(chunks)}): {sections_list}
|
|
721
|
+
|
|
722
|
+
YOUR TASK FOR THESE SECTIONS (TWO PHASES):
|
|
723
|
+
|
|
724
|
+
═══════════════════════════════════════════════════════════════
|
|
725
|
+
PHASE 1: Comprehensive Content
|
|
726
|
+
═══════════════════════════════════════════════════════════════
|
|
727
|
+
- Write clear explanations and overviews
|
|
728
|
+
- Include real, executable code examples (extract from source)
|
|
729
|
+
- Show usage patterns and workflows
|
|
730
|
+
- Add best practices and common patterns
|
|
731
|
+
- Professional language for {audience}
|
|
732
|
+
|
|
733
|
+
═══════════════════════════════════════════════════════════════
|
|
734
|
+
PHASE 2: Structured API Reference
|
|
735
|
+
═══════════════════════════════════════════════════════════════
|
|
736
|
+
For EACH function/method in these sections, add:
|
|
737
|
+
|
|
738
|
+
### `function_name()`
|
|
541
739
|
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
4. Be thorough and detailed - complete each section fully
|
|
547
|
-
5. Target {audience} audience
|
|
548
|
-
6. Write ONLY these specific sections, nothing else"""
|
|
740
|
+
**Function Signature:**
|
|
741
|
+
```python
|
|
742
|
+
def function_name(params) -> return_type
|
|
743
|
+
```
|
|
549
744
|
|
|
550
|
-
|
|
745
|
+
**Description:**
|
|
746
|
+
[Brief description]
|
|
747
|
+
|
|
748
|
+
**Args:**
|
|
749
|
+
- `param` (`type`): Description
|
|
750
|
+
|
|
751
|
+
**Returns:**
|
|
752
|
+
- `type`: Description
|
|
753
|
+
|
|
754
|
+
**Raises:**
|
|
755
|
+
- `Exception`: When it occurs
|
|
756
|
+
|
|
757
|
+
**Example:**
|
|
758
|
+
```python
|
|
759
|
+
# Real usage example
|
|
760
|
+
```
|
|
761
|
+
|
|
762
|
+
═══════════════════════════════════════════════════════════════
|
|
763
|
+
Complete BOTH phases for these sections.
|
|
764
|
+
═══════════════════════════════════════════════════════════════"""
|
|
765
|
+
|
|
766
|
+
user_message = f"""Write comprehensive documentation for these sections in TWO PHASES:
|
|
767
|
+
|
|
768
|
+
Sections to write: {sections_list}
|
|
551
769
|
|
|
552
770
|
Document Type: {doc_type}
|
|
553
771
|
Target Audience: {audience}
|
|
554
772
|
|
|
555
|
-
|
|
773
|
+
Source code (extract actual functions/classes from here):
|
|
774
|
+
{content_to_document[:3000]}
|
|
556
775
|
|
|
557
776
|
Full outline (for context):
|
|
558
777
|
{outline}
|
|
778
|
+
{previous_context}
|
|
559
779
|
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
780
|
+
PHASE 1: Write comprehensive content with real code examples
|
|
781
|
+
PHASE 2: Add structured API reference sections with **Args:**, **Returns:**, **Raises:**
|
|
782
|
+
|
|
783
|
+
Generate complete sections now, ensuring both phases are complete."""
|
|
563
784
|
|
|
564
785
|
try:
|
|
565
786
|
response, input_tokens, output_tokens = await self._call_llm(
|
|
@@ -623,6 +844,7 @@ Source content for reference:
|
|
|
623
844
|
"chunks_completed": len(all_content),
|
|
624
845
|
"stopped_early": stopped_early,
|
|
625
846
|
"accumulated_cost": self._accumulated_cost,
|
|
847
|
+
"source_code": content_to_document, # Pass through for API reference generation
|
|
626
848
|
}
|
|
627
849
|
|
|
628
850
|
if error_message:
|
|
@@ -688,28 +910,82 @@ Draft:
|
|
|
688
910
|
system = None # XML prompt includes all context
|
|
689
911
|
else:
|
|
690
912
|
# Use legacy plain text prompts
|
|
691
|
-
system = """You are a senior technical editor
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
913
|
+
system = """You are a senior technical editor specializing in developer documentation.
|
|
914
|
+
|
|
915
|
+
Polish and improve this documentation. The writer was asked to complete TWO PHASES:
|
|
916
|
+
- Phase 1: Comprehensive content with real examples
|
|
917
|
+
- Phase 2: Structured API reference sections with **Args:**, **Returns:**, **Raises:**
|
|
918
|
+
|
|
919
|
+
Your job is to verify BOTH phases are complete and polish to production quality.
|
|
920
|
+
|
|
921
|
+
═══════════════════════════════════════════════════════════════
|
|
922
|
+
CRITICAL: Verify Phase 2 Completion
|
|
923
|
+
═══════════════════════════════════════════════════════════════
|
|
924
|
+
|
|
925
|
+
1. **Check for Missing API Reference Sections**:
|
|
926
|
+
- Scan the entire document for all functions and methods
|
|
927
|
+
- EVERY function MUST have these sections:
|
|
928
|
+
- **Args:** (write "None" if no parameters)
|
|
929
|
+
- **Returns:** (write "None" if void)
|
|
930
|
+
- **Raises:** (write "None" if no exceptions)
|
|
931
|
+
- If ANY function is missing these sections, ADD them now
|
|
932
|
+
- Format: **Args:**, **Returns:**, **Raises:** (bold headers with colons)
|
|
933
|
+
|
|
934
|
+
2. **Polish API Reference Sections**:
|
|
935
|
+
- Verify all parameters have types in backticks: `param` (`type`)
|
|
936
|
+
- Ensure return values are clearly described
|
|
937
|
+
- Check exception documentation is complete
|
|
938
|
+
- Validate code examples in each function section
|
|
939
|
+
|
|
940
|
+
3. **Polish General Content**:
|
|
941
|
+
- Verify code examples are complete and runnable
|
|
942
|
+
- Ensure proper imports and setup code
|
|
943
|
+
- Replace any placeholders with real code
|
|
944
|
+
- Standardize terminology throughout
|
|
695
945
|
- Fix formatting inconsistencies
|
|
696
|
-
- Ensure consistent code style
|
|
697
|
-
|
|
698
|
-
2. QUALITY:
|
|
699
946
|
- Improve clarity and flow
|
|
700
|
-
- Add
|
|
701
|
-
|
|
947
|
+
- Add cross-references between sections
|
|
948
|
+
|
|
949
|
+
4. **Production Readiness**:
|
|
950
|
+
- Remove any TODO or placeholder comments
|
|
951
|
+
- Ensure professional tone
|
|
952
|
+
- Add helpful notes, tips, and warnings
|
|
953
|
+
- Verify edge cases are covered
|
|
954
|
+
|
|
955
|
+
═══════════════════════════════════════════════════════════════
|
|
956
|
+
Return the complete, polished document. Add a brief "## Polish Notes" section at the end summarizing improvements made."""
|
|
957
|
+
|
|
958
|
+
user_message = f"""Polish this documentation to production quality.
|
|
959
|
+
|
|
960
|
+
The writer was asked to complete TWO PHASES:
|
|
961
|
+
1. Comprehensive content with real examples
|
|
962
|
+
2. Structured API reference with **Args:**, **Returns:**, **Raises:** for every function
|
|
702
963
|
|
|
703
|
-
|
|
704
|
-
- Identify gaps
|
|
705
|
-
- Add helpful notes or warnings
|
|
706
|
-
- Ensure examples are complete
|
|
964
|
+
Verify BOTH phases are complete, then polish:
|
|
707
965
|
|
|
708
|
-
|
|
966
|
+
{input_payload}
|
|
709
967
|
|
|
710
|
-
|
|
968
|
+
═══════════════════════════════════════════════════════════════
|
|
969
|
+
YOUR TASKS:
|
|
970
|
+
═══════════════════════════════════════════════════════════════
|
|
711
971
|
|
|
712
|
-
|
|
972
|
+
1. SCAN for missing API reference sections
|
|
973
|
+
- Find every function/method in the document
|
|
974
|
+
- Check if it has **Args:**, **Returns:**, **Raises:** sections
|
|
975
|
+
- ADD these sections if missing (use "None" if no parameters/returns/exceptions)
|
|
976
|
+
|
|
977
|
+
2. POLISH existing content
|
|
978
|
+
- Verify code examples are complete and runnable
|
|
979
|
+
- Ensure terminology is consistent
|
|
980
|
+
- Fix formatting issues
|
|
981
|
+
- Improve clarity and flow
|
|
982
|
+
|
|
983
|
+
3. VALIDATE production readiness
|
|
984
|
+
- Remove TODOs and placeholders
|
|
985
|
+
- Add warnings and best practices
|
|
986
|
+
- Ensure professional tone
|
|
987
|
+
|
|
988
|
+
Return the complete, polished documentation with all API reference sections present."""
|
|
713
989
|
|
|
714
990
|
# Calculate polish tokens based on draft size (at least as much as write stage)
|
|
715
991
|
polish_max_tokens = max(self.max_write_tokens, 20000)
|
|
@@ -745,11 +1021,25 @@ Return the polished document with improvements noted at the end."""
|
|
|
745
1021
|
# Parse XML response if enforcement is enabled
|
|
746
1022
|
parsed_data = self._parse_xml_response(response)
|
|
747
1023
|
|
|
1024
|
+
# Add structured API reference sections (Step 4: Post-processing)
|
|
1025
|
+
source_code = input_data.get("source_code", "")
|
|
1026
|
+
if source_code:
|
|
1027
|
+
logger.info("Adding structured API reference sections to polished document...")
|
|
1028
|
+
response = await self._add_api_reference_sections(
|
|
1029
|
+
narrative_doc=response,
|
|
1030
|
+
source_code=source_code,
|
|
1031
|
+
tier=ModelTier.CHEAP, # Use cheap tier for structured extraction
|
|
1032
|
+
)
|
|
1033
|
+
else:
|
|
1034
|
+
logger.warning("No source code available for API reference generation")
|
|
1035
|
+
|
|
748
1036
|
result = {
|
|
749
1037
|
"document": response,
|
|
750
1038
|
"doc_type": doc_type,
|
|
751
1039
|
"audience": audience,
|
|
752
1040
|
"model_tier_used": tier.value,
|
|
1041
|
+
"accumulated_cost": self._accumulated_cost, # Track total cost
|
|
1042
|
+
"auth_mode_used": self._auth_mode_used, # Track recommended auth mode
|
|
753
1043
|
}
|
|
754
1044
|
|
|
755
1045
|
# Merge parsed XML data if available
|
|
@@ -823,22 +1113,46 @@ Return the polished document with improvements noted at the end."""
|
|
|
823
1113
|
total_output_tokens: int = 0
|
|
824
1114
|
|
|
825
1115
|
for chunk_idx, section in enumerate(sections):
|
|
826
|
-
system = """You are a senior technical editor
|
|
1116
|
+
system = """You are a senior technical editor specializing in developer documentation.
|
|
1117
|
+
|
|
1118
|
+
Polish this section to production quality. The writer was asked to complete TWO PHASES:
|
|
1119
|
+
1. Comprehensive content with real examples
|
|
1120
|
+
2. Structured API reference with **Args:**, **Returns:**, **Raises:** for every function
|
|
827
1121
|
|
|
828
|
-
|
|
829
|
-
2. Improve clarity and flow
|
|
830
|
-
3. Fix grammatical issues
|
|
831
|
-
4. Ensure code examples are complete and accurate
|
|
1122
|
+
Verify both phases are complete in this section:
|
|
832
1123
|
|
|
833
|
-
|
|
1124
|
+
═══════════════════════════════════════════════════════════════
|
|
1125
|
+
CRITICAL: Check for Missing API Reference Format
|
|
1126
|
+
═══════════════════════════════════════════════════════════════
|
|
834
1127
|
|
|
835
|
-
|
|
1128
|
+
1. **Scan for functions/methods in this section**
|
|
1129
|
+
- If any function is missing **Args:**, **Returns:**, **Raises:** sections, ADD them
|
|
1130
|
+
- Format: **Args:**, **Returns:**, **Raises:** (bold headers with colons)
|
|
1131
|
+
- Write "None" if no parameters/returns/exceptions
|
|
1132
|
+
|
|
1133
|
+
2. **Polish API Documentation**:
|
|
1134
|
+
- Verify parameters documented with types in backticks
|
|
1135
|
+
- Ensure return values and exceptions are clear
|
|
1136
|
+
- Validate code examples are complete
|
|
1137
|
+
|
|
1138
|
+
3. **Polish General Content**:
|
|
1139
|
+
- Ensure all examples are runnable with proper imports
|
|
1140
|
+
- Standardize terminology and formatting
|
|
1141
|
+
- Fix grammatical issues
|
|
1142
|
+
- Remove TODOs and placeholders
|
|
1143
|
+
|
|
1144
|
+
Return ONLY the polished section. Do not add commentary about changes."""
|
|
1145
|
+
|
|
1146
|
+
user_message = f"""Polish this section to production quality (part {chunk_idx + 1} of {len(sections)}):
|
|
836
1147
|
|
|
837
1148
|
Document Type: {doc_type}
|
|
838
1149
|
Target Audience: {audience}
|
|
839
1150
|
|
|
840
1151
|
Section to polish:
|
|
841
|
-
{section}
|
|
1152
|
+
{section}
|
|
1153
|
+
|
|
1154
|
+
Check if all functions have **Args:**, **Returns:**, **Raises:** sections - add if missing.
|
|
1155
|
+
Make all code examples complete and executable."""
|
|
842
1156
|
|
|
843
1157
|
try:
|
|
844
1158
|
response, input_tokens, output_tokens = await self._call_llm(
|
|
@@ -880,6 +1194,18 @@ Section to polish:
|
|
|
880
1194
|
# Combine polished chunks
|
|
881
1195
|
polished_document = "\n\n".join(polished_chunks)
|
|
882
1196
|
|
|
1197
|
+
# Add structured API reference sections (Step 4: Post-processing)
|
|
1198
|
+
source_code = input_data.get("source_code", "")
|
|
1199
|
+
if source_code:
|
|
1200
|
+
logger.info("Adding structured API reference sections to chunked polished document...")
|
|
1201
|
+
polished_document = await self._add_api_reference_sections(
|
|
1202
|
+
narrative_doc=polished_document,
|
|
1203
|
+
source_code=source_code,
|
|
1204
|
+
tier=ModelTier.CHEAP, # Use cheap tier for structured extraction
|
|
1205
|
+
)
|
|
1206
|
+
else:
|
|
1207
|
+
logger.warning("No source code available for API reference generation")
|
|
1208
|
+
|
|
883
1209
|
result = {
|
|
884
1210
|
"document": polished_document,
|
|
885
1211
|
"doc_type": doc_type,
|
|
@@ -919,6 +1245,212 @@ Section to polish:
|
|
|
919
1245
|
|
|
920
1246
|
return (result, total_input_tokens, total_output_tokens)
|
|
921
1247
|
|
|
1248
|
+
def _extract_functions_from_source(self, source_code: str) -> list[dict]:
|
|
1249
|
+
"""Extract function information from source code using AST.
|
|
1250
|
+
|
|
1251
|
+
Args:
|
|
1252
|
+
source_code: Python source code to parse
|
|
1253
|
+
|
|
1254
|
+
Returns:
|
|
1255
|
+
List of dicts with function information (name, args, returns, docstring)
|
|
1256
|
+
"""
|
|
1257
|
+
import ast
|
|
1258
|
+
|
|
1259
|
+
functions = []
|
|
1260
|
+
|
|
1261
|
+
try:
|
|
1262
|
+
tree = ast.parse(source_code)
|
|
1263
|
+
except SyntaxError as e:
|
|
1264
|
+
logger.warning(f"Failed to parse source code: {e}")
|
|
1265
|
+
return functions
|
|
1266
|
+
|
|
1267
|
+
for node in ast.walk(tree):
|
|
1268
|
+
# Extract top-level functions and class methods
|
|
1269
|
+
if isinstance(node, ast.FunctionDef):
|
|
1270
|
+
# Skip private functions (starting with _)
|
|
1271
|
+
if node.name.startswith("_"):
|
|
1272
|
+
continue
|
|
1273
|
+
|
|
1274
|
+
# Extract function signature
|
|
1275
|
+
args_list = []
|
|
1276
|
+
for arg in node.args.args:
|
|
1277
|
+
arg_name = arg.arg
|
|
1278
|
+
# Get type annotation if available
|
|
1279
|
+
arg_type = ast.unparse(arg.annotation) if arg.annotation else "Any"
|
|
1280
|
+
args_list.append({"name": arg_name, "type": arg_type})
|
|
1281
|
+
|
|
1282
|
+
# Extract return type
|
|
1283
|
+
return_type = ast.unparse(node.returns) if node.returns else "Any"
|
|
1284
|
+
|
|
1285
|
+
# Extract docstring
|
|
1286
|
+
docstring = ast.get_docstring(node) or ""
|
|
1287
|
+
|
|
1288
|
+
functions.append({
|
|
1289
|
+
"name": node.name,
|
|
1290
|
+
"args": args_list,
|
|
1291
|
+
"return_type": return_type,
|
|
1292
|
+
"docstring": docstring,
|
|
1293
|
+
"lineno": node.lineno,
|
|
1294
|
+
})
|
|
1295
|
+
|
|
1296
|
+
return functions
|
|
1297
|
+
|
|
1298
|
+
async def _generate_api_section_for_function(
|
|
1299
|
+
self,
|
|
1300
|
+
func_info: dict,
|
|
1301
|
+
tier: ModelTier,
|
|
1302
|
+
) -> str:
|
|
1303
|
+
"""Generate structured API reference section for a single function.
|
|
1304
|
+
|
|
1305
|
+
This is a focused prompt that ONLY asks for Args/Returns/Raises format,
|
|
1306
|
+
not narrative documentation.
|
|
1307
|
+
|
|
1308
|
+
Args:
|
|
1309
|
+
func_info: Function information from AST extraction
|
|
1310
|
+
tier: Model tier to use for generation
|
|
1311
|
+
|
|
1312
|
+
Returns:
|
|
1313
|
+
Markdown formatted API reference section
|
|
1314
|
+
"""
|
|
1315
|
+
func_name = func_info["name"]
|
|
1316
|
+
args_list = func_info["args"]
|
|
1317
|
+
return_type = func_info["return_type"]
|
|
1318
|
+
docstring = func_info["docstring"]
|
|
1319
|
+
|
|
1320
|
+
# Build function signature
|
|
1321
|
+
args_str = ", ".join([f"{arg['name']}: {arg['type']}" for arg in args_list])
|
|
1322
|
+
signature = f"def {func_name}({args_str}) -> {return_type}"
|
|
1323
|
+
|
|
1324
|
+
system = """You are an API documentation generator. Output ONLY structured API reference sections in the EXACT format specified below.
|
|
1325
|
+
|
|
1326
|
+
CRITICAL: Do NOT write explanatory text, questions, or narrative. Output ONLY the formatted section.
|
|
1327
|
+
|
|
1328
|
+
REQUIRED FORMAT (copy this structure EXACTLY, replace bracketed content):
|
|
1329
|
+
|
|
1330
|
+
### `function_name()`
|
|
1331
|
+
|
|
1332
|
+
**Function Signature:**
|
|
1333
|
+
```python
|
|
1334
|
+
def function_name(param: type) -> return_type
|
|
1335
|
+
```
|
|
1336
|
+
|
|
1337
|
+
**Description:**
|
|
1338
|
+
Brief 1-2 sentence description.
|
|
1339
|
+
|
|
1340
|
+
**Args:**
|
|
1341
|
+
- `param_name` (`type`): Parameter description
|
|
1342
|
+
|
|
1343
|
+
**Returns:**
|
|
1344
|
+
- `return_type`: Return value description
|
|
1345
|
+
|
|
1346
|
+
**Raises:**
|
|
1347
|
+
- `ExceptionType`: When this exception occurs
|
|
1348
|
+
|
|
1349
|
+
IMPORTANT:
|
|
1350
|
+
- Use "**Args:**" (NOT "Parameters" or "params")
|
|
1351
|
+
- Write "None" if no Args/Returns/Raises
|
|
1352
|
+
- NO conversational text - just the formatted section"""
|
|
1353
|
+
|
|
1354
|
+
user_message = f"""Generate API reference section using EXACT format specified in system prompt.
|
|
1355
|
+
|
|
1356
|
+
Function:
|
|
1357
|
+
```python
|
|
1358
|
+
{signature}
|
|
1359
|
+
```
|
|
1360
|
+
|
|
1361
|
+
Docstring:
|
|
1362
|
+
{docstring if docstring else "No docstring"}
|
|
1363
|
+
|
|
1364
|
+
Output the formatted section EXACTLY as shown in system prompt. Use **Args:** (not Parameters). NO conversational text."""
|
|
1365
|
+
|
|
1366
|
+
try:
|
|
1367
|
+
response, input_tokens, output_tokens = await self._call_llm(
|
|
1368
|
+
tier,
|
|
1369
|
+
system,
|
|
1370
|
+
user_message,
|
|
1371
|
+
max_tokens=1000, # Small response - just the structured section
|
|
1372
|
+
)
|
|
1373
|
+
|
|
1374
|
+
# Track cost
|
|
1375
|
+
self._track_cost(tier, input_tokens, output_tokens)
|
|
1376
|
+
|
|
1377
|
+
return response
|
|
1378
|
+
|
|
1379
|
+
except Exception as e:
|
|
1380
|
+
logger.error(f"Failed to generate API section for {func_name}: {e}")
|
|
1381
|
+
# Return minimal fallback
|
|
1382
|
+
return f"""### `{func_name}()`
|
|
1383
|
+
|
|
1384
|
+
**Function Signature:**
|
|
1385
|
+
```python
|
|
1386
|
+
{signature}
|
|
1387
|
+
```
|
|
1388
|
+
|
|
1389
|
+
**Description:**
|
|
1390
|
+
{docstring.split('.')[0] if docstring else "No description available."}
|
|
1391
|
+
|
|
1392
|
+
**Args:**
|
|
1393
|
+
None
|
|
1394
|
+
|
|
1395
|
+
**Returns:**
|
|
1396
|
+
- `{return_type}`: Return value
|
|
1397
|
+
|
|
1398
|
+
**Raises:**
|
|
1399
|
+
None
|
|
1400
|
+
"""
|
|
1401
|
+
|
|
1402
|
+
async def _add_api_reference_sections(
|
|
1403
|
+
self,
|
|
1404
|
+
narrative_doc: str,
|
|
1405
|
+
source_code: str,
|
|
1406
|
+
tier: ModelTier,
|
|
1407
|
+
) -> str:
|
|
1408
|
+
"""Add structured API reference sections to narrative documentation.
|
|
1409
|
+
|
|
1410
|
+
This is Step 4 of the pipeline: after outline, write, and polish,
|
|
1411
|
+
we add structured API reference sections extracted from source code.
|
|
1412
|
+
|
|
1413
|
+
Args:
|
|
1414
|
+
narrative_doc: The polished narrative documentation
|
|
1415
|
+
source_code: Original source code to extract functions from
|
|
1416
|
+
tier: Model tier to use for API section generation
|
|
1417
|
+
|
|
1418
|
+
Returns:
|
|
1419
|
+
Complete documentation with API reference appendix
|
|
1420
|
+
"""
|
|
1421
|
+
logger.info("Adding structured API reference sections...")
|
|
1422
|
+
|
|
1423
|
+
# Extract functions from source code
|
|
1424
|
+
functions = self._extract_functions_from_source(source_code)
|
|
1425
|
+
|
|
1426
|
+
if not functions:
|
|
1427
|
+
logger.warning("No public functions found in source code")
|
|
1428
|
+
return narrative_doc
|
|
1429
|
+
|
|
1430
|
+
logger.info(f"Found {len(functions)} public functions to document")
|
|
1431
|
+
|
|
1432
|
+
# Generate API section for each function
|
|
1433
|
+
api_sections = []
|
|
1434
|
+
for func_info in functions:
|
|
1435
|
+
func_name = func_info["name"]
|
|
1436
|
+
logger.debug(f"Generating API reference for {func_name}()")
|
|
1437
|
+
|
|
1438
|
+
api_section = await self._generate_api_section_for_function(
|
|
1439
|
+
func_info, tier
|
|
1440
|
+
)
|
|
1441
|
+
api_sections.append(api_section)
|
|
1442
|
+
|
|
1443
|
+
# Append API reference section to narrative doc
|
|
1444
|
+
full_doc = narrative_doc
|
|
1445
|
+
full_doc += "\n\n---\n\n"
|
|
1446
|
+
full_doc += "## API Reference\n\n"
|
|
1447
|
+
full_doc += "Complete structured reference for all public functions:\n\n"
|
|
1448
|
+
full_doc += "\n\n".join(api_sections)
|
|
1449
|
+
|
|
1450
|
+
logger.info(f"Added {len(api_sections)} API reference sections")
|
|
1451
|
+
|
|
1452
|
+
return full_doc
|
|
1453
|
+
|
|
922
1454
|
|
|
923
1455
|
def format_doc_gen_report(result: dict, input_data: dict) -> str:
|
|
924
1456
|
"""Format document generation output as a human-readable report.
|