docuflow 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: docuflow
3
+ Version: 0.3.0
4
+ Summary: AI-Native Documentation & Architecture Maintenance Agent
5
+ Author: DocuFlow Developer
6
+ License: MIT
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.9
11
+ Description-Content-Type: text/markdown
12
+ Requires-Dist: typer>=0.9.0
13
+ Requires-Dist: rich>=13.0.0
14
+ Requires-Dist: pydantic>=2.0.0
15
+ Requires-Dist: toml>=0.10.2
16
+ Requires-Dist: gitpython>=3.1.30
17
+ Requires-Dist: google-generativeai>=0.3.0
18
+ Requires-Dist: openai>=1.0.0
@@ -0,0 +1,34 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "docuflow"
7
+ version = "0.3.0"
8
+ description = "AI-Native Documentation & Architecture Maintenance Agent"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "MIT" }
12
+ authors = [
13
+ { name = "DocuFlow Developer" }
14
+ ]
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ ]
20
+ dependencies = [
21
+ "typer>=0.9.0",
22
+ "rich>=13.0.0",
23
+ "pydantic>=2.0.0",
24
+ "toml>=0.10.2",
25
+ "gitpython>=3.1.30",
26
+ "google-generativeai>=0.3.0",
27
+ "openai>=1.0.0",
28
+ ]
29
+
30
+ [project.scripts]
31
+ docuflow = "docuflow.main:app"
32
+
33
+ [tool.setuptools.packages.find]
34
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,4 @@
1
+ from setuptools import setup
2
+
3
+ if __name__ == "__main__":
4
+ setup()
@@ -0,0 +1,5 @@
1
+ """
2
+ DocuFlow: AI-Native Documentation & Architecture Maintenance Agent
3
+ """
4
+
5
+ __version__ = "0.3.0"
@@ -0,0 +1,179 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import List, Optional, Tuple
4
+ from google import generativeai as genai
5
+ from openai import OpenAI
6
+
7
+ from docuflow.config import DocuFlowConfig
8
+ from docuflow.context_builder import ImpactAnalysis
9
+
10
+ def find_associated_docs(filepath: str, docs_dir: Path) -> List[Path]:
11
+ """
12
+ Scans the documentation directory and matches markdown files that refer
13
+ to the given code filepath, filename, or module parent.
14
+ Normalizes casing, underscores, and hyphens to maximize match accuracy.
15
+ """
16
+ associated: List[Path] = []
17
+ if not docs_dir.exists():
18
+ return associated
19
+
20
+ filename = Path(filepath).name
21
+ basename = Path(filepath).stem
22
+
23
+ # Pre-calculate normalized flat values for code file
24
+ flat_basename = basename.replace("_", "").replace("-", "").lower()
25
+ flat_filename = filename.replace("_", "").replace("-", "").lower()
26
+
27
+ for md_file in docs_dir.glob("**/*.md"):
28
+ # Skip hidden or temporary files
29
+ if md_file.name.startswith("."):
30
+ continue
31
+ try:
32
+ content = md_file.read_text(encoding="utf-8")
33
+ flat_content = content.replace("_", "").replace("-", "").lower()
34
+ flat_md_filename = md_file.name.replace("_", "").replace("-", "").lower()
35
+
36
+ # Match if:
37
+ # - Flat filename stem (e.g., 'gitutils') is in the flat markdown content
38
+ # - Flat filename (e.g., 'gitutils.py') is in the flat markdown content
39
+ # - Flat filename stem (e.g., 'gitutils') matches the flat markdown filename stem
40
+ if (flat_filename in flat_content or
41
+ flat_basename in flat_content or
42
+ flat_basename in flat_md_filename):
43
+ associated.append(md_file)
44
+ except Exception:
45
+ pass
46
+
47
+ return associated
48
+
49
+ def format_ast_summary(analysis: ImpactAnalysis) -> str:
50
+ """
51
+ Formats a clean, human-readable summary of the AST modifications for the prompt.
52
+ """
53
+ summary = []
54
+ if analysis.added_entities:
55
+ summary.append("Added Code Entities:")
56
+ for ent in analysis.added_entities:
57
+ summary.append(f" - {ent.type.capitalize()} `{ent.name}` with signature: `{ent.signature}`")
58
+ if analysis.modified_entities:
59
+ summary.append("Modified Code Entities:")
60
+ for ent in analysis.modified_entities:
61
+ summary.append(f" - {ent.type.capitalize()} `{ent.name}` with signature: `{ent.signature}`")
62
+ if analysis.removed_entities:
63
+ summary.append("Removed/Deleted Code Entities:")
64
+ for ent in analysis.removed_entities:
65
+ summary.append(f" - {ent.type.capitalize()} `{ent.name}`")
66
+
67
+ return "\n".join(summary) if summary else "No high-level AST structural changes."
68
+
69
+ def build_orchestrator_prompt(
70
+ rules_content: str,
71
+ md_content: str,
72
+ md_filename: str,
73
+ analysis: ImpactAnalysis
74
+ ) -> str:
75
+ """
76
+ Assembles the detailed prompt for the AI documentation agent, passing the style rules,
77
+ current markdown file content, git diff, and AST modifications.
78
+ """
79
+ ast_summary = format_ast_summary(analysis)
80
+
81
+ prompt = f"""You are the DocuFlow AI Documentation Agent. Your job is to update the technical documentation markdown file to accurately reflect recent code modifications.
82
+
83
+ --- SYSTEM STYLING & FORMATTING RULES (documentation-rules.md) ---
84
+ {rules_content}
85
+
86
+ --- TARGET TECHNICAL DOCUMENT TO UPDATE ---
87
+ File Name: {md_filename}
88
+ Content:
89
+ ```markdown
90
+ {md_content}
91
+ ```
92
+
93
+ --- RAW CODE DIFF MODIFICATIONS ---
94
+ File: {analysis.filepath}
95
+ Diff:
96
+ ```diff
97
+ {analysis.raw_diff}
98
+ ```
99
+
100
+ --- EXTRACTED CODE AST CHANGES ---
101
+ {ast_summary}
102
+
103
+ --- MANDATORY INSTRUCTIONS ---
104
+ 1. Analyze the raw code changes and the high-level AST modifications.
105
+ 2. Update the target documentation file so it perfectly matches the new code structure (e.g., class names, function parameters, return types, or architectural flows).
106
+ 3. Perform a NON-DESTRUCTIVE update: only modify, add, or delete details that directly correspond to the code changes. Do NOT touch, rewrite, or delete surrounding unrelated text, descriptions, or headers.
107
+ 4. Synchronize or update any visual Mermaid diagrams inside the documentation to match the new code relationships or state flows, adhering strictly to the Mermaid standards (e.g., wrap node labels containing special characters in double quotes).
108
+ 5. Keep formatting intact. Return ONLY the complete, updated markdown content. Do not include any introductory remarks, conversational preambles, or markdown fences wrap outside the file itself.
109
+ """
110
+ return prompt
111
+
112
+ def execute_llm_update(
113
+ config: DocuFlowConfig,
114
+ prompt: str
115
+ ) -> Tuple[Optional[str], str]:
116
+ """
117
+ Executes the LLM request using the active configuration provider (Gemini or OpenAI).
118
+ Returns a tuple of (updated_markdown_content, error_message).
119
+ """
120
+ provider = config.ai.provider.lower()
121
+ model_name = config.ai.model
122
+ temp = config.ai.temperature
123
+ max_t = config.ai.max_tokens
124
+
125
+ if provider == "gemini":
126
+ api_key = os.environ.get("GEMINI_API_KEY")
127
+ if not api_key:
128
+ return None, "GEMINI_API_KEY environment variable is not set."
129
+ try:
130
+ genai.configure(api_key=api_key)
131
+ model = genai.GenerativeModel(
132
+ model_name=model_name,
133
+ generation_config={
134
+ "temperature": temp,
135
+ "max_output_tokens": max_t
136
+ }
137
+ )
138
+ response = model.generate_content(prompt)
139
+ content = response.text.strip()
140
+
141
+ # Strip outer markdown fences if returned
142
+ if content.startswith("```markdown"):
143
+ content = content[11:]
144
+ if content.endswith("```"):
145
+ content = content[:-3]
146
+ elif content.startswith("```") and content.endswith("```"):
147
+ content = content[3:-3]
148
+
149
+ return content.strip(), ""
150
+ except Exception as e:
151
+ return None, f"Gemini API call failed: {e}"
152
+
153
+ elif provider == "openai":
154
+ api_key = os.environ.get("OPENAI_API_KEY")
155
+ if not api_key:
156
+ return None, "OPENAI_API_KEY environment variable is not set."
157
+ try:
158
+ client = OpenAI(api_key=api_key)
159
+ response = client.chat.completions.create(
160
+ model=model_name,
161
+ messages=[{"role": "user", "content": prompt}],
162
+ temperature=temp,
163
+ max_tokens=max_t
164
+ )
165
+ content = response.choices[0].message.content.strip()
166
+
167
+ # Strip outer markdown fences if returned
168
+ if content.startswith("```markdown"):
169
+ content = content[11:]
170
+ if content.endswith("```"):
171
+ content = content[:-3]
172
+ elif content.startswith("```") and content.endswith("```"):
173
+ content = content[3:-3]
174
+
175
+ return content.strip(), ""
176
+ except Exception as e:
177
+ return None, f"OpenAI API call failed: {e}"
178
+
179
+ return None, f"Unsupported AI provider: {provider}"
@@ -0,0 +1,57 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import List, Optional
4
+ from pydantic import BaseModel, Field
5
+ import toml
6
+
7
+ class ProjectConfig(BaseModel):
8
+ name: str = "DocuFlow"
9
+ watch_dirs: List[str] = Field(default_factory=lambda: ["src"])
10
+
11
+ class DocumentationConfig(BaseModel):
12
+ docs_dir: str = "docs"
13
+ patterns: List[str] = Field(default_factory=lambda: ["*.md"])
14
+ rules_dir: str = ".agents/rules"
15
+ workflows_dir: str = ".agents/workflows"
16
+
17
+ class AIConfig(BaseModel):
18
+ provider: str = "gemini"
19
+ model: str = "gemini-1.5-pro"
20
+ temperature: float = 0.2
21
+ max_tokens: int = 4096
22
+
23
+ class GitConfig(BaseModel):
24
+ target_branch: str = "main"
25
+ include_unstaged: bool = True
26
+ include_staged: bool = True
27
+
28
+ class DocuFlowConfig(BaseModel):
29
+ project: ProjectConfig = Field(default_factory=ProjectConfig)
30
+ documentation: DocumentationConfig = Field(default_factory=DocumentationConfig)
31
+ ai: AIConfig = Field(default_factory=AIConfig)
32
+ git: GitConfig = Field(default_factory=GitConfig)
33
+
34
+ def load_config(config_path: Optional[Path] = None) -> DocuFlowConfig:
35
+ """
36
+ Loads and parses the docuflow.toml configuration file.
37
+ If no path is provided, checks the current working directory and its parents.
38
+ """
39
+ if config_path is None:
40
+ # Search upward from the current working directory for docuflow.toml
41
+ current_dir = Path.cwd()
42
+ for parent in [current_dir] + list(current_dir.parents):
43
+ candidate = parent / "docuflow.toml"
44
+ if candidate.is_file():
45
+ config_path = candidate
46
+ break
47
+
48
+ if config_path and config_path.is_file():
49
+ try:
50
+ with open(config_path, "r", encoding="utf-8") as f:
51
+ data = toml.load(f)
52
+ return DocuFlowConfig(**data)
53
+ except Exception:
54
+ # Fallback to default config on parse error
55
+ pass
56
+
57
+ return DocuFlowConfig()
@@ -0,0 +1,83 @@
1
+ from pathlib import Path
2
+ from typing import Dict, List, Optional
3
+ from pydantic import BaseModel, Field
4
+
5
+ from docuflow.parser import EntityInfo, parse_code_structure
6
+ from docuflow.git_utils import run_git_command, is_git_repo
7
+
8
+ class ImpactAnalysis(BaseModel):
9
+ """
10
+ Represents the full structural impact analysis of changes made to a file.
11
+ """
12
+ filepath: str
13
+ added_entities: List[EntityInfo] = Field(default_factory=list)
14
+ modified_entities: List[EntityInfo] = Field(default_factory=list)
15
+ removed_entities: List[EntityInfo] = Field(default_factory=list)
16
+ raw_diff: str = ""
17
+
18
+ def get_git_file_content(filepath: str, ref: str = "HEAD", cwd: Optional[Path] = None) -> str:
19
+ """
20
+ Retrieves the content of a file from Git history at a specific reference.
21
+ Returns an empty string if the file did not exist yet (e.g. newly added).
22
+ """
23
+ try:
24
+ return run_git_command(["show", f"{ref}:{filepath}"], cwd=cwd)
25
+ except Exception:
26
+ return ""
27
+
28
+ def build_impact_analysis(filepath: str, raw_diff: str, base_ref: str = "HEAD", cwd: Optional[Path] = None) -> ImpactAnalysis:
29
+ """
30
+ Compares the AST structures of a file between its Git base state and current filesystem state
31
+ to identify added, modified, or removed classes, functions, and methods.
32
+ """
33
+ # 1. Fetch original content from Git
34
+ original_content = get_git_file_content(filepath, ref=base_ref, cwd=cwd)
35
+
36
+ # 2. Fetch current content from local disk
37
+ current_path = (cwd or Path.cwd()) / filepath
38
+ current_content = ""
39
+ if current_path.is_file():
40
+ try:
41
+ current_content = current_path.read_text(encoding="utf-8")
42
+ except Exception:
43
+ pass
44
+
45
+ # 3. For Python files, parse and compare ASTs
46
+ if filepath.endswith(".py"):
47
+ old_entities = {e.name: e for e in parse_code_structure(original_content)}
48
+ new_entities = {e.name: e for e in parse_code_structure(current_content)}
49
+
50
+ added_entities = []
51
+ modified_entities = []
52
+ removed_entities = []
53
+
54
+ # Check added and modified entities
55
+ for name, new_ent in new_entities.items():
56
+ if name not in old_entities:
57
+ added_entities.append(new_ent)
58
+ else:
59
+ old_ent = old_entities[name]
60
+ # Consider it modified if signature or docstring changes, or if the size/bounds of implementation changed
61
+ if (new_ent.signature != old_ent.signature or
62
+ new_ent.docstring != old_ent.docstring or
63
+ (new_ent.line_end - new_ent.line_start) != (old_ent.line_end - old_ent.line_start)):
64
+ modified_entities.append(new_ent)
65
+
66
+ # Check removed entities
67
+ for name, old_ent in old_entities.items():
68
+ if name not in new_entities:
69
+ removed_entities.append(old_ent)
70
+
71
+ return ImpactAnalysis(
72
+ filepath=filepath,
73
+ added_entities=added_entities,
74
+ modified_entities=modified_entities,
75
+ removed_entities=removed_entities,
76
+ raw_diff=raw_diff
77
+ )
78
+
79
+ # Non-python files just map the raw diff without AST structures
80
+ return ImpactAnalysis(
81
+ filepath=filepath,
82
+ raw_diff=raw_diff
83
+ )
@@ -0,0 +1,188 @@
1
+ import subprocess
2
+ from pathlib import Path
3
+ from typing import Dict, List, Optional
4
+ from pydantic import BaseModel
5
+
6
+ class FileChange(BaseModel):
7
+ """
8
+ Represents a single file change extracted from Git.
9
+ """
10
+ filepath: str
11
+ change_type: str # 'A' (Added), 'M' (Modified), 'D' (Deleted), 'R' (Renamed), etc.
12
+ diff: str
13
+ module: str
14
+
15
+ def run_git_command(args: List[str], cwd: Optional[Path] = None) -> str:
16
+ """
17
+ Executes a git command and returns the stdout string.
18
+ Raises RuntimeError if the command fails.
19
+ """
20
+ try:
21
+ result = subprocess.run(
22
+ ["git"] + args,
23
+ capture_output=True,
24
+ text=True,
25
+ check=True,
26
+ cwd=cwd or Path.cwd()
27
+ )
28
+ return result.stdout.strip()
29
+ except subprocess.CalledProcessError as e:
30
+ raise RuntimeError(f"Git command failed: {' '.join(e.cmd)}\nError: {e.stderr.strip()}") from e
31
+ except FileNotFoundError as e:
32
+ raise RuntimeError("Git executable not found on system path.") from e
33
+
34
+ def is_git_repo(cwd: Optional[Path] = None) -> bool:
35
+ """
36
+ Checks if the given directory is inside a git repository.
37
+ """
38
+ try:
39
+ output = run_git_command(["rev-parse", "--is-inside-work-tree"], cwd=cwd)
40
+ return output == "true"
41
+ except RuntimeError:
42
+ return False
43
+
44
+ def get_git_root(cwd: Optional[Path] = None) -> Path:
45
+ """
46
+ Gets the absolute Path of the Git repository root.
47
+ """
48
+ output = run_git_command(["rev-parse", "--show-toplevel"], cwd=cwd)
49
+ return Path(output).resolve()
50
+
51
+ def extract_module(filepath: str) -> str:
52
+ """
53
+ Helper to extract the module/folder name for grouping.
54
+ e.g., 'src/auth/login.py' -> 'src/auth'
55
+ 'src/main.py' -> 'src'
56
+ 'plan.md' -> '.'
57
+ """
58
+ path = Path(filepath)
59
+ parts = path.parts
60
+ if len(parts) > 2:
61
+ return str(Path(*parts[:2]))
62
+ elif len(parts) == 2:
63
+ return parts[0]
64
+ else:
65
+ return "."
66
+
67
+ def parse_name_status_line(line: str) -> Optional[tuple[str, str]]:
68
+ """
69
+ Parses a line from `git diff --name-status`
70
+ e.g., 'M\tsrc/main.py' -> ('M', 'src/main.py')
71
+ """
72
+ if not line.strip():
73
+ return None
74
+ parts = line.split("\t")
75
+ if len(parts) >= 2:
76
+ # handle renamed status which could be 'R100\told_name\tnew_name'
77
+ status = parts[0][0] # Just take the first character (e.g. 'R', 'M', 'A')
78
+ filepath = parts[-1] # Take the final destination file path
79
+ return status, filepath
80
+ return None
81
+
82
+ def get_file_diff(filepath: str, extra_args: List[str], cwd: Optional[Path] = None) -> str:
83
+ """
84
+ Gets the diff content for a specific file.
85
+ """
86
+ try:
87
+ # run git diff with specific arguments and targeting the file
88
+ return run_git_command(["diff"] + extra_args + ["--", filepath], cwd=cwd)
89
+ except RuntimeError:
90
+ return ""
91
+
92
+ def get_unstaged_changes(cwd: Optional[Path] = None) -> List[FileChange]:
93
+ """
94
+ Retrieves all unstaged file modifications and their diffs.
95
+ """
96
+ if not is_git_repo(cwd):
97
+ return []
98
+
99
+ # Get the status list of unstaged files
100
+ status_output = run_git_command(["diff", "--name-status"], cwd=cwd)
101
+ changes = []
102
+
103
+ for line in status_output.splitlines():
104
+ parsed = parse_name_status_line(line)
105
+ if not parsed:
106
+ continue
107
+ status, filepath = parsed
108
+ # Get diff for this specific unstaged file
109
+ diff = get_file_diff(filepath, [], cwd=cwd)
110
+ changes.append(FileChange(
111
+ filepath=filepath,
112
+ change_type=status,
113
+ diff=diff,
114
+ module=extract_module(filepath)
115
+ ))
116
+
117
+ return changes
118
+
119
+ def get_staged_changes(cwd: Optional[Path] = None) -> List[FileChange]:
120
+ """
121
+ Retrieves all staged file modifications and their diffs.
122
+ """
123
+ if not is_git_repo(cwd):
124
+ return []
125
+
126
+ # Get the status list of staged files
127
+ status_output = run_git_command(["diff", "--cached", "--name-status"], cwd=cwd)
128
+ changes = []
129
+
130
+ for line in status_output.splitlines():
131
+ parsed = parse_name_status_line(line)
132
+ if not parsed:
133
+ continue
134
+ status, filepath = parsed
135
+ # Get diff for this specific staged file
136
+ diff = get_file_diff(filepath, ["--cached"], cwd=cwd)
137
+ changes.append(FileChange(
138
+ filepath=filepath,
139
+ change_type=status,
140
+ diff=diff,
141
+ module=extract_module(filepath)
142
+ ))
143
+
144
+ return changes
145
+
146
+ def get_branch_diff(target_branch: str, cwd: Optional[Path] = None) -> List[FileChange]:
147
+ """
148
+ Retrieves file modifications and diffs between current branch (HEAD) and a target branch/commit.
149
+ Uses target_branch...HEAD (triple dot) to see changes introduced on current branch since it split from target.
150
+ """
151
+ if not is_git_repo(cwd):
152
+ return []
153
+
154
+ try:
155
+ # Check if the target branch exists or can be resolved
156
+ run_git_command(["rev-parse", "--verify", target_branch], cwd=cwd)
157
+ except RuntimeError:
158
+ # Fallback to single-dot or direct branch comparison if the reference is different
159
+ pass
160
+
161
+ # Get status list comparing the target branch to current HEAD
162
+ status_output = run_git_command(["diff", f"{target_branch}...HEAD", "--name-status"], cwd=cwd)
163
+ changes = []
164
+
165
+ for line in status_output.splitlines():
166
+ parsed = parse_name_status_line(line)
167
+ if not parsed:
168
+ continue
169
+ status, filepath = parsed
170
+ # Get diff comparison
171
+ diff = get_file_diff(filepath, [f"{target_branch}...HEAD"], cwd=cwd)
172
+ changes.append(FileChange(
173
+ filepath=filepath,
174
+ change_type=status,
175
+ diff=diff,
176
+ module=extract_module(filepath)
177
+ ))
178
+
179
+ return changes
180
+
181
+ def group_changes_by_module(changes: List[FileChange]) -> Dict[str, List[FileChange]]:
182
+ """
183
+ Groups a list of FileChange objects by their module folder.
184
+ """
185
+ grouped: Dict[str, List[FileChange]] = {}
186
+ for change in changes:
187
+ grouped.setdefault(change.module, []).append(change)
188
+ return grouped