kodit 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +10 -12
- kodit/application/factories/server_factory.py +78 -11
- kodit/application/services/commit_indexing_application_service.py +188 -31
- kodit/application/services/enrichment_query_service.py +95 -0
- kodit/config.py +3 -3
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/protocols.py +7 -6
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/tracking/__init__.py +1 -0
- kodit/domain/tracking/resolution_service.py +81 -0
- kodit/domain/tracking/trackable.py +21 -0
- kodit/domain/value_objects.py +6 -23
- kodit/infrastructure/api/v1/dependencies.py +15 -0
- kodit/infrastructure/api/v1/routers/commits.py +81 -0
- kodit/infrastructure/api/v1/routers/repositories.py +99 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/cloning/git/git_python_adaptor.py +71 -4
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +20 -33
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/snippet_mapper.py +20 -22
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +56 -391
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +46 -38
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +22 -11
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +23 -14
- kodit/infrastructure/sqlalchemy/git_repository.py +27 -17
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +22 -11
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +101 -106
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +5 -6
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/METADATA +1 -1
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/RECORD +67 -32
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/WHEEL +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/entry_points.txt +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,836 @@
|
|
|
1
|
+
"""API documentation extractor."""
|
|
2
|
+
|
|
3
|
+
import structlog
|
|
4
|
+
|
|
5
|
+
from kodit.domain.enrichments.usage.api_docs import APIDocEnrichment
|
|
6
|
+
from kodit.domain.entities.git import GitFile
|
|
7
|
+
from kodit.infrastructure.slicing.ast_analyzer import (
|
|
8
|
+
ASTAnalyzer,
|
|
9
|
+
ClassDefinition,
|
|
10
|
+
FunctionDefinition,
|
|
11
|
+
ModuleDefinition,
|
|
12
|
+
ParsedFile,
|
|
13
|
+
TypeDefinition,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class APIDocExtractor:
|
|
18
|
+
"""Extract API documentation from code files."""
|
|
19
|
+
|
|
20
|
+
# Languages that should have API docs generated
|
|
21
|
+
SUPPORTED_LANGUAGES = frozenset(
|
|
22
|
+
{
|
|
23
|
+
"c",
|
|
24
|
+
"cpp",
|
|
25
|
+
"csharp",
|
|
26
|
+
"go",
|
|
27
|
+
"java",
|
|
28
|
+
"javascript",
|
|
29
|
+
"python",
|
|
30
|
+
"rust",
|
|
31
|
+
}
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def __init__(self) -> None:
|
|
35
|
+
"""Initialize the API doc extractor."""
|
|
36
|
+
self.log = structlog.get_logger(__name__)
|
|
37
|
+
|
|
38
|
+
def extract_api_docs(
|
|
39
|
+
self,
|
|
40
|
+
files: list[GitFile],
|
|
41
|
+
language: str,
|
|
42
|
+
commit_sha: str,
|
|
43
|
+
include_private: bool = False, # noqa: FBT001, FBT002
|
|
44
|
+
) -> list[APIDocEnrichment]:
|
|
45
|
+
"""Extract API documentation enrichments from files.
|
|
46
|
+
|
|
47
|
+
Returns a single enrichment per language that combines all modules.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
files: List of Git files to extract API docs from
|
|
51
|
+
language: Programming language of the files
|
|
52
|
+
commit_sha: Git commit SHA to use as entity_id
|
|
53
|
+
include_private: Whether to include private functions/classes
|
|
54
|
+
|
|
55
|
+
"""
|
|
56
|
+
if not files:
|
|
57
|
+
return []
|
|
58
|
+
|
|
59
|
+
# Filter out languages that shouldn't have API docs
|
|
60
|
+
if language not in self.SUPPORTED_LANGUAGES:
|
|
61
|
+
self.log.debug("Language not supported for API docs", language=language)
|
|
62
|
+
return []
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
analyzer = ASTAnalyzer(language)
|
|
66
|
+
parsed_files = analyzer.parse_files(files)
|
|
67
|
+
modules = analyzer.extract_module_definitions(
|
|
68
|
+
parsed_files, include_private=include_private
|
|
69
|
+
)
|
|
70
|
+
except ValueError:
|
|
71
|
+
self.log.debug("Unsupported language", language=language)
|
|
72
|
+
return []
|
|
73
|
+
|
|
74
|
+
# Filter modules: must have content, not be tests, and have module_path
|
|
75
|
+
modules_with_content = [
|
|
76
|
+
m
|
|
77
|
+
for m in modules
|
|
78
|
+
if self._has_content(m)
|
|
79
|
+
and not self._is_test_module(m)
|
|
80
|
+
and m.module_path # Exclude modules with empty module_path
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
if not modules_with_content:
|
|
84
|
+
return []
|
|
85
|
+
|
|
86
|
+
# Merge modules with the same module_path
|
|
87
|
+
merged_modules = self._merge_modules(modules_with_content)
|
|
88
|
+
|
|
89
|
+
# Generate single markdown document for all modules
|
|
90
|
+
markdown_content = self._generate_combined_markdown(
|
|
91
|
+
merged_modules,
|
|
92
|
+
language,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
enrichment = APIDocEnrichment(
|
|
96
|
+
entity_id=commit_sha,
|
|
97
|
+
language=language,
|
|
98
|
+
content=markdown_content,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
return [enrichment]
|
|
102
|
+
|
|
103
|
+
def _has_content(self, module: ModuleDefinition) -> bool:
|
|
104
|
+
"""Check if module has any API elements or documentation."""
|
|
105
|
+
return bool(
|
|
106
|
+
module.functions
|
|
107
|
+
or module.classes
|
|
108
|
+
or module.types
|
|
109
|
+
or module.constants
|
|
110
|
+
or module.module_docstring
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def _is_test_module(self, module: ModuleDefinition) -> bool:
|
|
114
|
+
"""Check if a module appears to be a test module.
|
|
115
|
+
|
|
116
|
+
Detects test modules based on common patterns:
|
|
117
|
+
- Module path contains 'test', 'tests', or '__tests__' directory
|
|
118
|
+
- Files with '_test' suffix (e.g., foo_test.go)
|
|
119
|
+
- Files with 'test_' prefix (e.g., test_foo.py)
|
|
120
|
+
- Files with '.test.' or '.spec.' in name (e.g., foo.test.js)
|
|
121
|
+
- Files with '_mocks' in name
|
|
122
|
+
"""
|
|
123
|
+
from pathlib import Path
|
|
124
|
+
|
|
125
|
+
# Check module_path for test directories
|
|
126
|
+
module_path_lower = module.module_path.lower()
|
|
127
|
+
module_path_parts = module_path_lower.split("/")
|
|
128
|
+
|
|
129
|
+
# Check if any part of the module path is a test directory
|
|
130
|
+
if any(part in ["test", "tests", "__tests__"] for part in module_path_parts):
|
|
131
|
+
return True
|
|
132
|
+
|
|
133
|
+
# Check all files in the module for test file name patterns
|
|
134
|
+
for parsed_file in module.files:
|
|
135
|
+
file_path = Path(parsed_file.git_file.path)
|
|
136
|
+
filename = file_path.name.lower()
|
|
137
|
+
|
|
138
|
+
# Check for test file name patterns
|
|
139
|
+
# Use more specific patterns to avoid false positives
|
|
140
|
+
if (
|
|
141
|
+
filename.endswith(("_test.go", "_test.py"))
|
|
142
|
+
or filename.startswith("test_")
|
|
143
|
+
or ".test." in filename
|
|
144
|
+
or ".spec." in filename
|
|
145
|
+
or "_mocks." in filename
|
|
146
|
+
or "_mock." in filename
|
|
147
|
+
):
|
|
148
|
+
return True
|
|
149
|
+
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
def _merge_modules(self, modules: list[ModuleDefinition]) -> list[ModuleDefinition]:
|
|
153
|
+
"""Merge modules with the same module_path.
|
|
154
|
+
|
|
155
|
+
This is particularly important for Go where multiple files belong to
|
|
156
|
+
the same package/module.
|
|
157
|
+
"""
|
|
158
|
+
from collections import defaultdict
|
|
159
|
+
|
|
160
|
+
# Group modules by module_path
|
|
161
|
+
modules_by_path: dict[str, list[ModuleDefinition]] = defaultdict(list)
|
|
162
|
+
for module in modules:
|
|
163
|
+
modules_by_path[module.module_path].append(module)
|
|
164
|
+
|
|
165
|
+
# Merge modules with same path
|
|
166
|
+
merged: list[ModuleDefinition] = []
|
|
167
|
+
for module_path, module_group in modules_by_path.items():
|
|
168
|
+
if len(module_group) == 1:
|
|
169
|
+
# No merging needed
|
|
170
|
+
merged.append(module_group[0])
|
|
171
|
+
else:
|
|
172
|
+
# Merge all modules in this group
|
|
173
|
+
merged_module = self._merge_module_group(module_path, module_group)
|
|
174
|
+
merged.append(merged_module)
|
|
175
|
+
|
|
176
|
+
return merged
|
|
177
|
+
|
|
178
|
+
def _merge_module_group(
|
|
179
|
+
self, module_path: str, module_group: list[ModuleDefinition]
|
|
180
|
+
) -> ModuleDefinition:
|
|
181
|
+
"""Merge a group of modules with the same path into a single module."""
|
|
182
|
+
# Collect all files
|
|
183
|
+
all_files = []
|
|
184
|
+
for mod in module_group:
|
|
185
|
+
all_files.extend(mod.files)
|
|
186
|
+
|
|
187
|
+
# Collect all functions
|
|
188
|
+
all_functions = []
|
|
189
|
+
for mod in module_group:
|
|
190
|
+
all_functions.extend(mod.functions)
|
|
191
|
+
|
|
192
|
+
# Collect all classes
|
|
193
|
+
all_classes = []
|
|
194
|
+
for mod in module_group:
|
|
195
|
+
all_classes.extend(mod.classes)
|
|
196
|
+
|
|
197
|
+
# Collect all types
|
|
198
|
+
all_types = []
|
|
199
|
+
for mod in module_group:
|
|
200
|
+
all_types.extend(mod.types)
|
|
201
|
+
|
|
202
|
+
# Collect all constants
|
|
203
|
+
all_constants = []
|
|
204
|
+
for mod in module_group:
|
|
205
|
+
all_constants.extend(mod.constants)
|
|
206
|
+
|
|
207
|
+
# Find first non-empty docstring
|
|
208
|
+
module_docstring = ""
|
|
209
|
+
for mod in module_group:
|
|
210
|
+
if mod.module_docstring:
|
|
211
|
+
module_docstring = mod.module_docstring
|
|
212
|
+
break
|
|
213
|
+
|
|
214
|
+
# Create merged module
|
|
215
|
+
return ModuleDefinition(
|
|
216
|
+
module_path=module_path,
|
|
217
|
+
module_docstring=module_docstring,
|
|
218
|
+
files=all_files,
|
|
219
|
+
functions=all_functions,
|
|
220
|
+
classes=all_classes,
|
|
221
|
+
types=all_types,
|
|
222
|
+
constants=all_constants,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
def _is_valid_function_name(self, name: str) -> bool:
|
|
226
|
+
"""Check if a function name should be included in API documentation.
|
|
227
|
+
|
|
228
|
+
Filters out:
|
|
229
|
+
- Names longer than 255 characters (likely minified code)
|
|
230
|
+
- Anonymous or auto-generated function names
|
|
231
|
+
- Short minified names (2-3 chars with digits)
|
|
232
|
+
"""
|
|
233
|
+
if not name:
|
|
234
|
+
return False
|
|
235
|
+
|
|
236
|
+
# Length check - names longer than 255 chars are likely minified code
|
|
237
|
+
if len(name) > 255:
|
|
238
|
+
return False
|
|
239
|
+
|
|
240
|
+
# Skip common anonymous/auto-generated function name patterns
|
|
241
|
+
anonymous_patterns = [
|
|
242
|
+
"anonymous", # Anonymous functions
|
|
243
|
+
"default", # Default export names in some bundlers
|
|
244
|
+
]
|
|
245
|
+
if name.lower() in anonymous_patterns: # noqa: SIM103
|
|
246
|
+
return False
|
|
247
|
+
|
|
248
|
+
return True
|
|
249
|
+
|
|
250
|
+
def _generate_combined_markdown(
|
|
251
|
+
self,
|
|
252
|
+
modules: list[ModuleDefinition],
|
|
253
|
+
language: str,
|
|
254
|
+
) -> str:
|
|
255
|
+
"""Generate Godoc-style markdown for all modules combined.
|
|
256
|
+
|
|
257
|
+
Organizes content by module path, with types and functions grouped
|
|
258
|
+
within each module section.
|
|
259
|
+
"""
|
|
260
|
+
lines = []
|
|
261
|
+
|
|
262
|
+
# Generate index of all modules
|
|
263
|
+
lines.append(f"## {language} Index")
|
|
264
|
+
lines.append("")
|
|
265
|
+
lines.extend(
|
|
266
|
+
f"- [{module.module_path}](#{self._anchor(module.module_path)})"
|
|
267
|
+
for module in sorted(modules, key=lambda m: m.module_path)
|
|
268
|
+
)
|
|
269
|
+
lines.append("")
|
|
270
|
+
|
|
271
|
+
# Generate documentation for each module
|
|
272
|
+
for module in sorted(modules, key=lambda m: m.module_path):
|
|
273
|
+
lines.extend(self._generate_module_section(module))
|
|
274
|
+
|
|
275
|
+
return "\n".join(lines)
|
|
276
|
+
|
|
277
|
+
def _anchor(self, text: str) -> str:
|
|
278
|
+
"""Generate markdown anchor from text.
|
|
279
|
+
|
|
280
|
+
Follows GitHub-flavored markdown heading ID generation:
|
|
281
|
+
- Convert to lowercase
|
|
282
|
+
- Replace spaces with hyphens
|
|
283
|
+
- Remove punctuation except hyphens and underscores
|
|
284
|
+
- Replace slashes and dots with hyphens
|
|
285
|
+
"""
|
|
286
|
+
import re
|
|
287
|
+
|
|
288
|
+
# Convert to lowercase
|
|
289
|
+
anchor = text.lower()
|
|
290
|
+
|
|
291
|
+
# Replace slashes and dots with hyphens
|
|
292
|
+
anchor = anchor.replace("/", "-").replace(".", "-")
|
|
293
|
+
|
|
294
|
+
# Remove any characters that aren't alphanumeric, hyphens, or underscores
|
|
295
|
+
anchor = re.sub(r"[^a-z0-9\-_]", "", anchor)
|
|
296
|
+
|
|
297
|
+
# Replace multiple consecutive hyphens with a single hyphen
|
|
298
|
+
anchor = re.sub(r"-+", "-", anchor)
|
|
299
|
+
|
|
300
|
+
# Strip leading/trailing hyphens
|
|
301
|
+
return anchor.strip("-")
|
|
302
|
+
|
|
303
|
+
def _generate_module_section(self, module: ModuleDefinition) -> list[str]:
|
|
304
|
+
"""Generate markdown section for a single module."""
|
|
305
|
+
lines = []
|
|
306
|
+
|
|
307
|
+
# Module header and docstring
|
|
308
|
+
lines.append(f"## {module.module_path}")
|
|
309
|
+
lines.append("")
|
|
310
|
+
if module.module_docstring:
|
|
311
|
+
lines.append(module.module_docstring)
|
|
312
|
+
lines.append("")
|
|
313
|
+
|
|
314
|
+
# Add subsections in godoc order: constants, types, functions
|
|
315
|
+
lines.extend(self._format_constants_section(module))
|
|
316
|
+
lines.extend(self._format_types_section(module))
|
|
317
|
+
lines.extend(self._format_functions_section(module))
|
|
318
|
+
lines.extend(self._format_source_files_section(module))
|
|
319
|
+
|
|
320
|
+
return lines
|
|
321
|
+
|
|
322
|
+
def _format_constants_section(self, module: ModuleDefinition) -> list[str]:
|
|
323
|
+
"""Format constants section for a module."""
|
|
324
|
+
if not module.constants:
|
|
325
|
+
return []
|
|
326
|
+
|
|
327
|
+
lines = ["### Constants", ""]
|
|
328
|
+
for _name, node in module.constants:
|
|
329
|
+
parsed_file = self._find_parsed_file(module, node)
|
|
330
|
+
if parsed_file:
|
|
331
|
+
signature = self._extract_source(parsed_file, node)
|
|
332
|
+
lines.append("```")
|
|
333
|
+
lines.append(signature.strip())
|
|
334
|
+
lines.append("```")
|
|
335
|
+
lines.append("")
|
|
336
|
+
return lines
|
|
337
|
+
|
|
338
|
+
def _format_functions_section(self, module: ModuleDefinition) -> list[str]:
|
|
339
|
+
"""Format functions section for a module."""
|
|
340
|
+
if not module.functions:
|
|
341
|
+
return []
|
|
342
|
+
|
|
343
|
+
# Filter out invalid function names (minified, anonymous, etc.)
|
|
344
|
+
valid_functions = [
|
|
345
|
+
f for f in module.functions if self._is_valid_function_name(f.simple_name)
|
|
346
|
+
]
|
|
347
|
+
|
|
348
|
+
if not valid_functions:
|
|
349
|
+
return []
|
|
350
|
+
|
|
351
|
+
lines = ["### Functions", ""]
|
|
352
|
+
for func in sorted(valid_functions, key=lambda f: f.simple_name):
|
|
353
|
+
lines.extend(self._format_function_standalone(func, module))
|
|
354
|
+
return lines
|
|
355
|
+
|
|
356
|
+
def _format_types_section(self, module: ModuleDefinition) -> list[str]:
|
|
357
|
+
"""Format types section for a module."""
|
|
358
|
+
if not (module.types or module.classes):
|
|
359
|
+
return []
|
|
360
|
+
|
|
361
|
+
lines = ["### Types", ""]
|
|
362
|
+
|
|
363
|
+
# Format type definitions
|
|
364
|
+
for typ in sorted(module.types, key=lambda t: t.simple_name):
|
|
365
|
+
lines.extend(self._format_type(typ, module))
|
|
366
|
+
|
|
367
|
+
# Format class definitions with methods
|
|
368
|
+
for cls in sorted(module.classes, key=lambda c: c.simple_name):
|
|
369
|
+
lines.extend(self._format_class(cls, module))
|
|
370
|
+
|
|
371
|
+
return lines
|
|
372
|
+
|
|
373
|
+
def _format_source_files_section(self, module: ModuleDefinition) -> list[str]:
|
|
374
|
+
"""Format source files section for a module."""
|
|
375
|
+
from pathlib import Path
|
|
376
|
+
|
|
377
|
+
lines = ["### Source Files", ""]
|
|
378
|
+
# Filter out __init__.py files as they're implementation details
|
|
379
|
+
# The module itself represents the package
|
|
380
|
+
non_init_files = [
|
|
381
|
+
parsed
|
|
382
|
+
for parsed in module.files
|
|
383
|
+
if Path(parsed.git_file.path).name != "__init__.py"
|
|
384
|
+
]
|
|
385
|
+
lines.extend(
|
|
386
|
+
f"- `{parsed.git_file.path}`"
|
|
387
|
+
for parsed in sorted(non_init_files, key=lambda f: f.git_file.path)
|
|
388
|
+
)
|
|
389
|
+
lines.append("")
|
|
390
|
+
return lines
|
|
391
|
+
|
|
392
|
+
def _format_function_standalone(
|
|
393
|
+
self, func: FunctionDefinition, module: ModuleDefinition
|
|
394
|
+
) -> list[str]:
|
|
395
|
+
"""Format a standalone function."""
|
|
396
|
+
# For Go methods, extract receiver type for godoc-style heading
|
|
397
|
+
parsed_file = self._find_parsed_file_for_function(module, func)
|
|
398
|
+
if parsed_file and func.is_method:
|
|
399
|
+
receiver_type = self._extract_go_receiver_type(func.node, parsed_file)
|
|
400
|
+
if receiver_type:
|
|
401
|
+
heading = f"#### func ({receiver_type}) {func.simple_name}"
|
|
402
|
+
else:
|
|
403
|
+
heading = f"#### {func.simple_name}"
|
|
404
|
+
else:
|
|
405
|
+
heading = f"#### {func.simple_name}"
|
|
406
|
+
|
|
407
|
+
lines = [heading, ""]
|
|
408
|
+
|
|
409
|
+
# Signature
|
|
410
|
+
if parsed_file:
|
|
411
|
+
signature = self._extract_source(parsed_file, func.node)
|
|
412
|
+
lines.append("```")
|
|
413
|
+
lines.append(signature.strip())
|
|
414
|
+
lines.append("```")
|
|
415
|
+
lines.append("")
|
|
416
|
+
|
|
417
|
+
# Documentation
|
|
418
|
+
if func.docstring:
|
|
419
|
+
lines.append(func.docstring)
|
|
420
|
+
lines.append("")
|
|
421
|
+
|
|
422
|
+
return lines
|
|
423
|
+
|
|
424
|
+
def _generate_markdown(self, module: ModuleDefinition) -> str: # noqa: C901
|
|
425
|
+
"""Generate Go-Doc style Markdown for a module."""
|
|
426
|
+
lines = []
|
|
427
|
+
|
|
428
|
+
# Header
|
|
429
|
+
lines.append(f"# package {module.module_path}")
|
|
430
|
+
lines.append("")
|
|
431
|
+
|
|
432
|
+
# Overview section (module docstring)
|
|
433
|
+
if module.module_docstring:
|
|
434
|
+
lines.append("## Overview")
|
|
435
|
+
lines.append("")
|
|
436
|
+
lines.append(module.module_docstring)
|
|
437
|
+
lines.append("")
|
|
438
|
+
|
|
439
|
+
# Index
|
|
440
|
+
if self._should_generate_index(module):
|
|
441
|
+
lines.extend(self._generate_index(module))
|
|
442
|
+
lines.append("")
|
|
443
|
+
|
|
444
|
+
# Constants
|
|
445
|
+
if module.constants:
|
|
446
|
+
lines.append("## Constants")
|
|
447
|
+
lines.append("")
|
|
448
|
+
for _name, node in module.constants:
|
|
449
|
+
parsed_file = self._find_parsed_file(module, node)
|
|
450
|
+
if parsed_file:
|
|
451
|
+
signature = self._extract_source(parsed_file, node)
|
|
452
|
+
lines.append("```")
|
|
453
|
+
lines.append(signature.strip())
|
|
454
|
+
lines.append("```")
|
|
455
|
+
lines.append("")
|
|
456
|
+
|
|
457
|
+
# Functions
|
|
458
|
+
if module.functions:
|
|
459
|
+
lines.append("## Functions")
|
|
460
|
+
lines.append("")
|
|
461
|
+
for func in sorted(module.functions, key=lambda f: f.simple_name):
|
|
462
|
+
lines.extend(self._format_function(func, module))
|
|
463
|
+
|
|
464
|
+
# Types
|
|
465
|
+
if module.types:
|
|
466
|
+
lines.append("## Types")
|
|
467
|
+
lines.append("")
|
|
468
|
+
for typ in sorted(module.types, key=lambda t: t.simple_name):
|
|
469
|
+
lines.extend(self._format_type(typ, module))
|
|
470
|
+
|
|
471
|
+
if module.classes:
|
|
472
|
+
if not module.types:
|
|
473
|
+
lines.append("## Types")
|
|
474
|
+
lines.append("")
|
|
475
|
+
for cls in sorted(module.classes, key=lambda c: c.simple_name):
|
|
476
|
+
lines.extend(self._format_class(cls, module))
|
|
477
|
+
|
|
478
|
+
# Source Files
|
|
479
|
+
lines.append("## Source Files")
|
|
480
|
+
lines.append("")
|
|
481
|
+
lines.extend(f"- {parsed.git_file.path}" for parsed in module.files)
|
|
482
|
+
lines.append("")
|
|
483
|
+
|
|
484
|
+
return "\n".join(lines)
|
|
485
|
+
|
|
486
|
+
def _should_generate_index(self, module: ModuleDefinition) -> bool:
|
|
487
|
+
"""Check if we should generate an index."""
|
|
488
|
+
total_items = (
|
|
489
|
+
len(module.constants)
|
|
490
|
+
+ len(module.functions)
|
|
491
|
+
+ len(module.types)
|
|
492
|
+
+ len(module.classes)
|
|
493
|
+
)
|
|
494
|
+
return total_items > 3
|
|
495
|
+
|
|
496
|
+
def _generate_index(self, module: ModuleDefinition) -> list[str]:
|
|
497
|
+
"""Generate an index of all public items."""
|
|
498
|
+
lines = ["## Index", ""]
|
|
499
|
+
|
|
500
|
+
if module.constants:
|
|
501
|
+
lines.append("### Constants")
|
|
502
|
+
for name, _ in sorted(module.constants, key=lambda c: c[0]):
|
|
503
|
+
lines.append(f"- `{name}`")
|
|
504
|
+
lines.append("")
|
|
505
|
+
|
|
506
|
+
if module.functions:
|
|
507
|
+
lines.append("### Functions")
|
|
508
|
+
for func in sorted(module.functions, key=lambda f: f.simple_name):
|
|
509
|
+
sig = self._generate_function_signature_short(func)
|
|
510
|
+
lines.append(f"- `{sig}`")
|
|
511
|
+
lines.append("")
|
|
512
|
+
|
|
513
|
+
if module.types or module.classes:
|
|
514
|
+
lines.append("### Types")
|
|
515
|
+
lines.extend(
|
|
516
|
+
f"- `type {typ.simple_name}`"
|
|
517
|
+
for typ in sorted(module.types, key=lambda t: t.simple_name)
|
|
518
|
+
)
|
|
519
|
+
lines.extend(
|
|
520
|
+
f"- `type {cls.simple_name}`"
|
|
521
|
+
for cls in sorted(module.classes, key=lambda c: c.simple_name)
|
|
522
|
+
)
|
|
523
|
+
lines.append("")
|
|
524
|
+
|
|
525
|
+
return lines
|
|
526
|
+
|
|
527
|
+
def _generate_function_signature_short(self, func: FunctionDefinition) -> str:
|
|
528
|
+
"""Generate short function signature for index."""
|
|
529
|
+
params = ", ".join(func.parameters) if func.parameters else "..."
|
|
530
|
+
ret = f" -> {func.return_type}" if func.return_type else ""
|
|
531
|
+
return f"{func.simple_name}({params}){ret}"
|
|
532
|
+
|
|
533
|
+
def _format_function(
|
|
534
|
+
self, func: FunctionDefinition, module: ModuleDefinition
|
|
535
|
+
) -> list[str]:
|
|
536
|
+
"""Format a function in Go-Doc style."""
|
|
537
|
+
lines = [f"### func {func.simple_name}", ""]
|
|
538
|
+
|
|
539
|
+
# Signature
|
|
540
|
+
parsed_file = self._find_parsed_file_for_function(module, func)
|
|
541
|
+
if parsed_file:
|
|
542
|
+
signature = self._extract_source(parsed_file, func.node)
|
|
543
|
+
lines.append("```")
|
|
544
|
+
lines.append(signature.strip())
|
|
545
|
+
lines.append("```")
|
|
546
|
+
lines.append("")
|
|
547
|
+
|
|
548
|
+
# Documentation
|
|
549
|
+
if func.docstring:
|
|
550
|
+
lines.append(func.docstring)
|
|
551
|
+
lines.append("")
|
|
552
|
+
|
|
553
|
+
return lines
|
|
554
|
+
|
|
555
|
+
def _format_type(self, typ: TypeDefinition, module: ModuleDefinition) -> list[str]:
|
|
556
|
+
"""Format a type in Go-Doc style."""
|
|
557
|
+
lines = [f"#### type {typ.simple_name}", ""]
|
|
558
|
+
|
|
559
|
+
# Signature
|
|
560
|
+
parsed_file = self._find_parsed_file_for_type(module, typ)
|
|
561
|
+
if parsed_file:
|
|
562
|
+
signature = self._extract_source(parsed_file, typ.node)
|
|
563
|
+
lines.append("```")
|
|
564
|
+
lines.append(signature.strip())
|
|
565
|
+
lines.append("```")
|
|
566
|
+
lines.append("")
|
|
567
|
+
|
|
568
|
+
# Documentation
|
|
569
|
+
if typ.docstring:
|
|
570
|
+
lines.append(typ.docstring)
|
|
571
|
+
lines.append("")
|
|
572
|
+
|
|
573
|
+
return lines
|
|
574
|
+
|
|
575
|
+
def _format_class(
|
|
576
|
+
self, cls: ClassDefinition, module: ModuleDefinition
|
|
577
|
+
) -> list[str]:
|
|
578
|
+
"""Format a class in Go-Doc style."""
|
|
579
|
+
lines = [f"### type {cls.simple_name}", ""]
|
|
580
|
+
|
|
581
|
+
# Class signature
|
|
582
|
+
parsed_file = self._find_parsed_file_for_class(module, cls)
|
|
583
|
+
if parsed_file:
|
|
584
|
+
signature = self._extract_source(parsed_file, cls.node)
|
|
585
|
+
lines.append("```")
|
|
586
|
+
lines.append(signature.strip())
|
|
587
|
+
lines.append("```")
|
|
588
|
+
lines.append("")
|
|
589
|
+
|
|
590
|
+
# Class documentation
|
|
591
|
+
if cls.docstring:
|
|
592
|
+
lines.append(cls.docstring)
|
|
593
|
+
lines.append("")
|
|
594
|
+
|
|
595
|
+
# Methods - filter out invalid method names
|
|
596
|
+
if cls.methods:
|
|
597
|
+
valid_methods = [
|
|
598
|
+
m for m in cls.methods if self._is_valid_function_name(m.simple_name)
|
|
599
|
+
]
|
|
600
|
+
for method in sorted(valid_methods, key=lambda m: m.simple_name):
|
|
601
|
+
lines.extend(self._format_method(method, cls, module))
|
|
602
|
+
|
|
603
|
+
return lines
|
|
604
|
+
|
|
605
|
+
def _format_method(
|
|
606
|
+
self,
|
|
607
|
+
method: FunctionDefinition,
|
|
608
|
+
cls: ClassDefinition,
|
|
609
|
+
module: ModuleDefinition,
|
|
610
|
+
) -> list[str]:
|
|
611
|
+
"""Format a method in Go-Doc style."""
|
|
612
|
+
lines = [f"#### func ({cls.simple_name}) {method.simple_name}", ""]
|
|
613
|
+
|
|
614
|
+
# Method signature
|
|
615
|
+
parsed_file = self._find_parsed_file_for_function(module, method)
|
|
616
|
+
if parsed_file:
|
|
617
|
+
signature = self._extract_source(parsed_file, method.node)
|
|
618
|
+
lines.append("```")
|
|
619
|
+
lines.append(signature.strip())
|
|
620
|
+
lines.append("```")
|
|
621
|
+
lines.append("")
|
|
622
|
+
|
|
623
|
+
# Method documentation
|
|
624
|
+
if method.docstring:
|
|
625
|
+
lines.append(method.docstring)
|
|
626
|
+
lines.append("")
|
|
627
|
+
|
|
628
|
+
return lines
|
|
629
|
+
|
|
630
|
+
def _extract_go_receiver_type(
|
|
631
|
+
self, node: object, parsed_file: ParsedFile
|
|
632
|
+
) -> str | None:
|
|
633
|
+
"""Extract Go receiver type from method declaration.
|
|
634
|
+
|
|
635
|
+
Returns the receiver type in godoc format.
|
|
636
|
+
Strips the parameter name, keeping only the type.
|
|
637
|
+
"""
|
|
638
|
+
node_type = getattr(node, "type", None)
|
|
639
|
+
if not node_type or node_type != "method_declaration":
|
|
640
|
+
return None
|
|
641
|
+
|
|
642
|
+
# Find the parameter_list that represents the receiver
|
|
643
|
+
for child in node.children: # type: ignore[attr-defined]
|
|
644
|
+
if child.type == "parameter_list":
|
|
645
|
+
# This is the receiver parameter
|
|
646
|
+
for param_child in child.children:
|
|
647
|
+
if param_child.type == "parameter_declaration":
|
|
648
|
+
# Extract the type from the parameter
|
|
649
|
+
return self._extract_go_type_from_param(
|
|
650
|
+
param_child, parsed_file
|
|
651
|
+
)
|
|
652
|
+
# If we found the parameter_list but no parameter, break
|
|
653
|
+
break
|
|
654
|
+
|
|
655
|
+
return None
|
|
656
|
+
|
|
657
|
+
def _extract_go_type_from_param(
|
|
658
|
+
self, param_node: object, parsed_file: ParsedFile
|
|
659
|
+
) -> str | None:
|
|
660
|
+
"""Extract type from Go parameter declaration node."""
|
|
661
|
+
# Look for type children: pointer_type or type_identifier
|
|
662
|
+
for child in param_node.children: # type: ignore[attr-defined]
|
|
663
|
+
if child.type == "pointer_type" and hasattr(child, "start_byte"):
|
|
664
|
+
# Extract the type being pointed to
|
|
665
|
+
start = child.start_byte
|
|
666
|
+
end = child.end_byte
|
|
667
|
+
type_bytes = parsed_file.source_code[start:end]
|
|
668
|
+
try:
|
|
669
|
+
return type_bytes.decode("utf-8")
|
|
670
|
+
except UnicodeDecodeError:
|
|
671
|
+
return None
|
|
672
|
+
if (
|
|
673
|
+
child.type == "type_identifier"
|
|
674
|
+
and hasattr(child, "text")
|
|
675
|
+
and child.text
|
|
676
|
+
):
|
|
677
|
+
# Direct type identifier
|
|
678
|
+
return child.text.decode("utf-8")
|
|
679
|
+
|
|
680
|
+
return None
|
|
681
|
+
|
|
682
|
+
def _find_parsed_file_for_function(
|
|
683
|
+
self, module: ModuleDefinition, func: FunctionDefinition
|
|
684
|
+
) -> ParsedFile | None:
|
|
685
|
+
"""Find the parsed file containing a function definition."""
|
|
686
|
+
# Match by file path from FunctionDefinition
|
|
687
|
+
for parsed in module.files:
|
|
688
|
+
if parsed.path == func.file:
|
|
689
|
+
return parsed
|
|
690
|
+
|
|
691
|
+
# Fallback: if we can't find by file path, this is an error condition
|
|
692
|
+
# Log a warning and return None to make the error visible
|
|
693
|
+
self.log.warning(
|
|
694
|
+
"Could not find parsed file for function",
|
|
695
|
+
module_path=module.module_path,
|
|
696
|
+
function_file=str(func.file),
|
|
697
|
+
file_count=len(module.files),
|
|
698
|
+
)
|
|
699
|
+
return None
|
|
700
|
+
|
|
701
|
+
def _find_parsed_file_for_type(
|
|
702
|
+
self, module: ModuleDefinition, typ: TypeDefinition
|
|
703
|
+
) -> ParsedFile | None:
|
|
704
|
+
"""Find the parsed file containing a type definition."""
|
|
705
|
+
# Match by file path from TypeDefinition
|
|
706
|
+
for parsed in module.files:
|
|
707
|
+
if parsed.path == typ.file:
|
|
708
|
+
return parsed
|
|
709
|
+
|
|
710
|
+
# Fallback: if we can't find by file path, this is an error condition
|
|
711
|
+
# Log a warning and return None to make the error visible
|
|
712
|
+
self.log.warning(
|
|
713
|
+
"Could not find parsed file for type",
|
|
714
|
+
module_path=module.module_path,
|
|
715
|
+
type_file=str(typ.file),
|
|
716
|
+
file_count=len(module.files),
|
|
717
|
+
)
|
|
718
|
+
return None
|
|
719
|
+
|
|
720
|
+
def _find_parsed_file_for_class(
|
|
721
|
+
self, module: ModuleDefinition, cls: ClassDefinition
|
|
722
|
+
) -> ParsedFile | None:
|
|
723
|
+
"""Find the parsed file containing a class definition."""
|
|
724
|
+
# Match by file path from ClassDefinition
|
|
725
|
+
for parsed in module.files:
|
|
726
|
+
if parsed.path == cls.file:
|
|
727
|
+
return parsed
|
|
728
|
+
|
|
729
|
+
# Fallback: if we can't find by file path, this is an error condition
|
|
730
|
+
# Log a warning and return None to make the error visible
|
|
731
|
+
self.log.warning(
|
|
732
|
+
"Could not find parsed file for class",
|
|
733
|
+
module_path=module.module_path,
|
|
734
|
+
class_file=str(cls.file),
|
|
735
|
+
file_count=len(module.files),
|
|
736
|
+
)
|
|
737
|
+
return None
|
|
738
|
+
|
|
739
|
+
def _find_parsed_file(
|
|
740
|
+
self, module: ModuleDefinition, node: object
|
|
741
|
+
) -> ParsedFile | None:
|
|
742
|
+
"""Find the parsed file containing a given node."""
|
|
743
|
+
# First try to match by tree reference
|
|
744
|
+
if hasattr(node, "tree"):
|
|
745
|
+
node_tree = node.tree # type: ignore[attr-defined]
|
|
746
|
+
for parsed in module.files:
|
|
747
|
+
if parsed.tree == node_tree:
|
|
748
|
+
return parsed
|
|
749
|
+
|
|
750
|
+
# Fallback: if we can't find by tree, this is an error condition
|
|
751
|
+
# Log a warning and return None to make the error visible
|
|
752
|
+
self.log.warning(
|
|
753
|
+
"Could not find parsed file for node",
|
|
754
|
+
module_path=module.module_path,
|
|
755
|
+
file_count=len(module.files),
|
|
756
|
+
)
|
|
757
|
+
return None
|
|
758
|
+
|
|
759
|
+
def _extract_source(self, parsed_file: ParsedFile | None, node: object) -> str:
|
|
760
|
+
"""Extract source code for a node."""
|
|
761
|
+
if not parsed_file:
|
|
762
|
+
return "<source unavailable>"
|
|
763
|
+
|
|
764
|
+
if not hasattr(node, "start_byte") or not hasattr(node, "end_byte"):
|
|
765
|
+
return "<source unavailable>"
|
|
766
|
+
|
|
767
|
+
start = node.start_byte # type: ignore[attr-defined]
|
|
768
|
+
end = node.end_byte # type: ignore[attr-defined]
|
|
769
|
+
|
|
770
|
+
try:
|
|
771
|
+
source = parsed_file.source_code[start:end].decode("utf-8")
|
|
772
|
+
# Extract just the signature
|
|
773
|
+
return self._extract_signature_only(source)
|
|
774
|
+
except (UnicodeDecodeError, IndexError):
|
|
775
|
+
return "<source unavailable>"
|
|
776
|
+
|
|
777
|
+
def _extract_signature_only(self, source: str) -> str:
|
|
778
|
+
"""Extract just the signature from a definition.
|
|
779
|
+
|
|
780
|
+
This removes function bodies and only keeps the declaration/signature.
|
|
781
|
+
For Go types (structs, interfaces), includes the full definition.
|
|
782
|
+
"""
|
|
783
|
+
lines = source.split("\n")
|
|
784
|
+
|
|
785
|
+
# Check if this is a Go type definition
|
|
786
|
+
# (starts with type name followed by struct/interface)
|
|
787
|
+
first_line = lines[0].strip() if lines else ""
|
|
788
|
+
is_go_type = any(keyword in first_line for keyword in [" struct", " interface"])
|
|
789
|
+
|
|
790
|
+
if is_go_type:
|
|
791
|
+
# For Go types, include the full definition including the body
|
|
792
|
+
# Find the matching closing brace
|
|
793
|
+
brace_count = 0
|
|
794
|
+
signature_lines = []
|
|
795
|
+
|
|
796
|
+
for line in lines:
|
|
797
|
+
signature_lines.append(line)
|
|
798
|
+
# Count braces to find the end of the type definition
|
|
799
|
+
brace_count += line.count("{") - line.count("}")
|
|
800
|
+
|
|
801
|
+
# If we've closed all braces, we're done
|
|
802
|
+
if brace_count == 0 and "{" in "".join(signature_lines):
|
|
803
|
+
break
|
|
804
|
+
|
|
805
|
+
return "\n".join(signature_lines)
|
|
806
|
+
|
|
807
|
+
# For functions, extract just the signature
|
|
808
|
+
signature_lines = []
|
|
809
|
+
|
|
810
|
+
for line in lines:
|
|
811
|
+
# Stop at the first line that ends a signature
|
|
812
|
+
signature_lines.append(line)
|
|
813
|
+
|
|
814
|
+
# Check for end of signature markers
|
|
815
|
+
stripped = line.strip()
|
|
816
|
+
|
|
817
|
+
# Python: colon ends signature (unless inside brackets)
|
|
818
|
+
if ":" in line:
|
|
819
|
+
open_parens = line.count("(") - line.count(")")
|
|
820
|
+
open_brackets = line.count("[") - line.count("]")
|
|
821
|
+
open_braces = line.count("{") - line.count("}")
|
|
822
|
+
if open_parens == 0 and open_brackets == 0 and open_braces == 0:
|
|
823
|
+
break
|
|
824
|
+
|
|
825
|
+
# Go/Java/C/C++/Rust/JS: opening brace often starts body
|
|
826
|
+
if stripped.endswith("{"):
|
|
827
|
+
# Remove the opening brace for cleaner signatures
|
|
828
|
+
signature_lines[-1] = line.rstrip("{").rstrip()
|
|
829
|
+
break
|
|
830
|
+
|
|
831
|
+
# Go: if signature ends without brace on same line
|
|
832
|
+
if stripped.endswith(")") and not any(c in line for c in ["{", ":"]):
|
|
833
|
+
# Might be complete - check if next line exists
|
|
834
|
+
continue
|
|
835
|
+
|
|
836
|
+
return "\n".join(signature_lines)
|