kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (135) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +51 -23
  3. kodit/application/factories/reporting_factory.py +6 -2
  4. kodit/application/factories/server_factory.py +353 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +700 -0
  7. kodit/application/services/indexing_worker_service.py +13 -44
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +0 -2
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -753
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +4 -97
  14. kodit/database.py +38 -1
  15. kodit/domain/enrichments/__init__.py +1 -0
  16. kodit/domain/enrichments/architecture/__init__.py +1 -0
  17. kodit/domain/enrichments/architecture/architecture.py +20 -0
  18. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  19. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  20. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  21. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  22. kodit/domain/enrichments/development/__init__.py +1 -0
  23. kodit/domain/enrichments/development/development.py +18 -0
  24. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  25. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  26. kodit/domain/enrichments/enricher.py +17 -0
  27. kodit/domain/enrichments/enrichment.py +39 -0
  28. kodit/domain/enrichments/request.py +12 -0
  29. kodit/domain/enrichments/response.py +11 -0
  30. kodit/domain/enrichments/usage/__init__.py +1 -0
  31. kodit/domain/enrichments/usage/api_docs.py +19 -0
  32. kodit/domain/enrichments/usage/usage.py +18 -0
  33. kodit/domain/{entities.py → entities/__init__.py} +50 -195
  34. kodit/domain/entities/git.py +190 -0
  35. kodit/domain/factories/__init__.py +1 -0
  36. kodit/domain/factories/git_repo_factory.py +76 -0
  37. kodit/domain/protocols.py +264 -64
  38. kodit/domain/services/bm25_service.py +5 -1
  39. kodit/domain/services/embedding_service.py +3 -0
  40. kodit/domain/services/enrichment_service.py +9 -30
  41. kodit/domain/services/git_repository_service.py +429 -0
  42. kodit/domain/services/git_service.py +300 -0
  43. kodit/domain/services/physical_architecture_service.py +182 -0
  44. kodit/domain/services/task_status_query_service.py +2 -2
  45. kodit/domain/value_objects.py +87 -135
  46. kodit/infrastructure/api/client/__init__.py +0 -2
  47. kodit/infrastructure/api/v1/__init__.py +0 -4
  48. kodit/infrastructure/api/v1/dependencies.py +92 -46
  49. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  50. kodit/infrastructure/api/v1/routers/commits.py +352 -0
  51. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  52. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  53. kodit/infrastructure/api/v1/routers/search.py +31 -14
  54. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  55. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  56. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  57. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  58. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  59. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  60. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  61. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  62. kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
  63. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  64. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  65. kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
  66. kodit/infrastructure/cloning/git/working_copy.py +1 -1
  67. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  68. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  69. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  70. kodit/infrastructure/enricher/__init__.py +1 -0
  71. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  72. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
  73. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  74. kodit/infrastructure/enricher/null_enricher.py +36 -0
  75. kodit/infrastructure/indexing/fusion_service.py +1 -1
  76. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  77. kodit/infrastructure/mappers/git_mapper.py +193 -0
  78. kodit/infrastructure/mappers/snippet_mapper.py +104 -0
  79. kodit/infrastructure/mappers/task_mapper.py +5 -44
  80. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  81. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  82. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  83. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  84. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  85. kodit/infrastructure/reporting/log_progress.py +8 -5
  86. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  87. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  88. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  89. kodit/infrastructure/slicing/slicer.py +87 -421
  90. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  91. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  92. kodit/infrastructure/sqlalchemy/entities.py +402 -158
  93. kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
  94. kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
  95. kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
  96. kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
  97. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
  98. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  99. kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
  100. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  101. kodit/mcp.py +12 -30
  102. kodit/migrations/env.py +1 -0
  103. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  104. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  105. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  106. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  107. kodit/py.typed +0 -0
  108. kodit/utils/dump_config.py +361 -0
  109. kodit/utils/dump_openapi.py +6 -4
  110. kodit/utils/path_utils.py +29 -0
  111. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
  112. kodit-0.5.1.dist-info/RECORD +168 -0
  113. kodit/application/factories/code_indexing_factory.py +0 -195
  114. kodit/application/services/auto_indexing_service.py +0 -99
  115. kodit/application/services/code_indexing_application_service.py +0 -410
  116. kodit/domain/services/index_query_service.py +0 -70
  117. kodit/domain/services/index_service.py +0 -269
  118. kodit/infrastructure/api/client/index_client.py +0 -57
  119. kodit/infrastructure/api/v1/routers/indexes.py +0 -164
  120. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  121. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  122. kodit/infrastructure/cloning/__init__.py +0 -1
  123. kodit/infrastructure/cloning/metadata.py +0 -98
  124. kodit/infrastructure/enrichment/__init__.py +0 -1
  125. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  126. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  127. kodit/infrastructure/mappers/index_mapper.py +0 -345
  128. kodit/infrastructure/reporting/tdqm_progress.py +0 -38
  129. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  130. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  131. kodit-0.4.3.dist-info/RECORD +0 -125
  132. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  133. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
  134. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
  135. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,836 @@
1
+ """API documentation extractor."""
2
+
3
+ import structlog
4
+
5
+ from kodit.domain.enrichments.usage.api_docs import APIDocEnrichment
6
+ from kodit.domain.entities.git import GitFile
7
+ from kodit.infrastructure.slicing.ast_analyzer import (
8
+ ASTAnalyzer,
9
+ ClassDefinition,
10
+ FunctionDefinition,
11
+ ModuleDefinition,
12
+ ParsedFile,
13
+ TypeDefinition,
14
+ )
15
+
16
+
17
+ class APIDocExtractor:
18
+ """Extract API documentation from code files."""
19
+
20
+ # Languages that should have API docs generated
21
+ SUPPORTED_LANGUAGES = frozenset(
22
+ {
23
+ "c",
24
+ "cpp",
25
+ "csharp",
26
+ "go",
27
+ "java",
28
+ "javascript",
29
+ "python",
30
+ "rust",
31
+ }
32
+ )
33
+
34
+ def __init__(self) -> None:
35
+ """Initialize the API doc extractor."""
36
+ self.log = structlog.get_logger(__name__)
37
+
38
+ def extract_api_docs(
39
+ self,
40
+ files: list[GitFile],
41
+ language: str,
42
+ commit_sha: str,
43
+ include_private: bool = False, # noqa: FBT001, FBT002
44
+ ) -> list[APIDocEnrichment]:
45
+ """Extract API documentation enrichments from files.
46
+
47
+ Returns a single enrichment per language that combines all modules.
48
+
49
+ Args:
50
+ files: List of Git files to extract API docs from
51
+ language: Programming language of the files
52
+ commit_sha: Git commit SHA to use as entity_id
53
+ include_private: Whether to include private functions/classes
54
+
55
+ """
56
+ if not files:
57
+ return []
58
+
59
+ # Filter out languages that shouldn't have API docs
60
+ if language not in self.SUPPORTED_LANGUAGES:
61
+ self.log.debug("Language not supported for API docs", language=language)
62
+ return []
63
+
64
+ try:
65
+ analyzer = ASTAnalyzer(language)
66
+ parsed_files = analyzer.parse_files(files)
67
+ modules = analyzer.extract_module_definitions(
68
+ parsed_files, include_private=include_private
69
+ )
70
+ except ValueError:
71
+ self.log.debug("Unsupported language", language=language)
72
+ return []
73
+
74
+ # Filter modules: must have content, not be tests, and have module_path
75
+ modules_with_content = [
76
+ m
77
+ for m in modules
78
+ if self._has_content(m)
79
+ and not self._is_test_module(m)
80
+ and m.module_path # Exclude modules with empty module_path
81
+ ]
82
+
83
+ if not modules_with_content:
84
+ return []
85
+
86
+ # Merge modules with the same module_path
87
+ merged_modules = self._merge_modules(modules_with_content)
88
+
89
+ # Generate single markdown document for all modules
90
+ markdown_content = self._generate_combined_markdown(
91
+ merged_modules,
92
+ language,
93
+ )
94
+
95
+ enrichment = APIDocEnrichment(
96
+ entity_id=commit_sha,
97
+ language=language,
98
+ content=markdown_content,
99
+ )
100
+
101
+ return [enrichment]
102
+
103
+ def _has_content(self, module: ModuleDefinition) -> bool:
104
+ """Check if module has any API elements or documentation."""
105
+ return bool(
106
+ module.functions
107
+ or module.classes
108
+ or module.types
109
+ or module.constants
110
+ or module.module_docstring
111
+ )
112
+
113
+ def _is_test_module(self, module: ModuleDefinition) -> bool:
114
+ """Check if a module appears to be a test module.
115
+
116
+ Detects test modules based on common patterns:
117
+ - Module path contains 'test', 'tests', or '__tests__' directory
118
+ - Files with '_test' suffix (e.g., foo_test.go)
119
+ - Files with 'test_' prefix (e.g., test_foo.py)
120
+ - Files with '.test.' or '.spec.' in name (e.g., foo.test.js)
121
+ - Files with '_mocks' in name
122
+ """
123
+ from pathlib import Path
124
+
125
+ # Check module_path for test directories
126
+ module_path_lower = module.module_path.lower()
127
+ module_path_parts = module_path_lower.split("/")
128
+
129
+ # Check if any part of the module path is a test directory
130
+ if any(part in ["test", "tests", "__tests__"] for part in module_path_parts):
131
+ return True
132
+
133
+ # Check all files in the module for test file name patterns
134
+ for parsed_file in module.files:
135
+ file_path = Path(parsed_file.git_file.path)
136
+ filename = file_path.name.lower()
137
+
138
+ # Check for test file name patterns
139
+ # Use more specific patterns to avoid false positives
140
+ if (
141
+ filename.endswith(("_test.go", "_test.py"))
142
+ or filename.startswith("test_")
143
+ or ".test." in filename
144
+ or ".spec." in filename
145
+ or "_mocks." in filename
146
+ or "_mock." in filename
147
+ ):
148
+ return True
149
+
150
+ return False
151
+
152
+ def _merge_modules(self, modules: list[ModuleDefinition]) -> list[ModuleDefinition]:
153
+ """Merge modules with the same module_path.
154
+
155
+ This is particularly important for Go where multiple files belong to
156
+ the same package/module.
157
+ """
158
+ from collections import defaultdict
159
+
160
+ # Group modules by module_path
161
+ modules_by_path: dict[str, list[ModuleDefinition]] = defaultdict(list)
162
+ for module in modules:
163
+ modules_by_path[module.module_path].append(module)
164
+
165
+ # Merge modules with same path
166
+ merged: list[ModuleDefinition] = []
167
+ for module_path, module_group in modules_by_path.items():
168
+ if len(module_group) == 1:
169
+ # No merging needed
170
+ merged.append(module_group[0])
171
+ else:
172
+ # Merge all modules in this group
173
+ merged_module = self._merge_module_group(module_path, module_group)
174
+ merged.append(merged_module)
175
+
176
+ return merged
177
+
178
+ def _merge_module_group(
179
+ self, module_path: str, module_group: list[ModuleDefinition]
180
+ ) -> ModuleDefinition:
181
+ """Merge a group of modules with the same path into a single module."""
182
+ # Collect all files
183
+ all_files = []
184
+ for mod in module_group:
185
+ all_files.extend(mod.files)
186
+
187
+ # Collect all functions
188
+ all_functions = []
189
+ for mod in module_group:
190
+ all_functions.extend(mod.functions)
191
+
192
+ # Collect all classes
193
+ all_classes = []
194
+ for mod in module_group:
195
+ all_classes.extend(mod.classes)
196
+
197
+ # Collect all types
198
+ all_types = []
199
+ for mod in module_group:
200
+ all_types.extend(mod.types)
201
+
202
+ # Collect all constants
203
+ all_constants = []
204
+ for mod in module_group:
205
+ all_constants.extend(mod.constants)
206
+
207
+ # Find first non-empty docstring
208
+ module_docstring = ""
209
+ for mod in module_group:
210
+ if mod.module_docstring:
211
+ module_docstring = mod.module_docstring
212
+ break
213
+
214
+ # Create merged module
215
+ return ModuleDefinition(
216
+ module_path=module_path,
217
+ module_docstring=module_docstring,
218
+ files=all_files,
219
+ functions=all_functions,
220
+ classes=all_classes,
221
+ types=all_types,
222
+ constants=all_constants,
223
+ )
224
+
225
+ def _is_valid_function_name(self, name: str) -> bool:
226
+ """Check if a function name should be included in API documentation.
227
+
228
+ Filters out:
229
+ - Names longer than 255 characters (likely minified code)
230
+ - Anonymous or auto-generated function names
231
+ - Short minified names (2-3 chars with digits)
232
+ """
233
+ if not name:
234
+ return False
235
+
236
+ # Length check - names longer than 255 chars are likely minified code
237
+ if len(name) > 255:
238
+ return False
239
+
240
+ # Skip common anonymous/auto-generated function name patterns
241
+ anonymous_patterns = [
242
+ "anonymous", # Anonymous functions
243
+ "default", # Default export names in some bundlers
244
+ ]
245
+ if name.lower() in anonymous_patterns: # noqa: SIM103
246
+ return False
247
+
248
+ return True
249
+
250
+ def _generate_combined_markdown(
251
+ self,
252
+ modules: list[ModuleDefinition],
253
+ language: str,
254
+ ) -> str:
255
+ """Generate Godoc-style markdown for all modules combined.
256
+
257
+ Organizes content by module path, with types and functions grouped
258
+ within each module section.
259
+ """
260
+ lines = []
261
+
262
+ # Generate index of all modules
263
+ lines.append(f"## {language} Index")
264
+ lines.append("")
265
+ lines.extend(
266
+ f"- [{module.module_path}](#{self._anchor(module.module_path)})"
267
+ for module in sorted(modules, key=lambda m: m.module_path)
268
+ )
269
+ lines.append("")
270
+
271
+ # Generate documentation for each module
272
+ for module in sorted(modules, key=lambda m: m.module_path):
273
+ lines.extend(self._generate_module_section(module))
274
+
275
+ return "\n".join(lines)
276
+
277
+ def _anchor(self, text: str) -> str:
278
+ """Generate markdown anchor from text.
279
+
280
+ Follows GitHub-flavored markdown heading ID generation:
281
+ - Convert to lowercase
282
+ - Replace spaces with hyphens
283
+ - Remove punctuation except hyphens and underscores
284
+ - Replace slashes and dots with hyphens
285
+ """
286
+ import re
287
+
288
+ # Convert to lowercase
289
+ anchor = text.lower()
290
+
291
+ # Replace slashes and dots with hyphens
292
+ anchor = anchor.replace("/", "-").replace(".", "-")
293
+
294
+ # Remove any characters that aren't alphanumeric, hyphens, or underscores
295
+ anchor = re.sub(r"[^a-z0-9\-_]", "", anchor)
296
+
297
+ # Replace multiple consecutive hyphens with a single hyphen
298
+ anchor = re.sub(r"-+", "-", anchor)
299
+
300
+ # Strip leading/trailing hyphens
301
+ return anchor.strip("-")
302
+
303
+ def _generate_module_section(self, module: ModuleDefinition) -> list[str]:
304
+ """Generate markdown section for a single module."""
305
+ lines = []
306
+
307
+ # Module header and docstring
308
+ lines.append(f"## {module.module_path}")
309
+ lines.append("")
310
+ if module.module_docstring:
311
+ lines.append(module.module_docstring)
312
+ lines.append("")
313
+
314
+ # Add subsections in godoc order: constants, types, functions
315
+ lines.extend(self._format_constants_section(module))
316
+ lines.extend(self._format_types_section(module))
317
+ lines.extend(self._format_functions_section(module))
318
+ lines.extend(self._format_source_files_section(module))
319
+
320
+ return lines
321
+
322
+ def _format_constants_section(self, module: ModuleDefinition) -> list[str]:
323
+ """Format constants section for a module."""
324
+ if not module.constants:
325
+ return []
326
+
327
+ lines = ["### Constants", ""]
328
+ for _name, node in module.constants:
329
+ parsed_file = self._find_parsed_file(module, node)
330
+ if parsed_file:
331
+ signature = self._extract_source(parsed_file, node)
332
+ lines.append("```")
333
+ lines.append(signature.strip())
334
+ lines.append("```")
335
+ lines.append("")
336
+ return lines
337
+
338
+ def _format_functions_section(self, module: ModuleDefinition) -> list[str]:
339
+ """Format functions section for a module."""
340
+ if not module.functions:
341
+ return []
342
+
343
+ # Filter out invalid function names (minified, anonymous, etc.)
344
+ valid_functions = [
345
+ f for f in module.functions if self._is_valid_function_name(f.simple_name)
346
+ ]
347
+
348
+ if not valid_functions:
349
+ return []
350
+
351
+ lines = ["### Functions", ""]
352
+ for func in sorted(valid_functions, key=lambda f: f.simple_name):
353
+ lines.extend(self._format_function_standalone(func, module))
354
+ return lines
355
+
356
+ def _format_types_section(self, module: ModuleDefinition) -> list[str]:
357
+ """Format types section for a module."""
358
+ if not (module.types or module.classes):
359
+ return []
360
+
361
+ lines = ["### Types", ""]
362
+
363
+ # Format type definitions
364
+ for typ in sorted(module.types, key=lambda t: t.simple_name):
365
+ lines.extend(self._format_type(typ, module))
366
+
367
+ # Format class definitions with methods
368
+ for cls in sorted(module.classes, key=lambda c: c.simple_name):
369
+ lines.extend(self._format_class(cls, module))
370
+
371
+ return lines
372
+
373
+ def _format_source_files_section(self, module: ModuleDefinition) -> list[str]:
374
+ """Format source files section for a module."""
375
+ from pathlib import Path
376
+
377
+ lines = ["### Source Files", ""]
378
+ # Filter out __init__.py files as they're implementation details
379
+ # The module itself represents the package
380
+ non_init_files = [
381
+ parsed
382
+ for parsed in module.files
383
+ if Path(parsed.git_file.path).name != "__init__.py"
384
+ ]
385
+ lines.extend(
386
+ f"- `{parsed.git_file.path}`"
387
+ for parsed in sorted(non_init_files, key=lambda f: f.git_file.path)
388
+ )
389
+ lines.append("")
390
+ return lines
391
+
392
+ def _format_function_standalone(
393
+ self, func: FunctionDefinition, module: ModuleDefinition
394
+ ) -> list[str]:
395
+ """Format a standalone function."""
396
+ # For Go methods, extract receiver type for godoc-style heading
397
+ parsed_file = self._find_parsed_file_for_function(module, func)
398
+ if parsed_file and func.is_method:
399
+ receiver_type = self._extract_go_receiver_type(func.node, parsed_file)
400
+ if receiver_type:
401
+ heading = f"#### func ({receiver_type}) {func.simple_name}"
402
+ else:
403
+ heading = f"#### {func.simple_name}"
404
+ else:
405
+ heading = f"#### {func.simple_name}"
406
+
407
+ lines = [heading, ""]
408
+
409
+ # Signature
410
+ if parsed_file:
411
+ signature = self._extract_source(parsed_file, func.node)
412
+ lines.append("```")
413
+ lines.append(signature.strip())
414
+ lines.append("```")
415
+ lines.append("")
416
+
417
+ # Documentation
418
+ if func.docstring:
419
+ lines.append(func.docstring)
420
+ lines.append("")
421
+
422
+ return lines
423
+
424
+ def _generate_markdown(self, module: ModuleDefinition) -> str: # noqa: C901
425
+ """Generate Go-Doc style Markdown for a module."""
426
+ lines = []
427
+
428
+ # Header
429
+ lines.append(f"# package {module.module_path}")
430
+ lines.append("")
431
+
432
+ # Overview section (module docstring)
433
+ if module.module_docstring:
434
+ lines.append("## Overview")
435
+ lines.append("")
436
+ lines.append(module.module_docstring)
437
+ lines.append("")
438
+
439
+ # Index
440
+ if self._should_generate_index(module):
441
+ lines.extend(self._generate_index(module))
442
+ lines.append("")
443
+
444
+ # Constants
445
+ if module.constants:
446
+ lines.append("## Constants")
447
+ lines.append("")
448
+ for _name, node in module.constants:
449
+ parsed_file = self._find_parsed_file(module, node)
450
+ if parsed_file:
451
+ signature = self._extract_source(parsed_file, node)
452
+ lines.append("```")
453
+ lines.append(signature.strip())
454
+ lines.append("```")
455
+ lines.append("")
456
+
457
+ # Functions
458
+ if module.functions:
459
+ lines.append("## Functions")
460
+ lines.append("")
461
+ for func in sorted(module.functions, key=lambda f: f.simple_name):
462
+ lines.extend(self._format_function(func, module))
463
+
464
+ # Types
465
+ if module.types:
466
+ lines.append("## Types")
467
+ lines.append("")
468
+ for typ in sorted(module.types, key=lambda t: t.simple_name):
469
+ lines.extend(self._format_type(typ, module))
470
+
471
+ if module.classes:
472
+ if not module.types:
473
+ lines.append("## Types")
474
+ lines.append("")
475
+ for cls in sorted(module.classes, key=lambda c: c.simple_name):
476
+ lines.extend(self._format_class(cls, module))
477
+
478
+ # Source Files
479
+ lines.append("## Source Files")
480
+ lines.append("")
481
+ lines.extend(f"- {parsed.git_file.path}" for parsed in module.files)
482
+ lines.append("")
483
+
484
+ return "\n".join(lines)
485
+
486
+ def _should_generate_index(self, module: ModuleDefinition) -> bool:
487
+ """Check if we should generate an index."""
488
+ total_items = (
489
+ len(module.constants)
490
+ + len(module.functions)
491
+ + len(module.types)
492
+ + len(module.classes)
493
+ )
494
+ return total_items > 3
495
+
496
+ def _generate_index(self, module: ModuleDefinition) -> list[str]:
497
+ """Generate an index of all public items."""
498
+ lines = ["## Index", ""]
499
+
500
+ if module.constants:
501
+ lines.append("### Constants")
502
+ for name, _ in sorted(module.constants, key=lambda c: c[0]):
503
+ lines.append(f"- `{name}`")
504
+ lines.append("")
505
+
506
+ if module.functions:
507
+ lines.append("### Functions")
508
+ for func in sorted(module.functions, key=lambda f: f.simple_name):
509
+ sig = self._generate_function_signature_short(func)
510
+ lines.append(f"- `{sig}`")
511
+ lines.append("")
512
+
513
+ if module.types or module.classes:
514
+ lines.append("### Types")
515
+ lines.extend(
516
+ f"- `type {typ.simple_name}`"
517
+ for typ in sorted(module.types, key=lambda t: t.simple_name)
518
+ )
519
+ lines.extend(
520
+ f"- `type {cls.simple_name}`"
521
+ for cls in sorted(module.classes, key=lambda c: c.simple_name)
522
+ )
523
+ lines.append("")
524
+
525
+ return lines
526
+
527
+ def _generate_function_signature_short(self, func: FunctionDefinition) -> str:
528
+ """Generate short function signature for index."""
529
+ params = ", ".join(func.parameters) if func.parameters else "..."
530
+ ret = f" -> {func.return_type}" if func.return_type else ""
531
+ return f"{func.simple_name}({params}){ret}"
532
+
533
+ def _format_function(
534
+ self, func: FunctionDefinition, module: ModuleDefinition
535
+ ) -> list[str]:
536
+ """Format a function in Go-Doc style."""
537
+ lines = [f"### func {func.simple_name}", ""]
538
+
539
+ # Signature
540
+ parsed_file = self._find_parsed_file_for_function(module, func)
541
+ if parsed_file:
542
+ signature = self._extract_source(parsed_file, func.node)
543
+ lines.append("```")
544
+ lines.append(signature.strip())
545
+ lines.append("```")
546
+ lines.append("")
547
+
548
+ # Documentation
549
+ if func.docstring:
550
+ lines.append(func.docstring)
551
+ lines.append("")
552
+
553
+ return lines
554
+
555
+ def _format_type(self, typ: TypeDefinition, module: ModuleDefinition) -> list[str]:
556
+ """Format a type in Go-Doc style."""
557
+ lines = [f"#### type {typ.simple_name}", ""]
558
+
559
+ # Signature
560
+ parsed_file = self._find_parsed_file_for_type(module, typ)
561
+ if parsed_file:
562
+ signature = self._extract_source(parsed_file, typ.node)
563
+ lines.append("```")
564
+ lines.append(signature.strip())
565
+ lines.append("```")
566
+ lines.append("")
567
+
568
+ # Documentation
569
+ if typ.docstring:
570
+ lines.append(typ.docstring)
571
+ lines.append("")
572
+
573
+ return lines
574
+
575
+ def _format_class(
576
+ self, cls: ClassDefinition, module: ModuleDefinition
577
+ ) -> list[str]:
578
+ """Format a class in Go-Doc style."""
579
+ lines = [f"### type {cls.simple_name}", ""]
580
+
581
+ # Class signature
582
+ parsed_file = self._find_parsed_file_for_class(module, cls)
583
+ if parsed_file:
584
+ signature = self._extract_source(parsed_file, cls.node)
585
+ lines.append("```")
586
+ lines.append(signature.strip())
587
+ lines.append("```")
588
+ lines.append("")
589
+
590
+ # Class documentation
591
+ if cls.docstring:
592
+ lines.append(cls.docstring)
593
+ lines.append("")
594
+
595
+ # Methods - filter out invalid method names
596
+ if cls.methods:
597
+ valid_methods = [
598
+ m for m in cls.methods if self._is_valid_function_name(m.simple_name)
599
+ ]
600
+ for method in sorted(valid_methods, key=lambda m: m.simple_name):
601
+ lines.extend(self._format_method(method, cls, module))
602
+
603
+ return lines
604
+
605
+ def _format_method(
606
+ self,
607
+ method: FunctionDefinition,
608
+ cls: ClassDefinition,
609
+ module: ModuleDefinition,
610
+ ) -> list[str]:
611
+ """Format a method in Go-Doc style."""
612
+ lines = [f"#### func ({cls.simple_name}) {method.simple_name}", ""]
613
+
614
+ # Method signature
615
+ parsed_file = self._find_parsed_file_for_function(module, method)
616
+ if parsed_file:
617
+ signature = self._extract_source(parsed_file, method.node)
618
+ lines.append("```")
619
+ lines.append(signature.strip())
620
+ lines.append("```")
621
+ lines.append("")
622
+
623
+ # Method documentation
624
+ if method.docstring:
625
+ lines.append(method.docstring)
626
+ lines.append("")
627
+
628
+ return lines
629
+
630
+ def _extract_go_receiver_type(
631
+ self, node: object, parsed_file: ParsedFile
632
+ ) -> str | None:
633
+ """Extract Go receiver type from method declaration.
634
+
635
+ Returns the receiver type in godoc format.
636
+ Strips the parameter name, keeping only the type.
637
+ """
638
+ node_type = getattr(node, "type", None)
639
+ if not node_type or node_type != "method_declaration":
640
+ return None
641
+
642
+ # Find the parameter_list that represents the receiver
643
+ for child in node.children: # type: ignore[attr-defined]
644
+ if child.type == "parameter_list":
645
+ # This is the receiver parameter
646
+ for param_child in child.children:
647
+ if param_child.type == "parameter_declaration":
648
+ # Extract the type from the parameter
649
+ return self._extract_go_type_from_param(
650
+ param_child, parsed_file
651
+ )
652
+ # If we found the parameter_list but no parameter, break
653
+ break
654
+
655
+ return None
656
+
657
+ def _extract_go_type_from_param(
658
+ self, param_node: object, parsed_file: ParsedFile
659
+ ) -> str | None:
660
+ """Extract type from Go parameter declaration node."""
661
+ # Look for type children: pointer_type or type_identifier
662
+ for child in param_node.children: # type: ignore[attr-defined]
663
+ if child.type == "pointer_type" and hasattr(child, "start_byte"):
664
+ # Extract the type being pointed to
665
+ start = child.start_byte
666
+ end = child.end_byte
667
+ type_bytes = parsed_file.source_code[start:end]
668
+ try:
669
+ return type_bytes.decode("utf-8")
670
+ except UnicodeDecodeError:
671
+ return None
672
+ if (
673
+ child.type == "type_identifier"
674
+ and hasattr(child, "text")
675
+ and child.text
676
+ ):
677
+ # Direct type identifier
678
+ return child.text.decode("utf-8")
679
+
680
+ return None
681
+
682
+ def _find_parsed_file_for_function(
683
+ self, module: ModuleDefinition, func: FunctionDefinition
684
+ ) -> ParsedFile | None:
685
+ """Find the parsed file containing a function definition."""
686
+ # Match by file path from FunctionDefinition
687
+ for parsed in module.files:
688
+ if parsed.path == func.file:
689
+ return parsed
690
+
691
+ # Fallback: if we can't find by file path, this is an error condition
692
+ # Log a warning and return None to make the error visible
693
+ self.log.warning(
694
+ "Could not find parsed file for function",
695
+ module_path=module.module_path,
696
+ function_file=str(func.file),
697
+ file_count=len(module.files),
698
+ )
699
+ return None
700
+
701
+ def _find_parsed_file_for_type(
702
+ self, module: ModuleDefinition, typ: TypeDefinition
703
+ ) -> ParsedFile | None:
704
+ """Find the parsed file containing a type definition."""
705
+ # Match by file path from TypeDefinition
706
+ for parsed in module.files:
707
+ if parsed.path == typ.file:
708
+ return parsed
709
+
710
+ # Fallback: if we can't find by file path, this is an error condition
711
+ # Log a warning and return None to make the error visible
712
+ self.log.warning(
713
+ "Could not find parsed file for type",
714
+ module_path=module.module_path,
715
+ type_file=str(typ.file),
716
+ file_count=len(module.files),
717
+ )
718
+ return None
719
+
720
+ def _find_parsed_file_for_class(
721
+ self, module: ModuleDefinition, cls: ClassDefinition
722
+ ) -> ParsedFile | None:
723
+ """Find the parsed file containing a class definition."""
724
+ # Match by file path from ClassDefinition
725
+ for parsed in module.files:
726
+ if parsed.path == cls.file:
727
+ return parsed
728
+
729
+ # Fallback: if we can't find by file path, this is an error condition
730
+ # Log a warning and return None to make the error visible
731
+ self.log.warning(
732
+ "Could not find parsed file for class",
733
+ module_path=module.module_path,
734
+ class_file=str(cls.file),
735
+ file_count=len(module.files),
736
+ )
737
+ return None
738
+
739
+ def _find_parsed_file(
740
+ self, module: ModuleDefinition, node: object
741
+ ) -> ParsedFile | None:
742
+ """Find the parsed file containing a given node."""
743
+ # First try to match by tree reference
744
+ if hasattr(node, "tree"):
745
+ node_tree = node.tree # type: ignore[attr-defined]
746
+ for parsed in module.files:
747
+ if parsed.tree == node_tree:
748
+ return parsed
749
+
750
+ # Fallback: if we can't find by tree, this is an error condition
751
+ # Log a warning and return None to make the error visible
752
+ self.log.warning(
753
+ "Could not find parsed file for node",
754
+ module_path=module.module_path,
755
+ file_count=len(module.files),
756
+ )
757
+ return None
758
+
759
+ def _extract_source(self, parsed_file: ParsedFile | None, node: object) -> str:
760
+ """Extract source code for a node."""
761
+ if not parsed_file:
762
+ return "<source unavailable>"
763
+
764
+ if not hasattr(node, "start_byte") or not hasattr(node, "end_byte"):
765
+ return "<source unavailable>"
766
+
767
+ start = node.start_byte # type: ignore[attr-defined]
768
+ end = node.end_byte # type: ignore[attr-defined]
769
+
770
+ try:
771
+ source = parsed_file.source_code[start:end].decode("utf-8")
772
+ # Extract just the signature
773
+ return self._extract_signature_only(source)
774
+ except (UnicodeDecodeError, IndexError):
775
+ return "<source unavailable>"
776
+
777
+ def _extract_signature_only(self, source: str) -> str:
778
+ """Extract just the signature from a definition.
779
+
780
+ This removes function bodies and only keeps the declaration/signature.
781
+ For Go types (structs, interfaces), includes the full definition.
782
+ """
783
+ lines = source.split("\n")
784
+
785
+ # Check if this is a Go type definition
786
+ # (starts with type name followed by struct/interface)
787
+ first_line = lines[0].strip() if lines else ""
788
+ is_go_type = any(keyword in first_line for keyword in [" struct", " interface"])
789
+
790
+ if is_go_type:
791
+ # For Go types, include the full definition including the body
792
+ # Find the matching closing brace
793
+ brace_count = 0
794
+ signature_lines = []
795
+
796
+ for line in lines:
797
+ signature_lines.append(line)
798
+ # Count braces to find the end of the type definition
799
+ brace_count += line.count("{") - line.count("}")
800
+
801
+ # If we've closed all braces, we're done
802
+ if brace_count == 0 and "{" in "".join(signature_lines):
803
+ break
804
+
805
+ return "\n".join(signature_lines)
806
+
807
+ # For functions, extract just the signature
808
+ signature_lines = []
809
+
810
+ for line in lines:
811
+ # Stop at the first line that ends a signature
812
+ signature_lines.append(line)
813
+
814
+ # Check for end of signature markers
815
+ stripped = line.strip()
816
+
817
+ # Python: colon ends signature (unless inside brackets)
818
+ if ":" in line:
819
+ open_parens = line.count("(") - line.count(")")
820
+ open_brackets = line.count("[") - line.count("]")
821
+ open_braces = line.count("{") - line.count("}")
822
+ if open_parens == 0 and open_brackets == 0 and open_braces == 0:
823
+ break
824
+
825
+ # Go/Java/C/C++/Rust/JS: opening brace often starts body
826
+ if stripped.endswith("{"):
827
+ # Remove the opening brace for cleaner signatures
828
+ signature_lines[-1] = line.rstrip("{").rstrip()
829
+ break
830
+
831
+ # Go: if signature ends without brace on same line
832
+ if stripped.endswith(")") and not any(c in line for c in ["{", ":"]):
833
+ # Might be complete - check if next line exists
834
+ continue
835
+
836
+ return "\n".join(signature_lines)