cicada-mcp 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cicada-mcp might be problematic. Click here for more details.
- cicada/__init__.py +30 -0
- cicada/clean.py +297 -0
- cicada/command_logger.py +293 -0
- cicada/dead_code_analyzer.py +282 -0
- cicada/extractors/__init__.py +36 -0
- cicada/extractors/base.py +66 -0
- cicada/extractors/call.py +176 -0
- cicada/extractors/dependency.py +361 -0
- cicada/extractors/doc.py +179 -0
- cicada/extractors/function.py +246 -0
- cicada/extractors/module.py +123 -0
- cicada/extractors/spec.py +151 -0
- cicada/find_dead_code.py +270 -0
- cicada/formatter.py +918 -0
- cicada/git_helper.py +646 -0
- cicada/indexer.py +629 -0
- cicada/install.py +724 -0
- cicada/keyword_extractor.py +364 -0
- cicada/keyword_search.py +553 -0
- cicada/lightweight_keyword_extractor.py +298 -0
- cicada/mcp_server.py +1559 -0
- cicada/mcp_tools.py +291 -0
- cicada/parser.py +124 -0
- cicada/pr_finder.py +435 -0
- cicada/pr_indexer/__init__.py +20 -0
- cicada/pr_indexer/cli.py +62 -0
- cicada/pr_indexer/github_api_client.py +431 -0
- cicada/pr_indexer/indexer.py +297 -0
- cicada/pr_indexer/line_mapper.py +209 -0
- cicada/pr_indexer/pr_index_builder.py +253 -0
- cicada/setup.py +339 -0
- cicada/utils/__init__.py +52 -0
- cicada/utils/call_site_formatter.py +95 -0
- cicada/utils/function_grouper.py +57 -0
- cicada/utils/hash_utils.py +173 -0
- cicada/utils/index_utils.py +290 -0
- cicada/utils/path_utils.py +240 -0
- cicada/utils/signature_builder.py +106 -0
- cicada/utils/storage.py +111 -0
- cicada/utils/subprocess_runner.py +182 -0
- cicada/utils/text_utils.py +90 -0
- cicada/version_check.py +116 -0
- cicada_mcp-0.1.4.dist-info/METADATA +619 -0
- cicada_mcp-0.1.4.dist-info/RECORD +48 -0
- cicada_mcp-0.1.4.dist-info/WHEEL +5 -0
- cicada_mcp-0.1.4.dist-info/entry_points.txt +8 -0
- cicada_mcp-0.1.4.dist-info/licenses/LICENSE +21 -0
- cicada_mcp-0.1.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dependency extraction logic (alias, import, require, use).
|
|
3
|
+
|
|
4
|
+
Author: Cursor(Auto)
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def extract_aliases(node, source_code: bytes) -> dict:
|
|
9
|
+
"""Extract all alias declarations from a module body."""
|
|
10
|
+
aliases = {}
|
|
11
|
+
_find_aliases_recursive(node, source_code, aliases)
|
|
12
|
+
return aliases
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _find_aliases_recursive(node, source_code: bytes, aliases: dict):
|
|
16
|
+
"""Recursively find alias declarations."""
|
|
17
|
+
if node.type == "call":
|
|
18
|
+
target = None
|
|
19
|
+
arguments = None
|
|
20
|
+
|
|
21
|
+
for child in node.children:
|
|
22
|
+
if child.type == "identifier":
|
|
23
|
+
target = child
|
|
24
|
+
elif child.type == "arguments":
|
|
25
|
+
arguments = child
|
|
26
|
+
|
|
27
|
+
if target and arguments:
|
|
28
|
+
target_text = source_code[target.start_byte : target.end_byte].decode(
|
|
29
|
+
"utf-8"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
if target_text == "alias":
|
|
33
|
+
# Parse the alias
|
|
34
|
+
alias_info = _parse_alias(arguments, source_code)
|
|
35
|
+
if alias_info:
|
|
36
|
+
# alias_info is a dict of {short_name: full_name}
|
|
37
|
+
aliases.update(alias_info)
|
|
38
|
+
|
|
39
|
+
# Recursively search children, but skip function bodies
|
|
40
|
+
for child in node.children:
|
|
41
|
+
if child.type == "call":
|
|
42
|
+
is_function_def = False
|
|
43
|
+
for call_child in child.children:
|
|
44
|
+
if call_child.type == "identifier":
|
|
45
|
+
target_text = source_code[
|
|
46
|
+
call_child.start_byte : call_child.end_byte
|
|
47
|
+
].decode("utf-8")
|
|
48
|
+
if target_text in ["def", "defp", "defmodule"]:
|
|
49
|
+
is_function_def = True
|
|
50
|
+
break
|
|
51
|
+
|
|
52
|
+
if is_function_def:
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
_find_aliases_recursive(child, source_code, aliases)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _parse_alias(arguments_node, source_code: bytes) -> dict | None:
|
|
59
|
+
"""
|
|
60
|
+
Parse an alias declaration.
|
|
61
|
+
|
|
62
|
+
Handles:
|
|
63
|
+
- alias MyApp.User -> {User: MyApp.User}
|
|
64
|
+
- alias MyApp.User, as: U -> {U: MyApp.User}
|
|
65
|
+
- alias MyApp.{User, Post} -> {User: MyApp.User, Post: MyApp.Post}
|
|
66
|
+
"""
|
|
67
|
+
result = {}
|
|
68
|
+
|
|
69
|
+
for arg_child in arguments_node.children:
|
|
70
|
+
# Simple alias: alias MyApp.User
|
|
71
|
+
if arg_child.type == "alias":
|
|
72
|
+
full_name = source_code[arg_child.start_byte : arg_child.end_byte].decode(
|
|
73
|
+
"utf-8"
|
|
74
|
+
)
|
|
75
|
+
# Get the last part as the short name
|
|
76
|
+
short_name = full_name.split(".")[-1]
|
|
77
|
+
result[short_name] = full_name
|
|
78
|
+
|
|
79
|
+
# Alias with tuple: alias MyApp.{User, Post}
|
|
80
|
+
elif arg_child.type == "dot":
|
|
81
|
+
# The dot node contains: alias (module prefix), dot, and tuple
|
|
82
|
+
module_prefix = None
|
|
83
|
+
tuple_node = None
|
|
84
|
+
|
|
85
|
+
for dot_child in arg_child.children:
|
|
86
|
+
if dot_child.type == "alias":
|
|
87
|
+
module_prefix = source_code[
|
|
88
|
+
dot_child.start_byte : dot_child.end_byte
|
|
89
|
+
].decode("utf-8")
|
|
90
|
+
elif dot_child.type == "tuple":
|
|
91
|
+
tuple_node = dot_child
|
|
92
|
+
|
|
93
|
+
if module_prefix and tuple_node:
|
|
94
|
+
# Extract each alias from the tuple
|
|
95
|
+
for tuple_child in tuple_node.children:
|
|
96
|
+
if tuple_child.type == "alias":
|
|
97
|
+
short_name = source_code[
|
|
98
|
+
tuple_child.start_byte : tuple_child.end_byte
|
|
99
|
+
].decode("utf-8")
|
|
100
|
+
full_name = f"{module_prefix}.{short_name}"
|
|
101
|
+
result[short_name] = full_name
|
|
102
|
+
|
|
103
|
+
# Keyword list for 'as:' option
|
|
104
|
+
elif arg_child.type == "keywords":
|
|
105
|
+
# Find the 'as:' keyword
|
|
106
|
+
for kw_child in arg_child.children:
|
|
107
|
+
if kw_child.type == "pair":
|
|
108
|
+
key_text = None
|
|
109
|
+
alias_name = None
|
|
110
|
+
for pair_child in kw_child.children:
|
|
111
|
+
if pair_child.type == "keyword":
|
|
112
|
+
# Get keyword text (e.g., "as:")
|
|
113
|
+
key_text = source_code[
|
|
114
|
+
pair_child.start_byte : pair_child.end_byte
|
|
115
|
+
].decode("utf-8")
|
|
116
|
+
elif pair_child.type == "alias":
|
|
117
|
+
alias_name = source_code[
|
|
118
|
+
pair_child.start_byte : pair_child.end_byte
|
|
119
|
+
].decode("utf-8")
|
|
120
|
+
|
|
121
|
+
# If we found 'as:', update the result to use custom name
|
|
122
|
+
if key_text and "as" in key_text and alias_name:
|
|
123
|
+
# Get the full module name from previous arg
|
|
124
|
+
for prev_arg in arguments_node.children:
|
|
125
|
+
if prev_arg.type == "alias":
|
|
126
|
+
full_name = source_code[
|
|
127
|
+
prev_arg.start_byte : prev_arg.end_byte
|
|
128
|
+
].decode("utf-8")
|
|
129
|
+
# Remove the default short name and add custom one
|
|
130
|
+
result.clear()
|
|
131
|
+
result[alias_name] = full_name
|
|
132
|
+
break
|
|
133
|
+
|
|
134
|
+
return result if result else None
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def extract_imports(node, source_code: bytes) -> list:
|
|
138
|
+
"""Extract all import declarations from a module body."""
|
|
139
|
+
imports = []
|
|
140
|
+
_find_imports_recursive(node, source_code, imports)
|
|
141
|
+
return imports
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _find_imports_recursive(node, source_code: bytes, imports: list):
|
|
145
|
+
"""Recursively find import declarations."""
|
|
146
|
+
if node.type == "call":
|
|
147
|
+
target = None
|
|
148
|
+
arguments = None
|
|
149
|
+
|
|
150
|
+
for child in node.children:
|
|
151
|
+
if child.type == "identifier":
|
|
152
|
+
target = child
|
|
153
|
+
elif child.type == "arguments":
|
|
154
|
+
arguments = child
|
|
155
|
+
|
|
156
|
+
if target and arguments:
|
|
157
|
+
target_text = source_code[target.start_byte : target.end_byte].decode(
|
|
158
|
+
"utf-8"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
if target_text == "import":
|
|
162
|
+
# Parse the import - imports are simpler than aliases
|
|
163
|
+
# import MyModule or import MyModule, only: [func: 1]
|
|
164
|
+
for arg_child in arguments.children:
|
|
165
|
+
if arg_child.type == "alias":
|
|
166
|
+
module_name = source_code[
|
|
167
|
+
arg_child.start_byte : arg_child.end_byte
|
|
168
|
+
].decode("utf-8")
|
|
169
|
+
imports.append(module_name)
|
|
170
|
+
|
|
171
|
+
# Recursively search children, but skip function bodies
|
|
172
|
+
for child in node.children:
|
|
173
|
+
if child.type == "call":
|
|
174
|
+
is_function_def = False
|
|
175
|
+
for call_child in child.children:
|
|
176
|
+
if call_child.type == "identifier":
|
|
177
|
+
target_text = source_code[
|
|
178
|
+
call_child.start_byte : call_child.end_byte
|
|
179
|
+
].decode("utf-8")
|
|
180
|
+
if target_text in ["def", "defp", "defmodule"]:
|
|
181
|
+
is_function_def = True
|
|
182
|
+
break
|
|
183
|
+
|
|
184
|
+
if is_function_def:
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
_find_imports_recursive(child, source_code, imports)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def extract_requires(node, source_code: bytes) -> list:
|
|
191
|
+
"""Extract all require declarations from a module body."""
|
|
192
|
+
requires = []
|
|
193
|
+
_find_requires_recursive(node, source_code, requires)
|
|
194
|
+
return requires
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _find_requires_recursive(node, source_code: bytes, requires: list):
|
|
198
|
+
"""Recursively find require declarations."""
|
|
199
|
+
if node.type == "call":
|
|
200
|
+
target = None
|
|
201
|
+
arguments = None
|
|
202
|
+
|
|
203
|
+
for child in node.children:
|
|
204
|
+
if child.type == "identifier":
|
|
205
|
+
target = child
|
|
206
|
+
elif child.type == "arguments":
|
|
207
|
+
arguments = child
|
|
208
|
+
|
|
209
|
+
if target and arguments:
|
|
210
|
+
target_text = source_code[target.start_byte : target.end_byte].decode(
|
|
211
|
+
"utf-8"
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
if target_text == "require":
|
|
215
|
+
# Parse the require
|
|
216
|
+
for arg_child in arguments.children:
|
|
217
|
+
if arg_child.type == "alias":
|
|
218
|
+
module_name = source_code[
|
|
219
|
+
arg_child.start_byte : arg_child.end_byte
|
|
220
|
+
].decode("utf-8")
|
|
221
|
+
requires.append(module_name)
|
|
222
|
+
|
|
223
|
+
# Recursively search children, but skip function bodies
|
|
224
|
+
for child in node.children:
|
|
225
|
+
if child.type == "call":
|
|
226
|
+
is_function_def = False
|
|
227
|
+
for call_child in child.children:
|
|
228
|
+
if call_child.type == "identifier":
|
|
229
|
+
target_text = source_code[
|
|
230
|
+
call_child.start_byte : call_child.end_byte
|
|
231
|
+
].decode("utf-8")
|
|
232
|
+
if target_text in ["def", "defp", "defmodule"]:
|
|
233
|
+
is_function_def = True
|
|
234
|
+
break
|
|
235
|
+
|
|
236
|
+
if is_function_def:
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
_find_requires_recursive(child, source_code, requires)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def extract_uses(node, source_code: bytes) -> list:
|
|
243
|
+
"""Extract all use declarations from a module body."""
|
|
244
|
+
uses = []
|
|
245
|
+
_find_uses_recursive(node, source_code, uses)
|
|
246
|
+
return uses
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _find_uses_recursive(node, source_code: bytes, uses: list):
|
|
250
|
+
"""Recursively find use declarations."""
|
|
251
|
+
if node.type == "call":
|
|
252
|
+
target = None
|
|
253
|
+
arguments = None
|
|
254
|
+
|
|
255
|
+
for child in node.children:
|
|
256
|
+
if child.type == "identifier":
|
|
257
|
+
target = child
|
|
258
|
+
elif child.type == "arguments":
|
|
259
|
+
arguments = child
|
|
260
|
+
|
|
261
|
+
if target and arguments:
|
|
262
|
+
target_text = source_code[target.start_byte : target.end_byte].decode(
|
|
263
|
+
"utf-8"
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
if target_text == "use":
|
|
267
|
+
# Parse the use
|
|
268
|
+
for arg_child in arguments.children:
|
|
269
|
+
if arg_child.type == "alias":
|
|
270
|
+
module_name = source_code[
|
|
271
|
+
arg_child.start_byte : arg_child.end_byte
|
|
272
|
+
].decode("utf-8")
|
|
273
|
+
uses.append(module_name)
|
|
274
|
+
|
|
275
|
+
# Recursively search children, but skip function bodies
|
|
276
|
+
for child in node.children:
|
|
277
|
+
if child.type == "call":
|
|
278
|
+
is_function_def = False
|
|
279
|
+
for call_child in child.children:
|
|
280
|
+
if call_child.type == "identifier":
|
|
281
|
+
target_text = source_code[
|
|
282
|
+
call_child.start_byte : call_child.end_byte
|
|
283
|
+
].decode("utf-8")
|
|
284
|
+
if target_text in ["def", "defp", "defmodule"]:
|
|
285
|
+
is_function_def = True
|
|
286
|
+
break
|
|
287
|
+
|
|
288
|
+
if is_function_def:
|
|
289
|
+
continue
|
|
290
|
+
|
|
291
|
+
_find_uses_recursive(child, source_code, uses)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def extract_behaviours(node, source_code: bytes) -> list:
|
|
295
|
+
"""Extract all @behaviour declarations from a module body."""
|
|
296
|
+
behaviours = []
|
|
297
|
+
_find_behaviours_recursive(node, source_code, behaviours)
|
|
298
|
+
return behaviours
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def _find_behaviours_recursive(node, source_code: bytes, behaviours: list):
|
|
302
|
+
"""Recursively find @behaviour declarations."""
|
|
303
|
+
if node.type == "unary_operator":
|
|
304
|
+
# Check if this is an @ operator with behaviour
|
|
305
|
+
is_at_operator = False
|
|
306
|
+
behaviour_call = None
|
|
307
|
+
|
|
308
|
+
for child in node.children:
|
|
309
|
+
if child.type == "@":
|
|
310
|
+
is_at_operator = True
|
|
311
|
+
elif child.type == "call" and is_at_operator:
|
|
312
|
+
behaviour_call = child
|
|
313
|
+
break
|
|
314
|
+
|
|
315
|
+
if behaviour_call:
|
|
316
|
+
# Check if the call is "behaviour"
|
|
317
|
+
identifier_text = None
|
|
318
|
+
arguments_node = None
|
|
319
|
+
|
|
320
|
+
for child in behaviour_call.children:
|
|
321
|
+
if child.type == "identifier":
|
|
322
|
+
identifier_text = source_code[
|
|
323
|
+
child.start_byte : child.end_byte
|
|
324
|
+
].decode("utf-8")
|
|
325
|
+
elif child.type == "arguments":
|
|
326
|
+
arguments_node = child
|
|
327
|
+
|
|
328
|
+
if identifier_text == "behaviour" and arguments_node:
|
|
329
|
+
# Extract the behaviour module name
|
|
330
|
+
for arg_child in arguments_node.children:
|
|
331
|
+
if arg_child.type == "alias":
|
|
332
|
+
# @behaviour ModuleName
|
|
333
|
+
module_name = source_code[
|
|
334
|
+
arg_child.start_byte : arg_child.end_byte
|
|
335
|
+
].decode("utf-8")
|
|
336
|
+
behaviours.append(module_name)
|
|
337
|
+
elif arg_child.type == "atom":
|
|
338
|
+
# @behaviour :module_name
|
|
339
|
+
atom_text = source_code[
|
|
340
|
+
arg_child.start_byte : arg_child.end_byte
|
|
341
|
+
].decode("utf-8")
|
|
342
|
+
# Remove leading colon and convert to module format if needed
|
|
343
|
+
behaviours.append(atom_text.lstrip(":"))
|
|
344
|
+
|
|
345
|
+
# Recursively search children, but skip function bodies
|
|
346
|
+
for child in node.children:
|
|
347
|
+
if child.type == "call":
|
|
348
|
+
is_function_def = False
|
|
349
|
+
for call_child in child.children:
|
|
350
|
+
if call_child.type == "identifier":
|
|
351
|
+
target_text = source_code[
|
|
352
|
+
call_child.start_byte : call_child.end_byte
|
|
353
|
+
].decode("utf-8")
|
|
354
|
+
if target_text in ["def", "defp", "defmodule"]:
|
|
355
|
+
is_function_def = True
|
|
356
|
+
break
|
|
357
|
+
|
|
358
|
+
if is_function_def:
|
|
359
|
+
continue
|
|
360
|
+
|
|
361
|
+
_find_behaviours_recursive(child, source_code, behaviours)
|
cicada/extractors/doc.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Documentation extraction logic.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import textwrap
|
|
6
|
+
from .base import extract_string_from_arguments
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def extract_docs(node, source_code: bytes) -> dict:
|
|
10
|
+
"""Extract all @doc attributes from a module body."""
|
|
11
|
+
docs = {}
|
|
12
|
+
_find_docs_recursive(node, source_code, docs)
|
|
13
|
+
return docs
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _find_docs_recursive(node, source_code: bytes, docs: dict):
|
|
17
|
+
"""Recursively find @doc declarations."""
|
|
18
|
+
# Look for unary_operator nodes (which represent @ attributes)
|
|
19
|
+
if node.type == "unary_operator":
|
|
20
|
+
operator = None
|
|
21
|
+
operand = None
|
|
22
|
+
|
|
23
|
+
for child in node.children:
|
|
24
|
+
if child.type == "@":
|
|
25
|
+
operator = child
|
|
26
|
+
elif child.type == "call":
|
|
27
|
+
operand = child
|
|
28
|
+
|
|
29
|
+
if operator and operand:
|
|
30
|
+
# Check if this is a doc attribute
|
|
31
|
+
for call_child in operand.children:
|
|
32
|
+
if call_child.type == "identifier":
|
|
33
|
+
attr_name = source_code[
|
|
34
|
+
call_child.start_byte : call_child.end_byte
|
|
35
|
+
].decode("utf-8")
|
|
36
|
+
|
|
37
|
+
if attr_name == "doc":
|
|
38
|
+
# Extract the doc definition
|
|
39
|
+
doc_info = _parse_doc(
|
|
40
|
+
operand, source_code, node.start_point[0] + 1
|
|
41
|
+
)
|
|
42
|
+
if doc_info:
|
|
43
|
+
# Store the entire doc_info dict (includes text and examples)
|
|
44
|
+
docs[doc_info["line"]] = doc_info
|
|
45
|
+
|
|
46
|
+
# Recursively search children
|
|
47
|
+
for child in node.children:
|
|
48
|
+
# Don't recurse into nested defmodule or function definitions
|
|
49
|
+
if child.type == "call":
|
|
50
|
+
is_defmodule_or_def = False
|
|
51
|
+
for call_child in child.children:
|
|
52
|
+
if call_child.type == "identifier":
|
|
53
|
+
target_text = source_code[
|
|
54
|
+
call_child.start_byte : call_child.end_byte
|
|
55
|
+
].decode("utf-8")
|
|
56
|
+
if target_text in ["defmodule", "def", "defp"]:
|
|
57
|
+
is_defmodule_or_def = True
|
|
58
|
+
break
|
|
59
|
+
|
|
60
|
+
if is_defmodule_or_def:
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
_find_docs_recursive(child, source_code, docs)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _parse_doc(doc_node, source_code: bytes, line: int) -> dict | None:
|
|
67
|
+
"""Parse a @doc attribute to extract its text and examples."""
|
|
68
|
+
# @doc is represented as: doc("text") or doc(false)
|
|
69
|
+
for child in doc_node.children:
|
|
70
|
+
if child.type == "arguments":
|
|
71
|
+
doc_text = extract_string_from_arguments(child, source_code)
|
|
72
|
+
if doc_text:
|
|
73
|
+
# Extract examples section if present
|
|
74
|
+
doc_without_examples, examples = _extract_examples_from_doc(doc_text)
|
|
75
|
+
result = {"line": line, "text": doc_without_examples}
|
|
76
|
+
if examples:
|
|
77
|
+
result["examples"] = examples
|
|
78
|
+
return result
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _extract_examples_from_doc(doc_text: str) -> tuple[str, str | None]:
|
|
83
|
+
"""
|
|
84
|
+
Extract the ## Examples or # Examples section from doc text.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
doc_text: The full @doc text
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Tuple of (doc_without_examples, examples_text)
|
|
91
|
+
"""
|
|
92
|
+
import re
|
|
93
|
+
|
|
94
|
+
# Look for ## Examples or # Examples heading (case-insensitive)
|
|
95
|
+
# Match at the start of a line, possibly preceded by whitespace
|
|
96
|
+
examples_pattern = r"^\s*#{1,2}\s+Examples?\s*$"
|
|
97
|
+
|
|
98
|
+
lines = doc_text.split("\n")
|
|
99
|
+
examples_start_idx = None
|
|
100
|
+
|
|
101
|
+
# Find the line where examples section starts
|
|
102
|
+
for i, line in enumerate(lines):
|
|
103
|
+
if re.match(examples_pattern, line, re.IGNORECASE):
|
|
104
|
+
examples_start_idx = i
|
|
105
|
+
break
|
|
106
|
+
|
|
107
|
+
# If no examples section found, return original doc with dedent
|
|
108
|
+
if examples_start_idx is None:
|
|
109
|
+
return textwrap.dedent(doc_text), None
|
|
110
|
+
|
|
111
|
+
# Find where examples section ends (next ## heading or end of doc)
|
|
112
|
+
examples_end_idx = len(lines)
|
|
113
|
+
for i in range(examples_start_idx + 1, len(lines)):
|
|
114
|
+
# Check if this line is another top-level heading (##)
|
|
115
|
+
if re.match(r"^\s*##\s+\w+", lines[i]):
|
|
116
|
+
examples_end_idx = i
|
|
117
|
+
break
|
|
118
|
+
|
|
119
|
+
# Extract the parts
|
|
120
|
+
doc_lines = lines[:examples_start_idx]
|
|
121
|
+
examples_lines = lines[examples_start_idx:examples_end_idx]
|
|
122
|
+
remaining_lines = lines[examples_end_idx:]
|
|
123
|
+
|
|
124
|
+
# Reconstruct doc without examples
|
|
125
|
+
doc_without_examples = "\n".join(doc_lines + remaining_lines).strip()
|
|
126
|
+
|
|
127
|
+
# Dedent the doc text
|
|
128
|
+
if doc_without_examples:
|
|
129
|
+
doc_without_examples = textwrap.dedent(doc_without_examples)
|
|
130
|
+
|
|
131
|
+
# Extract just the examples content (without the heading)
|
|
132
|
+
if len(examples_lines) <= 1:
|
|
133
|
+
return doc_without_examples, None
|
|
134
|
+
|
|
135
|
+
examples_lines_content = examples_lines[1:]
|
|
136
|
+
|
|
137
|
+
# Find minimum indentation from non-empty lines
|
|
138
|
+
min_indent: int | float = float("inf")
|
|
139
|
+
for line in examples_lines_content:
|
|
140
|
+
if line.strip(): # Skip blank lines
|
|
141
|
+
indent = len(line) - len(line.lstrip())
|
|
142
|
+
min_indent = min(min_indent, indent)
|
|
143
|
+
|
|
144
|
+
# Remove the common indentation from all lines
|
|
145
|
+
if min_indent < float("inf"):
|
|
146
|
+
dedented_lines: list[str] = []
|
|
147
|
+
min_indent_int = int(min_indent)
|
|
148
|
+
for line in examples_lines_content:
|
|
149
|
+
if line.strip(): # Non-empty line
|
|
150
|
+
dedented_lines.append(line[min_indent_int:])
|
|
151
|
+
else: # Empty line
|
|
152
|
+
dedented_lines.append(line)
|
|
153
|
+
examples_content = "\n".join(dedented_lines).strip()
|
|
154
|
+
else:
|
|
155
|
+
examples_content = "\n".join(examples_lines_content).strip()
|
|
156
|
+
|
|
157
|
+
return doc_without_examples, examples_content if examples_content else None
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def match_docs_to_functions(functions: list, docs: dict):
|
|
161
|
+
"""Match @doc attributes to functions based on proximity."""
|
|
162
|
+
# @doc appears before the function, possibly with @spec in between
|
|
163
|
+
# Look back up to 50 lines to handle long docs and @spec attributes
|
|
164
|
+
for func in functions:
|
|
165
|
+
func_line = func["line"]
|
|
166
|
+
# Look for @doc in the lines before the function (up to 50 lines)
|
|
167
|
+
for offset in range(1, 51):
|
|
168
|
+
doc_line = func_line - offset
|
|
169
|
+
if doc_line in docs:
|
|
170
|
+
# docs[doc_line] is now a dict with 'text' and optionally 'examples'
|
|
171
|
+
doc_info = docs[doc_line]
|
|
172
|
+
if isinstance(doc_info, dict):
|
|
173
|
+
func["doc"] = doc_info.get("text")
|
|
174
|
+
if "examples" in doc_info:
|
|
175
|
+
func["examples"] = doc_info["examples"]
|
|
176
|
+
else:
|
|
177
|
+
# Backward compatibility: if it's just a string
|
|
178
|
+
func["doc"] = doc_info
|
|
179
|
+
break
|