sigmap 6.9.0 → 6.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +105 -137
- package/CHANGELOG.md +24 -0
- package/README.md +21 -4
- package/gen-context.js +214 -20
- package/package.json +1 -1
- package/packages/cli/package.json +1 -1
- package/packages/core/index.js +1 -0
- package/packages/core/package.json +1 -1
- package/src/discovery/language-detector.js +1 -0
- package/src/discovery/source-root-registry.js +9 -0
- package/src/discovery/source-root-resolver.js +5 -1
- package/src/eval/analyzer.js +1 -0
- package/src/extractors/python.js +33 -2
- package/src/extractors/python_ast.py +348 -0
- package/src/extractors/r.js +136 -0
- package/src/mcp/server.js +1 -1
- package/src/workspace/detector.js +85 -0
package/package.json
CHANGED
package/packages/core/index.js
CHANGED
|
@@ -19,6 +19,7 @@ const EXT_TO_LANG = {
|
|
|
19
19
|
'.java': 'java', '.kt': 'kotlin', '.cs': 'csharp', '.cpp': 'cpp',
|
|
20
20
|
'.c': 'cpp', '.h': 'cpp', '.hpp': 'cpp', '.swift': 'swift',
|
|
21
21
|
'.dart': 'dart', '.scala': 'scala', '.php': 'php',
|
|
22
|
+
'.r': 'r', '.R': 'r',
|
|
22
23
|
};
|
|
23
24
|
|
|
24
25
|
function detectLanguages(cwd) {
|
|
@@ -161,6 +161,15 @@ const REGISTRY = {
|
|
|
161
161
|
srcDirs: ['src/main/scala','src'],
|
|
162
162
|
penalties: ['target'],
|
|
163
163
|
},
|
|
164
|
+
|
|
165
|
+
r: {
|
|
166
|
+
manifestFiles: ['DESCRIPTION','renv.lock'],
|
|
167
|
+
frameworks: {
|
|
168
|
+
shiny: { detectionFiles: ['app.R','ui.R','server.R'], srcDirs: ['R','inst','tests'], entrypoints: ['app.R','server.R'] },
|
|
169
|
+
},
|
|
170
|
+
srcDirs: ['R','src','inst'],
|
|
171
|
+
penalties: ['renv','packrat','.Rcheck'],
|
|
172
|
+
},
|
|
164
173
|
};
|
|
165
174
|
|
|
166
175
|
module.exports = { REGISTRY };
|
|
@@ -181,7 +181,11 @@ function _applySpecialRules(scored, cwd, primaryFw, fwEntry, frameworks) {
|
|
|
181
181
|
function _dedupeNested(scored) {
|
|
182
182
|
const result = [];
|
|
183
183
|
for (const c of scored) {
|
|
184
|
-
const
|
|
184
|
+
const cNorm = c.dir.replace(/\\/g, '/');
|
|
185
|
+
const isNested = result.some(r => {
|
|
186
|
+
const rNorm = r.dir.replace(/\\/g, '/');
|
|
187
|
+
return cNorm.startsWith(rNorm + '/');
|
|
188
|
+
});
|
|
185
189
|
if (!isNested) result.push(c);
|
|
186
190
|
}
|
|
187
191
|
return result;
|
package/src/eval/analyzer.js
CHANGED
package/src/extractors/python.js
CHANGED
|
@@ -1,11 +1,42 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
const path = require('path');
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Try to extract signatures using the native Python AST extractor.
|
|
7
|
+
* Returns null if Python3 is unavailable or the script returns empty results.
|
|
8
|
+
* @param {string} filePath - Absolute path to the Python file
|
|
9
|
+
* @returns {string[]|null}
|
|
10
|
+
*/
|
|
11
|
+
function tryNativeExtract(filePath) {
|
|
12
|
+
try {
|
|
13
|
+
const { execFileSync } = require('child_process');
|
|
14
|
+
const scriptPath = path.join(__dirname, 'python_ast.py');
|
|
15
|
+
const result = execFileSync('python3', [scriptPath, filePath], {
|
|
16
|
+
timeout: 5000,
|
|
17
|
+
encoding: 'utf8',
|
|
18
|
+
});
|
|
19
|
+
const sigs = JSON.parse(result.trim());
|
|
20
|
+
if (Array.isArray(sigs) && sigs.length > 0) return sigs;
|
|
21
|
+
} catch (_) {}
|
|
22
|
+
return null;
|
|
23
|
+
}
|
|
24
|
+
|
|
3
25
|
/**
|
|
4
26
|
* Extract signatures from Python source code.
|
|
27
|
+
* When a real file path is provided, tries the native Python AST extractor first
|
|
28
|
+
* (more accurate for multiline signatures, stacked decorators, and type annotations).
|
|
29
|
+
* Falls back to the regex approach if Python3 is unavailable or returns no results.
|
|
5
30
|
* @param {string} src - Raw file content
|
|
31
|
+
* @param {string} [filePath] - Optional absolute path to the source file
|
|
6
32
|
* @returns {string[]} Array of signature strings
|
|
7
33
|
*/
|
|
8
|
-
function extract(src) {
|
|
34
|
+
function extract(src, filePath) {
|
|
35
|
+
// Prefer native AST extractor when a real file path is available
|
|
36
|
+
if (filePath && typeof filePath === 'string') {
|
|
37
|
+
const native = tryNativeExtract(filePath);
|
|
38
|
+
if (native) return native;
|
|
39
|
+
}
|
|
9
40
|
if (!src || typeof src !== 'string') return [];
|
|
10
41
|
const sigs = [];
|
|
11
42
|
|
|
@@ -200,4 +231,4 @@ function extractDocHint(src, fnName, fnSigLine) {
|
|
|
200
231
|
return sentence.slice(0, 60);
|
|
201
232
|
}
|
|
202
233
|
|
|
203
|
-
module.exports = { extract };
|
|
234
|
+
module.exports = { extract, tryNativeExtract };
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
python_ast.py — Native Python AST-based signature extractor for SigMap.
|
|
4
|
+
|
|
5
|
+
More accurate than the JS regex approach:
|
|
6
|
+
- Handles multiline signatures correctly
|
|
7
|
+
- Decorator stacking resolved properly
|
|
8
|
+
- Type annotations extracted from AST nodes
|
|
9
|
+
- No false positives from regex on string contents
|
|
10
|
+
|
|
11
|
+
Usage (called by SigMap's python.js extractor as fallback):
|
|
12
|
+
python3 python_ast.py <filepath>
|
|
13
|
+
|
|
14
|
+
Output: JSON array of signature strings (one per line → stdout)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import ast
|
|
18
|
+
import json
|
|
19
|
+
import sys
|
|
20
|
+
|
|
21
|
+
MAX_SIGS = 30
|
|
22
|
+
MAX_DOC_HINT_LEN = 60
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def annotation_to_str(node):
|
|
26
|
+
"""Convert an AST annotation node to a string representation."""
|
|
27
|
+
if node is None:
|
|
28
|
+
return None
|
|
29
|
+
try:
|
|
30
|
+
return ast.unparse(node)
|
|
31
|
+
except Exception:
|
|
32
|
+
# Fallback for older Python versions without ast.unparse
|
|
33
|
+
if isinstance(node, ast.Name):
|
|
34
|
+
return node.id
|
|
35
|
+
if isinstance(node, ast.Attribute):
|
|
36
|
+
return f"{annotation_to_str(node.value)}.{node.attr}"
|
|
37
|
+
if isinstance(node, ast.Subscript):
|
|
38
|
+
val = annotation_to_str(node.value)
|
|
39
|
+
slc = annotation_to_str(node.slice)
|
|
40
|
+
return f"{val}[{slc}]"
|
|
41
|
+
if isinstance(node, ast.Index):
|
|
42
|
+
return annotation_to_str(node.value)
|
|
43
|
+
if isinstance(node, ast.Tuple):
|
|
44
|
+
parts = ", ".join(annotation_to_str(e) for e in node.elts)
|
|
45
|
+
return parts
|
|
46
|
+
if isinstance(node, ast.Constant):
|
|
47
|
+
return repr(node.value)
|
|
48
|
+
return "..."
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def format_args(args_node):
|
|
52
|
+
"""Format a function arguments node into a compact signature string."""
|
|
53
|
+
parts = []
|
|
54
|
+
all_args = args_node.args or []
|
|
55
|
+
defaults = args_node.defaults or []
|
|
56
|
+
# Align defaults to the right of args
|
|
57
|
+
default_offset = len(all_args) - len(defaults)
|
|
58
|
+
|
|
59
|
+
for i, arg in enumerate(all_args):
|
|
60
|
+
name = arg.arg
|
|
61
|
+
ann = annotation_to_str(arg.annotation) if arg.annotation else None
|
|
62
|
+
default_idx = i - default_offset
|
|
63
|
+
has_default = default_idx >= 0
|
|
64
|
+
token = name
|
|
65
|
+
if ann:
|
|
66
|
+
token = f"{name}: {ann}"
|
|
67
|
+
if has_default:
|
|
68
|
+
token = f"{token}=..."
|
|
69
|
+
parts.append(token)
|
|
70
|
+
|
|
71
|
+
# *args
|
|
72
|
+
vararg = args_node.vararg
|
|
73
|
+
if vararg:
|
|
74
|
+
ann = annotation_to_str(vararg.annotation) if vararg.annotation else None
|
|
75
|
+
token = f"*{vararg.arg}"
|
|
76
|
+
if ann:
|
|
77
|
+
token = f"*{vararg.arg}: {ann}"
|
|
78
|
+
parts.append(token)
|
|
79
|
+
|
|
80
|
+
# keyword-only args
|
|
81
|
+
kwonly = args_node.kwonlyargs or []
|
|
82
|
+
kw_defaults = args_node.kw_defaults or []
|
|
83
|
+
for i, arg in enumerate(kwonly):
|
|
84
|
+
name = arg.arg
|
|
85
|
+
ann = annotation_to_str(arg.annotation) if arg.annotation else None
|
|
86
|
+
has_default = i < len(kw_defaults) and kw_defaults[i] is not None
|
|
87
|
+
token = name
|
|
88
|
+
if ann:
|
|
89
|
+
token = f"{name}: {ann}"
|
|
90
|
+
if has_default:
|
|
91
|
+
token = f"{token}=..."
|
|
92
|
+
parts.append(token)
|
|
93
|
+
|
|
94
|
+
# **kwargs
|
|
95
|
+
kwarg = args_node.kwarg
|
|
96
|
+
if kwarg:
|
|
97
|
+
ann = annotation_to_str(kwarg.annotation) if kwarg.annotation else None
|
|
98
|
+
token = f"**{kwarg.arg}"
|
|
99
|
+
if ann:
|
|
100
|
+
token = f"**{kwarg.arg}: {ann}"
|
|
101
|
+
parts.append(token)
|
|
102
|
+
|
|
103
|
+
return ", ".join(parts)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def get_decorator_names(node):
|
|
107
|
+
"""Return a list of decorator name strings for a function/class node."""
|
|
108
|
+
names = []
|
|
109
|
+
for dec in node.decorator_list:
|
|
110
|
+
if isinstance(dec, ast.Name):
|
|
111
|
+
names.append(dec.id)
|
|
112
|
+
elif isinstance(dec, ast.Attribute):
|
|
113
|
+
names.append(dec.attr)
|
|
114
|
+
elif isinstance(dec, ast.Call):
|
|
115
|
+
func = dec.func
|
|
116
|
+
if isinstance(func, ast.Name):
|
|
117
|
+
names.append(func.id)
|
|
118
|
+
elif isinstance(func, ast.Attribute):
|
|
119
|
+
names.append(func.attr)
|
|
120
|
+
return names
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def is_dataclass(node):
|
|
124
|
+
return "dataclass" in get_decorator_names(node)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def is_basemodel(bases):
|
|
128
|
+
"""Check if class bases include BaseModel or BaseSettings."""
|
|
129
|
+
for base in bases:
|
|
130
|
+
name = annotation_to_str(base) or ""
|
|
131
|
+
if "BaseModel" in name or "BaseSettings" in name:
|
|
132
|
+
return True
|
|
133
|
+
return False
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def is_optional_annotation(annotation):
|
|
137
|
+
"""Check if an annotation represents an Optional type."""
|
|
138
|
+
if annotation is None:
|
|
139
|
+
return False
|
|
140
|
+
ann_str = annotation_to_str(annotation) or ""
|
|
141
|
+
return (
|
|
142
|
+
"Optional[" in ann_str
|
|
143
|
+
or ("Union[" in ann_str and "None" in ann_str)
|
|
144
|
+
or "| None" in ann_str
|
|
145
|
+
or "None |" in ann_str
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def get_docstring_hint(node):
|
|
150
|
+
"""Extract first sentence of docstring, if present."""
|
|
151
|
+
try:
|
|
152
|
+
doc = ast.get_docstring(node)
|
|
153
|
+
if doc:
|
|
154
|
+
first_line = doc.strip().splitlines()[0]
|
|
155
|
+
return first_line[:MAX_DOC_HINT_LEN] if len(first_line) > MAX_DOC_HINT_LEN else first_line
|
|
156
|
+
except Exception:
|
|
157
|
+
pass
|
|
158
|
+
return None
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def extract_dataclass_fields(class_node):
|
|
162
|
+
"""Return a collapsed fields string for a @dataclass class."""
|
|
163
|
+
fields = []
|
|
164
|
+
for stmt in class_node.body:
|
|
165
|
+
if isinstance(stmt, ast.AnnAssign) and isinstance(stmt.target, ast.Name):
|
|
166
|
+
name = stmt.target.id
|
|
167
|
+
has_default = stmt.value is not None
|
|
168
|
+
is_optional = is_optional_annotation(stmt.annotation) or has_default
|
|
169
|
+
suffix = "?" if is_optional else ""
|
|
170
|
+
fields.append(f"{name}{suffix}")
|
|
171
|
+
return ", ".join(fields)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def extract_basemodel_fields(class_node):
|
|
175
|
+
"""Return a compact {required*, optional?} string for a BaseModel subclass."""
|
|
176
|
+
req = []
|
|
177
|
+
opt = []
|
|
178
|
+
for stmt in class_node.body:
|
|
179
|
+
if isinstance(stmt, ast.AnnAssign) and isinstance(stmt.target, ast.Name):
|
|
180
|
+
name = stmt.target.id
|
|
181
|
+
has_default = stmt.value is not None
|
|
182
|
+
is_optional = is_optional_annotation(stmt.annotation) or has_default
|
|
183
|
+
if is_optional:
|
|
184
|
+
opt.append(f"{name}?")
|
|
185
|
+
else:
|
|
186
|
+
req.append(f"{name}*")
|
|
187
|
+
all_fields = req + opt
|
|
188
|
+
if not all_fields:
|
|
189
|
+
return None
|
|
190
|
+
return "{" + ", ".join(all_fields) + "}"
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def extract_class_constants(class_node):
|
|
194
|
+
"""Yield ALL_CAPS constant assignments from class body."""
|
|
195
|
+
for stmt in class_node.body:
|
|
196
|
+
if isinstance(stmt, ast.Assign):
|
|
197
|
+
for target in stmt.targets:
|
|
198
|
+
if isinstance(target, ast.Name) and target.id.isupper():
|
|
199
|
+
try:
|
|
200
|
+
val = ast.unparse(stmt.value)
|
|
201
|
+
except Exception:
|
|
202
|
+
val = "..."
|
|
203
|
+
yield f"{target.id}={val}"
|
|
204
|
+
elif isinstance(stmt, ast.AnnAssign) and isinstance(stmt.target, ast.Name):
|
|
205
|
+
name = stmt.target.id
|
|
206
|
+
if name.isupper():
|
|
207
|
+
val = "..."
|
|
208
|
+
if stmt.value:
|
|
209
|
+
try:
|
|
210
|
+
val = ast.unparse(stmt.value)
|
|
211
|
+
except Exception:
|
|
212
|
+
pass
|
|
213
|
+
yield f"{name}={val}"
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def extract_method_sig(func_node):
|
|
217
|
+
"""Format a method signature string (already indented by caller)."""
|
|
218
|
+
is_async = isinstance(func_node, ast.AsyncFunctionDef)
|
|
219
|
+
prefix = "async " if is_async else ""
|
|
220
|
+
params = format_args(func_node.args)
|
|
221
|
+
ret = annotation_to_str(func_node.returns) if func_node.returns else None
|
|
222
|
+
ret_str = f" → {ret}" if ret else ""
|
|
223
|
+
return f"{prefix}def {func_node.name}({params}){ret_str}"
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def extract_function_sig(func_node, src_lines=None):
|
|
227
|
+
"""Format a top-level function signature string."""
|
|
228
|
+
is_async = isinstance(func_node, ast.AsyncFunctionDef)
|
|
229
|
+
prefix = "async " if is_async else ""
|
|
230
|
+
params = format_args(func_node.args)
|
|
231
|
+
ret = annotation_to_str(func_node.returns) if func_node.returns else None
|
|
232
|
+
ret_str = f" → {ret}" if ret else ""
|
|
233
|
+
hint = get_docstring_hint(func_node)
|
|
234
|
+
hint_str = f" # {hint}" if hint else ""
|
|
235
|
+
return f"{prefix}def {func_node.name}({params}){ret_str}{hint_str}"
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def extract_fastapi_routes(tree, src_lines):
|
|
239
|
+
"""Extract FastAPI route signatures from top-level decorated functions only."""
|
|
240
|
+
routes = []
|
|
241
|
+
http_methods = {"get", "post", "put", "patch", "delete", "head"}
|
|
242
|
+
for node in tree.body:
|
|
243
|
+
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
244
|
+
continue
|
|
245
|
+
for dec in node.decorator_list:
|
|
246
|
+
if not isinstance(dec, ast.Call):
|
|
247
|
+
continue
|
|
248
|
+
func = dec.func
|
|
249
|
+
if not isinstance(func, ast.Attribute):
|
|
250
|
+
continue
|
|
251
|
+
method = func.attr.lower()
|
|
252
|
+
if method not in http_methods:
|
|
253
|
+
continue
|
|
254
|
+
if dec.args:
|
|
255
|
+
path_node = dec.args[0]
|
|
256
|
+
if isinstance(path_node, ast.Constant):
|
|
257
|
+
path = path_node.value
|
|
258
|
+
routes.append(f"{method.upper()} {path} → {node.name}()")
|
|
259
|
+
return routes
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def extract(filepath):
|
|
263
|
+
with open(filepath, "r", encoding="utf-8", errors="replace") as f:
|
|
264
|
+
src = f.read()
|
|
265
|
+
|
|
266
|
+
tree = ast.parse(src, filename=filepath)
|
|
267
|
+
src_lines = src.splitlines()
|
|
268
|
+
sigs = []
|
|
269
|
+
|
|
270
|
+
# Walk top-level statements only
|
|
271
|
+
for node in tree.body:
|
|
272
|
+
if len(sigs) >= MAX_SIGS:
|
|
273
|
+
break
|
|
274
|
+
|
|
275
|
+
# Classes
|
|
276
|
+
if isinstance(node, ast.ClassDef):
|
|
277
|
+
bases_str = ", ".join(annotation_to_str(b) for b in node.bases if b)
|
|
278
|
+
dec_names = get_decorator_names(node)
|
|
279
|
+
|
|
280
|
+
if is_dataclass(node):
|
|
281
|
+
fields = extract_dataclass_fields(node)
|
|
282
|
+
sigs.append(f"@dataclass {node.name}({fields})")
|
|
283
|
+
elif is_basemodel(node.bases):
|
|
284
|
+
bm_fields = extract_basemodel_fields(node)
|
|
285
|
+
base_label = next(
|
|
286
|
+
(annotation_to_str(b) for b in node.bases
|
|
287
|
+
if "BaseModel" in (annotation_to_str(b) or "") or "BaseSettings" in (annotation_to_str(b) or "")),
|
|
288
|
+
"BaseModel"
|
|
289
|
+
)
|
|
290
|
+
if bm_fields:
|
|
291
|
+
sigs.append(f"class {node.name}({base_label}) {bm_fields}")
|
|
292
|
+
else:
|
|
293
|
+
sigs.append(f"class {node.name}({base_label})")
|
|
294
|
+
else:
|
|
295
|
+
base_part = f"({bases_str})" if bases_str else ""
|
|
296
|
+
sigs.append(f"class {node.name}{base_part}")
|
|
297
|
+
|
|
298
|
+
# Class constants
|
|
299
|
+
for const in extract_class_constants(node):
|
|
300
|
+
if len(sigs) >= MAX_SIGS:
|
|
301
|
+
break
|
|
302
|
+
sigs.append(f" {const}")
|
|
303
|
+
|
|
304
|
+
# Methods (skip private except __init__, skip all other dunder)
|
|
305
|
+
for stmt in node.body:
|
|
306
|
+
if len(sigs) >= MAX_SIGS:
|
|
307
|
+
break
|
|
308
|
+
if not isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
309
|
+
continue
|
|
310
|
+
name = stmt.name
|
|
311
|
+
if name.startswith("_") and name != "__init__":
|
|
312
|
+
continue
|
|
313
|
+
sigs.append(f" {extract_method_sig(stmt)}")
|
|
314
|
+
|
|
315
|
+
# Top-level functions
|
|
316
|
+
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
317
|
+
if node.name.startswith("_"):
|
|
318
|
+
continue
|
|
319
|
+
sigs.append(extract_function_sig(node, src_lines))
|
|
320
|
+
|
|
321
|
+
# FastAPI routes (extract top-level decorated functions)
|
|
322
|
+
routes = extract_fastapi_routes(tree, src_lines)
|
|
323
|
+
seen_sigs = set(sigs)
|
|
324
|
+
for route in routes:
|
|
325
|
+
if len(sigs) >= MAX_SIGS:
|
|
326
|
+
break
|
|
327
|
+
if route not in seen_sigs:
|
|
328
|
+
sigs.append(route)
|
|
329
|
+
seen_sigs.add(route)
|
|
330
|
+
|
|
331
|
+
return sigs[:MAX_SIGS]
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def main():
|
|
335
|
+
if len(sys.argv) < 2:
|
|
336
|
+
print("[]")
|
|
337
|
+
return
|
|
338
|
+
|
|
339
|
+
filepath = sys.argv[1]
|
|
340
|
+
try:
|
|
341
|
+
sigs = extract(filepath)
|
|
342
|
+
print(json.dumps(sigs))
|
|
343
|
+
except Exception:
|
|
344
|
+
print("[]")
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
if __name__ == "__main__":
|
|
348
|
+
main()
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Extract signatures from R source code.
|
|
5
|
+
* @param {string} src - Raw file content
|
|
6
|
+
* @returns {string[]} Array of signature strings
|
|
7
|
+
*/
|
|
8
|
+
function extract(src) {
|
|
9
|
+
if (!src || typeof src !== 'string') return [];
|
|
10
|
+
const sigs = [];
|
|
11
|
+
|
|
12
|
+
// Strip line comments. R uses # comments. Roxygen2 (#') comments are
|
|
13
|
+
// stripped along with regular ones; Phase 2 may parse them.
|
|
14
|
+
const stripped = src.replace(/#.*$/gm, '');
|
|
15
|
+
|
|
16
|
+
// Function definitions:
|
|
17
|
+
// name <- function(args) { ... }
|
|
18
|
+
// name = function(args) { ... }
|
|
19
|
+
// name <<- function(args) { ... }
|
|
20
|
+
// Args may span multiple lines and contain default values, so we need to
|
|
21
|
+
// match a balanced parenthesis group rather than a single line.
|
|
22
|
+
const funcRe = /^(?:[ \t]*)([\w.]+)\s*(?:<<-|<-|=)\s*function\s*\(/gm;
|
|
23
|
+
let m;
|
|
24
|
+
while ((m = funcRe.exec(stripped)) !== null) {
|
|
25
|
+
const name = m[1];
|
|
26
|
+
if (name.startsWith('.')) continue; // private convention
|
|
27
|
+
const argsStart = funcRe.lastIndex;
|
|
28
|
+
const args = readBalancedParens(stripped, argsStart - 1);
|
|
29
|
+
if (args === null) continue;
|
|
30
|
+
sigs.push(`${name} <- function(${normalizeParams(args)})`);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// S4 setMethod / setGeneric:
|
|
34
|
+
// setGeneric("name", function(args) standardGeneric("name"))
|
|
35
|
+
// setMethod("name", "ClassName", function(args) { ... })
|
|
36
|
+
for (const sm of stripped.matchAll(/^[ \t]*setGeneric\s*\(\s*["']([\w.]+)["']/gm)) {
|
|
37
|
+
sigs.push(`setGeneric("${sm[1]}")`);
|
|
38
|
+
}
|
|
39
|
+
for (const sm of stripped.matchAll(/^[ \t]*setMethod\s*\(\s*["']([\w.]+)["']\s*,\s*["']([\w.]+)["']/gm)) {
|
|
40
|
+
sigs.push(`setMethod("${sm[1]}", "${sm[2]}")`);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// S4 class definitions:
|
|
44
|
+
// setClass("Name", representation(...), ...)
|
|
45
|
+
for (const sm of stripped.matchAll(/^[ \t]*setClass\s*\(\s*["']([\w.]+)["']/gm)) {
|
|
46
|
+
sigs.push(`setClass("${sm[1]}")`);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return sigs.slice(0, 30);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Read a parenthesis-balanced substring starting at the position of the
|
|
54
|
+
* opening '(' character, returning the inner content (without the outer
|
|
55
|
+
* parens). Returns null if no matching close paren is found within `cap`
|
|
56
|
+
* characters, which guards against runaway scans on malformed input.
|
|
57
|
+
*/
|
|
58
|
+
function readBalancedParens(src, openIdx, cap = 4096) {
|
|
59
|
+
if (src[openIdx] !== '(') return null;
|
|
60
|
+
let depth = 1;
|
|
61
|
+
let i = openIdx + 1;
|
|
62
|
+
const end = Math.min(src.length, openIdx + cap);
|
|
63
|
+
let inString = null; // null | '"' | "'"
|
|
64
|
+
while (i < end) {
|
|
65
|
+
const ch = src[i];
|
|
66
|
+
if (inString) {
|
|
67
|
+
if (ch === '\\') { i += 2; continue; }
|
|
68
|
+
if (ch === inString) inString = null;
|
|
69
|
+
i++;
|
|
70
|
+
continue;
|
|
71
|
+
}
|
|
72
|
+
if (ch === '"' || ch === "'") { inString = ch; i++; continue; }
|
|
73
|
+
if (ch === '(') depth++;
|
|
74
|
+
else if (ch === ')') {
|
|
75
|
+
depth--;
|
|
76
|
+
if (depth === 0) return src.slice(openIdx + 1, i);
|
|
77
|
+
}
|
|
78
|
+
i++;
|
|
79
|
+
}
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Compress whitespace inside a parameter list, collapse multi-line default
|
|
85
|
+
* expressions onto a single line, and trim. The goal is one-line readable
|
|
86
|
+
* signatures, not a faithful AST.
|
|
87
|
+
*
|
|
88
|
+
* String literals are protected so that commas/equals inside default values
|
|
89
|
+
* like sep = "," don't get respaced.
|
|
90
|
+
*/
|
|
91
|
+
function normalizeParams(raw) {
|
|
92
|
+
const tokens = [];
|
|
93
|
+
let buf = '';
|
|
94
|
+
let inString = null;
|
|
95
|
+
for (let i = 0; i < raw.length; i++) {
|
|
96
|
+
const ch = raw[i];
|
|
97
|
+
if (inString) {
|
|
98
|
+
buf += ch;
|
|
99
|
+
if (ch === '\\' && i + 1 < raw.length) { buf += raw[i + 1]; i++; continue; }
|
|
100
|
+
if (ch === inString) inString = null;
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
if (ch === '"' || ch === "'") { inString = ch; buf += ch; continue; }
|
|
104
|
+
buf += ch;
|
|
105
|
+
}
|
|
106
|
+
// Now buf === raw with strings preserved character-for-character.
|
|
107
|
+
// Walk again: collapse non-string runs of whitespace, normalize ', ' and ' = '.
|
|
108
|
+
let out = '';
|
|
109
|
+
inString = null;
|
|
110
|
+
for (let i = 0; i < buf.length; i++) {
|
|
111
|
+
const ch = buf[i];
|
|
112
|
+
if (inString) {
|
|
113
|
+
out += ch;
|
|
114
|
+
if (ch === '\\' && i + 1 < buf.length) { out += buf[i + 1]; i++; continue; }
|
|
115
|
+
if (ch === inString) inString = null;
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
if (ch === '"' || ch === "'") { inString = ch; out += ch; continue; }
|
|
119
|
+
if (/\s/.test(ch)) {
|
|
120
|
+
if (out.length && !/\s$/.test(out)) out += ' ';
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
if (ch === ',') {
|
|
124
|
+
out = out.replace(/\s+$/, '') + ', ';
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
if (ch === '=') {
|
|
128
|
+
out = out.replace(/\s+$/, '') + ' = ';
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
out += ch;
|
|
132
|
+
}
|
|
133
|
+
return out.trim();
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
module.exports = { extract };
|
package/src/mcp/server.js
CHANGED