codedeep-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +177 -0
- package/dist/config.js +223 -0
- package/dist/git/analyzer.js +177 -0
- package/dist/git/git-service.js +568 -0
- package/dist/git/head-watcher.js +113 -0
- package/dist/git/runner.js +204 -0
- package/dist/index.js +138 -0
- package/dist/indexer/code-index.js +1801 -0
- package/dist/indexer/complexity.js +633 -0
- package/dist/indexer/extractor.js +354 -0
- package/dist/indexer/languages/cpp.js +934 -0
- package/dist/indexer/languages/csharp.js +854 -0
- package/dist/indexer/languages/dart.js +777 -0
- package/dist/indexer/languages/go.js +665 -0
- package/dist/indexer/languages/java.js +507 -0
- package/dist/indexer/languages/kotlin.js +709 -0
- package/dist/indexer/languages/objc.js +397 -0
- package/dist/indexer/languages/php.js +771 -0
- package/dist/indexer/languages/python.js +455 -0
- package/dist/indexer/languages/ruby.js +697 -0
- package/dist/indexer/languages/rust.js +754 -0
- package/dist/indexer/languages/swift.js +691 -0
- package/dist/indexer/languages/typescript.js +485 -0
- package/dist/indexer/parser.js +175 -0
- package/dist/indexer/pipeline.js +342 -0
- package/dist/indexer/scanner.js +279 -0
- package/dist/indexer/watcher.js +353 -0
- package/dist/logger.js +16 -0
- package/dist/server.js +170 -0
- package/dist/tools/common.js +207 -0
- package/dist/tools/find-references.js +224 -0
- package/dist/tools/find-symbol.js +94 -0
- package/dist/tools/get-context.js +370 -0
- package/dist/tools/impact.js +218 -0
- package/dist/tools/overview.js +482 -0
- package/dist/tools/search-structure.js +303 -0
- package/dist/types.js +61 -0
- package/grammars/tree-sitter-c.wasm +0 -0
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/grammars/tree-sitter-cpp.wasm +0 -0
- package/grammars/tree-sitter-dart.wasm +0 -0
- package/grammars/tree-sitter-go.wasm +0 -0
- package/grammars/tree-sitter-java.wasm +0 -0
- package/grammars/tree-sitter-javascript.wasm +0 -0
- package/grammars/tree-sitter-kotlin.wasm +0 -0
- package/grammars/tree-sitter-objc.wasm +0 -0
- package/grammars/tree-sitter-php.wasm +0 -0
- package/grammars/tree-sitter-python.wasm +0 -0
- package/grammars/tree-sitter-ruby.wasm +0 -0
- package/grammars/tree-sitter-rust.wasm +0 -0
- package/grammars/tree-sitter-swift.wasm +0 -0
- package/grammars/tree-sitter-tsx.wasm +0 -0
- package/grammars/tree-sitter-typescript.wasm +0 -0
- package/package.json +67 -0
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
import { IMPORT_NAMESPACE, RECEIVER_OPAQUE } from '../../types.js';
|
|
2
|
+
import { SIGNATURE_DISPLAY_CAP, bareDecoratorIdentifier, normalizeSignature, resolveCalls, symbolId, } from '../extractor.js';
|
|
3
|
+
import { computeComplexity } from '../complexity.js';
|
|
4
|
+
// Function-like nodes whose bodies contain calls that shouldn't attribute
|
|
5
|
+
// to an enclosing body. walkDecorators uses this subset so it still
|
|
6
|
+
// descends through class bodies but stops at nested function bodies and
|
|
7
|
+
// lambdas — see the matching TS_FUNCTION_BODY_SKIP_TYPES in typescript.ts.
|
|
8
|
+
const PY_FUNCTION_BODY_SKIP_TYPES = new Set([
|
|
9
|
+
'function_definition',
|
|
10
|
+
'lambda',
|
|
11
|
+
]);
|
|
12
|
+
const PY_SKIP_TYPES = new Set([
|
|
13
|
+
...PY_FUNCTION_BODY_SKIP_TYPES,
|
|
14
|
+
'class_definition',
|
|
15
|
+
]);
|
|
16
|
+
const PY_SELECTORS = [
|
|
17
|
+
{ nodeType: 'call', getCallee: (n) => n.childForFieldName('function') },
|
|
18
|
+
{ nodeType: 'decorator', getCallee: bareDecoratorIdentifier },
|
|
19
|
+
];
|
|
20
|
+
// Peels `parenthesized_expression` (`(a)`, `(super())`) — transparent to receiver
|
|
21
|
+
// IDENTITY — so a parenthesized receiver resolves like its unwrapped form: `(a).x()`
|
|
22
|
+
// like `a.x()`, and `(super()).x()` reaches the super-call drop below. The wrapped
|
|
23
|
+
// expression is the first NON-COMMENT named child (a leading `# c` is a NAMED node;
|
|
24
|
+
// skip it, the Go/TS receiver-unwrap pattern). A genuine expression receiver
|
|
25
|
+
// (`(a + b).x()`) unwraps to a non-identifier/non-call node → stays opaque.
|
|
26
|
+
function unwrapPyReceiver(node) {
|
|
27
|
+
let n = node;
|
|
28
|
+
while (n.type === 'parenthesized_expression') {
|
|
29
|
+
let inner = n.firstNamedChild;
|
|
30
|
+
while (inner && inner.type === 'comment')
|
|
31
|
+
inner = inner.nextNamedSibling;
|
|
32
|
+
if (!inner)
|
|
33
|
+
break;
|
|
34
|
+
n = inner;
|
|
35
|
+
}
|
|
36
|
+
return n;
|
|
37
|
+
}
|
|
38
|
+
// `self.x()` / `cls.x()` / `obj.x()` carry their literal receiver token; a
|
|
39
|
+
// parenthesized `(a).x()` / `(self).x()` receiver is unwrapped to that token too.
|
|
40
|
+
// Chained and computed receivers (`a.b.c()`, `foo().x()`) carry RECEIVER_OPAQUE so
|
|
41
|
+
// the called method stays findable by name (recall) but never resolves. `super().x()`
|
|
42
|
+
// — including the parenthesized `(super()).x()` form — is parent-class dispatch,
|
|
43
|
+
// dropped (the TS/Java/Dart rule), even though `super()` is syntactically a `call`
|
|
44
|
+
// node like a genuine chain. A computed/non-clean attribute (no `identifier`
|
|
45
|
+
// attribute) emits nothing.
|
|
46
|
+
function pyMemberCallInfo(callee) {
|
|
47
|
+
if (callee.type !== 'attribute')
|
|
48
|
+
return null;
|
|
49
|
+
const obj0 = callee.childForFieldName('object');
|
|
50
|
+
const prop = callee.childForFieldName('attribute');
|
|
51
|
+
if (!obj0 || !prop || prop.type !== 'identifier')
|
|
52
|
+
return null;
|
|
53
|
+
const obj = unwrapPyReceiver(obj0);
|
|
54
|
+
if (obj.type === 'identifier') {
|
|
55
|
+
const isSelf = obj.text === 'self' || obj.text === 'cls';
|
|
56
|
+
return { receiver: obj.text, property: prop.text, isSelf };
|
|
57
|
+
}
|
|
58
|
+
// `super().method()` — Python 3 super is always a `call`, so unlike the other
|
|
59
|
+
// languages there is no `super` TOKEN to match; detect the call shape and drop
|
|
60
|
+
// it (parent-class dispatch, deliberately untracked) before the opaque branch.
|
|
61
|
+
if (obj.type === 'call') {
|
|
62
|
+
const fn = obj.childForFieldName('function');
|
|
63
|
+
if (fn?.type === 'identifier' && fn.text === 'super')
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
return { receiver: RECEIVER_OPAQUE, property: prop.text, isSelf: false };
|
|
67
|
+
}
|
|
68
|
+
// Dominant Python stdlib/builtin method names (>=4 chars) suppressed when a
|
|
69
|
+
// member call to them is unresolved — capturing chained calls otherwise floods
|
|
70
|
+
// the name-keyed store with `.append()`/`.items()`/`.format()`-style noise.
|
|
71
|
+
// Domain method names are deliberately absent. <=3-char names (`.get`, `.pop`)
|
|
72
|
+
// are gated downstream by SHORT_NAME_THRESHOLD, so they're omitted here.
|
|
73
|
+
//
|
|
74
|
+
// Composition checked against a requests dogfood (per-name member-call flood vs
|
|
75
|
+
// in-repo `def` recall stake). The kept names are canonical-by-usage: even where
|
|
76
|
+
// requests also defines one (its `CaseInsensitiveDict`/`RequestsCookieJar`
|
|
77
|
+
// implement MutableMapping), ~0–12% of `.items()`/`.update()`/`.copy()`/`.values()`
|
|
78
|
+
// sites target it, so capturing would inject mostly-FALSE weak callers (e.g. the
|
|
79
|
+
// `copy.copy()` MODULE function would smear onto `def copy`). `close` was REMOVED
|
|
80
|
+
// (now captured): a distinctive resource-teardown method (Response/Session/
|
|
81
|
+
// HTTPAdapter), ~60% of `.close()` sites have in-repo receivers. NOTE: requests is
|
|
82
|
+
// one small HTTP library — it does NOT exercise the Django/SQLAlchemy/parser
|
|
83
|
+
// collision worry (update/match/search as ORM/parser methods); a confident trim of
|
|
84
|
+
// those needs a flask/django/sqlalchemy dogfood (tracked as a follow-up).
|
|
85
|
+
const PY_IGNORED_MEMBER_CALLEES = new Set([
|
|
86
|
+
'append', 'extend', 'insert', 'remove', 'index', 'count', 'sort',
|
|
87
|
+
'reverse', 'copy', 'clear', 'items', 'keys', 'values', 'update',
|
|
88
|
+
'setdefault', 'format', 'format_map', 'strip', 'lstrip', 'rstrip',
|
|
89
|
+
'split', 'rsplit', 'splitlines', 'join', 'replace', 'encode', 'decode',
|
|
90
|
+
'startswith', 'endswith', 'lower', 'upper', 'title', 'find', 'rfind',
|
|
91
|
+
'isdigit', 'isalpha', 'isspace', 'read', 'readline', 'readlines',
|
|
92
|
+
'write', 'flush', 'group', 'groups', 'match', 'search',
|
|
93
|
+
'union', 'intersection', 'difference', 'discard',
|
|
94
|
+
]);
|
|
95
|
+
// Cyclomatic decision nodes — codedeep-mcp's convention (radon/McCabe-aligned), since
|
|
96
|
+
// Python is undocumented by SonarQube. Verified against the sonar-python source
|
|
97
|
+
// (metrics/ComplexityVisitor): sonar-python counts only def/if/for/while/ternary/
|
|
98
|
+
// (and|or)/comprehension-if and notably OMITS `elif`, `except`, and `match`/`case`
|
|
99
|
+
// entirely (the visitor has no handler for them — the metric file predates 3.10).
|
|
100
|
+
// codedeep-mcp DELIBERATELY DIVERGES from those omissions and counts every genuine
|
|
101
|
+
// branch (radon-style): `elif_clause` (+1 each), `except_clause` (+1 per clause),
|
|
102
|
+
// `case_clause` (each `match` arm incl. the wildcard `case _:`). `if_clause`
|
|
103
|
+
// covers BOTH comprehension filters (`[x for x in y if c]` — counted by radon AND
|
|
104
|
+
// sonar-python) and match-case guards (`case X if g:`). `else_clause`/
|
|
105
|
+
// `finally_clause`/comprehension-`for` never count. `boolean_operator` is a
|
|
106
|
+
// DISTINCT node (each `and`/`or` nests to its own node → per-operator total),
|
|
107
|
+
// folded straight in — no token-read predicate, unlike the C-family.
|
|
108
|
+
const PY_DECISION_NODE_TYPES = new Set([
|
|
109
|
+
'if_statement',
|
|
110
|
+
'elif_clause',
|
|
111
|
+
'for_statement',
|
|
112
|
+
'while_statement',
|
|
113
|
+
'except_clause',
|
|
114
|
+
'conditional_expression',
|
|
115
|
+
'case_clause',
|
|
116
|
+
'if_clause',
|
|
117
|
+
'boolean_operator',
|
|
118
|
+
]);
|
|
119
|
+
// Python's logical operator is a distinct `boolean_operator` node (NOT the C-family
|
|
120
|
+
// `binary_expression`), so the shared `cFamilyBooleanOperatorKind` reader won't match
|
|
121
|
+
// it. The `operator` field token is `and`/`or` — returned as the run-collapse KIND so
|
|
122
|
+
// `a and b or c` = 2 (per-operator-kind-change, the engine default + sonar-python).
|
|
123
|
+
function pyBooleanOperatorKind(node) {
|
|
124
|
+
return node.type === 'boolean_operator'
|
|
125
|
+
? (node.childForFieldName('operator')?.type ?? null)
|
|
126
|
+
: null;
|
|
127
|
+
}
|
|
128
|
+
// Never-matching sentinel: sonar-python's `flattenOperators` does NOT unwrap
|
|
129
|
+
// parentheses while linearizing a boolean run (it stops at a parenthesized
|
|
130
|
+
// expression), so `(a and b) and c` = 2 — the parenthesized `and` is its own run when
|
|
131
|
+
// the DFS later descends into it. The sentinel makes the engine's skipParens a no-op,
|
|
132
|
+
// the Go/gocognit treatment (NOT TS/Java/complexipy's unwrap). VERIFIED on the oracle.
|
|
133
|
+
const PY_NO_PAREN_SENTINEL = '__py_no_paren__';
|
|
134
|
+
// Cognitive-complexity config (SonarSource whitepaper §1.2), VERIFIED-EXACT against
|
|
135
|
+
// sonar-python's `CognitiveComplexityVisitor` (clean-room source read + an oracle diff
|
|
136
|
+
// on flask/django: 0 mismatches on all 5034 functions WITHOUT a nested scope). The pin
|
|
137
|
+
// is sonar-python — SonarQube's own number, and the clean engine fit (vs complexipy's
|
|
138
|
+
// quirks). Key choices, all oracle-confirmed: the `if`/`elif`/`else` chain is a flat
|
|
139
|
+
// sibling list (elifClauseType — the one genuinely-new engine path); `except` SURCHARGES
|
|
140
|
+
// (catchType, like Java); booleans count EVERYWHERE per-operator-kind run with NO paren
|
|
141
|
+
// unwrap; `for`/`while`/`try`-`else` is +1 flat with its body nested (the else_clause
|
|
142
|
+
// dispatch); `match` is 0 STRUCTURAL with case bodies nested (nestOnlyTypes); `with` and
|
|
143
|
+
// the `try` body are NOT nested (pass-through — the divergence from complexipy);
|
|
144
|
+
// loopBodyField nests only the loop body (resolving the loop-header overbump). Nested
|
|
145
|
+
// functions/lambdas/classes are EXCLUDED (PY_SKIP_TYPES is the cognitive boundary) — the
|
|
146
|
+
// per-symbol-model under-count, like the Java anon-class / TS-arrow callback divergence;
|
|
147
|
+
// see the project docs' "Cognitive Complexity Rules".
|
|
148
|
+
const PY_COGNITIVE_OPTIONS = {
|
|
149
|
+
ifType: 'if_statement',
|
|
150
|
+
conditionField: 'condition',
|
|
151
|
+
consequenceField: 'consequence',
|
|
152
|
+
alternativeField: 'alternative',
|
|
153
|
+
elifClauseType: 'elif_clause',
|
|
154
|
+
elseClauseType: 'else_clause',
|
|
155
|
+
loopTypes: new Set(['while_statement', 'for_statement']),
|
|
156
|
+
loopBodyField: 'body',
|
|
157
|
+
switchTypes: new Set(),
|
|
158
|
+
ternaryType: 'conditional_expression',
|
|
159
|
+
catchType: 'except_clause',
|
|
160
|
+
nestOnlyTypes: new Set(['match_statement']),
|
|
161
|
+
labeledJumpTypes: new Set(),
|
|
162
|
+
hasLabel: () => false,
|
|
163
|
+
booleanOperatorKind: pyBooleanOperatorKind,
|
|
164
|
+
parenthesizedType: PY_NO_PAREN_SENTINEL,
|
|
165
|
+
};
|
|
166
|
+
export function extractPython(tree, content, fileInfo) {
|
|
167
|
+
const symbols = [];
|
|
168
|
+
const imports = [];
|
|
169
|
+
const bodies = [];
|
|
170
|
+
const allNames = findAllNames(tree.rootNode);
|
|
171
|
+
for (const child of tree.rootNode.namedChildren) {
|
|
172
|
+
extractTopLevel(child, content, fileInfo, allNames, symbols, imports, bodies);
|
|
173
|
+
}
|
|
174
|
+
const references = resolveCalls(bodies, tree.rootNode, symbols, fileInfo, PY_SELECTORS, PY_SKIP_TYPES, PY_FUNCTION_BODY_SKIP_TYPES, pyMemberCallInfo, { ignoredMemberCallees: PY_IGNORED_MEMBER_CALLEES });
|
|
175
|
+
computeComplexity(bodies, symbols, {
|
|
176
|
+
decisionNodeTypes: PY_DECISION_NODE_TYPES,
|
|
177
|
+
skipTypes: PY_SKIP_TYPES,
|
|
178
|
+
cognitive: PY_COGNITIVE_OPTIONS,
|
|
179
|
+
});
|
|
180
|
+
return { symbols, references, imports };
|
|
181
|
+
}
|
|
182
|
+
function extractTopLevel(node, content, fileInfo, allNames, outSymbols, outImports, outBodies) {
|
|
183
|
+
switch (node.type) {
|
|
184
|
+
case 'function_definition':
|
|
185
|
+
extractFunction(node, node, content, fileInfo, allNames, outSymbols, outBodies);
|
|
186
|
+
return;
|
|
187
|
+
case 'class_definition':
|
|
188
|
+
extractClass(node, node, content, fileInfo, allNames, outSymbols, outBodies);
|
|
189
|
+
return;
|
|
190
|
+
case 'decorated_definition': {
|
|
191
|
+
const inner = node.childForFieldName('definition');
|
|
192
|
+
if (!inner)
|
|
193
|
+
return;
|
|
194
|
+
if (inner.type === 'function_definition') {
|
|
195
|
+
extractFunction(inner, node, content, fileInfo, allNames, outSymbols, outBodies);
|
|
196
|
+
}
|
|
197
|
+
else if (inner.type === 'class_definition') {
|
|
198
|
+
extractClass(inner, node, content, fileInfo, allNames, outSymbols, outBodies);
|
|
199
|
+
}
|
|
200
|
+
return;
|
|
201
|
+
}
|
|
202
|
+
case 'assignment':
|
|
203
|
+
extractAssignment(node, content, fileInfo, allNames, outSymbols);
|
|
204
|
+
return;
|
|
205
|
+
case 'expression_statement': {
|
|
206
|
+
const inner = node.firstNamedChild;
|
|
207
|
+
if (inner?.type === 'assignment') {
|
|
208
|
+
extractAssignment(inner, content, fileInfo, allNames, outSymbols);
|
|
209
|
+
}
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
case 'import_statement':
|
|
213
|
+
extractImport(node, fileInfo, outImports);
|
|
214
|
+
return;
|
|
215
|
+
case 'import_from_statement':
|
|
216
|
+
extractImportFrom(node, fileInfo, outImports);
|
|
217
|
+
return;
|
|
218
|
+
default:
|
|
219
|
+
return;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
function extractFunction(inner, rangeNode, content, fileInfo, allNames, outSymbols, outBodies) {
|
|
223
|
+
const name = inner.childForFieldName('name')?.text;
|
|
224
|
+
if (!name)
|
|
225
|
+
return;
|
|
226
|
+
const signature = pythonSignature(inner, rangeNode, content);
|
|
227
|
+
const sym = makePythonSymbol(rangeNode, inner, signature, fileInfo, 'function', name, `${fileInfo.path}:${name}`, isExported(name, allNames));
|
|
228
|
+
outSymbols.push(sym);
|
|
229
|
+
const body = inner.childForFieldName('body');
|
|
230
|
+
if (body)
|
|
231
|
+
outBodies.push({ symbolId: sym.id, body });
|
|
232
|
+
}
|
|
233
|
+
function extractClass(inner, rangeNode, content, fileInfo, allNames, outSymbols, outBodies) {
|
|
234
|
+
const className = inner.childForFieldName('name')?.text;
|
|
235
|
+
if (!className)
|
|
236
|
+
return;
|
|
237
|
+
const exported = isExported(className, allNames);
|
|
238
|
+
const classSym = makePythonSymbol(rangeNode, inner, pythonSignature(inner, rangeNode, content), fileInfo, 'class', className, `${fileInfo.path}:${className}`, exported);
|
|
239
|
+
outSymbols.push(classSym);
|
|
240
|
+
const body = inner.childForFieldName('body');
|
|
241
|
+
if (!body)
|
|
242
|
+
return;
|
|
243
|
+
// Walk the class body so class-level calls (`class C: x = helper()`,
|
|
244
|
+
// `class C: helper()`) attribute to the class. PY_SKIP_TYPES contains
|
|
245
|
+
// function_definition / lambda / class_definition, so methods,
|
|
246
|
+
// lambdas, and nested classes stay attributed to themselves.
|
|
247
|
+
outBodies.push({ symbolId: classSym.id, body, className });
|
|
248
|
+
for (const member of body.namedChildren) {
|
|
249
|
+
if (member.type === 'function_definition') {
|
|
250
|
+
const methodName = member.childForFieldName('name')?.text;
|
|
251
|
+
if (!methodName)
|
|
252
|
+
continue;
|
|
253
|
+
const methodSym = makePythonSymbol(member, member, pythonSignature(member, member, content), fileInfo, 'method', methodName, `${fileInfo.path}:${className}.${methodName}`, exported, className);
|
|
254
|
+
outSymbols.push(methodSym);
|
|
255
|
+
const methodBody = member.childForFieldName('body');
|
|
256
|
+
if (methodBody) {
|
|
257
|
+
outBodies.push({ symbolId: methodSym.id, body: methodBody, className });
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
else if (member.type === 'decorated_definition') {
|
|
261
|
+
const innerDef = member.childForFieldName('definition');
|
|
262
|
+
if (!innerDef || innerDef.type !== 'function_definition')
|
|
263
|
+
continue;
|
|
264
|
+
const methodName = innerDef.childForFieldName('name')?.text;
|
|
265
|
+
if (!methodName)
|
|
266
|
+
continue;
|
|
267
|
+
const methodSym = makePythonSymbol(member, innerDef, pythonSignature(innerDef, member, content), fileInfo, 'method', methodName, `${fileInfo.path}:${className}.${methodName}`, exported, className);
|
|
268
|
+
outSymbols.push(methodSym);
|
|
269
|
+
const methodBody = innerDef.childForFieldName('body');
|
|
270
|
+
if (methodBody) {
|
|
271
|
+
outBodies.push({ symbolId: methodSym.id, body: methodBody, className });
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
function extractAssignment(node, content, fileInfo, allNames, outSymbols) {
|
|
277
|
+
const left = node.childForFieldName('left');
|
|
278
|
+
if (!left || left.type !== 'identifier')
|
|
279
|
+
return;
|
|
280
|
+
const name = left.text;
|
|
281
|
+
if (name === '__all__')
|
|
282
|
+
return;
|
|
283
|
+
const kind = 'variable';
|
|
284
|
+
const signature = normalizeSignature(content.slice(node.startIndex, node.endIndex));
|
|
285
|
+
outSymbols.push({
|
|
286
|
+
// Inline construction (no doc/range split needed) — but it must mirror
|
|
287
|
+
// makePythonSymbol's contract: hash the FULL signature, store it capped.
|
|
288
|
+
// A module-level `DATA = {...}` literal can run to kilobytes.
|
|
289
|
+
id: symbolId(fileInfo.path, name, kind, signature),
|
|
290
|
+
name,
|
|
291
|
+
fqn: `${fileInfo.path}:${name}`,
|
|
292
|
+
kind,
|
|
293
|
+
file: fileInfo.path,
|
|
294
|
+
startLine: node.startPosition.row + 1,
|
|
295
|
+
endLine: node.endPosition.row + 1,
|
|
296
|
+
signature: signature.slice(0, SIGNATURE_DISPLAY_CAP),
|
|
297
|
+
doc: null,
|
|
298
|
+
exported: isExported(name, allNames),
|
|
299
|
+
language: fileInfo.language,
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
function extractImport(stmt, fileInfo, out) {
|
|
303
|
+
// `import x` / `import x as y` — both bind a module object, not a
|
|
304
|
+
// callable value. kind='module' tells primaryRefMatchesTarget to
|
|
305
|
+
// not admit bare `x()` / `y()` as bound through the import.
|
|
306
|
+
for (const nameNode of stmt.childrenForFieldName('name')) {
|
|
307
|
+
if (nameNode.type === 'aliased_import') {
|
|
308
|
+
const named = readAliased(nameNode);
|
|
309
|
+
if (!named)
|
|
310
|
+
continue;
|
|
311
|
+
named.kind = 'module';
|
|
312
|
+
out.push({
|
|
313
|
+
file: fileInfo.path,
|
|
314
|
+
sourceModule: named.name,
|
|
315
|
+
importedNames: [named],
|
|
316
|
+
line: stmt.startPosition.row + 1,
|
|
317
|
+
});
|
|
318
|
+
}
|
|
319
|
+
else if (nameNode.type === 'dotted_name') {
|
|
320
|
+
out.push({
|
|
321
|
+
file: fileInfo.path,
|
|
322
|
+
sourceModule: nameNode.text,
|
|
323
|
+
importedNames: [{ name: nameNode.text, kind: 'module' }],
|
|
324
|
+
line: stmt.startPosition.row + 1,
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
function extractImportFrom(stmt, fileInfo, out) {
|
|
330
|
+
const moduleNode = stmt.childForFieldName('module_name');
|
|
331
|
+
if (!moduleNode)
|
|
332
|
+
return;
|
|
333
|
+
const sourceModule = moduleNode.text;
|
|
334
|
+
// `from . import x` / `from .. import y` — the bare-dot form binds
|
|
335
|
+
// `x`/`y` as submodule objects of the package, not as callable
|
|
336
|
+
// values. tree-sitter-python distinguishes by structure: a bare-dot
|
|
337
|
+
// `relative_import` carries only `import_prefix`, while a named one
|
|
338
|
+
// (`from .pkg import x`) also has a `dotted_name` child.
|
|
339
|
+
const bindsModuleObjects = moduleNode.type === 'relative_import' &&
|
|
340
|
+
!moduleNode.namedChildren.some((c) => c.type === 'dotted_name');
|
|
341
|
+
const importedNames = [];
|
|
342
|
+
const hasWildcard = stmt.namedChildren.some((c) => c.type === 'wildcard_import');
|
|
343
|
+
if (hasWildcard) {
|
|
344
|
+
importedNames.push({ name: IMPORT_NAMESPACE });
|
|
345
|
+
}
|
|
346
|
+
else {
|
|
347
|
+
for (const nameNode of stmt.childrenForFieldName('name')) {
|
|
348
|
+
const named = nameNode.type === 'aliased_import'
|
|
349
|
+
? readAliased(nameNode)
|
|
350
|
+
: { name: nameNode.text };
|
|
351
|
+
if (!named)
|
|
352
|
+
continue;
|
|
353
|
+
if (bindsModuleObjects)
|
|
354
|
+
named.kind = 'module';
|
|
355
|
+
importedNames.push(named);
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
out.push({
|
|
359
|
+
file: fileInfo.path,
|
|
360
|
+
sourceModule,
|
|
361
|
+
importedNames,
|
|
362
|
+
line: stmt.startPosition.row + 1,
|
|
363
|
+
});
|
|
364
|
+
}
|
|
365
|
+
function readAliased(nameNode) {
|
|
366
|
+
const inner = nameNode.childForFieldName('name');
|
|
367
|
+
const alias = nameNode.childForFieldName('alias');
|
|
368
|
+
if (!inner || !alias)
|
|
369
|
+
return null;
|
|
370
|
+
return { name: inner.text, alias: alias.text };
|
|
371
|
+
}
|
|
372
|
+
function pythonSignature(inner, rangeNode, content) {
|
|
373
|
+
const body = inner.childForFieldName('body');
|
|
374
|
+
const sigEnd = body ? body.startIndex : inner.endIndex;
|
|
375
|
+
return normalizeSignature(content.slice(rangeNode.startIndex, sigEnd));
|
|
376
|
+
}
|
|
377
|
+
function makePythonSymbol(rangeNode, innerNode, signature, fileInfo, kind, name, fqn, exported, qualifier = '') {
|
|
378
|
+
return {
|
|
379
|
+
// The id hashes the FULL signature; only the stored copy is capped —
|
|
380
|
+
// otherwise overloads differing past the cap share an id (JG1).
|
|
381
|
+
id: symbolId(fileInfo.path, name, kind, signature, qualifier),
|
|
382
|
+
name,
|
|
383
|
+
fqn,
|
|
384
|
+
kind,
|
|
385
|
+
file: fileInfo.path,
|
|
386
|
+
startLine: rangeNode.startPosition.row + 1,
|
|
387
|
+
endLine: rangeNode.endPosition.row + 1,
|
|
388
|
+
signature: signature.slice(0, SIGNATURE_DISPLAY_CAP),
|
|
389
|
+
doc: extractPythonDoc(innerNode),
|
|
390
|
+
exported,
|
|
391
|
+
language: fileInfo.language,
|
|
392
|
+
};
|
|
393
|
+
}
|
|
394
|
+
function isExported(name, allNames) {
|
|
395
|
+
if (allNames)
|
|
396
|
+
return allNames.has(name);
|
|
397
|
+
return !name.startsWith('_');
|
|
398
|
+
}
|
|
399
|
+
function findAllNames(rootNode) {
|
|
400
|
+
for (const child of rootNode.namedChildren) {
|
|
401
|
+
let assignment = null;
|
|
402
|
+
if (child.type === 'assignment') {
|
|
403
|
+
assignment = child;
|
|
404
|
+
}
|
|
405
|
+
else if (child.type === 'expression_statement') {
|
|
406
|
+
const inner = child.firstNamedChild;
|
|
407
|
+
if (inner?.type === 'assignment')
|
|
408
|
+
assignment = inner;
|
|
409
|
+
}
|
|
410
|
+
if (!assignment)
|
|
411
|
+
continue;
|
|
412
|
+
const left = assignment.childForFieldName('left');
|
|
413
|
+
if (!left || left.text !== '__all__')
|
|
414
|
+
continue;
|
|
415
|
+
const right = assignment.childForFieldName('right');
|
|
416
|
+
if (!right || (right.type !== 'list' && right.type !== 'tuple'))
|
|
417
|
+
continue;
|
|
418
|
+
const set = new Set();
|
|
419
|
+
for (const item of right.namedChildren) {
|
|
420
|
+
if (item.type === 'string')
|
|
421
|
+
set.add(stripPyStringQuotes(item.text));
|
|
422
|
+
}
|
|
423
|
+
return set;
|
|
424
|
+
}
|
|
425
|
+
return null;
|
|
426
|
+
}
|
|
427
|
+
function extractPythonDoc(definitionNode) {
|
|
428
|
+
const body = definitionNode.childForFieldName('body');
|
|
429
|
+
if (!body)
|
|
430
|
+
return null;
|
|
431
|
+
const first = body.firstNamedChild;
|
|
432
|
+
if (!first || first.type !== 'expression_statement')
|
|
433
|
+
return null;
|
|
434
|
+
const stringNode = first.firstNamedChild;
|
|
435
|
+
if (!stringNode || stringNode.type !== 'string')
|
|
436
|
+
return null;
|
|
437
|
+
const inner = stripPyStringQuotes(stringNode.text);
|
|
438
|
+
for (const line of inner.split('\n')) {
|
|
439
|
+
const cleaned = line.trim();
|
|
440
|
+
if (cleaned)
|
|
441
|
+
return cleaned;
|
|
442
|
+
}
|
|
443
|
+
return null;
|
|
444
|
+
}
|
|
445
|
+
function stripPyStringQuotes(text) {
|
|
446
|
+
let s = text.replace(/^[fFrRbBuU]+/, '');
|
|
447
|
+
if (s.startsWith('"""') && s.endsWith('"""') && s.length >= 6)
|
|
448
|
+
return s.slice(3, -3);
|
|
449
|
+
if (s.startsWith("'''") && s.endsWith("'''") && s.length >= 6)
|
|
450
|
+
return s.slice(3, -3);
|
|
451
|
+
if (s.length >= 2 && (s.startsWith('"') || s.startsWith("'")) && s[0] === s[s.length - 1]) {
|
|
452
|
+
return s.slice(1, -1);
|
|
453
|
+
}
|
|
454
|
+
return s;
|
|
455
|
+
}
|