@shrkcrft/graph 0.1.0-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +30 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +32 -0
- package/dist/indexer/detect-workspace.d.ts +18 -0
- package/dist/indexer/detect-workspace.d.ts.map +1 -0
- package/dist/indexer/detect-workspace.js +80 -0
- package/dist/indexer/extract-csharp-file.d.ts +27 -0
- package/dist/indexer/extract-csharp-file.d.ts.map +1 -0
- package/dist/indexer/extract-csharp-file.js +163 -0
- package/dist/indexer/extract-dart-file.d.ts +28 -0
- package/dist/indexer/extract-dart-file.d.ts.map +1 -0
- package/dist/indexer/extract-dart-file.js +167 -0
- package/dist/indexer/extract-elixir-file.d.ts +27 -0
- package/dist/indexer/extract-elixir-file.d.ts.map +1 -0
- package/dist/indexer/extract-elixir-file.js +164 -0
- package/dist/indexer/extract-go-file.d.ts +28 -0
- package/dist/indexer/extract-go-file.d.ts.map +1 -0
- package/dist/indexer/extract-go-file.js +156 -0
- package/dist/indexer/extract-java-file.d.ts +25 -0
- package/dist/indexer/extract-java-file.d.ts.map +1 -0
- package/dist/indexer/extract-java-file.js +140 -0
- package/dist/indexer/extract-kotlin-file.d.ts +20 -0
- package/dist/indexer/extract-kotlin-file.d.ts.map +1 -0
- package/dist/indexer/extract-kotlin-file.js +158 -0
- package/dist/indexer/extract-php-file.d.ts +26 -0
- package/dist/indexer/extract-php-file.d.ts.map +1 -0
- package/dist/indexer/extract-php-file.js +161 -0
- package/dist/indexer/extract-python-file.d.ts +30 -0
- package/dist/indexer/extract-python-file.d.ts.map +1 -0
- package/dist/indexer/extract-python-file.js +196 -0
- package/dist/indexer/extract-ruby-file.d.ts +29 -0
- package/dist/indexer/extract-ruby-file.d.ts.map +1 -0
- package/dist/indexer/extract-ruby-file.js +151 -0
- package/dist/indexer/extract-rust-file.d.ts +27 -0
- package/dist/indexer/extract-rust-file.d.ts.map +1 -0
- package/dist/indexer/extract-rust-file.js +186 -0
- package/dist/indexer/extract-swift-file.d.ts +27 -0
- package/dist/indexer/extract-swift-file.d.ts.map +1 -0
- package/dist/indexer/extract-swift-file.js +168 -0
- package/dist/indexer/extract-ts-file.d.ts +79 -0
- package/dist/indexer/extract-ts-file.d.ts.map +1 -0
- package/dist/indexer/extract-ts-file.js +403 -0
- package/dist/indexer/incremental-updater.d.ts +41 -0
- package/dist/indexer/incremental-updater.d.ts.map +1 -0
- package/dist/indexer/incremental-updater.js +395 -0
- package/dist/indexer/index-builder.d.ts +23 -0
- package/dist/indexer/index-builder.d.ts.map +1 -0
- package/dist/indexer/index-builder.js +289 -0
- package/dist/indexer/resolve-imports.d.ts +36 -0
- package/dist/indexer/resolve-imports.d.ts.map +1 -0
- package/dist/indexer/resolve-imports.js +144 -0
- package/dist/indexer/unresolved-imports.d.ts +20 -0
- package/dist/indexer/unresolved-imports.d.ts.map +1 -0
- package/dist/indexer/unresolved-imports.js +32 -0
- package/dist/query/cycle-detection.d.ts +40 -0
- package/dist/query/cycle-detection.d.ts.map +1 -0
- package/dist/query/cycle-detection.js +135 -0
- package/dist/query/query-api.d.ts +87 -0
- package/dist/query/query-api.d.ts.map +1 -0
- package/dist/query/query-api.js +232 -0
- package/dist/schema/edge-kind.d.ts +31 -0
- package/dist/schema/edge-kind.d.ts.map +1 -0
- package/dist/schema/edge-kind.js +35 -0
- package/dist/schema/edge.d.ts +22 -0
- package/dist/schema/edge.d.ts.map +1 -0
- package/dist/schema/edge.js +1 -0
- package/dist/schema/file-fingerprint.d.ts +22 -0
- package/dist/schema/file-fingerprint.d.ts.map +1 -0
- package/dist/schema/file-fingerprint.js +1 -0
- package/dist/schema/graph-snapshot.d.ts +18 -0
- package/dist/schema/graph-snapshot.d.ts.map +1 -0
- package/dist/schema/graph-snapshot.js +1 -0
- package/dist/schema/manifest.d.ts +47 -0
- package/dist/schema/manifest.d.ts.map +1 -0
- package/dist/schema/manifest.js +1 -0
- package/dist/schema/node-kind.d.ts +21 -0
- package/dist/schema/node-kind.d.ts.map +1 -0
- package/dist/schema/node-kind.js +27 -0
- package/dist/schema/node.d.ts +26 -0
- package/dist/schema/node.d.ts.map +1 -0
- package/dist/schema/node.js +1 -0
- package/dist/schema/schema-version.d.ts +10 -0
- package/dist/schema/schema-version.d.ts.map +1 -0
- package/dist/schema/schema-version.js +8 -0
- package/dist/store/file-fingerprint.d.ts +8 -0
- package/dist/store/file-fingerprint.d.ts.map +1 -0
- package/dist/store/file-fingerprint.js +64 -0
- package/dist/store/graph-store.d.ts +48 -0
- package/dist/store/graph-store.d.ts.map +1 -0
- package/dist/store/graph-store.js +194 -0
- package/package.json +54 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import { readFileSync } from 'node:fs';
|
|
3
|
+
import { EdgeKind } from "../schema/edge-kind.js";
|
|
4
|
+
import { NodeKind } from "../schema/node-kind.js";
|
|
5
|
+
export const EXTRACT_KOTLIN_FILE_SOURCE = 'extract-kotlin-file@v1';
|
|
6
|
+
/**
|
|
7
|
+
* Regex-based Kotlin extractor.
|
|
8
|
+
*
|
|
9
|
+
* Top-level constructs only. Captured:
|
|
10
|
+
* - `fun name(...)`, `inline fun`, `suspend fun` → function
|
|
11
|
+
* - `class Name`, `data class Name`, `value class Name`, `inline class Name`, `sealed class Name`, `abstract class Name`, `open class Name` → class
|
|
12
|
+
* - `interface Name`, `sealed interface Name` → interface
|
|
13
|
+
* - `object Name` → object (rendered as `class`)
|
|
14
|
+
* - `enum class Name` → enum
|
|
15
|
+
* - `typealias Name = ...` → type-alias
|
|
16
|
+
* - `val NAME: T`, `var NAME: T`, `const val NAME` → const
|
|
17
|
+
*
|
|
18
|
+
* Visibility: Kotlin's default is `public`. `private`, `internal`,
|
|
19
|
+
* and `protected` mark a symbol as local; everything else is exported.
|
|
20
|
+
*/
|
|
21
|
+
export function extractKotlinFile(fingerprint, absPath, content) {
|
|
22
|
+
const text = content ?? readFileSync(absPath, 'utf8');
|
|
23
|
+
const fileNode = makeFileNode(fingerprint, text);
|
|
24
|
+
const symbolNodes = [];
|
|
25
|
+
const edges = [];
|
|
26
|
+
const lines = text.split('\n');
|
|
27
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
28
|
+
const raw = lines[i];
|
|
29
|
+
if (raw.length === 0)
|
|
30
|
+
continue;
|
|
31
|
+
if (raw.startsWith(' ') || raw.startsWith('\t'))
|
|
32
|
+
continue;
|
|
33
|
+
if (raw.trimStart().startsWith('//'))
|
|
34
|
+
continue;
|
|
35
|
+
// Strip leading annotations (e.g. `@Suppress("...")`, `@JvmStatic`).
|
|
36
|
+
const stripped = raw.replace(/^(?:@\w+(?:\([^)]*\))?\s+)+/, '');
|
|
37
|
+
// Detect visibility (default to exported).
|
|
38
|
+
const visMatch = /^(public|private|internal|protected)\s+/.exec(stripped);
|
|
39
|
+
const isExported = !visMatch || visMatch[1] === 'public';
|
|
40
|
+
const afterVis = visMatch ? stripped.slice(visMatch[0].length) : stripped;
|
|
41
|
+
// Strip declaration modifiers that don't affect the symbol shape.
|
|
42
|
+
const trimmed = afterVis.replace(/^(?:inline\s+|suspend\s+|tailrec\s+|infix\s+|operator\s+|external\s+|open\s+|abstract\s+|final\s+|sealed\s+|data\s+|value\s+|enum\s+|annotation\s+|inner\s+|companion\s+|expect\s+|actual\s+|override\s+)+/, '');
|
|
43
|
+
let m = /^fun(?:\s*<[^>]+>)?\s+([A-Za-z_][\w]*)\s*[<(]/.exec(trimmed);
|
|
44
|
+
if (m) {
|
|
45
|
+
pushSymbol(fingerprint, symbolNodes, edges, fileNode.id, m[1], 'function', i + 1, isExported);
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
m = /^class\s+([A-Za-z_][\w]*)/.exec(trimmed);
|
|
49
|
+
if (m) {
|
|
50
|
+
pushSymbol(fingerprint, symbolNodes, edges, fileNode.id, m[1], 'class', i + 1, isExported);
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
m = /^interface\s+([A-Za-z_][\w]*)/.exec(trimmed);
|
|
54
|
+
if (m) {
|
|
55
|
+
pushSymbol(fingerprint, symbolNodes, edges, fileNode.id, m[1], 'interface', i + 1, isExported);
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
m = /^object\s+([A-Za-z_][\w]*)/.exec(trimmed);
|
|
59
|
+
if (m) {
|
|
60
|
+
pushSymbol(fingerprint, symbolNodes, edges, fileNode.id, m[1], 'class', i + 1, isExported);
|
|
61
|
+
continue;
|
|
62
|
+
}
|
|
63
|
+
m = /^typealias\s+([A-Za-z_][\w]*)/.exec(trimmed);
|
|
64
|
+
if (m) {
|
|
65
|
+
pushSymbol(fingerprint, symbolNodes, edges, fileNode.id, m[1], 'type-alias', i + 1, isExported);
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
m = /^(?:const\s+)?(?:val|var)\s+([A-Za-z_][\w]*)\s*[:=]/.exec(trimmed);
|
|
69
|
+
if (m) {
|
|
70
|
+
pushSymbol(fingerprint, symbolNodes, edges, fileNode.id, m[1], 'const', i + 1, isExported);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return {
|
|
74
|
+
fileNode,
|
|
75
|
+
symbolNodes,
|
|
76
|
+
edges,
|
|
77
|
+
rawImportSpecifiers: scanKotlinImports(text),
|
|
78
|
+
importBindings: [],
|
|
79
|
+
identifierReferences: [],
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
function pushSymbol(fp, nodes, edges, fileId, name, declKind, line, isExported) {
|
|
83
|
+
const sym = {
|
|
84
|
+
id: `symbol:${fp.path}#${name}`,
|
|
85
|
+
kind: NodeKind.Symbol,
|
|
86
|
+
label: name,
|
|
87
|
+
path: fp.path,
|
|
88
|
+
line,
|
|
89
|
+
data: { declKind, visibility: isExported ? 'export' : 'local', isExported, language: 'kotlin' },
|
|
90
|
+
};
|
|
91
|
+
nodes.push(sym);
|
|
92
|
+
edges.push({
|
|
93
|
+
id: createHash('sha1').update(`${fileId}|${sym.id}|${EdgeKind.DeclaresSymbol}`).digest('hex'),
|
|
94
|
+
from: fileId,
|
|
95
|
+
to: sym.id,
|
|
96
|
+
kind: EdgeKind.DeclaresSymbol,
|
|
97
|
+
source: EXTRACT_KOTLIN_FILE_SOURCE,
|
|
98
|
+
data: { visibility: isExported ? 'export' : 'local', declKind, line },
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
function makeFileNode(fp, text) {
|
|
102
|
+
const label = fp.path.split('/').pop() ?? fp.path;
|
|
103
|
+
const tags = ['kotlin'];
|
|
104
|
+
if (isKotlinTestPath(fp.path))
|
|
105
|
+
tags.push('test');
|
|
106
|
+
const packageMatch = /^package\s+([\w.]+)/m.exec(text);
|
|
107
|
+
return {
|
|
108
|
+
id: fp.nodeId,
|
|
109
|
+
kind: NodeKind.File,
|
|
110
|
+
label,
|
|
111
|
+
path: fp.path,
|
|
112
|
+
tags,
|
|
113
|
+
data: {
|
|
114
|
+
language: 'kotlin',
|
|
115
|
+
sizeBytes: fp.sizeBytes,
|
|
116
|
+
sha1: fp.sha1,
|
|
117
|
+
hasDefaultExport: false,
|
|
118
|
+
exportCount: 0,
|
|
119
|
+
localCount: 0,
|
|
120
|
+
reExportCount: 0,
|
|
121
|
+
...(packageMatch ? { kotlinPackage: packageMatch[1] } : {}),
|
|
122
|
+
},
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
function isKotlinTestPath(rel) {
|
|
126
|
+
return (/(?:^|\/)src\/test\//.test(rel) ||
|
|
127
|
+
/(?:^|\/)src\/.*Test\//.test(rel) ||
|
|
128
|
+
/(?:^|\/)[\w-]+Test\.kts?$/.test(rel));
|
|
129
|
+
}
|
|
130
|
+
function scanKotlinImports(text) {
|
|
131
|
+
const out = [];
|
|
132
|
+
// Matches `import a.b.C`, `import a.b.*`, `import a.b.C as D`.
|
|
133
|
+
const re = /^import\s+([\w.]+(?:\.\*)?)(?:\s+as\s+\w+)?\s*$/gm;
|
|
134
|
+
let m;
|
|
135
|
+
while ((m = re.exec(text)) !== null) {
|
|
136
|
+
const line = lineFromOffset(text, m.index);
|
|
137
|
+
out.push({ specifier: m[1], line, kind: 'kotlin-import' });
|
|
138
|
+
}
|
|
139
|
+
// De-dupe.
|
|
140
|
+
const seen = new Set();
|
|
141
|
+
const deduped = [];
|
|
142
|
+
for (const it of out) {
|
|
143
|
+
const k = `${it.specifier}|${it.line}`;
|
|
144
|
+
if (seen.has(k))
|
|
145
|
+
continue;
|
|
146
|
+
seen.add(k);
|
|
147
|
+
deduped.push(it);
|
|
148
|
+
}
|
|
149
|
+
return deduped;
|
|
150
|
+
}
|
|
151
|
+
function lineFromOffset(text, offset) {
|
|
152
|
+
let line = 1;
|
|
153
|
+
for (let i = 0; i < offset && i < text.length; i++) {
|
|
154
|
+
if (text.charCodeAt(i) === 10)
|
|
155
|
+
line += 1;
|
|
156
|
+
}
|
|
157
|
+
return line;
|
|
158
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { IFileFingerprint } from '../schema/file-fingerprint.js';
|
|
2
|
+
import type { IExtractedFile } from './extract-ts-file.js';
|
|
3
|
+
export declare const EXTRACT_PHP_FILE_SOURCE = "extract-php-file@v1";
|
|
4
|
+
/**
|
|
5
|
+
* Regex-based PHP extractor.
|
|
6
|
+
*
|
|
7
|
+
* Top-level declarations only (after `namespace … {` or at the file's
|
|
8
|
+
* top level). Detected:
|
|
9
|
+
* - `namespace Path\Sub` → namespace symbol
|
|
10
|
+
* - `[abstract|final|readonly] class Name` → class
|
|
11
|
+
* - `interface Name` → interface
|
|
12
|
+
* - `trait Name` → class (treated like a mixin)
|
|
13
|
+
* - `enum Name` → enum
|
|
14
|
+
* - `function name(…)` → function (file-scope)
|
|
15
|
+
*
|
|
16
|
+
* Imports: `use Path\To\Class;`, `use Path\To\{A, B as C};`,
|
|
17
|
+
* `use function Foo\bar`, `use const Foo\BAR`.
|
|
18
|
+
*
|
|
19
|
+
* Visibility: PHP requires `class` modifiers on declarations; we
|
|
20
|
+
* derive `isExported` from the absence of `private`/`protected`.
|
|
21
|
+
* Class-internal methods are NOT walked here — those live in their
|
|
22
|
+
* class's body, which the framework-scanner can inspect when it
|
|
23
|
+
* needs to.
|
|
24
|
+
*/
|
|
25
|
+
export declare function extractPhpFile(fingerprint: IFileFingerprint, absPath: string, content?: string): IExtractedFile;
|
|
26
|
+
//# sourceMappingURL=extract-php-file.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extract-php-file.d.ts","sourceRoot":"","sources":["../../src/indexer/extract-php-file.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AAGtE,OAAO,KAAK,EACV,cAAc,EAEf,MAAM,sBAAsB,CAAC;AAE9B,eAAO,MAAM,uBAAuB,wBAAwB,CAAC;AAE7D;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,cAAc,CAC5B,WAAW,EAAE,gBAAgB,EAC7B,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,MAAM,GACf,cAAc,CA+ChB"}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import { readFileSync } from 'node:fs';
|
|
3
|
+
import { EdgeKind } from "../schema/edge-kind.js";
|
|
4
|
+
import { NodeKind } from "../schema/node-kind.js";
|
|
5
|
+
export const EXTRACT_PHP_FILE_SOURCE = 'extract-php-file@v1';
|
|
6
|
+
/**
|
|
7
|
+
* Regex-based PHP extractor.
|
|
8
|
+
*
|
|
9
|
+
* Top-level declarations only (after `namespace … {` or at the file's
|
|
10
|
+
* top level). Detected:
|
|
11
|
+
* - `namespace Path\Sub` → namespace symbol
|
|
12
|
+
* - `[abstract|final|readonly] class Name` → class
|
|
13
|
+
* - `interface Name` → interface
|
|
14
|
+
* - `trait Name` → class (treated like a mixin)
|
|
15
|
+
* - `enum Name` → enum
|
|
16
|
+
* - `function name(…)` → function (file-scope)
|
|
17
|
+
*
|
|
18
|
+
* Imports: `use Path\To\Class;`, `use Path\To\{A, B as C};`,
|
|
19
|
+
* `use function Foo\bar`, `use const Foo\BAR`.
|
|
20
|
+
*
|
|
21
|
+
* Visibility: PHP requires `class` modifiers on declarations; we
|
|
22
|
+
* derive `isExported` from the absence of `private`/`protected`.
|
|
23
|
+
* Class-internal methods are NOT walked here — those live in their
|
|
24
|
+
* class's body, which the framework-scanner can inspect when it
|
|
25
|
+
* needs to.
|
|
26
|
+
*/
|
|
27
|
+
export function extractPhpFile(fingerprint, absPath, content) {
|
|
28
|
+
const text = content ?? readFileSync(absPath, 'utf8');
|
|
29
|
+
const fileNode = makeFileNode(fingerprint, text);
|
|
30
|
+
const symbolNodes = [];
|
|
31
|
+
const edges = [];
|
|
32
|
+
const lines = text.split('\n');
|
|
33
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
34
|
+
const raw = lines[i];
|
|
35
|
+
if (raw.length === 0)
|
|
36
|
+
continue;
|
|
37
|
+
const trimmed = raw.trimStart();
|
|
38
|
+
if (trimmed.startsWith('//') || trimmed.startsWith('#') || trimmed.startsWith('/*'))
|
|
39
|
+
continue;
|
|
40
|
+
// namespace Path\Sub; OR namespace Path\Sub { ... }
|
|
41
|
+
let m = /^namespace\s+([\w\\]+)/.exec(trimmed);
|
|
42
|
+
if (m) {
|
|
43
|
+
pushSymbol(fingerprint, symbolNodes, edges, fileNode.id, m[1], 'namespace', i + 1, true);
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
// Strip attributes `#[Attr]` (PHP 8 attributes).
|
|
47
|
+
const stripped = trimmed.replace(/^(?:#\[[^\]]*\]\s*)+/, '');
|
|
48
|
+
// class / interface / trait / enum, allowing modifiers in any order
|
|
49
|
+
m = /^(?:abstract\s+|final\s+|readonly\s+)*\s*(class|interface|trait|enum)\s+([A-Za-z_]\w*)/.exec(stripped);
|
|
50
|
+
if (m) {
|
|
51
|
+
const declKind = m[1] === 'interface' ? 'interface'
|
|
52
|
+
: m[1] === 'enum' ? 'enum'
|
|
53
|
+
: 'class';
|
|
54
|
+
pushSymbol(fingerprint, symbolNodes, edges, fileNode.id, m[2], declKind, i + 1, true);
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
// File-level `function name(...)` (not class methods).
|
|
58
|
+
if (raw.startsWith('function ') || stripped.startsWith('function ')) {
|
|
59
|
+
m = /^function\s+([A-Za-z_]\w*)\s*\(/.exec(stripped);
|
|
60
|
+
if (m && !raw.startsWith(' ') && !raw.startsWith('\t')) {
|
|
61
|
+
pushSymbol(fingerprint, symbolNodes, edges, fileNode.id, m[1], 'function', i + 1, true);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return {
|
|
66
|
+
fileNode,
|
|
67
|
+
symbolNodes,
|
|
68
|
+
edges,
|
|
69
|
+
rawImportSpecifiers: scanPhpImports(text),
|
|
70
|
+
importBindings: [],
|
|
71
|
+
identifierReferences: [],
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
function pushSymbol(fp, nodes, edges, fileId, name, declKind, line, isExported) {
|
|
75
|
+
const sym = {
|
|
76
|
+
id: `symbol:${fp.path}#${name}`,
|
|
77
|
+
kind: NodeKind.Symbol,
|
|
78
|
+
label: name,
|
|
79
|
+
path: fp.path,
|
|
80
|
+
line,
|
|
81
|
+
data: { declKind, visibility: isExported ? 'export' : 'local', isExported, language: 'php' },
|
|
82
|
+
};
|
|
83
|
+
nodes.push(sym);
|
|
84
|
+
edges.push({
|
|
85
|
+
id: createHash('sha1').update(`${fileId}|${sym.id}|${EdgeKind.DeclaresSymbol}`).digest('hex'),
|
|
86
|
+
from: fileId,
|
|
87
|
+
to: sym.id,
|
|
88
|
+
kind: EdgeKind.DeclaresSymbol,
|
|
89
|
+
source: EXTRACT_PHP_FILE_SOURCE,
|
|
90
|
+
data: { visibility: isExported ? 'export' : 'local', declKind, line },
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
function makeFileNode(fp, text) {
|
|
94
|
+
const label = fp.path.split('/').pop() ?? fp.path;
|
|
95
|
+
const tags = ['php'];
|
|
96
|
+
if (isPhpTestPath(fp.path))
|
|
97
|
+
tags.push('test');
|
|
98
|
+
const nsMatch = /^namespace\s+([\w\\]+)/m.exec(text);
|
|
99
|
+
return {
|
|
100
|
+
id: fp.nodeId,
|
|
101
|
+
kind: NodeKind.File,
|
|
102
|
+
label,
|
|
103
|
+
path: fp.path,
|
|
104
|
+
tags,
|
|
105
|
+
data: {
|
|
106
|
+
language: 'php',
|
|
107
|
+
sizeBytes: fp.sizeBytes,
|
|
108
|
+
sha1: fp.sha1,
|
|
109
|
+
hasDefaultExport: false,
|
|
110
|
+
exportCount: 0,
|
|
111
|
+
localCount: 0,
|
|
112
|
+
reExportCount: 0,
|
|
113
|
+
...(nsMatch ? { phpNamespace: nsMatch[1] } : {}),
|
|
114
|
+
},
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
function isPhpTestPath(rel) {
|
|
118
|
+
return (/(?:^|\/)(?:tests|test)\//.test(rel) ||
|
|
119
|
+
/(?:^|\/)[\w-]+Test\.php$/.test(rel) ||
|
|
120
|
+
/(?:^|\/)[\w-]+\.test\.php$/.test(rel));
|
|
121
|
+
}
|
|
122
|
+
function scanPhpImports(text) {
|
|
123
|
+
const out = [];
|
|
124
|
+
// `use Path\To\Class;`, `use Path\To\Class as Alias;`,
|
|
125
|
+
// `use function Path\to\fn;`, `use const Path\TO\CONST;`,
|
|
126
|
+
// `use Path\To\{A, B as C};`
|
|
127
|
+
const re = /^\s*use\s+(?:function\s+|const\s+)?([\w\\]+)(?:\s*\\?\{([^}]+)\})?(?:\s+as\s+\w+)?\s*;/gm;
|
|
128
|
+
let m;
|
|
129
|
+
while ((m = re.exec(text)) !== null) {
|
|
130
|
+
const line = lineFromOffset(text, m.index);
|
|
131
|
+
const base = m[1].replace(/\\$/, '');
|
|
132
|
+
if (m[2]) {
|
|
133
|
+
for (const item of m[2].split(',')) {
|
|
134
|
+
const cleaned = item.trim().split(/\s+as\s+/)[0].trim();
|
|
135
|
+
if (cleaned)
|
|
136
|
+
out.push({ specifier: `${base}\\${cleaned}`, line, kind: 'php-use' });
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
out.push({ specifier: base, line, kind: 'php-use' });
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
const seen = new Set();
|
|
144
|
+
const deduped = [];
|
|
145
|
+
for (const it of out) {
|
|
146
|
+
const k = `${it.specifier}|${it.line}`;
|
|
147
|
+
if (seen.has(k))
|
|
148
|
+
continue;
|
|
149
|
+
seen.add(k);
|
|
150
|
+
deduped.push(it);
|
|
151
|
+
}
|
|
152
|
+
return deduped;
|
|
153
|
+
}
|
|
154
|
+
function lineFromOffset(text, offset) {
|
|
155
|
+
let line = 1;
|
|
156
|
+
for (let i = 0; i < offset && i < text.length; i++) {
|
|
157
|
+
if (text.charCodeAt(i) === 10)
|
|
158
|
+
line += 1;
|
|
159
|
+
}
|
|
160
|
+
return line;
|
|
161
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { IFileFingerprint } from '../schema/file-fingerprint.js';
|
|
2
|
+
import type { IExtractedFile } from './extract-ts-file.js';
|
|
3
|
+
export declare const EXTRACT_PYTHON_FILE_SOURCE = "extract-python-file@v1";
|
|
4
|
+
/**
|
|
5
|
+
* Regex-based Python extractor.
|
|
6
|
+
*
|
|
7
|
+
* Module-level constructs only — no class/function body inspection, no
|
|
8
|
+
* cross-reference resolution. The output matches the TS extractor's
|
|
9
|
+
* `IExtractedFile` shape so the indexer can dispatch by language
|
|
10
|
+
* without branching downstream.
|
|
11
|
+
*
|
|
12
|
+
* What we extract:
|
|
13
|
+
* - `def NAME(...)` at column 0 → symbol (function).
|
|
14
|
+
* - `class NAME(...)` at column 0 → symbol (class).
|
|
15
|
+
* - Top-level CONSTANT_LIKE assignments → symbol (const-ish). Filter:
|
|
16
|
+
* identifier is UPPERCASE and at column 0.
|
|
17
|
+
* - `import X` / `import X as Y` / `from X import ...` (including
|
|
18
|
+
* relative `.` / `..`) → raw import specifiers. Resolution to a
|
|
19
|
+
* project-relative file is deferred — Python's import resolution
|
|
20
|
+
* depends on `sys.path`, which we don't model in the MVP.
|
|
21
|
+
*
|
|
22
|
+
* What we don't extract (yet):
|
|
23
|
+
* - Decorators (FastAPI / Flask / Django routes are framework-scanner
|
|
24
|
+
* territory).
|
|
25
|
+
* - Async functions vs sync (treated identically — both `def`).
|
|
26
|
+
* - Nested defs / classes.
|
|
27
|
+
* - Type aliases / TypeVar / NewType.
|
|
28
|
+
*/
|
|
29
|
+
export declare function extractPythonFile(fingerprint: IFileFingerprint, absPath: string, content?: string): IExtractedFile;
|
|
30
|
+
//# sourceMappingURL=extract-python-file.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extract-python-file.d.ts","sourceRoot":"","sources":["../../src/indexer/extract-python-file.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AAGtE,OAAO,KAAK,EACV,cAAc,EAEf,MAAM,sBAAsB,CAAC;AAE9B,eAAO,MAAM,0BAA0B,2BAA2B,CAAC;AAEnE;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAgB,iBAAiB,CAC/B,WAAW,EAAE,gBAAgB,EAC7B,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,MAAM,GACf,cAAc,CAyEhB"}
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import { readFileSync } from 'node:fs';
|
|
3
|
+
import { EdgeKind } from "../schema/edge-kind.js";
|
|
4
|
+
import { NodeKind } from "../schema/node-kind.js";
|
|
5
|
+
export const EXTRACT_PYTHON_FILE_SOURCE = 'extract-python-file@v1';
|
|
6
|
+
/**
|
|
7
|
+
* Regex-based Python extractor.
|
|
8
|
+
*
|
|
9
|
+
* Module-level constructs only — no class/function body inspection, no
|
|
10
|
+
* cross-reference resolution. The output matches the TS extractor's
|
|
11
|
+
* `IExtractedFile` shape so the indexer can dispatch by language
|
|
12
|
+
* without branching downstream.
|
|
13
|
+
*
|
|
14
|
+
* What we extract:
|
|
15
|
+
* - `def NAME(...)` at column 0 → symbol (function).
|
|
16
|
+
* - `class NAME(...)` at column 0 → symbol (class).
|
|
17
|
+
* - Top-level CONSTANT_LIKE assignments → symbol (const-ish). Filter:
|
|
18
|
+
* identifier is UPPERCASE and at column 0.
|
|
19
|
+
* - `import X` / `import X as Y` / `from X import ...` (including
|
|
20
|
+
* relative `.` / `..`) → raw import specifiers. Resolution to a
|
|
21
|
+
* project-relative file is deferred — Python's import resolution
|
|
22
|
+
* depends on `sys.path`, which we don't model in the MVP.
|
|
23
|
+
*
|
|
24
|
+
* What we don't extract (yet):
|
|
25
|
+
* - Decorators (FastAPI / Flask / Django routes are framework-scanner
|
|
26
|
+
* territory).
|
|
27
|
+
* - Async functions vs sync (treated identically — both `def`).
|
|
28
|
+
* - Nested defs / classes.
|
|
29
|
+
* - Type aliases / TypeVar / NewType.
|
|
30
|
+
*/
|
|
31
|
+
export function extractPythonFile(fingerprint, absPath, content) {
|
|
32
|
+
const text = content ?? readFileSync(absPath, 'utf8');
|
|
33
|
+
const fileNode = makeFileNode(fingerprint);
|
|
34
|
+
const symbolNodes = [];
|
|
35
|
+
const edges = [];
|
|
36
|
+
// Strip line-leading `#` comments for symbol/import detection so we
|
|
37
|
+
// don't pick up `def foo` inside a docstring or comment. (Multi-line
|
|
38
|
+
// string literals are still a hazard — out of scope for the MVP.)
|
|
39
|
+
const lines = text.split('\n');
|
|
40
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
41
|
+
const raw = lines[i];
|
|
42
|
+
const line = i + 1;
|
|
43
|
+
if (raw.length === 0)
|
|
44
|
+
continue;
|
|
45
|
+
if (raw[0] === '#')
|
|
46
|
+
continue; // full-line comment
|
|
47
|
+
// Module-level `def NAME(...)`.
|
|
48
|
+
let m = /^def\s+([A-Za-z_][\w]*)\s*\(/.exec(raw);
|
|
49
|
+
if (m) {
|
|
50
|
+
const sym = makeSymbol(fingerprint, m[1], 'function', line);
|
|
51
|
+
symbolNodes.push(sym);
|
|
52
|
+
edges.push(buildEdge(fileNode.id, sym.id, EdgeKind.DeclaresSymbol, {
|
|
53
|
+
visibility: 'export',
|
|
54
|
+
declKind: 'function',
|
|
55
|
+
line,
|
|
56
|
+
}));
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
m = /^async\s+def\s+([A-Za-z_][\w]*)\s*\(/.exec(raw);
|
|
60
|
+
if (m) {
|
|
61
|
+
const sym = makeSymbol(fingerprint, m[1], 'function', line);
|
|
62
|
+
symbolNodes.push(sym);
|
|
63
|
+
edges.push(buildEdge(fileNode.id, sym.id, EdgeKind.DeclaresSymbol, {
|
|
64
|
+
visibility: 'export',
|
|
65
|
+
declKind: 'function',
|
|
66
|
+
line,
|
|
67
|
+
}));
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
// Module-level `class NAME(...)` or `class NAME:`.
|
|
71
|
+
m = /^class\s+([A-Za-z_][\w]*)\s*[\(:]/.exec(raw);
|
|
72
|
+
if (m) {
|
|
73
|
+
const sym = makeSymbol(fingerprint, m[1], 'class', line);
|
|
74
|
+
symbolNodes.push(sym);
|
|
75
|
+
edges.push(buildEdge(fileNode.id, sym.id, EdgeKind.DeclaresSymbol, {
|
|
76
|
+
visibility: 'export',
|
|
77
|
+
declKind: 'class',
|
|
78
|
+
line,
|
|
79
|
+
}));
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
// Module-level UPPERCASE constant assignment.
|
|
83
|
+
m = /^([A-Z][A-Z0-9_]+)\s*(?::[^=]+)?=\s*/.exec(raw);
|
|
84
|
+
if (m) {
|
|
85
|
+
const sym = makeSymbol(fingerprint, m[1], 'const', line);
|
|
86
|
+
symbolNodes.push(sym);
|
|
87
|
+
edges.push(buildEdge(fileNode.id, sym.id, EdgeKind.DeclaresSymbol, {
|
|
88
|
+
visibility: 'export',
|
|
89
|
+
declKind: 'const',
|
|
90
|
+
line,
|
|
91
|
+
}));
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
const rawImportSpecifiers = scanPythonImports(text);
|
|
95
|
+
return {
|
|
96
|
+
fileNode,
|
|
97
|
+
symbolNodes,
|
|
98
|
+
edges,
|
|
99
|
+
rawImportSpecifiers,
|
|
100
|
+
importBindings: [],
|
|
101
|
+
identifierReferences: [],
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
function makeFileNode(fp) {
|
|
105
|
+
const label = fp.path.split('/').pop() ?? fp.path;
|
|
106
|
+
const tags = ['python'];
|
|
107
|
+
if (isPythonTestPath(fp.path))
|
|
108
|
+
tags.push('test');
|
|
109
|
+
return {
|
|
110
|
+
id: fp.nodeId,
|
|
111
|
+
kind: NodeKind.File,
|
|
112
|
+
label,
|
|
113
|
+
path: fp.path,
|
|
114
|
+
tags,
|
|
115
|
+
data: {
|
|
116
|
+
language: 'python',
|
|
117
|
+
sizeBytes: fp.sizeBytes,
|
|
118
|
+
sha1: fp.sha1,
|
|
119
|
+
hasDefaultExport: false,
|
|
120
|
+
exportCount: 0,
|
|
121
|
+
localCount: 0,
|
|
122
|
+
reExportCount: 0,
|
|
123
|
+
},
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
function makeSymbol(fp, name, declKind, line) {
|
|
127
|
+
return {
|
|
128
|
+
id: `symbol:${fp.path}#${name}`,
|
|
129
|
+
kind: NodeKind.Symbol,
|
|
130
|
+
label: name,
|
|
131
|
+
path: fp.path,
|
|
132
|
+
line,
|
|
133
|
+
data: {
|
|
134
|
+
declKind,
|
|
135
|
+
visibility: 'export',
|
|
136
|
+
isExported: true,
|
|
137
|
+
language: 'python',
|
|
138
|
+
},
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
function buildEdge(from, to, kind, data) {
|
|
142
|
+
return {
|
|
143
|
+
id: createHash('sha1').update(`${from}|${to}|${kind}`).digest('hex'),
|
|
144
|
+
from,
|
|
145
|
+
to,
|
|
146
|
+
kind,
|
|
147
|
+
source: EXTRACT_PYTHON_FILE_SOURCE,
|
|
148
|
+
...(data ? { data } : {}),
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
function isPythonTestPath(rel) {
|
|
152
|
+
// Common Python conventions: test_<x>.py, <x>_test.py, files under tests/.
|
|
153
|
+
return (/(?:^|\/)(?:tests?|test)\//.test(rel) ||
|
|
154
|
+
/(?:^|\/)test_[\w-]+\.py$/.test(rel) ||
|
|
155
|
+
/(?:^|\/)[\w-]+_test\.py$/.test(rel));
|
|
156
|
+
}
|
|
157
|
+
function scanPythonImports(text) {
|
|
158
|
+
const out = [];
|
|
159
|
+
// `from X import Y, Z` (X may include dots: `.relative`, `..parent`, `pkg.sub`)
|
|
160
|
+
const fromRe = /^from\s+([.\w]+)\s+import\s+/gm;
|
|
161
|
+
// `import X` or `import X as Y` or `import X, Y, Z`
|
|
162
|
+
const importRe = /^import\s+([.\w]+(?:\s*,\s*[.\w]+)*)/gm;
|
|
163
|
+
let m;
|
|
164
|
+
while ((m = fromRe.exec(text)) !== null) {
|
|
165
|
+
const line = lineFromOffset(text, m.index);
|
|
166
|
+
out.push({ specifier: m[1], line, kind: 'from-import' });
|
|
167
|
+
}
|
|
168
|
+
while ((m = importRe.exec(text)) !== null) {
|
|
169
|
+
const line = lineFromOffset(text, m.index);
|
|
170
|
+
const list = m[1];
|
|
171
|
+
for (const single of list.split(',')) {
|
|
172
|
+
const name = single.trim().split(/\s+/)[0];
|
|
173
|
+
if (name)
|
|
174
|
+
out.push({ specifier: name, line, kind: 'import' });
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
// De-dupe identical (specifier, line, kind).
|
|
178
|
+
const seen = new Set();
|
|
179
|
+
const deduped = [];
|
|
180
|
+
for (const it of out) {
|
|
181
|
+
const k = `${it.kind}|${it.specifier}|${it.line}`;
|
|
182
|
+
if (seen.has(k))
|
|
183
|
+
continue;
|
|
184
|
+
seen.add(k);
|
|
185
|
+
deduped.push(it);
|
|
186
|
+
}
|
|
187
|
+
return deduped;
|
|
188
|
+
}
|
|
189
|
+
function lineFromOffset(text, offset) {
|
|
190
|
+
let line = 1;
|
|
191
|
+
for (let i = 0; i < offset && i < text.length; i++) {
|
|
192
|
+
if (text.charCodeAt(i) === 10)
|
|
193
|
+
line += 1;
|
|
194
|
+
}
|
|
195
|
+
return line;
|
|
196
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { IFileFingerprint } from '../schema/file-fingerprint.js';
|
|
2
|
+
import type { IExtractedFile } from './extract-ts-file.js';
|
|
3
|
+
export declare const EXTRACT_RUBY_FILE_SOURCE = "extract-ruby-file@v1";
|
|
4
|
+
/**
|
|
5
|
+
* Regex-based Ruby extractor.
|
|
6
|
+
*
|
|
7
|
+
* Top-level constructs only (column-0):
|
|
8
|
+
* - `class Name` / `class Name < Base` → class symbol
|
|
9
|
+
* - `module Name` → module symbol
|
|
10
|
+
* - `def name` / `def self.name` → function symbol
|
|
11
|
+
* - `NAME = …` (uppercase identifier) → const symbol
|
|
12
|
+
*
|
|
13
|
+
* Ruby has no public/private declarations at the top level — every
|
|
14
|
+
* top-level symbol is reachable. All emitted symbols have
|
|
15
|
+
* `isExported: true` for graph consumers; the `visibility` data field
|
|
16
|
+
* preserves the explicit modifier (`private` / `protected`) when one
|
|
17
|
+
* precedes the def, otherwise defaults to `export`.
|
|
18
|
+
*
|
|
19
|
+
* Imports: `require '...'`, `require_relative '...'`, `load '...'`.
|
|
20
|
+
* Specifiers are stored exactly as written, without quotes.
|
|
21
|
+
*
|
|
22
|
+
* Out of scope:
|
|
23
|
+
* - Method-body inspection (visibility modifiers inside class bodies
|
|
24
|
+
* toggle subsequent defs; we don't track that state here).
|
|
25
|
+
* - Mixins (`include`, `extend`, `prepend`).
|
|
26
|
+
* - Singleton classes / `class << self` blocks.
|
|
27
|
+
*/
|
|
28
|
+
export declare function extractRubyFile(fingerprint: IFileFingerprint, absPath: string, content?: string): IExtractedFile;
|
|
29
|
+
//# sourceMappingURL=extract-ruby-file.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extract-ruby-file.d.ts","sourceRoot":"","sources":["../../src/indexer/extract-ruby-file.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AAGtE,OAAO,KAAK,EACV,cAAc,EAEf,MAAM,sBAAsB,CAAC;AAE9B,eAAO,MAAM,wBAAwB,yBAAyB,CAAC;AAE/D;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,eAAe,CAC7B,WAAW,EAAE,gBAAgB,EAC7B,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,MAAM,GACf,cAAc,CA8ChB"}
|