gitnexus 1.6.6-rc.41 → 1.6.6-rc.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/cli/analyze.d.ts +2 -0
- package/dist/cli/analyze.js +30 -1
- package/dist/cli/cli-message.d.ts +25 -3
- package/dist/cli/index.js +4 -0
- package/dist/core/ingestion/call-extractors/configs/c-cpp.js +157 -0
- package/dist/core/ingestion/languages/c-cpp.js +5 -3
- package/dist/core/ingestion/languages/cpp/captures.js +81 -5
- package/dist/core/ingestion/languages/cpp/query.js +69 -0
- package/dist/core/ingestion/scope-extractor.js +1 -0
- package/dist/core/ingestion/tree-sitter-queries.d.ts +1 -1
- package/dist/core/ingestion/tree-sitter-queries.js +10 -2
- package/dist/core/lbug/lbug-adapter.d.ts +15 -0
- package/dist/core/lbug/lbug-adapter.js +21 -0
- package/dist/core/lbug/lbug-config.d.ts +7 -0
- package/dist/core/lbug/lbug-config.js +82 -2
- package/dist/core/lbug/wal-checkpoint-driver.d.ts +98 -0
- package/dist/core/lbug/wal-checkpoint-driver.js +189 -0
- package/dist/core/run-analyze.js +21 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -158,6 +158,7 @@ gitnexus analyze --skip-agents-md # Preserve custom AGENTS.md/CLAUDE.md gitnexu
|
|
|
158
158
|
gitnexus analyze --verbose # Log skipped files when parsers are unavailable
|
|
159
159
|
gitnexus analyze --max-file-size 1024 # Skip files larger than N KB (default: 512, cap: 32768)
|
|
160
160
|
gitnexus analyze --worker-timeout 60 # Increase worker idle timeout for slow parses
|
|
161
|
+
gitnexus analyze --wal-checkpoint-threshold 67108864 # 64 MiB. Control LadybugDB WAL auto-checkpoint threshold (default: 67108864 = 64 MiB; -1 keeps Ladybug stock ~16 MiB)
|
|
161
162
|
gitnexus mcp # Start MCP server (stdio) — serves all indexed repos
|
|
162
163
|
gitnexus serve # Start local HTTP server (multi-repo) for web UI
|
|
163
164
|
gitnexus index # Register an existing .gitnexus/ folder into the global registry
|
|
@@ -307,6 +308,7 @@ Configure the behavior with two environment variables:
|
|
|
307
308
|
|----------|--------|---------|--------|
|
|
308
309
|
| `GITNEXUS_LBUG_EXTENSION_INSTALL` | `auto`, `load-only`, `never` | `auto` | `auto` runs one bounded INSTALL if LOAD fails. `load-only` only uses already-installed extensions (recommended for offline / firewalled environments). `never` skips optional extensions entirely. |
|
|
309
310
|
| `GITNEXUS_LBUG_EXTENSION_INSTALL_TIMEOUT_MS` | positive integer | `15000` | Wall-clock budget for the out-of-process `INSTALL` child before it is killed. |
|
|
311
|
+
| `GITNEXUS_WAL_CHECKPOINT_THRESHOLD` | integer `>= -1` | `67108864` (64 MiB) | LadybugDB WAL auto-checkpoint threshold during analyze (bytes). Auto-checkpoint remains enabled; `-1` keeps Ladybug's stock ~16 MiB. Larger thresholds reduce checkpoint frequency but increase the WAL size at rotation time — choose a smaller value on disk-constrained environments. |
|
|
310
312
|
|
|
311
313
|
```bash
|
|
312
314
|
# Offline/airgapped: never reach the network for extensions
|
package/dist/cli/analyze.d.ts
CHANGED
|
@@ -69,6 +69,8 @@ export interface AnalyzeOptions {
|
|
|
69
69
|
maxFileSize?: string;
|
|
70
70
|
/** Override worker sub-batch idle timeout in seconds. */
|
|
71
71
|
workerTimeout?: string;
|
|
72
|
+
/** Control LadybugDB WAL auto-checkpoint threshold during analyze. */
|
|
73
|
+
walCheckpointThreshold?: string;
|
|
72
74
|
/** Parse worker pool size; 0 disables workers (sequential fallback). */
|
|
73
75
|
workers?: string;
|
|
74
76
|
embeddingThreads?: string;
|
package/dist/cli/analyze.js
CHANGED
|
@@ -12,7 +12,7 @@ import { spawn } from 'child_process';
|
|
|
12
12
|
import v8 from 'v8';
|
|
13
13
|
import cliProgress from 'cli-progress';
|
|
14
14
|
import { closeLbug } from '../core/lbug/lbug-adapter.js';
|
|
15
|
-
import { isWalCorruptionError, WAL_RECOVERY_SUGGESTION } from '../core/lbug/lbug-config.js';
|
|
15
|
+
import { isLbugCheckpointIoError, isWalCorruptionError, parseWalCheckpointThreshold, WAL_RECOVERY_SUGGESTION, } from '../core/lbug/lbug-config.js';
|
|
16
16
|
import { getStoragePaths, getGlobalRegistryPath, RegistryNameCollisionError, AnalysisNotFinalizedError, assertAnalysisFinalized, } from '../storage/repo-manager.js';
|
|
17
17
|
import { getGitRoot, hasGitDir } from '../storage/git.js';
|
|
18
18
|
import { runFullAnalysis } from '../core/run-analyze.js';
|
|
@@ -322,6 +322,14 @@ const forceHeapOOMForTestIfEnabled = () => {
|
|
|
322
322
|
for (;;)
|
|
323
323
|
chunks.push('x'.repeat(1024 * 1024));
|
|
324
324
|
};
|
|
325
|
+
// 64 MiB keeps auto-checkpoint enabled but triggers less frequently than
|
|
326
|
+
// Ladybug's stock ~16 MiB threshold, reducing rename/remove churn on large
|
|
327
|
+
// runs. Also matches the GitNexus default in `lbug-config.ts`.
|
|
328
|
+
//
|
|
329
|
+
// IMPORTANT: keep README examples (`README.md`, `gitnexus/README.md`) and
|
|
330
|
+
// the `DEFAULT_WAL_CHECKPOINT_THRESHOLD` constant in
|
|
331
|
+
// `gitnexus/src/core/lbug/lbug-config.ts` in sync with this value.
|
|
332
|
+
const RECOMMENDED_WAL_CHECKPOINT_THRESHOLD = 64 * 1024 * 1024;
|
|
325
333
|
/** Re-exec the process with a 16GB heap and larger stack if we're currently below that. */
|
|
326
334
|
async function ensureHeap() {
|
|
327
335
|
const nodeOpts = process.env.NODE_OPTIONS || '';
|
|
@@ -378,6 +386,8 @@ const ANALYZE_CLI_ENV_KEYS = [
|
|
|
378
386
|
'GITNEXUS_PROFILE_DEFERRED_SLOW_MS',
|
|
379
387
|
'GITNEXUS_MAX_FILE_SIZE',
|
|
380
388
|
'GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS',
|
|
389
|
+
'GITNEXUS_WAL_CHECKPOINT_THRESHOLD',
|
|
390
|
+
'GITNEXUS_WAL_MANUAL_CHECKPOINT',
|
|
381
391
|
'GITNEXUS_EMBEDDING_THREADS',
|
|
382
392
|
'GITNEXUS_EMBEDDING_BATCH_SIZE',
|
|
383
393
|
'GITNEXUS_EMBEDDING_SUB_BATCH_SIZE',
|
|
@@ -452,6 +462,15 @@ const analyzeCommandImpl = async (inputPath, options) => {
|
|
|
452
462
|
}
|
|
453
463
|
process.env.GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS = String(Math.round(workerTimeoutSeconds * 1000));
|
|
454
464
|
}
|
|
465
|
+
if (options?.walCheckpointThreshold !== undefined) {
|
|
466
|
+
const parsed = parseWalCheckpointThreshold(options.walCheckpointThreshold);
|
|
467
|
+
if (parsed === undefined) {
|
|
468
|
+
cliError(' --wal-checkpoint-threshold must be an integer >= -1.\n');
|
|
469
|
+
process.exitCode = 1;
|
|
470
|
+
return;
|
|
471
|
+
}
|
|
472
|
+
process.env.GITNEXUS_WAL_CHECKPOINT_THRESHOLD = String(parsed);
|
|
473
|
+
}
|
|
455
474
|
// `--workers` is threaded through `runFullAnalysis` options → PipelineOptions
|
|
456
475
|
// → createWorkerPool, intentionally bypassing the GITNEXUS_WORKER_POOL_SIZE
|
|
457
476
|
// env channel so this CLI surface never mutates `process.env` for pool size.
|
|
@@ -859,6 +878,16 @@ const analyzeCommandImpl = async (inputPath, options) => {
|
|
|
859
878
|
process.exitCode = 1;
|
|
860
879
|
return;
|
|
861
880
|
}
|
|
881
|
+
if (isLbugCheckpointIoError(err)) {
|
|
882
|
+
cliError(` LadybugDB failed while rotating/removing WAL checkpoint files.\n` +
|
|
883
|
+
` This can happen when auto-checkpoint runs at the default threshold (~16MB).\n` +
|
|
884
|
+
` Retry with a larger checkpoint threshold to reduce checkpoint frequency:\n` +
|
|
885
|
+
` gitnexus analyze --wal-checkpoint-threshold ${RECOMMENDED_WAL_CHECKPOINT_THRESHOLD}\n` +
|
|
886
|
+
` (or set GITNEXUS_WAL_CHECKPOINT_THRESHOLD=${RECOMMENDED_WAL_CHECKPOINT_THRESHOLD})\n` +
|
|
887
|
+
` (Try 33554432 = 32 MiB on small-disk / CI runners.)\n`, { recoveryHint: 'wal-checkpoint-threshold' });
|
|
888
|
+
process.exitCode = 1;
|
|
889
|
+
return;
|
|
890
|
+
}
|
|
862
891
|
// HF download failure — show clean guidance without the raw stack trace.
|
|
863
892
|
// Checked before writeFatalToStderr so the user sees one focused message
|
|
864
893
|
// rather than a stack-trace dump followed by a second remediation block.
|
|
@@ -1,15 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* String-literal union of all `recoveryHint` tags emitted by the CLI.
|
|
3
|
+
*
|
|
4
|
+
* Centralized so a new recovery branch added in `analyze.ts` cannot land
|
|
5
|
+
* without updating this union — TypeScript will reject the unknown literal
|
|
6
|
+
* passed via `cliError({ recoveryHint: '...' })`. To add a new hint:
|
|
7
|
+
* 1. Add the tag string to this union.
|
|
8
|
+
* 2. Pass it as the `recoveryHint` field at the relevant `cliError`
|
|
9
|
+
* call site.
|
|
10
|
+
*
|
|
11
|
+
* Consumers can import this type to narrow log-record `recoveryHint`
|
|
12
|
+
* fields without restating the literal list.
|
|
13
|
+
*/
|
|
14
|
+
export type RecoveryHint = 'wal-corruption' | 'wal-checkpoint-threshold' | 'heap-oom-respawn' | 'native-worker-abort' | 'hf-endpoint-unreachable' | 'large-repo' | 'npm-resolution' | 'module-not-found';
|
|
15
|
+
/**
|
|
16
|
+
* Common shape for the optional structured-field bag passed to
|
|
17
|
+
* `cliError`/`cliWarn`/`cliInfo`. Typed so the `recoveryHint` slot is
|
|
18
|
+
* checked against the {@link RecoveryHint} union.
|
|
19
|
+
*/
|
|
20
|
+
export interface CliMessageFields extends Record<string, unknown> {
|
|
21
|
+
recoveryHint?: RecoveryHint;
|
|
22
|
+
}
|
|
1
23
|
/**
|
|
2
24
|
* User-facing informational message. Use for banners, listening URLs,
|
|
3
25
|
* and any message the user expects to read in plain text.
|
|
4
26
|
*/
|
|
5
|
-
export declare function cliInfo(msg: string, fields?:
|
|
27
|
+
export declare function cliInfo(msg: string, fields?: CliMessageFields): void;
|
|
6
28
|
/**
|
|
7
29
|
* User-facing warning. Operator-actionable but non-fatal — `cliWarn`
|
|
8
30
|
* indicates the command can still proceed in some form.
|
|
9
31
|
*/
|
|
10
|
-
export declare function cliWarn(msg: string, fields?:
|
|
32
|
+
export declare function cliWarn(msg: string, fields?: CliMessageFields): void;
|
|
11
33
|
/**
|
|
12
34
|
* User-facing error. Indicates the command cannot proceed; usually
|
|
13
35
|
* paired with a non-zero exit code at the call site.
|
|
14
36
|
*/
|
|
15
|
-
export declare function cliError(msg: string, fields?:
|
|
37
|
+
export declare function cliError(msg: string, fields?: CliMessageFields): void;
|
package/dist/cli/index.js
CHANGED
|
@@ -38,6 +38,8 @@ program
|
|
|
38
38
|
.option('-v, --verbose', 'Enable verbose ingestion warnings (default: false)')
|
|
39
39
|
.option('--max-file-size <kb>', 'Skip files larger than this (KB). Default: 512. Hard cap: 32768 (tree-sitter limit).')
|
|
40
40
|
.option('--worker-timeout <seconds>', 'Worker sub-batch idle timeout before retry/fallback. Default: 30.')
|
|
41
|
+
.option('--wal-checkpoint-threshold <bytes>', 'LadybugDB WAL auto-checkpoint threshold in bytes during analyze ' +
|
|
42
|
+
'(integer >= -1; default: 67108864 = 64 MiB; -1 keeps Ladybug stock ~16 MiB).')
|
|
41
43
|
.option('--workers <n>', 'Parse worker pool size. Default: cores-1 capped at 16. Pass 0 to disable workers (sequential).')
|
|
42
44
|
.option('--embedding-threads <n>', 'Limit local ONNX embedding CPU threads')
|
|
43
45
|
.option('--embedding-batch-size <n>', 'Number of nodes per embedding batch')
|
|
@@ -47,6 +49,7 @@ program
|
|
|
47
49
|
' GITNEXUS_NO_GITIGNORE=1 Skip .gitignore parsing (still reads .gitnexusignore)\n' +
|
|
48
50
|
' GITNEXUS_MAX_FILE_SIZE=N Override large-file skip threshold (KB). Default 512, max 32768.\n' +
|
|
49
51
|
' GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS=N Worker idle timeout in milliseconds. Default 30000.\n' +
|
|
52
|
+
' GITNEXUS_WAL_CHECKPOINT_THRESHOLD=N LadybugDB WAL auto-checkpoint threshold in bytes (default 67108864 = 64 MiB; -1 keeps Ladybug stock ~16 MiB).\n' +
|
|
50
53
|
' GITNEXUS_WORKER_SUB_BATCH_MAX_BYTES=N Worker job byte budget. Default 8388608.\n' +
|
|
51
54
|
' GITNEXUS_WORKER_POOL_SIZE=N Parse worker count override. Default cores-1 capped at 16.\n' +
|
|
52
55
|
' GITNEXUS_PARSE_CHUNK_CONCURRENCY=N Concurrent in-flight parse chunks. Default 2.\n' +
|
|
@@ -55,6 +58,7 @@ program
|
|
|
55
58
|
' GITNEXUS_WORKER_CONSECUTIVE_FAILURE_THRESHOLD=N Per-slot deaths to trip circuit breaker. Default max(3, poolSize).\n' +
|
|
56
59
|
' GITNEXUS_EMBEDDING_THREADS=N Limit local ONNX CPU threads for --embeddings.\n' +
|
|
57
60
|
' GITNEXUS_SEMANTIC_EXACT_SCAN_LIMIT=N Max embedding chunks for exact-scan fallback. Default 10000.\n' +
|
|
61
|
+
'\nFlags override the corresponding env vars when both are provided.\n' +
|
|
58
62
|
'\nTip: `.gitnexusignore` supports `.gitignore`-style negation. Add e.g.\n' +
|
|
59
63
|
' `!__tests__/` to index a directory that is auto-filtered by default (#771).')
|
|
60
64
|
.action(createLazyAction(() => import('./analyze.js'), 'analyzeCommand'));
|
|
@@ -5,4 +5,161 @@ export const cCallConfig = {
|
|
|
5
5
|
};
|
|
6
6
|
export const cppCallConfig = {
|
|
7
7
|
language: SupportedLanguages.CPlusPlus,
|
|
8
|
+
extractLanguageCallSite(callNode) {
|
|
9
|
+
return extractCppOperatorCallSite(callNode);
|
|
10
|
+
},
|
|
8
11
|
};
|
|
12
|
+
function extractCppOperatorCallSite(callNode) {
|
|
13
|
+
if (callNode.type !== 'binary_expression')
|
|
14
|
+
return null;
|
|
15
|
+
if (isPrimitiveOnlyBinaryOperatorCall(callNode))
|
|
16
|
+
return null;
|
|
17
|
+
const operator = callNode.childForFieldName('operator')?.text.trim();
|
|
18
|
+
// Keep the legacy DAG conservative: only simple identifier operands are
|
|
19
|
+
// modeled here. Complex expressions stay unresolved instead of guessed.
|
|
20
|
+
if (operator === '+') {
|
|
21
|
+
const left = callNode.childForFieldName('left');
|
|
22
|
+
const right = callNode.childForFieldName('right');
|
|
23
|
+
if (left?.type !== 'identifier' || right?.type !== 'identifier')
|
|
24
|
+
return null;
|
|
25
|
+
return {
|
|
26
|
+
calledName: 'operator+',
|
|
27
|
+
callForm: 'member',
|
|
28
|
+
receiverName: left.text,
|
|
29
|
+
argCount: 1,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
if (operator === '<<') {
|
|
33
|
+
const right = callNode.childForFieldName('right');
|
|
34
|
+
if (right?.type !== 'identifier')
|
|
35
|
+
return null;
|
|
36
|
+
return {
|
|
37
|
+
calledName: 'operator<<',
|
|
38
|
+
callForm: 'free',
|
|
39
|
+
argCount: 2,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
function isPrimitiveOnlyBinaryOperatorCall(callNode) {
|
|
45
|
+
const left = callNode.childForFieldName('left');
|
|
46
|
+
const right = callNode.childForFieldName('right');
|
|
47
|
+
if (left === null || right === null)
|
|
48
|
+
return false;
|
|
49
|
+
return isBuiltinOperatorOperand(left) && isBuiltinOperatorOperand(right);
|
|
50
|
+
}
|
|
51
|
+
function isBuiltinOperatorOperand(node) {
|
|
52
|
+
return isBuiltinOperatorType(inferCppOperatorOperandType(node));
|
|
53
|
+
}
|
|
54
|
+
function inferCppOperatorOperandType(node) {
|
|
55
|
+
const literalType = inferCppLiteralType(node);
|
|
56
|
+
if (literalType !== '')
|
|
57
|
+
return literalType;
|
|
58
|
+
if (node.type === 'identifier')
|
|
59
|
+
return lookupCppIdentifierType(node);
|
|
60
|
+
return '';
|
|
61
|
+
}
|
|
62
|
+
function inferCppLiteralType(node) {
|
|
63
|
+
if (node.type === 'number_literal')
|
|
64
|
+
return node.text.includes('.') ? 'double' : 'int';
|
|
65
|
+
if (node.type === 'char_literal')
|
|
66
|
+
return 'char';
|
|
67
|
+
if (node.type === 'true' || node.type === 'false')
|
|
68
|
+
return 'bool';
|
|
69
|
+
return '';
|
|
70
|
+
}
|
|
71
|
+
function lookupCppIdentifierType(identNode) {
|
|
72
|
+
const varName = identNode.text;
|
|
73
|
+
let scope = identNode.parent;
|
|
74
|
+
while (scope !== null &&
|
|
75
|
+
scope.type !== 'compound_statement' &&
|
|
76
|
+
scope.type !== 'translation_unit') {
|
|
77
|
+
scope = scope.parent;
|
|
78
|
+
}
|
|
79
|
+
if (scope === null)
|
|
80
|
+
return '';
|
|
81
|
+
const parameterType = lookupCppFunctionParameterType(scope, varName);
|
|
82
|
+
if (parameterType !== '')
|
|
83
|
+
return parameterType;
|
|
84
|
+
for (let i = 0; i < scope.childCount; i++) {
|
|
85
|
+
const stmt = scope.child(i);
|
|
86
|
+
if (stmt === null || stmt.type !== 'declaration')
|
|
87
|
+
continue;
|
|
88
|
+
const typeNode = stmt.childForFieldName('type');
|
|
89
|
+
const declarator = stmt.childForFieldName('declarator');
|
|
90
|
+
if (typeNode === null || declarator === null)
|
|
91
|
+
continue;
|
|
92
|
+
if (extractDeclaratorLeafName(declarator) === varName)
|
|
93
|
+
return normalizeCppTypeText(typeNode.text);
|
|
94
|
+
}
|
|
95
|
+
return '';
|
|
96
|
+
}
|
|
97
|
+
function lookupCppFunctionParameterType(scope, varName) {
|
|
98
|
+
let node = scope.parent;
|
|
99
|
+
while (node !== null) {
|
|
100
|
+
if (node.type === 'function_definition' || node.type === 'function_declarator') {
|
|
101
|
+
const fnDecl = node.type === 'function_declarator'
|
|
102
|
+
? node
|
|
103
|
+
: findFirstDescendantOfType(node, 'function_declarator');
|
|
104
|
+
const params = fnDecl?.childForFieldName('parameters') ?? null;
|
|
105
|
+
if (params === null)
|
|
106
|
+
return '';
|
|
107
|
+
for (let i = 0; i < params.namedChildCount; i++) {
|
|
108
|
+
const param = params.namedChild(i);
|
|
109
|
+
if (param === null || param.type !== 'parameter_declaration')
|
|
110
|
+
continue;
|
|
111
|
+
const declarator = param.childForFieldName('declarator');
|
|
112
|
+
const typeNode = param.childForFieldName('type');
|
|
113
|
+
if (declarator !== null &&
|
|
114
|
+
typeNode !== null &&
|
|
115
|
+
extractDeclaratorLeafName(declarator) === varName) {
|
|
116
|
+
return normalizeCppTypeText(typeNode.text);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return '';
|
|
120
|
+
}
|
|
121
|
+
node = node.parent;
|
|
122
|
+
}
|
|
123
|
+
return '';
|
|
124
|
+
}
|
|
125
|
+
function findFirstDescendantOfType(node, type) {
|
|
126
|
+
if (node.type === type)
|
|
127
|
+
return node;
|
|
128
|
+
for (let i = 0; i < node.namedChildCount; i++) {
|
|
129
|
+
const found = findFirstDescendantOfType(node.namedChild(i), type);
|
|
130
|
+
if (found !== null)
|
|
131
|
+
return found;
|
|
132
|
+
}
|
|
133
|
+
return null;
|
|
134
|
+
}
|
|
135
|
+
function extractDeclaratorLeafName(node) {
|
|
136
|
+
if (node.type === 'identifier' ||
|
|
137
|
+
node.type === 'field_identifier' ||
|
|
138
|
+
node.type === 'operator_name') {
|
|
139
|
+
return node.text;
|
|
140
|
+
}
|
|
141
|
+
const named = node.namedChildren;
|
|
142
|
+
for (let i = named.length - 1; i >= 0; i--) {
|
|
143
|
+
const name = extractDeclaratorLeafName(named[i]);
|
|
144
|
+
if (name !== '')
|
|
145
|
+
return name;
|
|
146
|
+
}
|
|
147
|
+
return '';
|
|
148
|
+
}
|
|
149
|
+
function normalizeCppTypeText(text) {
|
|
150
|
+
return text
|
|
151
|
+
.replace(/\b(const|volatile|static|extern|register|mutable|inline|constexpr)\b/g, ' ')
|
|
152
|
+
.replace(/\s+/g, ' ')
|
|
153
|
+
.trim();
|
|
154
|
+
}
|
|
155
|
+
function isBuiltinOperatorType(type) {
|
|
156
|
+
return (type === 'bool' ||
|
|
157
|
+
type === 'char' ||
|
|
158
|
+
type === 'double' ||
|
|
159
|
+
type === 'float' ||
|
|
160
|
+
type === 'int' ||
|
|
161
|
+
type === 'long' ||
|
|
162
|
+
type === 'short' ||
|
|
163
|
+
type === 'signed' ||
|
|
164
|
+
type === 'unsigned');
|
|
165
|
+
}
|
|
@@ -194,6 +194,7 @@ const cCppExtractFunctionName = (node) => {
|
|
|
194
194
|
if (c?.type === 'qualified_identifier' ||
|
|
195
195
|
c?.type === 'identifier' ||
|
|
196
196
|
c?.type === 'field_identifier' ||
|
|
197
|
+
c?.type === 'operator_name' ||
|
|
197
198
|
c?.type === 'parenthesized_declarator') {
|
|
198
199
|
innerDeclarator = c;
|
|
199
200
|
break;
|
|
@@ -205,7 +206,7 @@ const cCppExtractFunctionName = (node) => {
|
|
|
205
206
|
if (!nameNode) {
|
|
206
207
|
for (let i = 0; i < innerDeclarator.childCount; i++) {
|
|
207
208
|
const c = innerDeclarator.child(i);
|
|
208
|
-
if (c?.type === 'identifier') {
|
|
209
|
+
if (c?.type === 'identifier' || c?.type === 'operator_name') {
|
|
209
210
|
nameNode = c;
|
|
210
211
|
break;
|
|
211
212
|
}
|
|
@@ -217,7 +218,8 @@ const cCppExtractFunctionName = (node) => {
|
|
|
217
218
|
}
|
|
218
219
|
}
|
|
219
220
|
else if (innerDeclarator?.type === 'identifier' ||
|
|
220
|
-
innerDeclarator?.type === 'field_identifier'
|
|
221
|
+
innerDeclarator?.type === 'field_identifier' ||
|
|
222
|
+
innerDeclarator?.type === 'operator_name') {
|
|
221
223
|
// field_identifier is used for method names inside C++ class bodies
|
|
222
224
|
funcName = innerDeclarator.text;
|
|
223
225
|
if (innerDeclarator.type === 'field_identifier')
|
|
@@ -237,7 +239,7 @@ const cCppExtractFunctionName = (node) => {
|
|
|
237
239
|
if (!nameNode) {
|
|
238
240
|
for (let i = 0; i < nestedId.childCount; i++) {
|
|
239
241
|
const c = nestedId.child(i);
|
|
240
|
-
if (c?.type === 'identifier') {
|
|
242
|
+
if (c?.type === 'identifier' || c?.type === 'operator_name') {
|
|
241
243
|
nameNode = c;
|
|
242
244
|
break;
|
|
243
245
|
}
|
|
@@ -135,11 +135,26 @@ export function emitCppScopeCaptures(sourceText, filePath, cachedTree) {
|
|
|
135
135
|
const callAnchor = grouped['@reference.call.free'] ??
|
|
136
136
|
grouped['@reference.call.member'] ??
|
|
137
137
|
grouped['@reference.call.qualified'];
|
|
138
|
+
const operatorAnchor = grouped['@reference.operator'];
|
|
139
|
+
if (operatorAnchor !== undefined) {
|
|
140
|
+
const operatorNode = callAnchor !== undefined
|
|
141
|
+
? findNodeAtRange(tree.rootNode, callAnchor.range, 'binary_expression')
|
|
142
|
+
: null;
|
|
143
|
+
if (operatorNode !== null && isPrimitiveOnlyBinaryOperator(operatorNode))
|
|
144
|
+
continue;
|
|
145
|
+
}
|
|
138
146
|
if (callAnchor !== undefined && grouped['@reference.arity'] === undefined) {
|
|
139
|
-
const callNode = findNodeAtRange(tree.rootNode, callAnchor.range, 'call_expression')
|
|
140
|
-
|
|
147
|
+
const callNode = findNodeAtRange(tree.rootNode, callAnchor.range, 'call_expression') ??
|
|
148
|
+
findNodeAtRange(tree.rootNode, callAnchor.range, 'binary_expression');
|
|
149
|
+
if (callNode?.type === 'call_expression') {
|
|
141
150
|
grouped['@reference.arity'] = syntheticCapture('@reference.arity', callNode, String(computeCppCallArity(callNode)));
|
|
142
151
|
}
|
|
152
|
+
else if (callNode?.type === 'binary_expression') {
|
|
153
|
+
grouped['@reference.arity'] = syntheticCapture('@reference.arity', callNode, grouped['@reference.call.member'] !== undefined ? '1' : '2');
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
if (operatorAnchor !== undefined && grouped['@reference.name'] === undefined) {
|
|
157
|
+
grouped['@reference.name'] = syntheticCapture('@reference.name', findNodeAtRange(tree.rootNode, operatorAnchor.range, operatorAnchor.text) ?? tree.rootNode, `operator${operatorAnchor.text}`);
|
|
143
158
|
}
|
|
144
159
|
// ── Enrich constructor calls (new Foo()) with arity ─────────────
|
|
145
160
|
const ctorCallAnchor = grouped['@reference.call.constructor'];
|
|
@@ -153,13 +168,18 @@ export function emitCppScopeCaptures(sourceText, filePath, cachedTree) {
|
|
|
153
168
|
const anyCallAnchor = callAnchor ?? ctorCallAnchor;
|
|
154
169
|
if (anyCallAnchor !== undefined && grouped['@reference.parameter-types'] === undefined) {
|
|
155
170
|
const cNode = findNodeAtRange(tree.rootNode, anyCallAnchor.range, 'call_expression') ??
|
|
156
|
-
findNodeAtRange(tree.rootNode, anyCallAnchor.range, 'new_expression')
|
|
171
|
+
findNodeAtRange(tree.rootNode, anyCallAnchor.range, 'new_expression') ??
|
|
172
|
+
findNodeAtRange(tree.rootNode, anyCallAnchor.range, 'binary_expression');
|
|
157
173
|
if (cNode !== null) {
|
|
158
|
-
const argTypes =
|
|
174
|
+
const argTypes = cNode.type === 'binary_expression'
|
|
175
|
+
? inferCppBinaryOperatorArgTypes(cNode, grouped['@reference.call.free'] !== undefined)
|
|
176
|
+
: inferCppCallArgTypes(cNode);
|
|
159
177
|
if (argTypes !== undefined && argTypes.length > 0) {
|
|
160
178
|
grouped['@reference.parameter-types'] = syntheticCapture('@reference.parameter-types', cNode, JSON.stringify(argTypes));
|
|
161
179
|
}
|
|
162
|
-
const argTypeClasses =
|
|
180
|
+
const argTypeClasses = cNode.type === 'binary_expression'
|
|
181
|
+
? inferCppBinaryOperatorArgTypeClasses(cNode, grouped['@reference.call.free'] !== undefined)
|
|
182
|
+
: inferCppCallArgTypeClasses(cNode);
|
|
163
183
|
if (argTypeClasses !== undefined && argTypeClasses.length > 0) {
|
|
164
184
|
grouped['@reference.parameter-type-classes'] = syntheticCapture('@reference.parameter-type-classes', cNode, JSON.stringify(argTypeClasses));
|
|
165
185
|
}
|
|
@@ -640,6 +660,62 @@ function inferCppCallArgTypeClasses(node) {
|
|
|
640
660
|
}
|
|
641
661
|
return classes.length > 0 ? classes : undefined;
|
|
642
662
|
}
|
|
663
|
+
function inferCppBinaryOperatorArgTypes(node, includeLeftOperand) {
|
|
664
|
+
const operands = binaryOperatorOperands(node, includeLeftOperand);
|
|
665
|
+
if (operands.length === 0)
|
|
666
|
+
return undefined;
|
|
667
|
+
const types = operands.map(inferCppExpressionType);
|
|
668
|
+
return types.length > 0 ? types : undefined;
|
|
669
|
+
}
|
|
670
|
+
function inferCppBinaryOperatorArgTypeClasses(node, includeLeftOperand) {
|
|
671
|
+
const operands = binaryOperatorOperands(node, includeLeftOperand);
|
|
672
|
+
if (operands.length === 0)
|
|
673
|
+
return undefined;
|
|
674
|
+
const classes = operands.map(inferCppExpressionTypeClass);
|
|
675
|
+
return classes.length > 0 ? classes : undefined;
|
|
676
|
+
}
|
|
677
|
+
function binaryOperatorOperands(node, includeLeftOperand) {
|
|
678
|
+
const operands = [];
|
|
679
|
+
const left = node.childForFieldName('left');
|
|
680
|
+
const right = node.childForFieldName('right');
|
|
681
|
+
if (includeLeftOperand && left !== null)
|
|
682
|
+
operands.push(left);
|
|
683
|
+
if (right !== null)
|
|
684
|
+
operands.push(right);
|
|
685
|
+
return operands;
|
|
686
|
+
}
|
|
687
|
+
function isPrimitiveOnlyBinaryOperator(node) {
|
|
688
|
+
const operands = binaryOperatorOperands(node, true);
|
|
689
|
+
return operands.length > 0 && operands.every((operand) => isBuiltinOperatorType(operand));
|
|
690
|
+
}
|
|
691
|
+
function isBuiltinOperatorType(node) {
|
|
692
|
+
const type = inferCppExpressionType(node);
|
|
693
|
+
return (type === 'bool' ||
|
|
694
|
+
type === 'char' ||
|
|
695
|
+
type === 'double' ||
|
|
696
|
+
type === 'float' ||
|
|
697
|
+
type === 'int' ||
|
|
698
|
+
type === 'long' ||
|
|
699
|
+
type === 'short' ||
|
|
700
|
+
type === 'signed' ||
|
|
701
|
+
type === 'unsigned');
|
|
702
|
+
}
|
|
703
|
+
function inferCppExpressionType(node) {
|
|
704
|
+
const litType = inferCppLiteralType(node);
|
|
705
|
+
if (litType !== '')
|
|
706
|
+
return litType;
|
|
707
|
+
if (node.type === 'identifier')
|
|
708
|
+
return lookupDeclaredTypeForIdentifier(node);
|
|
709
|
+
return '';
|
|
710
|
+
}
|
|
711
|
+
function inferCppExpressionTypeClass(node) {
|
|
712
|
+
const litType = inferCppLiteralType(node);
|
|
713
|
+
if (litType !== '')
|
|
714
|
+
return valueTypeClass(litType);
|
|
715
|
+
if (node.type === 'identifier')
|
|
716
|
+
return lookupDeclaredTypeClassForIdentifier(node);
|
|
717
|
+
return unknownTypeClass('unknown');
|
|
718
|
+
}
|
|
643
719
|
function valueTypeClass(base) {
|
|
644
720
|
return { base, cv: 'none', indirection: 'value', pointerDepth: 0 };
|
|
645
721
|
}
|
|
@@ -97,6 +97,12 @@ const CPP_SCOPE_QUERY = `
|
|
|
97
97
|
declarator: (qualified_identifier
|
|
98
98
|
name: (identifier) @declaration.name))) @declaration.method
|
|
99
99
|
|
|
100
|
+
;; Out-of-class operator method: Point::operator+(...)
|
|
101
|
+
(function_definition
|
|
102
|
+
declarator: (function_declarator
|
|
103
|
+
declarator: (qualified_identifier
|
|
104
|
+
name: (operator_name) @declaration.name))) @declaration.method
|
|
105
|
+
|
|
100
106
|
;; ─── Declarations — out-of-class method with pointer return ─────────
|
|
101
107
|
(function_definition
|
|
102
108
|
declarator: (pointer_declarator
|
|
@@ -129,6 +135,11 @@ const CPP_SCOPE_QUERY = `
|
|
|
129
135
|
declarator: (function_declarator
|
|
130
136
|
declarator: (field_identifier) @declaration.name)) @declaration.method
|
|
131
137
|
|
|
138
|
+
;; Inline operator method in class body: Point operator+(Point) const { ... }
|
|
139
|
+
(function_definition
|
|
140
|
+
declarator: (function_declarator
|
|
141
|
+
declarator: (operator_name) @declaration.name)) @declaration.method
|
|
142
|
+
|
|
132
143
|
;; ─── Declarations — inline method with pointer return (field_identifier) ──
|
|
133
144
|
;; Covers: User* lookup(int id) { ... } inside a class body
|
|
134
145
|
;; AST: function_definition > pointer_declarator > function_declarator > field_identifier
|
|
@@ -144,17 +155,49 @@ const CPP_SCOPE_QUERY = `
|
|
|
144
155
|
(function_declarator
|
|
145
156
|
declarator: (field_identifier) @declaration.name))) @declaration.method
|
|
146
157
|
|
|
158
|
+
;; Inline operator method with reference return: Point& operator+=(Point) { ... }
|
|
159
|
+
(field_declaration_list
|
|
160
|
+
(function_definition
|
|
161
|
+
declarator: (reference_declarator
|
|
162
|
+
(function_declarator
|
|
163
|
+
declarator: (operator_name) @declaration.name))) @declaration.method)
|
|
164
|
+
|
|
165
|
+
;; Free operator definition with reference return: std::ostream& operator<<(...) { ... }
|
|
166
|
+
(translation_unit
|
|
167
|
+
(function_definition
|
|
168
|
+
declarator: (reference_declarator
|
|
169
|
+
(function_declarator
|
|
170
|
+
declarator: (operator_name) @declaration.name))) @declaration.function)
|
|
171
|
+
|
|
172
|
+
(namespace_definition
|
|
173
|
+
body: (declaration_list
|
|
174
|
+
(function_definition
|
|
175
|
+
declarator: (reference_declarator
|
|
176
|
+
(function_declarator
|
|
177
|
+
declarator: (operator_name) @declaration.name))) @declaration.function))
|
|
178
|
+
|
|
147
179
|
;; ─── Declarations — function prototype (forward declaration) ────────
|
|
148
180
|
(declaration
|
|
149
181
|
declarator: (function_declarator
|
|
150
182
|
declarator: (identifier) @declaration.name)) @declaration.function
|
|
151
183
|
|
|
184
|
+
;; Free operator prototype: std::ostream& operator<<(std::ostream&, T)
|
|
185
|
+
(declaration
|
|
186
|
+
declarator: (function_declarator
|
|
187
|
+
declarator: (operator_name) @declaration.name)) @declaration.function
|
|
188
|
+
|
|
152
189
|
;; ─── Declarations — function prototype with pointer return ──────────
|
|
153
190
|
(declaration
|
|
154
191
|
declarator: (pointer_declarator
|
|
155
192
|
declarator: (function_declarator
|
|
156
193
|
declarator: (identifier) @declaration.name))) @declaration.function
|
|
157
194
|
|
|
195
|
+
;; Free operator prototype with reference return.
|
|
196
|
+
(declaration
|
|
197
|
+
declarator: (reference_declarator
|
|
198
|
+
(function_declarator
|
|
199
|
+
declarator: (operator_name) @declaration.name))) @declaration.function
|
|
200
|
+
|
|
158
201
|
;; ─── Declarations — typedef ─────────────────────────────────────────
|
|
159
202
|
(type_definition
|
|
160
203
|
declarator: (type_identifier) @declaration.name) @declaration.typedef
|
|
@@ -170,6 +213,11 @@ const CPP_SCOPE_QUERY = `
|
|
|
170
213
|
declarator: (function_declarator
|
|
171
214
|
declarator: (field_identifier) @declaration.name)) @declaration.method
|
|
172
215
|
|
|
216
|
+
;; Operator method prototype in class body: Point operator+(Point) const;
|
|
217
|
+
(field_declaration
|
|
218
|
+
declarator: (function_declarator
|
|
219
|
+
declarator: (operator_name) @declaration.name)) @declaration.method
|
|
220
|
+
|
|
173
221
|
;; Method prototype with pointer return: User* lookup(int id);
|
|
174
222
|
(field_declaration
|
|
175
223
|
declarator: (pointer_declarator
|
|
@@ -182,6 +230,11 @@ const CPP_SCOPE_QUERY = `
|
|
|
182
230
|
(function_declarator
|
|
183
231
|
declarator: (field_identifier) @declaration.name))) @declaration.method
|
|
184
232
|
|
|
233
|
+
(field_declaration
|
|
234
|
+
declarator: (reference_declarator
|
|
235
|
+
(function_declarator
|
|
236
|
+
declarator: (operator_name) @declaration.name))) @declaration.method
|
|
237
|
+
|
|
185
238
|
;; ─── Declarations — fields ──────────────────────────────────────────
|
|
186
239
|
(field_declaration
|
|
187
240
|
declarator: (field_identifier) @declaration.name) @declaration.field
|
|
@@ -472,6 +525,22 @@ const CPP_SCOPE_QUERY = `
|
|
|
472
525
|
argument: (_) @reference.receiver
|
|
473
526
|
field: (field_identifier) @reference.name)) @reference.call.member
|
|
474
527
|
|
|
528
|
+
;; Conservative operator-call support (#1636): model a + b as a
|
|
529
|
+
;; member-style operator+ lookup, and lhs << rhs as a free
|
|
530
|
+
;; operator<< lookup. Free operator+(T,T), member operator<<, and
|
|
531
|
+
;; complex operand expressions remain false negatives for now.
|
|
532
|
+
;; Built-in operators remain unresolved because no user-defined
|
|
533
|
+
;; operator target exists.
|
|
534
|
+
(binary_expression
|
|
535
|
+
left: (_) @reference.receiver
|
|
536
|
+
operator: "+" @reference.operator
|
|
537
|
+
right: (_)) @reference.call.member
|
|
538
|
+
|
|
539
|
+
(binary_expression
|
|
540
|
+
left: (_)
|
|
541
|
+
operator: "<<" @reference.operator
|
|
542
|
+
right: (_)) @reference.call.free
|
|
543
|
+
|
|
475
544
|
;; ─── References — template calls (func<T>()) ────────────────────────
|
|
476
545
|
(call_expression
|
|
477
546
|
function: (template_function
|
|
@@ -4,7 +4,7 @@ export declare const PYTHON_QUERIES = "\n(class_definition\n name: (identifier)
|
|
|
4
4
|
export declare const JAVA_QUERIES = "\n; Classes, Interfaces, Enums, Annotations\n(class_declaration name: (identifier) @name) @definition.class\n(interface_declaration name: (identifier) @name) @definition.interface\n(enum_declaration name: (identifier) @name) @definition.enum\n(annotation_type_declaration name: (identifier) @name) @definition.annotation\n\n; Methods & Constructors\n(method_declaration name: (identifier) @name) @definition.method\n(constructor_declaration name: (identifier) @name) @definition.constructor\n\n; Fields \u2014 typed field declarations inside class bodies\n(field_declaration\n declarator: (variable_declarator\n name: (identifier) @name)) @definition.property\n\n; Imports - capture any import declaration child as source\n(import_declaration (_) @import.source) @import\n\n; Calls\n(method_invocation name: (identifier) @call.name) @call\n(method_invocation object: (_) name: (identifier) @call.name) @call\n(method_reference) @call\n\n; Constructor calls: new Foo()\n(object_creation_expression type: (type_identifier) @call.name) @call\n\n; Local variable declarations inside method bodies\n(local_variable_declaration\n declarator: (variable_declarator\n name: (identifier) @name)) @definition.variable\n\n; Heritage - extends class\n(class_declaration name: (identifier) @heritage.class\n (superclass (type_identifier) @heritage.extends)) @heritage\n\n; Heritage - implements interfaces\n(class_declaration name: (identifier) @heritage.class\n (super_interfaces (type_list (type_identifier) @heritage.implements))) @heritage.impl\n\n; Write access: obj.field = value\n(assignment_expression\n left: (field_access\n object: (_) @assignment.receiver\n field: (identifier) @assignment.property)\n right: (_)) @assignment\n";
|
|
5
5
|
export declare const C_QUERIES = "\n; Functions (direct declarator)\n(function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition.function\n(declaration declarator: (function_declarator declarator: (identifier) @name)) @definition.function\n\n; Functions returning pointers (pointer_declarator wraps function_declarator)\n(function_definition declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name))) @definition.function\n(declaration declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name))) @definition.function\n\n; Functions returning double pointers (nested pointer_declarator)\n(function_definition declarator: (pointer_declarator declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name)))) @definition.function\n\n; Structs, Unions, Enums, Typedefs\n(struct_specifier name: (type_identifier) @name) @definition.struct\n(union_specifier name: (type_identifier) @name) @definition.union\n(enum_specifier name: (type_identifier) @name) @definition.enum\n(type_definition declarator: (type_identifier) @name) @definition.typedef\n\n; Macros\n(preproc_function_def name: (identifier) @name) @definition.macro\n(preproc_def name: (identifier) @name) @definition.macro\n\n; Includes\n(preproc_include path: (_) @import.source) @import\n\n; Calls\n(call_expression function: (identifier) @call.name) @call\n(call_expression function: (field_expression field: (field_identifier) @call.name)) @call\n\n; Variable declarations: int x = 5; or int x;\n(declaration\n declarator: (init_declarator\n declarator: (identifier) @name)) @definition.variable\n";
|
|
6
6
|
export declare const GO_QUERIES = "\n; Functions & Methods\n(function_declaration name: (identifier) @name) @definition.function\n(method_declaration name: (field_identifier) @name) @definition.method\n\n; Types\n(type_declaration (type_spec name: (type_identifier) @name type: (struct_type))) @definition.struct\n(type_declaration (type_spec name: (type_identifier) @name type: (interface_type))) @definition.interface\n\n; Imports\n(import_declaration (import_spec path: (interpreted_string_literal) @import.source)) @import\n(import_declaration (import_spec_list (import_spec path: (interpreted_string_literal) @import.source))) @import\n\n; Struct fields \u2014 named field declarations inside struct types\n(field_declaration_list\n (field_declaration\n name: (field_identifier) @name) @definition.property)\n\n; Struct embedding (anonymous fields = inheritance)\n(type_declaration\n (type_spec\n name: (type_identifier) @heritage.class\n type: (struct_type\n (field_declaration_list\n (field_declaration\n type: (type_identifier) @heritage.extends))))) @definition.struct\n\n; Calls\n(call_expression function: (identifier) @call.name) @call\n(call_expression function: (selector_expression field: (field_identifier) @call.name)) @call\n\n; Const/var declarations\n(const_declaration (const_spec name: (identifier) @name)) @definition.const\n(var_declaration (var_spec name: (identifier) @name)) @definition.variable\n\n; Short variable declaration: x := 5\n(short_var_declaration left: (expression_list (identifier) @name)) @definition.variable\n\n; Struct literal construction: User{Name: \"Alice\"}\n(composite_literal type: (type_identifier) @call.name) @call\n\n; Write access: obj.field = value\n(assignment_statement\n left: (expression_list\n (selector_expression\n operand: (_) @assignment.receiver\n field: (field_identifier) @assignment.property))\n right: (_)) @assignment\n\n; Write access: obj.field++ / obj.field--\n(inc_statement\n (selector_expression\n operand: (_) @assignment.receiver\n field: (field_identifier) @assignment.property)) @assignment\n(dec_statement\n (selector_expression\n operand: (_) @assignment.receiver\n field: (field_identifier) @assignment.property)) @assignment\n";
|
|
7
|
-
export declare const CPP_QUERIES = "\n; Classes, Structs, Namespaces\n(class_specifier name: (type_identifier) @name) @definition.class\n(class_specifier\n name: (template_type\n (type_identifier) @name\n (template_argument_list) @template-arguments)) @definition.class\n(struct_specifier name: (type_identifier) @name) @definition.struct\n(struct_specifier\n name: (template_type\n (type_identifier) @name\n (template_argument_list) @template-arguments)) @definition.struct\n(namespace_definition name: (namespace_identifier) @name) @definition.namespace\n(enum_specifier name: (type_identifier) @name) @definition.enum\n\n; Typedefs and unions (common in C-style headers and mixed C/C++ code)\n(type_definition declarator: (type_identifier) @name) @definition.typedef\n(union_specifier name: (type_identifier) @name) @definition.union\n\n; Macros\n(preproc_function_def name: (identifier) @name) @definition.macro\n(preproc_def name: (identifier) @name) @definition.macro\n\n; Functions & Methods (direct declarator)\n(function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition.function\n(function_definition declarator: (function_declarator declarator: (qualified_identifier name: (identifier) @name))) @definition.method\n\n; Functions/methods returning pointers (pointer_declarator wraps function_declarator)\n(function_definition declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name))) @definition.function\n(function_definition declarator: (pointer_declarator declarator: (function_declarator declarator: (qualified_identifier name: (identifier) @name)))) @definition.method\n\n; Functions/methods returning double pointers (nested pointer_declarator)\n(function_definition declarator: (pointer_declarator declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name)))) @definition.function\n(function_definition declarator: (pointer_declarator declarator: (pointer_declarator declarator: (function_declarator declarator: (qualified_identifier name: (identifier) @name))))) @definition.method\n\n; Functions/methods returning references (reference_declarator wraps function_declarator)\n(function_definition declarator: (reference_declarator (function_declarator declarator: (identifier) @name))) @definition.function\n(function_definition declarator: (reference_declarator (function_declarator declarator: (qualified_identifier name: (identifier) @name)))) @definition.method\n\n; Destructors (destructor_name is distinct from identifier in tree-sitter-cpp)\n(function_definition declarator: (function_declarator declarator: (qualified_identifier name: (destructor_name) @name))) @definition.method\n\n; Function declarations / prototypes (common in headers)\n(declaration declarator: (function_declarator declarator: (identifier) @name)) @definition.function\n(declaration declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name))) @definition.function\n\n; Class/struct data member fields (Address address; int count;)\n; Uses field_identifier to exclude method declarations (which use function_declarator)\n(field_declaration\n declarator: (field_identifier) @name) @definition.property\n\n; Pointer member fields (Address* address;)\n(field_declaration\n declarator: (pointer_declarator\n declarator: (field_identifier) @name)) @definition.property\n\n; Reference member fields (Address& address;)\n(field_declaration\n declarator: (reference_declarator\n (field_identifier) @name)) @definition.property\n\n; Inline class method declarations (inside class body, no body: void save();)\n; tree-sitter-cpp uses field_identifier (not identifier) for names inside class bodies\n(field_declaration declarator: (function_declarator declarator: [(field_identifier) (identifier)] @name)) @definition.method\n\n; Inline class method declarations returning a pointer (User* lookup();)\n(field_declaration declarator: (pointer_declarator declarator: (function_declarator declarator: [(field_identifier) (identifier)] @name))) @definition.method\n\n; Inline class method declarations returning a reference (User& lookup();)\n(field_declaration declarator: (reference_declarator (function_declarator declarator: [(field_identifier) (identifier)] @name))) @definition.method\n\n; Inline class method definitions (inside class body, with body: void Foo() { ... })\n(field_declaration_list\n (function_definition\n declarator: (function_declarator\n declarator: [(field_identifier) (identifier) (operator_name) (destructor_name)] @name)) @definition.method)\n\n; Inline class methods returning a pointer type (User* lookup(int id) { ... })\n(field_declaration_list\n (function_definition\n declarator: (pointer_declarator\n declarator: (function_declarator\n declarator: [(field_identifier) (identifier) (operator_name)] @name))) @definition.method)\n\n; Inline class methods returning a reference type (User& lookup(int id) { ... })\n(field_declaration_list\n (function_definition\n declarator: (reference_declarator\n (function_declarator\n declarator: [(field_identifier) (identifier) (operator_name)] @name))) @definition.method)\n\n; Templates\n(template_declaration (class_specifier name: (type_identifier) @name)) @definition.template\n(template_declaration\n (class_specifier\n name: (template_type\n (type_identifier) @name\n (template_argument_list) @template-arguments))) @definition.template\n(template_declaration (function_definition declarator: (function_declarator declarator: (identifier) @name))) @definition.template\n\n; Includes\n(preproc_include path: (_) @import.source) @import\n\n; Calls\n(call_expression function: (identifier) @call.name) @call\n(call_expression function: (field_expression field: (field_identifier) @call.name)) @call\n(call_expression function: (qualified_identifier name: (identifier) @call.name)) @call\n(call_expression function: (template_function name: (identifier) @call.name)) @call\n\n; Constructor calls: new User()\n(new_expression type: (type_identifier) @call.name) @call\n\n; Variable declarations: int x = 5; or auto x = 5;\n(declaration\n declarator: (init_declarator\n declarator: (identifier) @name)) @definition.variable\n\n; Heritage\n(class_specifier name: (type_identifier) @heritage.class\n (base_class_clause (type_identifier) @heritage.extends)) @heritage\n(class_specifier name: (type_identifier) @heritage.class\n (base_class_clause (access_specifier) (type_identifier) @heritage.extends)) @heritage\n\n; Write access: obj.field = value\n(assignment_expression\n left: (field_expression\n argument: (_) @assignment.receiver\n field: (field_identifier) @assignment.property)\n right: (_)) @assignment\n\n";
|
|
7
|
+
export declare const CPP_QUERIES = "\n; Classes, Structs, Namespaces\n(class_specifier name: (type_identifier) @name) @definition.class\n(class_specifier\n name: (template_type\n (type_identifier) @name\n (template_argument_list) @template-arguments)) @definition.class\n(struct_specifier name: (type_identifier) @name) @definition.struct\n(struct_specifier\n name: (template_type\n (type_identifier) @name\n (template_argument_list) @template-arguments)) @definition.struct\n(namespace_definition name: (namespace_identifier) @name) @definition.namespace\n(enum_specifier name: (type_identifier) @name) @definition.enum\n\n; Typedefs and unions (common in C-style headers and mixed C/C++ code)\n(type_definition declarator: (type_identifier) @name) @definition.typedef\n(union_specifier name: (type_identifier) @name) @definition.union\n\n; Macros\n(preproc_function_def name: (identifier) @name) @definition.macro\n(preproc_def name: (identifier) @name) @definition.macro\n\n; Functions & Methods (direct declarator)\n(function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition.function\n(function_definition declarator: (function_declarator declarator: (operator_name) @name)) @definition.function\n(function_definition declarator: (function_declarator declarator: (qualified_identifier name: (identifier) @name))) @definition.method\n(function_definition declarator: (function_declarator declarator: (qualified_identifier name: (operator_name) @name))) @definition.method\n\n; Functions/methods returning pointers (pointer_declarator wraps function_declarator)\n(function_definition declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name))) @definition.function\n(function_definition declarator: (pointer_declarator declarator: (function_declarator declarator: (qualified_identifier name: (identifier) @name)))) @definition.method\n\n; Functions/methods returning double pointers (nested pointer_declarator)\n(function_definition declarator: (pointer_declarator declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name)))) @definition.function\n(function_definition declarator: (pointer_declarator declarator: (pointer_declarator declarator: (function_declarator declarator: (qualified_identifier name: (identifier) @name))))) @definition.method\n\n; Functions/methods returning references (reference_declarator wraps function_declarator)\n(function_definition declarator: (reference_declarator (function_declarator declarator: (identifier) @name))) @definition.function\n(function_definition declarator: (reference_declarator (function_declarator declarator: (operator_name) @name))) @definition.function\n(function_definition declarator: (reference_declarator (function_declarator declarator: (qualified_identifier name: (identifier) @name)))) @definition.method\n(function_definition declarator: (reference_declarator (function_declarator declarator: (qualified_identifier name: (operator_name) @name)))) @definition.method\n\n; Destructors (destructor_name is distinct from identifier in tree-sitter-cpp)\n(function_definition declarator: (function_declarator declarator: (qualified_identifier name: (destructor_name) @name))) @definition.method\n\n; Function declarations / prototypes (common in headers)\n(declaration declarator: (function_declarator declarator: (identifier) @name)) @definition.function\n(declaration declarator: (function_declarator declarator: (operator_name) @name)) @definition.function\n(declaration declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name))) @definition.function\n(declaration declarator: (reference_declarator (function_declarator declarator: (operator_name) @name))) @definition.function\n\n; Class/struct data member fields (Address address; int count;)\n; Uses field_identifier to exclude method declarations (which use function_declarator)\n(field_declaration\n declarator: (field_identifier) @name) @definition.property\n\n; Pointer member fields (Address* address;)\n(field_declaration\n declarator: (pointer_declarator\n declarator: (field_identifier) @name)) @definition.property\n\n; Reference member fields (Address& address;)\n(field_declaration\n declarator: (reference_declarator\n (field_identifier) @name)) @definition.property\n\n; Inline class method declarations (inside class body, no body: void save();)\n; tree-sitter-cpp uses field_identifier (not identifier) for names inside class bodies\n(field_declaration declarator: (function_declarator declarator: [(field_identifier) (identifier) (operator_name)] @name)) @definition.method\n\n; Inline class method declarations returning a pointer (User* lookup();)\n(field_declaration declarator: (pointer_declarator declarator: (function_declarator declarator: [(field_identifier) (identifier)] @name))) @definition.method\n\n; Inline class method declarations returning a reference (User& lookup();)\n(field_declaration declarator: (reference_declarator (function_declarator declarator: [(field_identifier) (identifier) (operator_name)] @name))) @definition.method\n\n; Inline class method definitions (inside class body, with body: void Foo() { ... })\n(field_declaration_list\n (function_definition\n declarator: (function_declarator\n declarator: [(field_identifier) (identifier) (operator_name) (destructor_name)] @name)) @definition.method)\n\n; Inline class methods returning a pointer type (User* lookup(int id) { ... })\n(field_declaration_list\n (function_definition\n declarator: (pointer_declarator\n declarator: (function_declarator\n declarator: [(field_identifier) (identifier) (operator_name)] @name))) @definition.method)\n\n; Inline class methods returning a reference type (User& lookup(int id) { ... })\n(field_declaration_list\n (function_definition\n declarator: (reference_declarator\n (function_declarator\n declarator: [(field_identifier) (identifier) (operator_name)] @name))) @definition.method)\n\n; Templates\n(template_declaration (class_specifier name: (type_identifier) @name)) @definition.template\n(template_declaration\n (class_specifier\n name: (template_type\n (type_identifier) @name\n (template_argument_list) @template-arguments))) @definition.template\n(template_declaration (function_definition declarator: (function_declarator declarator: (identifier) @name))) @definition.template\n\n; Includes\n(preproc_include path: (_) @import.source) @import\n\n; Calls\n(call_expression function: (identifier) @call.name) @call\n(call_expression function: (field_expression field: (field_identifier) @call.name)) @call\n(call_expression function: (qualified_identifier name: (identifier) @call.name)) @call\n(call_expression function: (template_function name: (identifier) @call.name)) @call\n(binary_expression operator: \"+\" @call.name) @call\n(binary_expression operator: \"<<\" @call.name) @call\n\n; Constructor calls: new User()\n(new_expression type: (type_identifier) @call.name) @call\n\n; Variable declarations: int x = 5; or auto x = 5;\n(declaration\n declarator: (init_declarator\n declarator: (identifier) @name)) @definition.variable\n\n; Heritage\n(class_specifier name: (type_identifier) @heritage.class\n (base_class_clause (type_identifier) @heritage.extends)) @heritage\n(class_specifier name: (type_identifier) @heritage.class\n (base_class_clause (access_specifier) (type_identifier) @heritage.extends)) @heritage\n\n; Write access: obj.field = value\n(assignment_expression\n left: (field_expression\n argument: (_) @assignment.receiver\n field: (field_identifier) @assignment.property)\n right: (_)) @assignment\n\n";
|
|
8
8
|
export declare const CSHARP_QUERIES = "\n; Types\n(class_declaration name: (identifier) @name) @definition.class\n(interface_declaration name: (identifier) @name) @definition.interface\n(struct_declaration name: (identifier) @name) @definition.struct\n(enum_declaration name: (identifier) @name) @definition.enum\n(record_declaration name: (identifier) @name) @definition.record\n(delegate_declaration name: (identifier) @name) @definition.delegate\n\n; Namespaces (block form and C# 10+ file-scoped form)\n(namespace_declaration name: (identifier) @name) @definition.namespace\n(namespace_declaration name: (qualified_name) @name) @definition.namespace\n(file_scoped_namespace_declaration name: (identifier) @name) @definition.namespace\n(file_scoped_namespace_declaration name: (qualified_name) @name) @definition.namespace\n\n; Methods & Properties\n(method_declaration name: (identifier) @name) @definition.method\n(local_function_statement name: (identifier) @name) @definition.function\n(constructor_declaration name: (identifier) @name) @definition.constructor\n(property_declaration name: (identifier) @name) @definition.property\n\n; Primary constructors (C# 12): class User(string name, int age) { }\n(class_declaration name: (identifier) @name (parameter_list) @definition.constructor)\n(record_declaration name: (identifier) @name (parameter_list) @definition.constructor)\n\n; Using\n(using_directive (qualified_name) @import.source) @import\n(using_directive (identifier) @import.source) @import\n\n; Calls\n(invocation_expression function: (identifier) @call.name) @call\n(invocation_expression function: (member_access_expression name: (identifier) @call.name)) @call\n\n; Null-conditional method calls: user?.Save()\n; Parses as: invocation_expression \u2192 conditional_access_expression \u2192 member_binding_expression \u2192 identifier\n(invocation_expression\n function: (conditional_access_expression\n (member_binding_expression\n (identifier) @call.name))) @call\n\n; Constructor calls: new Foo() and new Foo { Props }\n(object_creation_expression type: (identifier) @call.name) @call\n\n; Target-typed new (C# 9): User u = new(\"x\", 5)\n(variable_declaration type: (identifier) @call.name (variable_declarator (implicit_object_creation_expression) @call))\n\n; Local variable declarations\n(local_declaration_statement\n (variable_declaration\n (variable_declarator\n (identifier) @name))) @definition.variable\n\n; Heritage\n(class_declaration name: (identifier) @heritage.class\n (base_list (identifier) @heritage.extends)) @heritage\n(class_declaration name: (identifier) @heritage.class\n (base_list (generic_name (identifier) @heritage.extends))) @heritage\n\n; Interface inheritance: interface IFoo : IBar / interface IFoo : IBar, IBaz\n; Without these patterns, interface-to-interface relationships are never\n; captured, so transitive \"class X implements IBar\" chains are broken.\n(interface_declaration name: (identifier) @heritage.class\n (base_list (identifier) @heritage.extends)) @heritage\n(interface_declaration name: (identifier) @heritage.class\n (base_list (generic_name (identifier) @heritage.extends))) @heritage\n\n; Write access: obj.field = value\n(assignment_expression\n left: (member_access_expression\n expression: (_) @assignment.receiver\n name: (identifier) @assignment.property)\n right: (_)) @assignment\n";
|
|
9
9
|
export declare const RUST_QUERIES = "\n; Functions & Items\n(function_item name: (identifier) @name) @definition.function\n(function_signature_item name: (identifier) @name) @definition.function\n(struct_item name: (type_identifier) @name) @definition.struct\n(enum_item name: (type_identifier) @name) @definition.enum\n(trait_item name: (type_identifier) @name) @definition.trait\n(impl_item type: (type_identifier) @name !trait) @definition.impl\n(impl_item type: (generic_type type: (type_identifier) @name) !trait) @definition.impl\n(mod_item name: (identifier) @name) @definition.module\n\n; Type aliases, const, static, macros\n(type_item name: (type_identifier) @name) @definition.type\n(const_item name: (identifier) @name) @definition.const\n(static_item name: (identifier) @name) @definition.static\n(macro_definition name: (identifier) @name) @definition.macro\n\n; Use statements\n(use_declaration argument: (_) @import.source) @import\n\n; Calls\n(call_expression function: (identifier) @call.name) @call\n(call_expression function: (field_expression field: (field_identifier) @call.name)) @call\n(call_expression function: (scoped_identifier name: (identifier) @call.name)) @call\n(call_expression function: (generic_function function: (identifier) @call.name)) @call\n\n; Struct literal construction: User { name: value }\n(struct_expression name: (type_identifier) @call.name) @call\n\n; Struct fields \u2014 named field declarations inside struct bodies\n(field_declaration_list\n (field_declaration\n name: (field_identifier) @name) @definition.property)\n\n; Heritage (trait implementation) \u2014 all combinations of concrete/generic trait \u00D7 concrete/generic type\n(impl_item trait: (type_identifier) @heritage.trait type: (type_identifier) @heritage.class) @heritage\n(impl_item trait: (generic_type type: (type_identifier) @heritage.trait) type: (type_identifier) @heritage.class) @heritage\n(impl_item trait: (type_identifier) @heritage.trait type: (generic_type type: (type_identifier) @heritage.class)) @heritage\n(impl_item trait: (generic_type type: (type_identifier) @heritage.trait) type: (generic_type type: (type_identifier) @heritage.class)) @heritage\n\n; Write access: obj.field = value\n(assignment_expression\n left: (field_expression\n value: (_) @assignment.receiver\n field: (field_identifier) @assignment.property)\n right: (_)) @assignment\n\n; Write access: obj.field += value (compound assignment)\n(compound_assignment_expr\n left: (field_expression\n value: (_) @assignment.receiver\n field: (field_identifier) @assignment.property)\n right: (_)) @assignment\n";
|
|
10
10
|
export declare const PHP_QUERIES = "\n; \u2500\u2500 Namespace \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n(namespace_definition\n name: (namespace_name) @name) @definition.namespace\n\n; \u2500\u2500 Classes \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n(class_declaration\n name: (name) @name) @definition.class\n\n; \u2500\u2500 Interfaces \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n(interface_declaration\n name: (name) @name) @definition.interface\n\n; \u2500\u2500 Traits \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n(trait_declaration\n name: (name) @name) @definition.trait\n\n; \u2500\u2500 Enums (PHP 8.1) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n(enum_declaration\n name: (name) @name) @definition.enum\n\n; \u2500\u2500 Top-level functions \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n(function_definition\n name: (name) @name) @definition.function\n\n; \u2500\u2500 Methods (including constructors) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n(method_declaration\n name: (name) @name) @definition.method\n\n; \u2500\u2500 Class properties (including Eloquent $fillable, $casts, etc.) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n(property_declaration\n (property_element\n (variable_name\n (name) @name))) @definition.property\n\n; Constructor property promotion (PHP 8.0+: public Address $address in __construct)\n(method_declaration\n parameters: (formal_parameters\n (property_promotion_parameter\n name: (variable_name\n (name) @name)))) @definition.property\n\n; \u2500\u2500 Imports: use statements \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n; Simple: use App\\Models\\User;\n(namespace_use_declaration\n (namespace_use_clause\n (qualified_name) @import.source)) @import\n\n; \u2500\u2500 Function/method calls \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n; Regular function call: foo()\n(function_call_expression\n function: (name) @call.name) @call\n\n; Method call: $obj->method()\n(member_call_expression\n name: (name) @call.name) @call\n\n; Nullsafe method call: $obj?->method()\n(nullsafe_member_call_expression\n name: (name) @call.name) @call\n\n; Static call: Foo::bar() (php_only uses scoped_call_expression)\n(scoped_call_expression\n name: (name) @call.name) @call\n\n; Constructor call: new User()\n(object_creation_expression (name) @call.name) @call\n\n; Const declarations at class scope\n(const_declaration\n (const_element\n (name) @name)) @definition.const\n\n; \u2500\u2500 Heritage: extends \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n(class_declaration\n name: (name) @heritage.class\n (base_clause\n [(name) (qualified_name)] @heritage.extends)) @heritage\n\n; \u2500\u2500 Heritage: implements \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n(class_declaration\n name: (name) @heritage.class\n (class_interface_clause\n [(name) (qualified_name)] @heritage.implements)) @heritage.impl\n\n; \u2500\u2500 Heritage: use trait (must capture enclosing class name) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n(class_declaration\n name: (name) @heritage.class\n body: (declaration_list\n (use_declaration\n [(name) (qualified_name)] @heritage.trait))) @heritage\n\n; \u2500\u2500 Heritage: trait uses another trait (transitive trait composition) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n; PHP allows a trait body to contain \"use OtherTrait;\". The trait-uses-trait\n; IMPLEMENTS edge is required by buildPhpMro to compute the full transitive\n; trait closure (depth 3+ chains).\n(trait_declaration\n name: (name) @heritage.class\n body: (declaration_list\n (use_declaration\n [(name) (qualified_name)] @heritage.trait))) @heritage\n\n; PHP HTTP consumers: file_get_contents('/path'), curl_init('/path')\n(function_call_expression\n function: (name) @_php_http (#match? @_php_http \"^(file_get_contents|curl_init)$\")\n arguments: (arguments\n (argument (string (string_content) @http_client.url)))) @http_client\n\n; Write access: $obj->field = value\n(assignment_expression\n left: (member_access_expression\n object: (_) @assignment.receiver\n name: (name) @assignment.property)\n right: (_)) @assignment\n\n; Write access: ClassName::$field = value (static property)\n(assignment_expression\n left: (scoped_property_access_expression\n scope: (_) @assignment.receiver\n name: (variable_name (name) @assignment.property))\n right: (_)) @assignment\n";
|
|
@@ -695,7 +695,9 @@ export const CPP_QUERIES = `
|
|
|
695
695
|
|
|
696
696
|
; Functions & Methods (direct declarator)
|
|
697
697
|
(function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition.function
|
|
698
|
+
(function_definition declarator: (function_declarator declarator: (operator_name) @name)) @definition.function
|
|
698
699
|
(function_definition declarator: (function_declarator declarator: (qualified_identifier name: (identifier) @name))) @definition.method
|
|
700
|
+
(function_definition declarator: (function_declarator declarator: (qualified_identifier name: (operator_name) @name))) @definition.method
|
|
699
701
|
|
|
700
702
|
; Functions/methods returning pointers (pointer_declarator wraps function_declarator)
|
|
701
703
|
(function_definition declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name))) @definition.function
|
|
@@ -707,14 +709,18 @@ export const CPP_QUERIES = `
|
|
|
707
709
|
|
|
708
710
|
; Functions/methods returning references (reference_declarator wraps function_declarator)
|
|
709
711
|
(function_definition declarator: (reference_declarator (function_declarator declarator: (identifier) @name))) @definition.function
|
|
712
|
+
(function_definition declarator: (reference_declarator (function_declarator declarator: (operator_name) @name))) @definition.function
|
|
710
713
|
(function_definition declarator: (reference_declarator (function_declarator declarator: (qualified_identifier name: (identifier) @name)))) @definition.method
|
|
714
|
+
(function_definition declarator: (reference_declarator (function_declarator declarator: (qualified_identifier name: (operator_name) @name)))) @definition.method
|
|
711
715
|
|
|
712
716
|
; Destructors (destructor_name is distinct from identifier in tree-sitter-cpp)
|
|
713
717
|
(function_definition declarator: (function_declarator declarator: (qualified_identifier name: (destructor_name) @name))) @definition.method
|
|
714
718
|
|
|
715
719
|
; Function declarations / prototypes (common in headers)
|
|
716
720
|
(declaration declarator: (function_declarator declarator: (identifier) @name)) @definition.function
|
|
721
|
+
(declaration declarator: (function_declarator declarator: (operator_name) @name)) @definition.function
|
|
717
722
|
(declaration declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name))) @definition.function
|
|
723
|
+
(declaration declarator: (reference_declarator (function_declarator declarator: (operator_name) @name))) @definition.function
|
|
718
724
|
|
|
719
725
|
; Class/struct data member fields (Address address; int count;)
|
|
720
726
|
; Uses field_identifier to exclude method declarations (which use function_declarator)
|
|
@@ -733,13 +739,13 @@ export const CPP_QUERIES = `
|
|
|
733
739
|
|
|
734
740
|
; Inline class method declarations (inside class body, no body: void save();)
|
|
735
741
|
; tree-sitter-cpp uses field_identifier (not identifier) for names inside class bodies
|
|
736
|
-
(field_declaration declarator: (function_declarator declarator: [(field_identifier) (identifier)] @name)) @definition.method
|
|
742
|
+
(field_declaration declarator: (function_declarator declarator: [(field_identifier) (identifier) (operator_name)] @name)) @definition.method
|
|
737
743
|
|
|
738
744
|
; Inline class method declarations returning a pointer (User* lookup();)
|
|
739
745
|
(field_declaration declarator: (pointer_declarator declarator: (function_declarator declarator: [(field_identifier) (identifier)] @name))) @definition.method
|
|
740
746
|
|
|
741
747
|
; Inline class method declarations returning a reference (User& lookup();)
|
|
742
|
-
(field_declaration declarator: (reference_declarator (function_declarator declarator: [(field_identifier) (identifier)] @name))) @definition.method
|
|
748
|
+
(field_declaration declarator: (reference_declarator (function_declarator declarator: [(field_identifier) (identifier) (operator_name)] @name))) @definition.method
|
|
743
749
|
|
|
744
750
|
; Inline class method definitions (inside class body, with body: void Foo() { ... })
|
|
745
751
|
(field_declaration_list
|
|
@@ -778,6 +784,8 @@ export const CPP_QUERIES = `
|
|
|
778
784
|
(call_expression function: (field_expression field: (field_identifier) @call.name)) @call
|
|
779
785
|
(call_expression function: (qualified_identifier name: (identifier) @call.name)) @call
|
|
780
786
|
(call_expression function: (template_function name: (identifier) @call.name)) @call
|
|
787
|
+
(binary_expression operator: "+" @call.name) @call
|
|
788
|
+
(binary_expression operator: "<<" @call.name) @call
|
|
781
789
|
|
|
782
790
|
; Constructor calls: new User()
|
|
783
791
|
(new_expression type: (type_identifier) @call.name) @call
|
|
@@ -142,6 +142,21 @@ export declare const fetchExistingEmbeddingHashes: (execQuery: (cypher: string)
|
|
|
142
142
|
* @see safeClose — CHECKPOINT + connection/database close
|
|
143
143
|
*/
|
|
144
144
|
export declare const flushWAL: () => Promise<void>;
|
|
145
|
+
/**
|
|
146
|
+
* Issue a manual `CHECKPOINT` against the current connection and surface
|
|
147
|
+
* any engine error to the caller. Unlike {@link flushWAL}, this variant
|
|
148
|
+
* does NOT swallow Ladybug rename/remove IO failures — the manual
|
|
149
|
+
* checkpoint driver (`wal-checkpoint-driver.ts`) relies on the rejection
|
|
150
|
+
* to drive its bounded retry loop. Returns `false` when no connection is
|
|
151
|
+
* open (the caller treats this as a no-op success — there is no WAL to
|
|
152
|
+
* flush). Returns `true` after a successful CHECKPOINT + drain.
|
|
153
|
+
*
|
|
154
|
+
* The split from `flushWAL` is deliberate: every other CHECKPOINT site
|
|
155
|
+
* (server flush, safeClose) is best-effort and prefers a silent skip;
|
|
156
|
+
* the manual driver, by contrast, must observe failures to decide
|
|
157
|
+
* whether to retry.
|
|
158
|
+
*/
|
|
159
|
+
export declare const tryFlushWAL: () => Promise<boolean>;
|
|
145
160
|
/**
|
|
146
161
|
* Flush the WAL and close the connection and database handles.
|
|
147
162
|
*
|
|
@@ -1334,6 +1334,27 @@ export const flushWAL = async () => {
|
|
|
1334
1334
|
logger.debug(`GitNexus: LadybugDB CHECKPOINT skipped/failed during WAL flush: ${summarizeError(err)}`);
|
|
1335
1335
|
}
|
|
1336
1336
|
};
|
|
1337
|
+
/**
|
|
1338
|
+
* Issue a manual `CHECKPOINT` against the current connection and surface
|
|
1339
|
+
* any engine error to the caller. Unlike {@link flushWAL}, this variant
|
|
1340
|
+
* does NOT swallow Ladybug rename/remove IO failures — the manual
|
|
1341
|
+
* checkpoint driver (`wal-checkpoint-driver.ts`) relies on the rejection
|
|
1342
|
+
* to drive its bounded retry loop. Returns `false` when no connection is
|
|
1343
|
+
* open (the caller treats this as a no-op success — there is no WAL to
|
|
1344
|
+
* flush). Returns `true` after a successful CHECKPOINT + drain.
|
|
1345
|
+
*
|
|
1346
|
+
* The split from `flushWAL` is deliberate: every other CHECKPOINT site
|
|
1347
|
+
* (server flush, safeClose) is best-effort and prefers a silent skip;
|
|
1348
|
+
* the manual driver, by contrast, must observe failures to decide
|
|
1349
|
+
* whether to retry.
|
|
1350
|
+
*/
|
|
1351
|
+
export const tryFlushWAL = async () => {
|
|
1352
|
+
if (!conn)
|
|
1353
|
+
return false;
|
|
1354
|
+
const checkpointResult = await conn.query('CHECKPOINT');
|
|
1355
|
+
await drainQueryResult(checkpointResult);
|
|
1356
|
+
return true;
|
|
1357
|
+
};
|
|
1337
1358
|
/**
|
|
1338
1359
|
* Flush the WAL and close the connection and database handles.
|
|
1339
1360
|
*
|
|
@@ -32,8 +32,15 @@ import type lbug from '@ladybugdb/core';
|
|
|
32
32
|
* integer; anything invalid falls back to the default.
|
|
33
33
|
*/
|
|
34
34
|
export declare const LBUG_MAX_DB_SIZE: number;
|
|
35
|
+
export declare const parseWalCheckpointThreshold: (raw: string | undefined) => number | undefined;
|
|
35
36
|
export declare const WAL_RECOVERY_SUGGESTION = "WAL corruption detected. Run `gitnexus analyze --force` to rebuild the index.";
|
|
36
37
|
export declare function isWalCorruptionError(err: unknown): boolean;
|
|
38
|
+
/**
|
|
39
|
+
* True when `err` looks like a Ladybug WAL-checkpoint rotation/remove IO
|
|
40
|
+
* failure. Tries strict matchers first (renames + removes), then falls
|
|
41
|
+
* back to the permissive matcher.
|
|
42
|
+
*/
|
|
43
|
+
export declare const isLbugCheckpointIoError: (err: unknown) => boolean;
|
|
37
44
|
type LbugModule = typeof lbug;
|
|
38
45
|
export interface LbugDatabaseOptions {
|
|
39
46
|
readOnly?: boolean;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import fs from 'fs/promises';
|
|
2
2
|
import os from 'os';
|
|
3
3
|
import path from 'path';
|
|
4
|
+
import { logger } from '../logger.js';
|
|
4
5
|
/**
|
|
5
6
|
* Shared configuration for `@ladybugdb/core` `Database` construction.
|
|
6
7
|
*
|
|
@@ -42,6 +43,43 @@ export const LBUG_MAX_DB_SIZE = (() => {
|
|
|
42
43
|
}
|
|
43
44
|
return 16 * 1024 * 1024 * 1024;
|
|
44
45
|
})();
|
|
46
|
+
export const parseWalCheckpointThreshold = (raw) => {
|
|
47
|
+
if (raw === undefined)
|
|
48
|
+
return undefined;
|
|
49
|
+
const normalized = raw.trim();
|
|
50
|
+
if (normalized.length === 0)
|
|
51
|
+
return undefined;
|
|
52
|
+
const parsed = Number(normalized);
|
|
53
|
+
if (!Number.isInteger(parsed) || parsed < -1)
|
|
54
|
+
return undefined;
|
|
55
|
+
return parsed;
|
|
56
|
+
};
|
|
57
|
+
/**
|
|
58
|
+
* Default GitNexus WAL auto-checkpoint threshold in bytes (64 MiB).
|
|
59
|
+
*
|
|
60
|
+
* Larger than Ladybug's stock ~16 MiB to reduce checkpoint rename/remove
|
|
61
|
+
* churn under heavy analyze write load — the original race that motivated
|
|
62
|
+
* issue #1741 triggered at the stock threshold. README examples in
|
|
63
|
+
* `README.md` and `gitnexus/README.md` and the recovery hint in
|
|
64
|
+
* `analyze.ts` MUST stay in sync with this value.
|
|
65
|
+
*/
|
|
66
|
+
const DEFAULT_WAL_CHECKPOINT_THRESHOLD = 64 * 1024 * 1024;
|
|
67
|
+
const resolveCheckpointThreshold = () => {
|
|
68
|
+
const raw = process.env.GITNEXUS_WAL_CHECKPOINT_THRESHOLD;
|
|
69
|
+
if (raw === undefined)
|
|
70
|
+
return DEFAULT_WAL_CHECKPOINT_THRESHOLD;
|
|
71
|
+
const parsed = parseWalCheckpointThreshold(raw);
|
|
72
|
+
if (parsed !== undefined)
|
|
73
|
+
return parsed;
|
|
74
|
+
// Non-empty but unparseable input: warn the operator and fall back. Mirrors
|
|
75
|
+
// the CLI's `--wal-checkpoint-threshold` validation (which hard-errors)
|
|
76
|
+
// but the env-var path stays soft to preserve "set once in your shell"
|
|
77
|
+
// ergonomics across mixed-version invocations.
|
|
78
|
+
if (raw.trim().length > 0) {
|
|
79
|
+
logger.warn({ rawValue: raw, fallback: DEFAULT_WAL_CHECKPOINT_THRESHOLD }, `Ignoring invalid GITNEXUS_WAL_CHECKPOINT_THRESHOLD=${raw}; expected integer >= -1; falling back to default (${DEFAULT_WAL_CHECKPOINT_THRESHOLD}).`);
|
|
80
|
+
}
|
|
81
|
+
return DEFAULT_WAL_CHECKPOINT_THRESHOLD;
|
|
82
|
+
};
|
|
45
83
|
/** Matches WAL corruption errors from the LadybugDB engine. */
|
|
46
84
|
const WAL_CORRUPTION_RE = /corrupt(ed)?\s+wal|invalid\s+wal\s+record|wal.*corrupt|checksum.*wal/i;
|
|
47
85
|
export const WAL_RECOVERY_SUGGESTION = 'WAL corruption detected. Run `gitnexus analyze --force` to rebuild the index.';
|
|
@@ -51,6 +89,48 @@ export function isWalCorruptionError(err) {
|
|
|
51
89
|
const msg = err instanceof Error ? err.message : String(err);
|
|
52
90
|
return WAL_CORRUPTION_RE.test(msg);
|
|
53
91
|
}
|
|
92
|
+
// ─── Ladybug WAL checkpoint IO error matchers ───────────────────────────────
|
|
93
|
+
//
|
|
94
|
+
// Matched against LadybugDB v0.16.1 (see `gitnexus/package.json`
|
|
95
|
+
// @ladybugdb/core). Strict regexes encode local_file_system.cpp wording
|
|
96
|
+
// verified at that version. Two-tier strategy: strict matchers first so we
|
|
97
|
+
// only fire on real checkpoint-rotation shapes; a permissive fallback
|
|
98
|
+
// catches future Ladybug message drift so the recovery hint keeps surfacing
|
|
99
|
+
// even if upstream wording changes.
|
|
100
|
+
//
|
|
101
|
+
// From Ladybug native LocalFileSystem exceptions (`local_file_system.cpp`),
|
|
102
|
+
// surfaced in Node as:
|
|
103
|
+
// "Runtime exception: IO exception: Error renaming file ..."
|
|
104
|
+
// "Runtime exception: IO exception: Error removing directory or file ..."
|
|
105
|
+
// We only match checkpoint-rotation shapes:
|
|
106
|
+
// - "<db>.wal -> <db>.wal.checkpoint" rename failures
|
|
107
|
+
// - "<db>.wal.checkpoint" remove failures
|
|
108
|
+
// Example matches:
|
|
109
|
+
// "Runtime exception: IO exception: Error renaming file /x/lbug.wal to /x/lbug.wal.checkpoint. ErrorMessage: Permission denied"
|
|
110
|
+
// "Runtime exception: IO exception: Error removing directory or file /x/lbug.wal.checkpoint. Error Message: Permission denied"
|
|
111
|
+
// Matching is case-insensitive to remain robust across wrappers/platforms.
|
|
112
|
+
const LBUG_CHECKPOINT_RENAME_RE = /^runtime exception: io exception:\s*error renaming file\s+.+?\.wal\s+to\s+.+?\.wal\.checkpoint(?:\.|\s|$)/i;
|
|
113
|
+
const LBUG_CHECKPOINT_REMOVE_RE = /^runtime exception: io exception:\s*error removing directory or file\s+.+?\.wal\.checkpoint(?:\.|\s|$)/i;
|
|
114
|
+
/**
|
|
115
|
+
* Permissive fallback: any IO-exception-shaped message that mentions a
|
|
116
|
+
* `.wal.checkpoint` path. Catches future Ladybug message drift (different
|
|
117
|
+
* verb, additional preamble, locale variation) so the recovery hint keeps
|
|
118
|
+
* surfacing even if the strict regexes go stale.
|
|
119
|
+
*/
|
|
120
|
+
const LBUG_CHECKPOINT_PERMISSIVE_RE = /io exception.*\.wal\.checkpoint/i;
|
|
121
|
+
/**
|
|
122
|
+
* True when `err` looks like a Ladybug WAL-checkpoint rotation/remove IO
|
|
123
|
+
* failure. Tries strict matchers first (renames + removes), then falls
|
|
124
|
+
* back to the permissive matcher.
|
|
125
|
+
*/
|
|
126
|
+
export const isLbugCheckpointIoError = (err) => {
|
|
127
|
+
if (!err)
|
|
128
|
+
return false;
|
|
129
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
130
|
+
if (LBUG_CHECKPOINT_RENAME_RE.test(msg) || LBUG_CHECKPOINT_REMOVE_RE.test(msg))
|
|
131
|
+
return true;
|
|
132
|
+
return LBUG_CHECKPOINT_PERMISSIVE_RE.test(msg);
|
|
133
|
+
};
|
|
54
134
|
/**
|
|
55
135
|
* Return true when the error message indicates that a LadybugDB file lock
|
|
56
136
|
* could not be acquired — either at construction time
|
|
@@ -76,8 +156,8 @@ export function createLbugDatabase(lbugModule, databasePath, options = {}) {
|
|
|
76
156
|
// .d.ts declares fewer args than the native constructor accepts.
|
|
77
157
|
return new lbugModule.Database(databasePath, 0, // bufferManagerSize
|
|
78
158
|
false, // enableCompression (pinned for v0.16.0)
|
|
79
|
-
options.readOnly ?? false, LBUG_MAX_DB_SIZE, true, // autoCheckpoint
|
|
80
|
-
|
|
159
|
+
options.readOnly ?? false, LBUG_MAX_DB_SIZE, true, // autoCheckpoint (always on)
|
|
160
|
+
resolveCheckpointThreshold(), // checkpointThreshold (default 64 MiB; override with GITNEXUS_WAL_CHECKPOINT_THRESHOLD; -1 keeps Ladybug stock ~16 MiB)
|
|
81
161
|
options.throwOnWalReplayFailure ?? true, true);
|
|
82
162
|
}
|
|
83
163
|
// ─── Lock-busy retry tuning knobs ───────────────────────────────────────────
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Manual WAL checkpoint driver with bounded retry (#1741 follow-up).
|
|
3
|
+
*
|
|
4
|
+
* Background
|
|
5
|
+
* ----------
|
|
6
|
+
* LadybugDB's native auto-checkpoint runs from inside the C++ engine on a
|
|
7
|
+
* background path that has no JS-side hook for mid-write rotation. When
|
|
8
|
+
* the rename of `<db>.wal` → `<db>.wal.checkpoint` races a transient file
|
|
9
|
+
* lock (Windows Defender, AV scanner, NTFS shadow copy) the engine raises
|
|
10
|
+
* a `Runtime exception: IO exception: Error renaming file …` that aborts
|
|
11
|
+
* the in-flight write. There is no engine-level retry.
|
|
12
|
+
*
|
|
13
|
+
* The auto-checkpoint cannot be made retryable from JS, but a *manual*
|
|
14
|
+
* `CHECKPOINT` query that the JS layer issues itself CAN be wrapped in a
|
|
15
|
+
* bounded retry. By draining the WAL on a tight cadence — more often than
|
|
16
|
+
* the native threshold — the auto-checkpoint almost never has work left
|
|
17
|
+
* to do, so the un-retriable native rename race is moved into the
|
|
18
|
+
* JS-controlled path where this module's retry absorbs it.
|
|
19
|
+
*
|
|
20
|
+
* Design contract
|
|
21
|
+
* ---------------
|
|
22
|
+
* - `autoCheckpoint` stays on (maintainer requirement). This driver is
|
|
23
|
+
* additive: it preempts the native checkpoint, it does not replace it.
|
|
24
|
+
* - The driver runs ONLY during analyze (callers opt-in explicitly). MCP
|
|
25
|
+
* and other long-lived flows continue to rely on the close-time
|
|
26
|
+
* CHECKPOINT in `safeClose`.
|
|
27
|
+
* - Opt-out is via `GITNEXUS_WAL_MANUAL_CHECKPOINT=0`. Default is on.
|
|
28
|
+
* - Retries only fire on `isLbugCheckpointIoError` — every other error
|
|
29
|
+
* surfaces immediately. The retry budget is small (3 attempts) with
|
|
30
|
+
* jittered backoff so a chronic rename failure escalates fast.
|
|
31
|
+
* - Retry attempts log at `debug`; only the final, exhausted failure
|
|
32
|
+
* surfaces to the caller (and is logged at `warn` here for operators).
|
|
33
|
+
*/
|
|
34
|
+
/**
|
|
35
|
+
* Run a single CHECKPOINT with bounded retry on
|
|
36
|
+
* `isLbugCheckpointIoError`. Returns the number of attempts actually
|
|
37
|
+
* spent (1-`CHECKPOINT_RETRY_ATTEMPTS`) on success, or rethrows the last
|
|
38
|
+
* checkpoint error after exhausting the budget. Non-checkpoint errors
|
|
39
|
+
* (e.g. WAL corruption, lock-busy) propagate immediately on the first
|
|
40
|
+
* attempt — those are not what this retry is designed to absorb.
|
|
41
|
+
*
|
|
42
|
+
* The split from `flushWAL` is deliberate: `flushWAL` is the swallow-and-
|
|
43
|
+
* log helper used by `safeClose` and the server's best-effort flush,
|
|
44
|
+
* which by contract cannot fail the surrounding operation. The manual
|
|
45
|
+
* driver MUST observe failures to decide whether to retry, and that is
|
|
46
|
+
* the role of `tryFlushWAL`.
|
|
47
|
+
*
|
|
48
|
+
* Exported for direct unit testing — production callers use
|
|
49
|
+
* {@link startWalCheckpointDriver} or {@link checkpointOnce}.
|
|
50
|
+
*/
|
|
51
|
+
export declare const runCheckpointWithRetry: (options?: {
|
|
52
|
+
/** Override the sleep implementation for tests. */
|
|
53
|
+
sleepFn?: (ms: number) => Promise<void>;
|
|
54
|
+
/** Override the CHECKPOINT call for tests. */
|
|
55
|
+
checkpointFn?: () => Promise<boolean>;
|
|
56
|
+
/** Override the jitter source for tests. Returns a value in [0, 1). */
|
|
57
|
+
randomFn?: () => number;
|
|
58
|
+
}) => Promise<{
|
|
59
|
+
attempts: number;
|
|
60
|
+
flushed: boolean;
|
|
61
|
+
}>;
|
|
62
|
+
/**
|
|
63
|
+
* Single-shot manual checkpoint. Use this when the caller drives the
|
|
64
|
+
* cadence itself (e.g. a phase boundary in `runFullAnalysis`).
|
|
65
|
+
*
|
|
66
|
+
* Honors the `GITNEXUS_WAL_MANUAL_CHECKPOINT=0` opt-out so operators can
|
|
67
|
+
* disable the manual path if it ever interacts badly with a future
|
|
68
|
+
* Ladybug release.
|
|
69
|
+
*/
|
|
70
|
+
export declare const checkpointOnce: () => Promise<void>;
|
|
71
|
+
/**
|
|
72
|
+
* Start a periodic manual checkpoint driver. The returned handle has a
|
|
73
|
+
* `stop()` method that resolves once the in-flight checkpoint (if any)
|
|
74
|
+
* settles, so callers can `await driver.stop()` before close-time
|
|
75
|
+
* `safeClose` and avoid racing the final flush.
|
|
76
|
+
*
|
|
77
|
+
* The first checkpoint fires after `periodMs` (not immediately) so a
|
|
78
|
+
* cold analyze does not pay a CHECKPOINT round trip before any writes
|
|
79
|
+
* have happened.
|
|
80
|
+
*/
|
|
81
|
+
export interface WalCheckpointDriver {
|
|
82
|
+
/** Stop the driver and await any in-flight checkpoint. Idempotent. */
|
|
83
|
+
stop(): Promise<void>;
|
|
84
|
+
}
|
|
85
|
+
export declare const startWalCheckpointDriver: (options?: {
|
|
86
|
+
periodMs?: number;
|
|
87
|
+
}) => WalCheckpointDriver;
|
|
88
|
+
/**
|
|
89
|
+
* Reading `GITNEXUS_WAL_MANUAL_CHECKPOINT` at every call site (rather
|
|
90
|
+
* than caching at module load) keeps `analyzeCommand` env restoration
|
|
91
|
+
* honest: tests that toggle the flag between invocations see the live
|
|
92
|
+
* value, matching the `ANALYZE_CLI_ENV_KEYS` snapshot/restore contract
|
|
93
|
+
* in `analyze.ts`.
|
|
94
|
+
*
|
|
95
|
+
* Accepted opt-out values: '0', 'false', 'off', 'no' (case-insensitive).
|
|
96
|
+
* Anything else — including undefined — leaves the driver enabled.
|
|
97
|
+
*/
|
|
98
|
+
export declare const isManualCheckpointEnabled: () => boolean;
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Manual WAL checkpoint driver with bounded retry (#1741 follow-up).
|
|
3
|
+
*
|
|
4
|
+
* Background
|
|
5
|
+
* ----------
|
|
6
|
+
* LadybugDB's native auto-checkpoint runs from inside the C++ engine on a
|
|
7
|
+
* background path that has no JS-side hook for mid-write rotation. When
|
|
8
|
+
* the rename of `<db>.wal` → `<db>.wal.checkpoint` races a transient file
|
|
9
|
+
* lock (Windows Defender, AV scanner, NTFS shadow copy) the engine raises
|
|
10
|
+
* a `Runtime exception: IO exception: Error renaming file …` that aborts
|
|
11
|
+
* the in-flight write. There is no engine-level retry.
|
|
12
|
+
*
|
|
13
|
+
* The auto-checkpoint cannot be made retryable from JS, but a *manual*
|
|
14
|
+
* `CHECKPOINT` query that the JS layer issues itself CAN be wrapped in a
|
|
15
|
+
* bounded retry. By draining the WAL on a tight cadence — more often than
|
|
16
|
+
* the native threshold — the auto-checkpoint almost never has work left
|
|
17
|
+
* to do, so the un-retriable native rename race is moved into the
|
|
18
|
+
* JS-controlled path where this module's retry absorbs it.
|
|
19
|
+
*
|
|
20
|
+
* Design contract
|
|
21
|
+
* ---------------
|
|
22
|
+
* - `autoCheckpoint` stays on (maintainer requirement). This driver is
|
|
23
|
+
* additive: it preempts the native checkpoint, it does not replace it.
|
|
24
|
+
* - The driver runs ONLY during analyze (callers opt-in explicitly). MCP
|
|
25
|
+
* and other long-lived flows continue to rely on the close-time
|
|
26
|
+
* CHECKPOINT in `safeClose`.
|
|
27
|
+
* - Opt-out is via `GITNEXUS_WAL_MANUAL_CHECKPOINT=0`. Default is on.
|
|
28
|
+
* - Retries only fire on `isLbugCheckpointIoError` — every other error
|
|
29
|
+
* surfaces immediately. The retry budget is small (3 attempts) with
|
|
30
|
+
* jittered backoff so a chronic rename failure escalates fast.
|
|
31
|
+
* - Retry attempts log at `debug`; only the final, exhausted failure
|
|
32
|
+
* surfaces to the caller (and is logged at `warn` here for operators).
|
|
33
|
+
*/
|
|
34
|
+
import { logger } from '../logger.js';
|
|
35
|
+
import { tryFlushWAL } from './lbug-adapter.js';
|
|
36
|
+
import { isLbugCheckpointIoError } from './lbug-config.js';
|
|
37
|
+
/**
|
|
38
|
+
* Bounded retry budget. Total worst-case wall time is dominated by the
|
|
39
|
+
* three sleeps below (~750 ms before jitter) plus three CHECKPOINT round
|
|
40
|
+
* trips — small enough to stay invisible during a large analyze, large
|
|
41
|
+
* enough to ride out a single AV scanner sweep on Windows.
|
|
42
|
+
*/
|
|
43
|
+
const CHECKPOINT_RETRY_ATTEMPTS = 3;
|
|
44
|
+
/**
|
|
45
|
+
* Base back-off in ms. Each attempt waits `BASE_DELAYS[attempt-1]`
|
|
46
|
+
* milliseconds before the next try, plus a small jitter to avoid
|
|
47
|
+
* synchronized retries when multiple analyzers ever share a host.
|
|
48
|
+
*/
|
|
49
|
+
const BASE_DELAYS_MS = [50, 200, 500];
|
|
50
|
+
/** Maximum jitter added on top of each base delay. */
|
|
51
|
+
const JITTER_MAX_MS = 50;
|
|
52
|
+
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
53
|
+
/**
|
|
54
|
+
* Run a single CHECKPOINT with bounded retry on
|
|
55
|
+
* `isLbugCheckpointIoError`. Returns the number of attempts actually
|
|
56
|
+
* spent (1-`CHECKPOINT_RETRY_ATTEMPTS`) on success, or rethrows the last
|
|
57
|
+
* checkpoint error after exhausting the budget. Non-checkpoint errors
|
|
58
|
+
* (e.g. WAL corruption, lock-busy) propagate immediately on the first
|
|
59
|
+
* attempt — those are not what this retry is designed to absorb.
|
|
60
|
+
*
|
|
61
|
+
* The split from `flushWAL` is deliberate: `flushWAL` is the swallow-and-
|
|
62
|
+
* log helper used by `safeClose` and the server's best-effort flush,
|
|
63
|
+
* which by contract cannot fail the surrounding operation. The manual
|
|
64
|
+
* driver MUST observe failures to decide whether to retry, and that is
|
|
65
|
+
* the role of `tryFlushWAL`.
|
|
66
|
+
*
|
|
67
|
+
* Exported for direct unit testing — production callers use
|
|
68
|
+
* {@link startWalCheckpointDriver} or {@link checkpointOnce}.
|
|
69
|
+
*/
|
|
70
|
+
export const runCheckpointWithRetry = async (options = {}) => {
|
|
71
|
+
const sleepImpl = options.sleepFn ?? sleep;
|
|
72
|
+
const checkpointImpl = options.checkpointFn ?? tryFlushWAL;
|
|
73
|
+
const randomImpl = options.randomFn ?? Math.random;
|
|
74
|
+
let lastError;
|
|
75
|
+
for (let attempt = 1; attempt <= CHECKPOINT_RETRY_ATTEMPTS; attempt++) {
|
|
76
|
+
try {
|
|
77
|
+
const flushed = await checkpointImpl();
|
|
78
|
+
return { attempts: attempt, flushed };
|
|
79
|
+
}
|
|
80
|
+
catch (err) {
|
|
81
|
+
lastError = err;
|
|
82
|
+
if (!isLbugCheckpointIoError(err)) {
|
|
83
|
+
// Non-checkpoint error — propagate immediately. Examples:
|
|
84
|
+
// WAL corruption, missing connection, query syntax failure.
|
|
85
|
+
// Retrying these would only mask the real signal.
|
|
86
|
+
throw err;
|
|
87
|
+
}
|
|
88
|
+
if (attempt === CHECKPOINT_RETRY_ATTEMPTS)
|
|
89
|
+
break;
|
|
90
|
+
const base = BASE_DELAYS_MS[Math.min(attempt - 1, BASE_DELAYS_MS.length - 1)] ?? 500;
|
|
91
|
+
// randomImpl defaults to Math.random — non-cryptographic by design; jitter only avoids
|
|
92
|
+
// synchronized retries between concurrent analyzers.
|
|
93
|
+
const delayMs = base + Math.floor(randomImpl() * JITTER_MAX_MS);
|
|
94
|
+
logger.debug({ attempt, totalAttempts: CHECKPOINT_RETRY_ATTEMPTS, delayMs }, 'GitNexus: WAL checkpoint IO error — retrying');
|
|
95
|
+
await sleepImpl(delayMs);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
logger.warn({ attempts: CHECKPOINT_RETRY_ATTEMPTS }, 'GitNexus: manual WAL checkpoint exhausted retry budget — surfacing IO error to caller');
|
|
99
|
+
throw lastError;
|
|
100
|
+
};
|
|
101
|
+
/**
|
|
102
|
+
* Single-shot manual checkpoint. Use this when the caller drives the
|
|
103
|
+
* cadence itself (e.g. a phase boundary in `runFullAnalysis`).
|
|
104
|
+
*
|
|
105
|
+
* Honors the `GITNEXUS_WAL_MANUAL_CHECKPOINT=0` opt-out so operators can
|
|
106
|
+
* disable the manual path if it ever interacts badly with a future
|
|
107
|
+
* Ladybug release.
|
|
108
|
+
*/
|
|
109
|
+
export const checkpointOnce = async () => {
|
|
110
|
+
if (!isManualCheckpointEnabled())
|
|
111
|
+
return;
|
|
112
|
+
await runCheckpointWithRetry();
|
|
113
|
+
};
|
|
114
|
+
/** Default cadence (ms) for the periodic driver. */
|
|
115
|
+
const DEFAULT_PERIOD_MS = 5_000;
|
|
116
|
+
export const startWalCheckpointDriver = (options = {}) => {
|
|
117
|
+
if (!isManualCheckpointEnabled()) {
|
|
118
|
+
return { stop: async () => undefined };
|
|
119
|
+
}
|
|
120
|
+
const periodMs = options.periodMs ?? DEFAULT_PERIOD_MS;
|
|
121
|
+
let stopped = false;
|
|
122
|
+
let inflight = null;
|
|
123
|
+
const tick = async () => {
|
|
124
|
+
if (stopped)
|
|
125
|
+
return;
|
|
126
|
+
inflight = runCheckpointWithRetry()
|
|
127
|
+
.then(() => undefined)
|
|
128
|
+
.catch((err) => {
|
|
129
|
+
// The retry budget exhausted. The caller's surrounding write
|
|
130
|
+
// will see the same engine error on its next operation and the
|
|
131
|
+
// `analyzeCommand` catch block will emit the recovery hint.
|
|
132
|
+
// Logging here keeps the operator-visible trail without
|
|
133
|
+
// double-logging the user-facing message.
|
|
134
|
+
logger.warn({ err: err instanceof Error ? err.message : String(err) }, 'GitNexus: manual WAL checkpoint failed after retries');
|
|
135
|
+
});
|
|
136
|
+
try {
|
|
137
|
+
await inflight;
|
|
138
|
+
}
|
|
139
|
+
finally {
|
|
140
|
+
inflight = null;
|
|
141
|
+
}
|
|
142
|
+
};
|
|
143
|
+
const handle = setInterval(() => {
|
|
144
|
+
// Fire-and-forget: setInterval cannot await directly. The next tick
|
|
145
|
+
// is guarded by `stopped` and the `inflight` reference.
|
|
146
|
+
void tick();
|
|
147
|
+
}, periodMs);
|
|
148
|
+
// `setInterval` returned by Node is a `Timeout` object with `.unref()`
|
|
149
|
+
// so a hung driver never prevents process exit.
|
|
150
|
+
if (typeof handle.unref === 'function') {
|
|
151
|
+
handle.unref();
|
|
152
|
+
}
|
|
153
|
+
return {
|
|
154
|
+
stop: async () => {
|
|
155
|
+
if (stopped) {
|
|
156
|
+
if (inflight)
|
|
157
|
+
await inflight;
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
stopped = true;
|
|
161
|
+
clearInterval(handle);
|
|
162
|
+
if (inflight) {
|
|
163
|
+
try {
|
|
164
|
+
await inflight;
|
|
165
|
+
}
|
|
166
|
+
catch {
|
|
167
|
+
/* swallowed in tick() — surface path is the surrounding write */
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
},
|
|
171
|
+
};
|
|
172
|
+
};
|
|
173
|
+
/**
|
|
174
|
+
* Reading `GITNEXUS_WAL_MANUAL_CHECKPOINT` at every call site (rather
|
|
175
|
+
* than caching at module load) keeps `analyzeCommand` env restoration
|
|
176
|
+
* honest: tests that toggle the flag between invocations see the live
|
|
177
|
+
* value, matching the `ANALYZE_CLI_ENV_KEYS` snapshot/restore contract
|
|
178
|
+
* in `analyze.ts`.
|
|
179
|
+
*
|
|
180
|
+
* Accepted opt-out values: '0', 'false', 'off', 'no' (case-insensitive).
|
|
181
|
+
* Anything else — including undefined — leaves the driver enabled.
|
|
182
|
+
*/
|
|
183
|
+
export const isManualCheckpointEnabled = () => {
|
|
184
|
+
const raw = process.env.GITNEXUS_WAL_MANUAL_CHECKPOINT;
|
|
185
|
+
if (raw === undefined)
|
|
186
|
+
return true;
|
|
187
|
+
const normalized = raw.trim().toLowerCase();
|
|
188
|
+
return !['0', 'false', 'off', 'no'].includes(normalized);
|
|
189
|
+
};
|
package/dist/core/run-analyze.js
CHANGED
|
@@ -14,6 +14,7 @@ import { execFileSync } from 'child_process';
|
|
|
14
14
|
import { runPipelineFromRepo } from './ingestion/pipeline.js';
|
|
15
15
|
import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, loadCachedEmbeddings, deleteNodesForFile, deleteAllCommunitiesAndProcesses, queryImporters, } from './lbug/lbug-adapter.js';
|
|
16
16
|
import { createSearchFTSIndexes, verifySearchFTSIndexes } from './search/fts-indexes.js';
|
|
17
|
+
import { startWalCheckpointDriver, } from './lbug/wal-checkpoint-driver.js';
|
|
17
18
|
import { getStoragePaths, saveMeta, loadMeta, ensureGitNexusIgnored, registerRepo, cleanupOldKuzuFiles, INCREMENTAL_SCHEMA_VERSION, } from '../storage/repo-manager.js';
|
|
18
19
|
import { computeFileHashes, diffFileHashes } from '../storage/file-hash.js';
|
|
19
20
|
import { extractChangedSubgraph, computeEffectiveWriteSet, } from './incremental/subgraph-extract.js';
|
|
@@ -343,6 +344,15 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
343
344
|
}
|
|
344
345
|
}
|
|
345
346
|
await initLbug(lbugPath);
|
|
347
|
+
// Manual WAL checkpoint driver (#1741): periodically drain the WAL
|
|
348
|
+
// from JS so the un-retriable native auto-checkpoint almost never
|
|
349
|
+
// has work left to do. Failures of the manual CHECKPOINT are absorbed
|
|
350
|
+
// by the driver's bounded retry; the final un-recoverable error still
|
|
351
|
+
// surfaces via the surrounding write that follows the failed flush.
|
|
352
|
+
// Opt-out via `GITNEXUS_WAL_MANUAL_CHECKPOINT=0` (the driver itself
|
|
353
|
+
// returns a no-op handle when disabled). Analyze-only: MCP and serve
|
|
354
|
+
// paths continue to rely on the close-time CHECKPOINT in `safeClose`.
|
|
355
|
+
const walCheckpointDriver = startWalCheckpointDriver();
|
|
346
356
|
try {
|
|
347
357
|
// All work after initLbug is wrapped in try/finally to ensure closeLbug()
|
|
348
358
|
// is called even if an error occurs — the module-level singleton DB handle
|
|
@@ -726,6 +736,9 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
726
736
|
// Best-effort — don't fail the entire analysis for context file issues
|
|
727
737
|
}
|
|
728
738
|
// ── Close LadybugDB ──────────────────────────────────────────────
|
|
739
|
+
// Stop the manual checkpoint driver before closeLbug so its
|
|
740
|
+
// in-flight CHECKPOINT cannot race the `safeClose` CHECKPOINT.
|
|
741
|
+
await walCheckpointDriver.stop();
|
|
729
742
|
await closeLbug();
|
|
730
743
|
progress('done', 100, 'Done');
|
|
731
744
|
return {
|
|
@@ -736,7 +749,14 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
736
749
|
};
|
|
737
750
|
}
|
|
738
751
|
catch (err) {
|
|
739
|
-
// Ensure LadybugDB is closed even on error
|
|
752
|
+
// Ensure LadybugDB is closed even on error. Stop the driver first
|
|
753
|
+
// so its retry loop cannot extend an already-failing analyze.
|
|
754
|
+
try {
|
|
755
|
+
await walCheckpointDriver.stop();
|
|
756
|
+
}
|
|
757
|
+
catch {
|
|
758
|
+
/* swallow — surface path is the rethrow below */
|
|
759
|
+
}
|
|
740
760
|
try {
|
|
741
761
|
await closeLbug();
|
|
742
762
|
}
|
package/package.json
CHANGED