@pugi/cli 0.1.0-beta.100 → 0.1.0-beta.101
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/core/codegraph/parser.js +574 -47
- package/dist/core/codegraph/queries/go.scm +57 -0
- package/dist/core/codegraph/queries/javascript.scm +56 -0
- package/dist/core/codegraph/queries/python.scm +55 -0
- package/dist/core/codegraph/queries/rust.scm +63 -0
- package/dist/core/codegraph/queries/typescript.scm +91 -0
- package/dist/core/codegraph/reindex.js +218 -0
- package/dist/core/codegraph/resolve-edges.js +107 -0
- package/dist/core/codegraph/watcher.js +440 -0
- package/dist/core/diagnostics/probes/sandbox.js +7 -12
- package/dist/core/engine/prompts.js +32 -0
- package/dist/core/eval/v1/ledger.js +83 -0
- package/dist/core/eval/v1/runner.js +280 -0
- package/dist/core/eval/v1/scoring.js +68 -0
- package/dist/core/eval/v1/task-loader.js +191 -0
- package/dist/core/eval/v1/types.js +14 -0
- package/dist/core/eval/v1/verifier.js +176 -0
- package/dist/core/eval/v1/yaml-parser.js +250 -0
- package/dist/core/sandboxing/adapter.js +31 -17
- package/dist/core/sandboxing/bubblewrap.js +209 -0
- package/dist/core/sandboxing/index.js +32 -3
- package/dist/core/sandboxing/policy.js +97 -0
- package/dist/core/sandboxing/seatbelt.js +69 -21
- package/dist/core/settings.js +31 -7
- package/dist/runtime/cli.js +58 -0
- package/dist/runtime/commands/eval-v1.js +266 -0
- package/dist/runtime/commands/index-cmd.js +125 -19
- package/dist/runtime/commands/servers-cli.js +182 -0
- package/dist/runtime/version.js +1 -1
- package/dist/tools/bash.js +187 -3
- package/package.json +10 -3
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
; Go symbol + edge query - used by core/codegraph/parser.ts.
|
|
2
|
+
;
|
|
3
|
+
; SymbolKind coverage:
|
|
4
|
+
; function - function_declaration at file top level
|
|
5
|
+
; method - method_declaration (receiver-attached)
|
|
6
|
+
; type - type_declaration (struct, alias, interface base)
|
|
7
|
+
; interface - type_declaration whose underlying type is interface_type
|
|
8
|
+
; variable - top-level var / const single-identifier binding
|
|
9
|
+
; import - import_spec path string OR alias identifier
|
|
10
|
+
;
|
|
11
|
+
; EdgeKind coverage:
|
|
12
|
+
; calls - call_expression with identifier OR selector
|
|
13
|
+
; implements - Go has no `implements` keyword; satisfaction is
|
|
14
|
+
; structural. We do NOT emit implements edges for Go.
|
|
15
|
+
|
|
16
|
+
(function_declaration
|
|
17
|
+
name: (identifier) @symbol.function.name) @symbol.function.def
|
|
18
|
+
|
|
19
|
+
(method_declaration
|
|
20
|
+
name: (field_identifier) @symbol.method.name) @symbol.method.def
|
|
21
|
+
|
|
22
|
+
; `type Foo struct { ... }` / `type Foo interface { ... }` / `type Foo = X`
|
|
23
|
+
(type_declaration
|
|
24
|
+
(type_spec
|
|
25
|
+
name: (type_identifier) @symbol.type.name)) @symbol.type.def
|
|
26
|
+
|
|
27
|
+
(type_declaration
|
|
28
|
+
(type_spec
|
|
29
|
+
name: (type_identifier) @symbol.interface.name
|
|
30
|
+
type: (interface_type))) @symbol.interface.def
|
|
31
|
+
|
|
32
|
+
; Top-level var / const - single-identifier specs only to dodge noise.
|
|
33
|
+
(var_declaration
|
|
34
|
+
(var_spec
|
|
35
|
+
name: (identifier) @symbol.variable.name)) @symbol.variable.def
|
|
36
|
+
|
|
37
|
+
(const_declaration
|
|
38
|
+
(const_spec
|
|
39
|
+
name: (identifier) @symbol.variable.name)) @symbol.variable.def
|
|
40
|
+
|
|
41
|
+
; Imports: `import "fmt"` / `import f "fmt"`. We keep both the quoted
|
|
42
|
+
; path and the alias identifier as the symbol name. Reviewer note: the
|
|
43
|
+
; quoted form (with double quotes) matches what Go editor tooling
|
|
44
|
+
; surfaces - we keep it as-is.
|
|
45
|
+
(import_spec
|
|
46
|
+
path: (interpreted_string_literal) @symbol.import.name) @symbol.import.def
|
|
47
|
+
|
|
48
|
+
(import_spec
|
|
49
|
+
name: (package_identifier) @symbol.import.name) @symbol.import.def
|
|
50
|
+
|
|
51
|
+
; Calls: bare and selector.
|
|
52
|
+
(call_expression
|
|
53
|
+
function: (identifier) @edge.calls.target)
|
|
54
|
+
|
|
55
|
+
(call_expression
|
|
56
|
+
function: (selector_expression
|
|
57
|
+
field: (field_identifier) @edge.calls.target))
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
; JavaScript symbol + edge query - used by core/codegraph/parser.ts.
|
|
2
|
+
;
|
|
3
|
+
; Capture-name convention matches the TS query; the extractor is
|
|
4
|
+
; language-agnostic at the capture-parsing layer.
|
|
5
|
+
;
|
|
6
|
+
; tree-sitter-javascript covers JS + JSX in a single grammar. .mjs and
|
|
7
|
+
; .cjs route here too. We intentionally do NOT extract `var` declarations -
|
|
8
|
+
; they leak across enclosing scopes and the resulting Symbol rows would
|
|
9
|
+
; be misleading. `let` / `const` only.
|
|
10
|
+
;
|
|
11
|
+
; SymbolKind coverage:
|
|
12
|
+
; function - function_declaration, generator_function_declaration
|
|
13
|
+
; class - class_declaration
|
|
14
|
+
; method - method_definition
|
|
15
|
+
; variable - lexical_declaration single-identifier binding
|
|
16
|
+
; import - import_specifier / default-import identifier
|
|
17
|
+
;
|
|
18
|
+
; (No interface / type kinds in JS - they live in TS-only.)
|
|
19
|
+
;
|
|
20
|
+
; EdgeKind coverage:
|
|
21
|
+
; calls - call_expression bare ID + member-access function
|
|
22
|
+
; extends - class extends parent identifier
|
|
23
|
+
|
|
24
|
+
(function_declaration
|
|
25
|
+
name: (identifier) @symbol.function.name) @symbol.function.def
|
|
26
|
+
|
|
27
|
+
(generator_function_declaration
|
|
28
|
+
name: (identifier) @symbol.function.name) @symbol.function.def
|
|
29
|
+
|
|
30
|
+
(class_declaration
|
|
31
|
+
name: (identifier) @symbol.class.name) @symbol.class.def
|
|
32
|
+
|
|
33
|
+
(method_definition
|
|
34
|
+
name: (property_identifier) @symbol.method.name) @symbol.method.def
|
|
35
|
+
|
|
36
|
+
(lexical_declaration
|
|
37
|
+
(variable_declarator
|
|
38
|
+
name: (identifier) @symbol.variable.name)) @symbol.variable.def
|
|
39
|
+
|
|
40
|
+
(import_specifier
|
|
41
|
+
name: (identifier) @symbol.import.name) @symbol.import.def
|
|
42
|
+
|
|
43
|
+
(import_clause
|
|
44
|
+
(identifier) @symbol.import.name) @symbol.import.def
|
|
45
|
+
|
|
46
|
+
(call_expression
|
|
47
|
+
function: (identifier) @edge.calls.target)
|
|
48
|
+
|
|
49
|
+
(call_expression
|
|
50
|
+
function: (member_expression
|
|
51
|
+
property: (property_identifier) @edge.calls.target))
|
|
52
|
+
|
|
53
|
+
(class_declaration
|
|
54
|
+
name: (identifier) @edge.extends.from
|
|
55
|
+
(class_heritage
|
|
56
|
+
(identifier) @edge.extends.target))
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
; Python symbol + edge query - used by core/codegraph/parser.ts.
|
|
2
|
+
;
|
|
3
|
+
; SymbolKind coverage:
|
|
4
|
+
; function - function_definition (top-level OR nested - parser.ts
|
|
5
|
+
; computes scope by walking parents so nested defs
|
|
6
|
+
; appear as `Class.method` etc.)
|
|
7
|
+
; class - class_definition
|
|
8
|
+
; variable - top-level identifier assignment
|
|
9
|
+
; import - import_from_statement / import_statement names
|
|
10
|
+
;
|
|
11
|
+
; EdgeKind coverage:
|
|
12
|
+
; calls - call where the function is an identifier or attribute
|
|
13
|
+
; extends - class superclass argument list (`class C(A, B):`)
|
|
14
|
+
;
|
|
15
|
+
; Python has no `implements` concept in the grammar (Protocols + ABCs
|
|
16
|
+
; are runtime checks), so that edge kind stays empty here.
|
|
17
|
+
|
|
18
|
+
(function_definition
|
|
19
|
+
name: (identifier) @symbol.function.name) @symbol.function.def
|
|
20
|
+
|
|
21
|
+
(class_definition
|
|
22
|
+
name: (identifier) @symbol.class.name) @symbol.class.def
|
|
23
|
+
|
|
24
|
+
; Top-level single-target assignments: `FOO = 1`. Restricted to the
|
|
25
|
+
; simplest left side to avoid noise from destructuring.
|
|
26
|
+
(expression_statement
|
|
27
|
+
(assignment
|
|
28
|
+
left: (identifier) @symbol.variable.name)) @symbol.variable.def
|
|
29
|
+
|
|
30
|
+
; `from x import a, b as c` - names captured per identifier.
|
|
31
|
+
(import_from_statement
|
|
32
|
+
name: (dotted_name (identifier) @symbol.import.name)) @symbol.import.def
|
|
33
|
+
|
|
34
|
+
(import_from_statement
|
|
35
|
+
name: (aliased_import (dotted_name (identifier) @symbol.import.name))) @symbol.import.def
|
|
36
|
+
|
|
37
|
+
; `import foo` (single name).
|
|
38
|
+
(import_statement
|
|
39
|
+
name: (dotted_name (identifier) @symbol.import.name)) @symbol.import.def
|
|
40
|
+
|
|
41
|
+
; Call expressions: bare identifier OR attribute (`foo.bar()`).
|
|
42
|
+
(call
|
|
43
|
+
function: (identifier) @edge.calls.target)
|
|
44
|
+
|
|
45
|
+
(call
|
|
46
|
+
function: (attribute
|
|
47
|
+
attribute: (identifier) @edge.calls.target))
|
|
48
|
+
|
|
49
|
+
; Class superclass: `class C(A, B):` - emit one extends edge per
|
|
50
|
+
; argument identifier. The scope walk in parser.ts resolves the
|
|
51
|
+
; enclosing class as the source.
|
|
52
|
+
(class_definition
|
|
53
|
+
name: (identifier) @edge.extends.from
|
|
54
|
+
superclasses: (argument_list
|
|
55
|
+
(identifier) @edge.extends.target))
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
; Rust symbol + edge query - used by core/codegraph/parser.ts.
|
|
2
|
+
;
|
|
3
|
+
; SymbolKind coverage:
|
|
4
|
+
; function - function_item (`fn foo`). Methods (inside impl_item)
|
|
5
|
+
; also map to `function` for v1; the scope walk in
|
|
6
|
+
; parser.ts disambiguates via the dotted path.
|
|
7
|
+
; class - struct_item. Rust has no `class`; struct is the
|
|
8
|
+
; closest analog so `code_callers` queries work
|
|
9
|
+
; consistently across languages.
|
|
10
|
+
; interface - trait_item.
|
|
11
|
+
; type - enum_item, type_item (type alias).
|
|
12
|
+
; variable - const_item / static_item at the top level.
|
|
13
|
+
; import - use_declaration terminal identifier.
|
|
14
|
+
;
|
|
15
|
+
; EdgeKind coverage:
|
|
16
|
+
; calls - call_expression with identifier OR field-expression call.
|
|
17
|
+
; extends - impl_item that implements a trait for a type. Rust
|
|
18
|
+
; does not inherit; we reuse `extends` here to model
|
|
19
|
+
; trait-implementation as the closest cross-language analog.
|
|
20
|
+
|
|
21
|
+
(function_item
|
|
22
|
+
name: (identifier) @symbol.function.name) @symbol.function.def
|
|
23
|
+
|
|
24
|
+
(struct_item
|
|
25
|
+
name: (type_identifier) @symbol.class.name) @symbol.class.def
|
|
26
|
+
|
|
27
|
+
(trait_item
|
|
28
|
+
name: (type_identifier) @symbol.interface.name) @symbol.interface.def
|
|
29
|
+
|
|
30
|
+
(enum_item
|
|
31
|
+
name: (type_identifier) @symbol.type.name) @symbol.type.def
|
|
32
|
+
|
|
33
|
+
(type_item
|
|
34
|
+
name: (type_identifier) @symbol.type.name) @symbol.type.def
|
|
35
|
+
|
|
36
|
+
; const FOO: T = 1; / static FOO: T = 1;
|
|
37
|
+
(const_item
|
|
38
|
+
name: (identifier) @symbol.variable.name) @symbol.variable.def
|
|
39
|
+
|
|
40
|
+
(static_item
|
|
41
|
+
name: (identifier) @symbol.variable.name) @symbol.variable.def
|
|
42
|
+
|
|
43
|
+
; Imports: tree-sitter-rust models `use a::b::c` as nested
|
|
44
|
+
; scoped_use_list / scoped_identifier. We capture the simplest forms.
|
|
45
|
+
(use_declaration
|
|
46
|
+
argument: (identifier) @symbol.import.name) @symbol.import.def
|
|
47
|
+
|
|
48
|
+
(use_declaration
|
|
49
|
+
argument: (scoped_identifier
|
|
50
|
+
name: (identifier) @symbol.import.name)) @symbol.import.def
|
|
51
|
+
|
|
52
|
+
; Calls: bare and method.
|
|
53
|
+
(call_expression
|
|
54
|
+
function: (identifier) @edge.calls.target)
|
|
55
|
+
|
|
56
|
+
(call_expression
|
|
57
|
+
function: (field_expression
|
|
58
|
+
field: (field_identifier) @edge.calls.target))
|
|
59
|
+
|
|
60
|
+
; `impl Trait for Type { ... }` - emit a `extends` edge from Type -> Trait.
|
|
61
|
+
(impl_item
|
|
62
|
+
trait: (type_identifier) @edge.extends.target
|
|
63
|
+
type: (type_identifier) @edge.extends.from)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
; TypeScript symbol + edge query - used by core/codegraph/parser.ts.
|
|
2
|
+
;
|
|
3
|
+
; Capture naming convention drives the extractor:
|
|
4
|
+
; @symbol.<kind>.def binds to the AST node spanning the whole declaration
|
|
5
|
+
; (used for line/column anchor).
|
|
6
|
+
; @symbol.<kind>.name binds to the identifier node (used for `name`).
|
|
7
|
+
; @edge.<kind>.from enclosing definition that originates the edge.
|
|
8
|
+
; @edge.<kind>.target identifier whose name resolves to the edge target.
|
|
9
|
+
;
|
|
10
|
+
; tree-sitter-typescript ships separate grammars for typescript and tsx; both
|
|
11
|
+
; share the bulk of node names below. The same query runs against both
|
|
12
|
+
; languages - the extractor wires both grammars to this file.
|
|
13
|
+
;
|
|
14
|
+
; SymbolKind coverage (per types.ts):
|
|
15
|
+
; function - function_declaration, generator_function_declaration
|
|
16
|
+
; class - class_declaration, abstract_class_declaration
|
|
17
|
+
; method - method_definition, abstract_method_signature
|
|
18
|
+
; interface - interface_declaration
|
|
19
|
+
; type - type_alias_declaration, enum_declaration
|
|
20
|
+
; variable - lexical_declaration single-identifier binding (let/const)
|
|
21
|
+
; import - import_specifier (named imports) and default-import identifier
|
|
22
|
+
;
|
|
23
|
+
; EdgeKind coverage:
|
|
24
|
+
; calls - call_expression where function is identifier or member.
|
|
25
|
+
; extends - class extends_clause target
|
|
26
|
+
; implements - class implements_clause target (TS-only)
|
|
27
|
+
|
|
28
|
+
; --- functions ---
|
|
29
|
+
(function_declaration
|
|
30
|
+
name: (identifier) @symbol.function.name) @symbol.function.def
|
|
31
|
+
|
|
32
|
+
(generator_function_declaration
|
|
33
|
+
name: (identifier) @symbol.function.name) @symbol.function.def
|
|
34
|
+
|
|
35
|
+
; --- classes ---
|
|
36
|
+
(class_declaration
|
|
37
|
+
name: (type_identifier) @symbol.class.name) @symbol.class.def
|
|
38
|
+
|
|
39
|
+
(abstract_class_declaration
|
|
40
|
+
name: (type_identifier) @symbol.class.name) @symbol.class.def
|
|
41
|
+
|
|
42
|
+
; --- methods (inside class bodies) ---
|
|
43
|
+
(method_definition
|
|
44
|
+
name: (property_identifier) @symbol.method.name) @symbol.method.def
|
|
45
|
+
|
|
46
|
+
(abstract_method_signature
|
|
47
|
+
name: (property_identifier) @symbol.method.name) @symbol.method.def
|
|
48
|
+
|
|
49
|
+
; --- interfaces ---
|
|
50
|
+
(interface_declaration
|
|
51
|
+
name: (type_identifier) @symbol.interface.name) @symbol.interface.def
|
|
52
|
+
|
|
53
|
+
; --- types / enums ---
|
|
54
|
+
(type_alias_declaration
|
|
55
|
+
name: (type_identifier) @symbol.type.name) @symbol.type.def
|
|
56
|
+
|
|
57
|
+
(enum_declaration
|
|
58
|
+
name: (identifier) @symbol.type.name) @symbol.type.def
|
|
59
|
+
|
|
60
|
+
; --- top-level variables (let / const single binding) ---
|
|
61
|
+
(lexical_declaration
|
|
62
|
+
(variable_declarator
|
|
63
|
+
name: (identifier) @symbol.variable.name)) @symbol.variable.def
|
|
64
|
+
|
|
65
|
+
; --- imports ---
|
|
66
|
+
(import_specifier
|
|
67
|
+
name: (identifier) @symbol.import.name) @symbol.import.def
|
|
68
|
+
|
|
69
|
+
(import_clause
|
|
70
|
+
(identifier) @symbol.import.name) @symbol.import.def
|
|
71
|
+
|
|
72
|
+
; --- calls (edges) ---
|
|
73
|
+
(call_expression
|
|
74
|
+
function: (identifier) @edge.calls.target)
|
|
75
|
+
|
|
76
|
+
(call_expression
|
|
77
|
+
function: (member_expression
|
|
78
|
+
property: (property_identifier) @edge.calls.target))
|
|
79
|
+
|
|
80
|
+
; --- class extends / implements (edges) ---
|
|
81
|
+
(class_declaration
|
|
82
|
+
name: (type_identifier) @edge.extends.from
|
|
83
|
+
(class_heritage
|
|
84
|
+
(extends_clause
|
|
85
|
+
value: [(identifier) (type_identifier)] @edge.extends.target)))
|
|
86
|
+
|
|
87
|
+
(class_declaration
|
|
88
|
+
name: (type_identifier) @edge.implements.from
|
|
89
|
+
(class_heritage
|
|
90
|
+
(implements_clause
|
|
91
|
+
[(type_identifier) (generic_type) (predefined_type)] @edge.implements.target)))
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pugi local symbol index - reindex orchestrator (PR L1).
|
|
3
|
+
*
|
|
4
|
+
* Wires the workspace walk against `parser.parseFile` and writes the
|
|
5
|
+
* results into `db.ts`. The orchestrator is the only consumer of
|
|
6
|
+
* `PendingEdge` - it resolves names to SQLite row ids per file and
|
|
7
|
+
* drops orphans before calling `insertEdges`. The contract types in
|
|
8
|
+
* `types.ts` stay frozen.
|
|
9
|
+
*
|
|
10
|
+
* Workspace walk is intentionally minimal:
|
|
11
|
+
* - Use `tinyglobby` to enumerate `**\/*.{ext}` for the v1 extension
|
|
12
|
+
* set (see `parser.PARSEABLE_EXTENSIONS`).
|
|
13
|
+
* - Always skip `node_modules`, `.git`, `dist`, `build`, `.next`,
|
|
14
|
+
* `.turbo`, `.cache`, `coverage`, and `.pugi` itself - these are
|
|
15
|
+
* the same exclusions the existing `core/repo-map/scanner.ts`
|
|
16
|
+
* uses, kept inline here so the codegraph module can ship without
|
|
17
|
+
* pulling the broader scanner dependency tree.
|
|
18
|
+
* - Symlinks are not followed - same default as the rest of Pugi.
|
|
19
|
+
*
|
|
20
|
+
* The orchestrator returns a structured summary so the CLI layer
|
|
21
|
+
* can render a single-line operator-facing recap without parsing
|
|
22
|
+
* stdout. Every counter the operator might want is exposed.
|
|
23
|
+
*/
|
|
24
|
+
import { createHash } from 'node:crypto';
|
|
25
|
+
import { readFileSync } from 'node:fs';
|
|
26
|
+
import { resolve, sep } from 'node:path';
|
|
27
|
+
import { glob } from 'tinyglobby';
|
|
28
|
+
import { deleteFile, insertEdges, insertSymbols, upsertFile, } from './db.js';
|
|
29
|
+
import { PARSEABLE_EXTENSIONS, parseFile } from './parser.js';
|
|
30
|
+
/** Default directories pruned from the walk. */
|
|
31
|
+
const PRUNE_DIRS = Object.freeze([
|
|
32
|
+
'node_modules',
|
|
33
|
+
'.git',
|
|
34
|
+
'dist',
|
|
35
|
+
'build',
|
|
36
|
+
'.next',
|
|
37
|
+
'.turbo',
|
|
38
|
+
'.cache',
|
|
39
|
+
'coverage',
|
|
40
|
+
'.pugi',
|
|
41
|
+
]);
|
|
42
|
+
/**
|
|
43
|
+
* Walk the workspace, parse every supported file, and write symbols +
|
|
44
|
+
* edges into the open `IndexDB`. The DB is assumed to be open already -
|
|
45
|
+
* the caller (the CLI command) owns its lifecycle.
|
|
46
|
+
*/
|
|
47
|
+
export async function reindexWorkspace(db, opts = {}) {
|
|
48
|
+
const start = Date.now();
|
|
49
|
+
const root = db.workspaceRoot;
|
|
50
|
+
const patterns = PARSEABLE_EXTENSIONS.map((ext) => `**/*${ext}`);
|
|
51
|
+
const matches = await glob(patterns, {
|
|
52
|
+
cwd: root,
|
|
53
|
+
ignore: PRUNE_DIRS.map((d) => `**/${d}/**`),
|
|
54
|
+
onlyFiles: true,
|
|
55
|
+
dot: false,
|
|
56
|
+
followSymbolicLinks: false,
|
|
57
|
+
});
|
|
58
|
+
/**
|
|
59
|
+
* Per-file index of name -> rowId after symbols are inserted. We
|
|
60
|
+
* use a Map per file so edges within a file can resolve their
|
|
61
|
+
* source side via `fromName`. Cross-file targets resolve via the
|
|
62
|
+
* cross-file global map.
|
|
63
|
+
*/
|
|
64
|
+
const filesOut = [];
|
|
65
|
+
const work = [];
|
|
66
|
+
/** Cross-file map: target name -> first matching SQLite id. */
|
|
67
|
+
const globalNameMap = new Map();
|
|
68
|
+
let symbolsInserted = 0;
|
|
69
|
+
for (const m of matches) {
|
|
70
|
+
const abs = resolve(root, m);
|
|
71
|
+
const relPath = m.split(sep).join('/');
|
|
72
|
+
let bytes;
|
|
73
|
+
try {
|
|
74
|
+
bytes = readFileSync(abs, 'utf8');
|
|
75
|
+
}
|
|
76
|
+
catch {
|
|
77
|
+
// Disappeared mid-walk or unreadable - skip.
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
const result = await parseFile(abs, {
|
|
81
|
+
relPath,
|
|
82
|
+
quiet: opts.quiet === true,
|
|
83
|
+
sourceOverride: bytes,
|
|
84
|
+
});
|
|
85
|
+
// Delete prior rows for this file before re-inserting. Cheap on
|
|
86
|
+
// first run because the file row does not exist; cheap on
|
|
87
|
+
// re-runs because of `idx_symbols_file`.
|
|
88
|
+
deleteFile(db, relPath);
|
|
89
|
+
if (result.symbols.length === 0) {
|
|
90
|
+
// Still record the file fingerprint when applicable so the
|
|
91
|
+
// watcher's sha256 short-circuit works on subsequent runs.
|
|
92
|
+
if (result.language !== null) {
|
|
93
|
+
upsertFile(db, {
|
|
94
|
+
path: relPath,
|
|
95
|
+
sha256: sha256(bytes),
|
|
96
|
+
lastIndexedAt: new Date().toISOString(),
|
|
97
|
+
symbolCount: 0,
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
filesOut.push({
|
|
101
|
+
file: relPath,
|
|
102
|
+
language: result.language,
|
|
103
|
+
symbolCount: 0,
|
|
104
|
+
edgeCount: 0,
|
|
105
|
+
parseMs: result.parseMs,
|
|
106
|
+
});
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
const ids = insertSymbols(db, result.symbols);
|
|
110
|
+
symbolsInserted += ids.length;
|
|
111
|
+
const nameMap = new Map();
|
|
112
|
+
for (let i = 0; i < result.symbols.length; i += 1) {
|
|
113
|
+
const sym = result.symbols[i];
|
|
114
|
+
const id = ids[i];
|
|
115
|
+
if (!sym || id === undefined)
|
|
116
|
+
continue;
|
|
117
|
+
// Per-file canonical key includes scope so two `speak()`
|
|
118
|
+
// methods on different classes do not collide.
|
|
119
|
+
nameMap.set(scopedKey(sym.name, sym.scope), id);
|
|
120
|
+
nameMap.set(sym.name, id);
|
|
121
|
+
// Cross-file: keep the first definition we see. A later file
|
|
122
|
+
// with the same name does not overwrite - we want stability,
|
|
123
|
+
// not last-write-wins.
|
|
124
|
+
if (!globalNameMap.has(sym.name)) {
|
|
125
|
+
globalNameMap.set(sym.name, id);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
upsertFile(db, {
|
|
129
|
+
path: relPath,
|
|
130
|
+
sha256: sha256(bytes),
|
|
131
|
+
lastIndexedAt: new Date().toISOString(),
|
|
132
|
+
symbolCount: ids.length,
|
|
133
|
+
});
|
|
134
|
+
work.push({
|
|
135
|
+
file: relPath,
|
|
136
|
+
pendingEdges: result.pendingEdges,
|
|
137
|
+
nameMap,
|
|
138
|
+
parseMs: result.parseMs,
|
|
139
|
+
});
|
|
140
|
+
filesOut.push({
|
|
141
|
+
file: relPath,
|
|
142
|
+
language: result.language,
|
|
143
|
+
symbolCount: ids.length,
|
|
144
|
+
// edgeCount filled in pass 2.
|
|
145
|
+
edgeCount: 0,
|
|
146
|
+
parseMs: result.parseMs,
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
// Second pass: resolve edges. Caller side prefers per-file scope
|
|
150
|
+
// match, falls back to bare name match. Target side prefers
|
|
151
|
+
// per-file match first (intra-file refs), then global (cross-file
|
|
152
|
+
// best effort).
|
|
153
|
+
let edgesInserted = 0;
|
|
154
|
+
let edgesOrphaned = 0;
|
|
155
|
+
for (let i = 0; i < work.length; i += 1) {
|
|
156
|
+
const item = work[i];
|
|
157
|
+
if (!item)
|
|
158
|
+
continue;
|
|
159
|
+
const resolved = [];
|
|
160
|
+
for (const pe of item.pendingEdges) {
|
|
161
|
+
const fromId = item.nameMap.get(scopedKey(pe.fromName, pe.fromScope)) ??
|
|
162
|
+
item.nameMap.get(pe.fromName);
|
|
163
|
+
if (fromId === undefined) {
|
|
164
|
+
// Caller side did not resolve - this means the .scm query
|
|
165
|
+
// captured an edge whose source declaration is not in the
|
|
166
|
+
// symbol table. Should not happen for well-formed files;
|
|
167
|
+
// we drop quietly.
|
|
168
|
+
edgesOrphaned += 1;
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
const toId = item.nameMap.get(pe.toName) ?? globalNameMap.get(pe.toName);
|
|
172
|
+
if (toId === undefined) {
|
|
173
|
+
// Target name was not found in this file's symbol table OR
|
|
174
|
+
// the cross-file map. Drop silently - the agent's
|
|
175
|
+
// code_callers query simply will not see this edge.
|
|
176
|
+
edgesOrphaned += 1;
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
// Self-recursion is a real edge: `fn foo() { foo(); }`. Keep
|
|
180
|
+
// it - PR L4 PageRank treats self-loops correctly, and the
|
|
181
|
+
// `code_callers` MCP tool already filters by edge kind so the
|
|
182
|
+
// self-loop does not pollute the caller list.
|
|
183
|
+
resolved.push({
|
|
184
|
+
fromSymbolId: fromId,
|
|
185
|
+
toSymbolId: toId,
|
|
186
|
+
kind: pe.kind,
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
if (resolved.length > 0) {
|
|
190
|
+
insertEdges(db, resolved);
|
|
191
|
+
edgesInserted += resolved.length;
|
|
192
|
+
const outIdx = filesOut.findIndex((f) => f.file === item.file);
|
|
193
|
+
if (outIdx >= 0) {
|
|
194
|
+
const existing = filesOut[outIdx];
|
|
195
|
+
if (existing) {
|
|
196
|
+
filesOut[outIdx] = { ...existing, edgeCount: resolved.length };
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
return {
|
|
202
|
+
filesScanned: matches.length,
|
|
203
|
+
filesIndexed: work.length,
|
|
204
|
+
symbolsInserted,
|
|
205
|
+
edgesInserted,
|
|
206
|
+
edgesOrphaned,
|
|
207
|
+
totalMs: Date.now() - start,
|
|
208
|
+
files: filesOut,
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
/** Compose a stable per-file scope+name lookup key. */
|
|
212
|
+
function scopedKey(name, scope) {
|
|
213
|
+
return scope.length > 0 ? `${scope}.${name}` : name;
|
|
214
|
+
}
|
|
215
|
+
function sha256(s) {
|
|
216
|
+
return createHash('sha256').update(s).digest('hex');
|
|
217
|
+
}
|
|
218
|
+
//# sourceMappingURL=reindex.js.map
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pugi local symbol index - edge resolution helper (PR L2).
|
|
3
|
+
*
|
|
4
|
+
* Shared between `reindex.ts` (workspace walker) and `watcher.ts` (chokidar
|
|
5
|
+
* incremental updater). Resolves the name-keyed `PendingEdge[]` produced by
|
|
6
|
+
* the parser into row-id-keyed `Edge[]` that `db.ts:insertEdges` accepts.
|
|
7
|
+
*
|
|
8
|
+
* Why factored out:
|
|
9
|
+
*
|
|
10
|
+
* - PR L2 originally inlined this in two places; the duplicated bodies
|
|
11
|
+
* drifted on the third change. One reusable resolver keeps the
|
|
12
|
+
* invariants (per-file scoped key precedence, global fall-through,
|
|
13
|
+
* orphan handling) in one location.
|
|
14
|
+
* - The watcher's "global name map" is the live DB rather than an
|
|
15
|
+
* in-memory map built across the full walk - we expose a lookup
|
|
16
|
+
* callback so each caller plugs in its own source of truth.
|
|
17
|
+
*
|
|
18
|
+
* Resolution policy mirrors `reindex.ts:resolveEdges` exactly:
|
|
19
|
+
*
|
|
20
|
+
* 1. Caller side prefers per-file `scope.name` exact, falls back to
|
|
21
|
+
* bare `name`. Unresolved caller -> orphan, edge dropped.
|
|
22
|
+
* 2. Target side prefers the same per-file map (intra-file ref), then
|
|
23
|
+
* asks the cross-file lookup (db query or in-memory map).
|
|
24
|
+
* 3. Self-recursive edges (`fn foo() { foo() }`) are kept - PR L4
|
|
25
|
+
* PageRank treats self-loops correctly and the `code_callers` MCP
|
|
26
|
+
* tool filters by edge kind so it does not pollute caller lists.
|
|
27
|
+
*/
|
|
28
|
+
/**
|
|
29
|
+
* Compose `scope.name` for map lookup. Mirrors `reindex.ts:scopedKey`.
|
|
30
|
+
* Empty scope collapses to the bare name so the empty-scope branch
|
|
31
|
+
* doesn't need a separate key form.
|
|
32
|
+
*/
|
|
33
|
+
export function scopedKey(name, scope) {
|
|
34
|
+
return scope.length > 0 ? `${scope}.${name}` : name;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Resolve a parser file's `PendingEdge[]` against the per-file name
|
|
38
|
+
* map plus a cross-file lookup. The two callers wire `crossFile`
|
|
39
|
+
* differently:
|
|
40
|
+
*
|
|
41
|
+
* - reindex walker: in-memory `Map<name, id>` accumulated across the
|
|
42
|
+
* full pass (`globalNameMap`).
|
|
43
|
+
* - chokidar watcher: SQLite `SELECT id FROM symbols WHERE name = ?`
|
|
44
|
+
* closure over the open `IndexDB`.
|
|
45
|
+
*
|
|
46
|
+
* The function is pure given those inputs - no IO, no DB writes. The
|
|
47
|
+
* caller decides what to do with the resolved batch.
|
|
48
|
+
*/
|
|
49
|
+
export function resolvePendingEdges(pendingEdges, perFile, crossFile) {
|
|
50
|
+
const resolved = [];
|
|
51
|
+
let orphaned = 0;
|
|
52
|
+
for (const pe of pendingEdges) {
|
|
53
|
+
const fromId = perFile.get(scopedKey(pe.fromName, pe.fromScope)) ??
|
|
54
|
+
perFile.get(pe.fromName);
|
|
55
|
+
if (fromId === undefined) {
|
|
56
|
+
// Caller side did not resolve. The .scm query captured an edge
|
|
57
|
+
// whose source declaration is not in the file's symbol table -
|
|
58
|
+
// should not happen for well-formed files; drop quietly.
|
|
59
|
+
orphaned += 1;
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
let toId = perFile.get(pe.toName);
|
|
63
|
+
if (toId === undefined) {
|
|
64
|
+
const fromCross = crossFile(pe.toName);
|
|
65
|
+
if (fromCross !== null)
|
|
66
|
+
toId = fromCross;
|
|
67
|
+
}
|
|
68
|
+
if (toId === undefined) {
|
|
69
|
+
// Target was not in this file's symbol table OR the cross-file
|
|
70
|
+
// source. Drop silently - the `code_callers` MCP tool will just
|
|
71
|
+
// not see this edge.
|
|
72
|
+
orphaned += 1;
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
resolved.push({
|
|
76
|
+
fromSymbolId: fromId,
|
|
77
|
+
toSymbolId: toId,
|
|
78
|
+
kind: pe.kind,
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
return { resolved, orphaned };
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Build the per-file `nameMap` from a freshly-inserted symbol batch
|
|
85
|
+
* and the row ids returned by `insertSymbols`. Mirrors the inline
|
|
86
|
+
* loop в `reindex.ts` so both call sites stay consistent.
|
|
87
|
+
*
|
|
88
|
+
* The two map entries per symbol intentional: the scoped key
|
|
89
|
+
* (`Class.method`) is the canonical one, the bare-name entry is the
|
|
90
|
+
* fallback for the very common case where the .scm query captured a
|
|
91
|
+
* top-level caller. Last-write-wins on the bare-name key matches the
|
|
92
|
+
* walker's first-write behavior because both forms point at the same
|
|
93
|
+
* row id for the same file.
|
|
94
|
+
*/
|
|
95
|
+
export function buildPerFileNameMap(symbols, ids) {
|
|
96
|
+
const map = new Map();
|
|
97
|
+
for (let i = 0; i < symbols.length; i += 1) {
|
|
98
|
+
const sym = symbols[i];
|
|
99
|
+
const id = ids[i];
|
|
100
|
+
if (!sym || id === undefined)
|
|
101
|
+
continue;
|
|
102
|
+
map.set(scopedKey(sym.name, sym.scope), id);
|
|
103
|
+
map.set(sym.name, id);
|
|
104
|
+
}
|
|
105
|
+
return map;
|
|
106
|
+
}
|
|
107
|
+
//# sourceMappingURL=resolve-edges.js.map
|