@joycodetech/qmd-ja 2.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +819 -0
- package/LICENSE +21 -0
- package/README.md +1143 -0
- package/bin/qmd +162 -0
- package/dist/ast.d.ts +65 -0
- package/dist/ast.js +334 -0
- package/dist/bench/bench.d.ts +23 -0
- package/dist/bench/bench.js +280 -0
- package/dist/bench/score.d.ts +33 -0
- package/dist/bench/score.js +88 -0
- package/dist/bench/types.d.ts +80 -0
- package/dist/bench/types.js +8 -0
- package/dist/cli/formatter.d.ts +120 -0
- package/dist/cli/formatter.js +355 -0
- package/dist/cli/qmd.d.ts +43 -0
- package/dist/cli/qmd.js +4159 -0
- package/dist/collections.d.ts +166 -0
- package/dist/collections.js +410 -0
- package/dist/db.d.ts +44 -0
- package/dist/db.js +75 -0
- package/dist/index.d.ts +230 -0
- package/dist/index.js +242 -0
- package/dist/llm.d.ts +500 -0
- package/dist/llm.js +1615 -0
- package/dist/maintenance.d.ts +23 -0
- package/dist/maintenance.js +37 -0
- package/dist/mcp/server.d.ts +24 -0
- package/dist/mcp/server.js +702 -0
- package/dist/paths.d.ts +1 -0
- package/dist/paths.js +4 -0
- package/dist/store.d.ts +996 -0
- package/dist/store.js +4208 -0
- package/models/vaporetto-bccwj.model +0 -0
- package/package.json +130 -0
- package/scripts/build.mjs +30 -0
- package/scripts/check-package-grammars.mjs +29 -0
- package/scripts/package-smoke.mjs +65 -0
- package/scripts/test-all.mjs +38 -0
- package/skills/qmd/SKILL.md +295 -0
- package/skills/qmd/references/mcp-setup.md +102 -0
- package/skills/release/SKILL.md +139 -0
- package/skills/release/scripts/install-hooks.sh +38 -0
- package/vendor/vaporetto-node-wasm/package.json +11 -0
- package/vendor/vaporetto-node-wasm/vaporetto_node_wasm.d.ts +19 -0
- package/vendor/vaporetto-node-wasm/vaporetto_node_wasm.js +202 -0
- package/vendor/vaporetto-node-wasm/vaporetto_node_wasm_bg.wasm +0 -0
- package/vendor/vaporetto-node-wasm/vaporetto_node_wasm_bg.wasm.d.ts +13 -0
package/bin/qmd
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// 2>/dev/null; if command -v node >/dev/null 2>&1; then exec node "$0" "$@"; else exec bun "$0" "$@"; fi
|
|
3
|
+
// Cross-platform launcher for qmd.
|
|
4
|
+
//
|
|
5
|
+
// Previously this was a POSIX shell script with `#!/bin/sh`, which meant npm
|
|
6
|
+
// on Windows generated shims that tried to route through `/bin/sh` — a path
|
|
7
|
+
// that doesn't exist on Windows, so `qmd` failed immediately after a global
|
|
8
|
+
// install. Rewriting the launcher in Node.js lets npm generate native
|
|
9
|
+
// cmd/ps1/sh shims that invoke `node` directly on every platform.
|
|
10
|
+
|
|
11
|
+
import { spawn, spawnSync } from "node:child_process";
|
|
12
|
+
import { existsSync, realpathSync } from "node:fs";
|
|
13
|
+
import { dirname, resolve } from "node:path";
|
|
14
|
+
import { fileURLToPath } from "node:url";
|
|
15
|
+
|
|
16
|
+
// Resolve symlinks so global installs (npm link / npm install -g) can find
|
|
17
|
+
// the actual package directory instead of the global bin directory.
|
|
18
|
+
const self = realpathSync(fileURLToPath(import.meta.url));
|
|
19
|
+
const pkgDir = resolve(dirname(self), "..");
|
|
20
|
+
const jsEntry = resolve(pkgDir, "dist/cli/qmd.js");
|
|
21
|
+
const tsEntry = resolve(pkgDir, "src/cli/qmd.ts");
|
|
22
|
+
|
|
23
|
+
// MCP stdio reserves stdout exclusively for JSON-RPC frames. node-llama-cpp
|
|
24
|
+
// / llama.cpp / ggml can write native logs directly to stdout before JS-level
|
|
25
|
+
// log handlers are attached, so seed the native quiet env before Node/Bun imports
|
|
26
|
+
// the CLI and its LLM modules. Preserve explicit user values when provided.
|
|
27
|
+
if (process.argv[2] === "mcp") {
|
|
28
|
+
process.env.LLAMA_LOG_LEVEL = process.env.LLAMA_LOG_LEVEL || "error";
|
|
29
|
+
process.env.GGML_LOG_LEVEL = process.env.GGML_LOG_LEVEL || "error";
|
|
30
|
+
process.env.GGML_BACKEND_SILENT = process.env.GGML_BACKEND_SILENT || "1";
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// libggml-metal on macOS uses "residency sets" to keep allocated model memory
|
|
34
|
+
// resident across inference requests (180-second keep_alive timer). The
|
|
35
|
+
// process-static device destructor that runs during libc exit() asserts the
|
|
36
|
+
// residency set is empty (ggml-org/llama.cpp#22593); the keep_alive hasn't
|
|
37
|
+
// expired by exit, so the assertion fails and ggml_abort dumps a multi-kB
|
|
38
|
+
// stack trace to stderr even when the user-visible results were already
|
|
39
|
+
// emitted correctly. No JS-side dispose can prevent it because the static
|
|
40
|
+
// destructor runs in __cxa_finalize_ranges, after every JS-reachable cleanup.
|
|
41
|
+
//
|
|
42
|
+
// For QMD's short-lived CLI workflow, residency sets provide no observable
|
|
43
|
+
// performance benefit (subsequent requests don't reuse the warm mapping —
|
|
44
|
+
// measured: identical wall time with and without on M3 Pro), so disable them
|
|
45
|
+
// by default on darwin. The env var must be set BEFORE the native llama.cpp
|
|
46
|
+
// binding loads, which is why it lives here in the launcher rather than in
|
|
47
|
+
// the JS entry point. Opt back in with QMD_METAL_KEEP_RESIDENCY=1 if you
|
|
48
|
+
// run long-lived qmd processes (the MCP daemon may benefit on hot reload)
|
|
49
|
+
// or are triaging an upstream Metal teardown fix.
|
|
50
|
+
if (process.platform === "darwin" && process.env.QMD_METAL_KEEP_RESIDENCY !== "1") {
|
|
51
|
+
process.env.GGML_METAL_NO_RESIDENCY = process.env.GGML_METAL_NO_RESIDENCY || "1";
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function hasBun() {
|
|
55
|
+
try {
|
|
56
|
+
const res = spawnSync("bun", ["--version"], { stdio: "ignore", shell: process.platform === "win32" });
|
|
57
|
+
return res.status === 0;
|
|
58
|
+
} catch {
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// In published packages, bin/qmd must run dist/. In a git checkout, however,
|
|
64
|
+
// dist/ is often ignored and can be stale after git reset or branch switches.
|
|
65
|
+
// Prefer source mode only for checkouts so ./bin/qmd reflects the checked-out
|
|
66
|
+
// source without changing packaged/runtime behavior.
|
|
67
|
+
//
|
|
68
|
+
// Critical: source-mode detection must NOT trigger when a package manager
|
|
69
|
+
// installed us. `pnpm install -g .` (and `npm install -g .`) copy the entire
|
|
70
|
+
// working tree — including .git/, bun.lock, package-lock.json, src/, and even
|
|
71
|
+
// node_modules/ — into <prefix>/node_modules/@tobilu/qmd/, so .git and a
|
|
72
|
+
// lockfile being present is not a reliable "this is a working tree" signal.
|
|
73
|
+
// What IS reliable: a package-manager install always lands the package
|
|
74
|
+
// directory inside a `node_modules/` segment; a bare working-tree checkout
|
|
75
|
+
// (with `bun link` or a direct path invocation) does not. Gate source mode
|
|
76
|
+
// on that. Allow QMD_SOURCE_MODE=1 / =0 as an explicit override for the
|
|
77
|
+
// rare case where the heuristic disagrees with the user.
|
|
78
|
+
const sourceOverride = process.env.QMD_SOURCE_MODE;
|
|
79
|
+
const looksInstalled = pkgDir.split("/").includes("node_modules");
|
|
80
|
+
const sourceAllowed = sourceOverride === "1"
|
|
81
|
+
|| (sourceOverride !== "0" && !looksInstalled);
|
|
82
|
+
|
|
83
|
+
let useSourceMode = false;
|
|
84
|
+
let sourceRunner = null;
|
|
85
|
+
let sourceArgs = [];
|
|
86
|
+
|
|
87
|
+
if (sourceAllowed && existsSync(resolve(pkgDir, ".git")) && existsSync(tsEntry)) {
|
|
88
|
+
// Lockfile-driven runner selection — mirror the dist-mode logic below so
|
|
89
|
+
// source mode picks the same runtime the user's deps were installed for.
|
|
90
|
+
// package-lock.json wins over bun.lock when both are present: pnpm/npm
|
|
91
|
+
// installs ship the Node-ABI native modules (better-sqlite3, sqlite-vec),
|
|
92
|
+
// and running Bun against them produces ABI mismatches. This also fixes
|
|
93
|
+
// pnpm-global installs, which copy the whole working tree — including .git
|
|
94
|
+
// and bun.lock — into the install dir and used to route through Bun even
|
|
95
|
+
// when the user installed via npm/pnpm.
|
|
96
|
+
const hasNpmLock = existsSync(resolve(pkgDir, "package-lock.json"));
|
|
97
|
+
const hasBunLock = existsSync(resolve(pkgDir, "bun.lock")) || existsSync(resolve(pkgDir, "bun.lockb"));
|
|
98
|
+
const tsxEntry = resolve(pkgDir, "node_modules/tsx/dist/cli.mjs");
|
|
99
|
+
const tsxAvailable = existsSync(tsxEntry);
|
|
100
|
+
|
|
101
|
+
if (hasNpmLock && tsxAvailable) {
|
|
102
|
+
useSourceMode = true;
|
|
103
|
+
sourceRunner = "node";
|
|
104
|
+
sourceArgs = [tsxEntry, tsEntry, ...process.argv.slice(2)];
|
|
105
|
+
} else if (hasBunLock && hasBun()) {
|
|
106
|
+
useSourceMode = true;
|
|
107
|
+
sourceRunner = "bun";
|
|
108
|
+
sourceArgs = [tsEntry, ...process.argv.slice(2)];
|
|
109
|
+
} else if (tsxAvailable) {
|
|
110
|
+
useSourceMode = true;
|
|
111
|
+
sourceRunner = "node";
|
|
112
|
+
sourceArgs = [tsxEntry, tsEntry, ...process.argv.slice(2)];
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (!useSourceMode && !existsSync(jsEntry)) {
|
|
117
|
+
console.error(`qmd is not built: missing ${jsEntry}`);
|
|
118
|
+
console.error("Run: bun install && bun run build");
|
|
119
|
+
console.error("Or: npm install && npm run build");
|
|
120
|
+
console.error("After building, run: qmd doctor");
|
|
121
|
+
process.exit(1);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Detect the package manager that installed dependencies by checking lockfiles.
|
|
125
|
+
// $BUN_INSTALL is intentionally NOT checked — it only indicates that bun exists
|
|
126
|
+
// on the system, not that it was used to install this package (see #361).
|
|
127
|
+
//
|
|
128
|
+
// package-lock.json takes priority: if it exists, npm installed the native
|
|
129
|
+
// modules for Node. The repo ships bun.lock, so without this check, source
|
|
130
|
+
// builds that use npm would be incorrectly routed to bun, causing ABI
|
|
131
|
+
// mismatches with better-sqlite3 / sqlite-vec (see #381).
|
|
132
|
+
let runnerName = "node";
|
|
133
|
+
if (existsSync(resolve(pkgDir, "package-lock.json"))) {
|
|
134
|
+
runnerName = "node";
|
|
135
|
+
} else if (existsSync(resolve(pkgDir, "bun.lock")) || existsSync(resolve(pkgDir, "bun.lockb"))) {
|
|
136
|
+
runnerName = "bun";
|
|
137
|
+
} else {
|
|
138
|
+
runnerName = "node";
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const runner = useSourceMode ? sourceRunner : (runnerName === "node" ? "node" : "bun");
|
|
142
|
+
const args = useSourceMode ? sourceArgs : [jsEntry, ...process.argv.slice(2)];
|
|
143
|
+
const needsShell = (runner === "bun") && process.platform === "win32";
|
|
144
|
+
|
|
145
|
+
const child = spawn(runner, args, {
|
|
146
|
+
stdio: "inherit",
|
|
147
|
+
shell: needsShell,
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
child.on("exit", (code, signal) => {
|
|
151
|
+
if (signal) {
|
|
152
|
+
process.kill(process.pid, signal);
|
|
153
|
+
} else {
|
|
154
|
+
process.exit(code ?? 0);
|
|
155
|
+
}
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
child.on("error", (err) => {
|
|
159
|
+
const name = useSourceMode ? sourceRunner : runnerName;
|
|
160
|
+
console.error(`qmd: failed to launch ${name}: ${err.message}`);
|
|
161
|
+
process.exit(1);
|
|
162
|
+
});
|
package/dist/ast.d.ts
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AST-aware chunking support via web-tree-sitter.
|
|
3
|
+
*
|
|
4
|
+
* Provides language detection, AST break point extraction for supported
|
|
5
|
+
* code file types, and a stub for future symbol extraction.
|
|
6
|
+
*
|
|
7
|
+
* All functions degrade gracefully: parse failures or unsupported languages
|
|
8
|
+
* return empty arrays, falling back to regex-only chunking.
|
|
9
|
+
*
|
|
10
|
+
* ## Dependency Note
|
|
11
|
+
*
|
|
12
|
+
* Grammar packages (tree-sitter-typescript, etc.) are listed as
|
|
13
|
+
* optionalDependencies with pinned versions. They ship native prebuilds
|
|
14
|
+
* and source files (~72 MB total) but QMD only uses the .wasm files
|
|
15
|
+
* (~5 MB). If install size becomes a concern, the .wasm files can be
|
|
16
|
+
* bundled directly in the repo (e.g. assets/grammars/) and resolved
|
|
17
|
+
* via import.meta.url instead of require.resolve(), eliminating the
|
|
18
|
+
* grammar packages entirely.
|
|
19
|
+
*/
|
|
20
|
+
import type { BreakPoint } from "./store.js";
|
|
21
|
+
export type SupportedLanguage = "typescript" | "tsx" | "javascript" | "python" | "go" | "rust";
|
|
22
|
+
/**
|
|
23
|
+
* Detect language from file path extension.
|
|
24
|
+
* Returns null for unsupported or unknown extensions (including .md).
|
|
25
|
+
*/
|
|
26
|
+
export declare function detectLanguage(filepath: string): SupportedLanguage | null;
|
|
27
|
+
export declare function formatGrammarLoadError(language: SupportedLanguage, err: unknown): string;
|
|
28
|
+
/**
|
|
29
|
+
* Parse a source file and return break points at AST node boundaries.
|
|
30
|
+
*
|
|
31
|
+
* Returns an empty array for unsupported languages, parse failures,
|
|
32
|
+
* or grammar loading failures. Never throws.
|
|
33
|
+
*
|
|
34
|
+
* @param content - The file content to parse.
|
|
35
|
+
* @param filepath - The file path (used for language detection).
|
|
36
|
+
* @returns Array of BreakPoint objects suitable for merging with regex break points.
|
|
37
|
+
*/
|
|
38
|
+
export declare function getASTBreakPoints(content: string, filepath: string): Promise<BreakPoint[]>;
|
|
39
|
+
/**
|
|
40
|
+
* Check which tree-sitter grammars are available.
|
|
41
|
+
* Returns a status object for each supported language.
|
|
42
|
+
*/
|
|
43
|
+
export declare function getASTStatus(): Promise<{
|
|
44
|
+
available: boolean;
|
|
45
|
+
languages: {
|
|
46
|
+
language: SupportedLanguage;
|
|
47
|
+
available: boolean;
|
|
48
|
+
error?: string;
|
|
49
|
+
}[];
|
|
50
|
+
}>;
|
|
51
|
+
/**
|
|
52
|
+
* Metadata about a code symbol within a chunk.
|
|
53
|
+
* Stubbed for Phase 2 — always returns empty array in Phase 1.
|
|
54
|
+
*/
|
|
55
|
+
export interface SymbolInfo {
|
|
56
|
+
name: string;
|
|
57
|
+
kind: string;
|
|
58
|
+
signature?: string;
|
|
59
|
+
line: number;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Extract symbol metadata for code within a byte range.
|
|
63
|
+
* Stubbed for Phase 2 — returns empty array.
|
|
64
|
+
*/
|
|
65
|
+
export declare function extractSymbols(_content: string, _language: string, _startPos: number, _endPos: number): SymbolInfo[];
|
package/dist/ast.js
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AST-aware chunking support via web-tree-sitter.
|
|
3
|
+
*
|
|
4
|
+
* Provides language detection, AST break point extraction for supported
|
|
5
|
+
* code file types, and a stub for future symbol extraction.
|
|
6
|
+
*
|
|
7
|
+
* All functions degrade gracefully: parse failures or unsupported languages
|
|
8
|
+
* return empty arrays, falling back to regex-only chunking.
|
|
9
|
+
*
|
|
10
|
+
* ## Dependency Note
|
|
11
|
+
*
|
|
12
|
+
* Grammar packages (tree-sitter-typescript, etc.) are listed as
|
|
13
|
+
* optionalDependencies with pinned versions. They ship native prebuilds
|
|
14
|
+
* and source files (~72 MB total) but QMD only uses the .wasm files
|
|
15
|
+
* (~5 MB). If install size becomes a concern, the .wasm files can be
|
|
16
|
+
* bundled directly in the repo (e.g. assets/grammars/) and resolved
|
|
17
|
+
* via import.meta.url instead of require.resolve(), eliminating the
|
|
18
|
+
* grammar packages entirely.
|
|
19
|
+
*/
|
|
20
|
+
import { createRequire } from "node:module";
|
|
21
|
+
import { extname } from "node:path";
|
|
22
|
+
const EXTENSION_MAP = {
|
|
23
|
+
".ts": "typescript",
|
|
24
|
+
".tsx": "tsx",
|
|
25
|
+
".js": "javascript",
|
|
26
|
+
".jsx": "tsx",
|
|
27
|
+
".mts": "typescript",
|
|
28
|
+
".cts": "typescript",
|
|
29
|
+
".mjs": "javascript",
|
|
30
|
+
".cjs": "javascript",
|
|
31
|
+
".py": "python",
|
|
32
|
+
".go": "go",
|
|
33
|
+
".rs": "rust",
|
|
34
|
+
};
|
|
35
|
+
/**
|
|
36
|
+
* Detect language from file path extension.
|
|
37
|
+
* Returns null for unsupported or unknown extensions (including .md).
|
|
38
|
+
*/
|
|
39
|
+
export function detectLanguage(filepath) {
|
|
40
|
+
const ext = extname(filepath).toLowerCase();
|
|
41
|
+
return EXTENSION_MAP[ext] ?? null;
|
|
42
|
+
}
|
|
43
|
+
// =============================================================================
|
|
44
|
+
// Grammar Resolution
|
|
45
|
+
// =============================================================================
|
|
46
|
+
/**
|
|
47
|
+
* Maps language to the npm package and wasm filename for the grammar.
|
|
48
|
+
*/
|
|
49
|
+
const GRAMMAR_MAP = {
|
|
50
|
+
typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
|
|
51
|
+
tsx: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm", version: "0.23.2" },
|
|
52
|
+
javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
|
|
53
|
+
python: { pkg: "tree-sitter-python", wasm: "tree-sitter-python.wasm", version: "0.23.4" },
|
|
54
|
+
go: { pkg: "tree-sitter-go", wasm: "tree-sitter-go.wasm", version: "0.23.4" },
|
|
55
|
+
rust: { pkg: "tree-sitter-rust", wasm: "tree-sitter-rust.wasm", version: "0.24.0" },
|
|
56
|
+
};
|
|
57
|
+
export function formatGrammarLoadError(language, err) {
|
|
58
|
+
const grammar = GRAMMAR_MAP[language];
|
|
59
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
60
|
+
return `${grammar.pkg}/${grammar.wasm} failed to load (${detail}); falling back to regex chunking. ` +
|
|
61
|
+
`Repair a broken global install with: bun add ${grammar.pkg}@${grammar.version}`;
|
|
62
|
+
}
|
|
63
|
+
// =============================================================================
|
|
64
|
+
// Per-Language Query Definitions
|
|
65
|
+
// =============================================================================
|
|
66
|
+
/**
|
|
67
|
+
* Tree-sitter S-expression queries for each language.
|
|
68
|
+
* Each capture name maps to a break point score via SCORE_MAP.
|
|
69
|
+
*
|
|
70
|
+
* For TypeScript/JavaScript, we match export_statement wrappers to get the
|
|
71
|
+
* correct start position (before `export`), plus bare declarations for
|
|
72
|
+
* non-exported code.
|
|
73
|
+
*/
|
|
74
|
+
const LANGUAGE_QUERIES = {
|
|
75
|
+
typescript: `
|
|
76
|
+
(export_statement) @export
|
|
77
|
+
(class_declaration) @class
|
|
78
|
+
(function_declaration) @func
|
|
79
|
+
(method_definition) @method
|
|
80
|
+
(interface_declaration) @iface
|
|
81
|
+
(type_alias_declaration) @type
|
|
82
|
+
(enum_declaration) @enum
|
|
83
|
+
(import_statement) @import
|
|
84
|
+
(lexical_declaration (variable_declarator value: (arrow_function))) @func
|
|
85
|
+
(lexical_declaration (variable_declarator value: (function_expression))) @func
|
|
86
|
+
`,
|
|
87
|
+
tsx: `
|
|
88
|
+
(export_statement) @export
|
|
89
|
+
(class_declaration) @class
|
|
90
|
+
(function_declaration) @func
|
|
91
|
+
(method_definition) @method
|
|
92
|
+
(interface_declaration) @iface
|
|
93
|
+
(type_alias_declaration) @type
|
|
94
|
+
(enum_declaration) @enum
|
|
95
|
+
(import_statement) @import
|
|
96
|
+
(lexical_declaration (variable_declarator value: (arrow_function))) @func
|
|
97
|
+
(lexical_declaration (variable_declarator value: (function_expression))) @func
|
|
98
|
+
`,
|
|
99
|
+
javascript: `
|
|
100
|
+
(export_statement) @export
|
|
101
|
+
(class_declaration) @class
|
|
102
|
+
(function_declaration) @func
|
|
103
|
+
(method_definition) @method
|
|
104
|
+
(import_statement) @import
|
|
105
|
+
(lexical_declaration (variable_declarator value: (arrow_function))) @func
|
|
106
|
+
(lexical_declaration (variable_declarator value: (function_expression))) @func
|
|
107
|
+
`,
|
|
108
|
+
python: `
|
|
109
|
+
(class_definition) @class
|
|
110
|
+
(function_definition) @func
|
|
111
|
+
(decorated_definition) @decorated
|
|
112
|
+
(import_statement) @import
|
|
113
|
+
(import_from_statement) @import
|
|
114
|
+
`,
|
|
115
|
+
go: `
|
|
116
|
+
(type_declaration) @type
|
|
117
|
+
(function_declaration) @func
|
|
118
|
+
(method_declaration) @method
|
|
119
|
+
(import_declaration) @import
|
|
120
|
+
`,
|
|
121
|
+
rust: `
|
|
122
|
+
(struct_item) @struct
|
|
123
|
+
(impl_item) @impl
|
|
124
|
+
(function_item) @func
|
|
125
|
+
(trait_item) @trait
|
|
126
|
+
(enum_item) @enum
|
|
127
|
+
(use_declaration) @import
|
|
128
|
+
(type_item) @type
|
|
129
|
+
(mod_item) @mod
|
|
130
|
+
`,
|
|
131
|
+
};
|
|
132
|
+
/**
|
|
133
|
+
* Score mapping from capture names to break point scores.
|
|
134
|
+
* Aligned with the markdown BREAK_PATTERNS scale (h1=100, h2=90, etc.)
|
|
135
|
+
* so findBestCutoff() decay works unchanged.
|
|
136
|
+
*/
|
|
137
|
+
const SCORE_MAP = {
|
|
138
|
+
class: 100,
|
|
139
|
+
iface: 100,
|
|
140
|
+
struct: 100,
|
|
141
|
+
trait: 100,
|
|
142
|
+
impl: 100,
|
|
143
|
+
mod: 100,
|
|
144
|
+
export: 90,
|
|
145
|
+
func: 90,
|
|
146
|
+
method: 90,
|
|
147
|
+
decorated: 90,
|
|
148
|
+
type: 80,
|
|
149
|
+
enum: 80,
|
|
150
|
+
import: 60,
|
|
151
|
+
};
|
|
152
|
+
// =============================================================================
|
|
153
|
+
// Parser Caching & Initialization
|
|
154
|
+
// =============================================================================
|
|
155
|
+
let ParserClass = null;
|
|
156
|
+
let LanguageClass = null;
|
|
157
|
+
let QueryClass = null;
|
|
158
|
+
let initPromise = null;
|
|
159
|
+
/** Languages that have already failed to load — warn only once per process. */
|
|
160
|
+
const failedLanguages = new Set();
|
|
161
|
+
/** Last grammar load error by language, for status output. */
|
|
162
|
+
const grammarLoadErrors = new Map();
|
|
163
|
+
/** Cached grammar load promises. */
|
|
164
|
+
const grammarCache = new Map();
|
|
165
|
+
/** Cached compiled queries per language. */
|
|
166
|
+
const queryCache = new Map();
|
|
167
|
+
/**
|
|
168
|
+
* Initialize web-tree-sitter. Called once and cached.
|
|
169
|
+
*/
|
|
170
|
+
async function ensureInit() {
|
|
171
|
+
if (!initPromise) {
|
|
172
|
+
initPromise = (async () => {
|
|
173
|
+
const mod = await import("web-tree-sitter");
|
|
174
|
+
ParserClass = mod.Parser;
|
|
175
|
+
LanguageClass = mod.Language;
|
|
176
|
+
QueryClass = mod.Query;
|
|
177
|
+
await ParserClass.init();
|
|
178
|
+
})();
|
|
179
|
+
}
|
|
180
|
+
return initPromise;
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Resolve the filesystem path to a grammar .wasm file.
|
|
184
|
+
* Uses createRequire to resolve from installed dependency packages.
|
|
185
|
+
*/
|
|
186
|
+
function resolveGrammarPath(language) {
|
|
187
|
+
const { pkg, wasm } = GRAMMAR_MAP[language];
|
|
188
|
+
const require = createRequire(import.meta.url);
|
|
189
|
+
return require.resolve(`${pkg}/${wasm}`);
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Load and cache a grammar for the given language.
|
|
193
|
+
* Returns null on failure (logs once per language).
|
|
194
|
+
*/
|
|
195
|
+
async function loadGrammar(language) {
|
|
196
|
+
if (failedLanguages.has(language))
|
|
197
|
+
return null;
|
|
198
|
+
const wasmKey = GRAMMAR_MAP[language].wasm;
|
|
199
|
+
if (!grammarCache.has(wasmKey)) {
|
|
200
|
+
grammarCache.set(wasmKey, (async () => {
|
|
201
|
+
const path = resolveGrammarPath(language);
|
|
202
|
+
return LanguageClass.load(path);
|
|
203
|
+
})());
|
|
204
|
+
}
|
|
205
|
+
try {
|
|
206
|
+
return await grammarCache.get(wasmKey);
|
|
207
|
+
}
|
|
208
|
+
catch (err) {
|
|
209
|
+
failedLanguages.add(language);
|
|
210
|
+
grammarCache.delete(wasmKey);
|
|
211
|
+
const message = formatGrammarLoadError(language, err);
|
|
212
|
+
grammarLoadErrors.set(language, message);
|
|
213
|
+
console.warn(`[qmd] AST grammar unavailable for ${language}: ${message}`);
|
|
214
|
+
return null;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Get or create a compiled query for the given language.
|
|
219
|
+
*/
|
|
220
|
+
function getQuery(language, grammar) {
|
|
221
|
+
if (!queryCache.has(language)) {
|
|
222
|
+
const source = LANGUAGE_QUERIES[language];
|
|
223
|
+
const query = new QueryClass(grammar, source);
|
|
224
|
+
queryCache.set(language, query);
|
|
225
|
+
}
|
|
226
|
+
return queryCache.get(language);
|
|
227
|
+
}
|
|
228
|
+
// =============================================================================
|
|
229
|
+
// AST Break Point Extraction
|
|
230
|
+
// =============================================================================
|
|
231
|
+
/**
|
|
232
|
+
* Parse a source file and return break points at AST node boundaries.
|
|
233
|
+
*
|
|
234
|
+
* Returns an empty array for unsupported languages, parse failures,
|
|
235
|
+
* or grammar loading failures. Never throws.
|
|
236
|
+
*
|
|
237
|
+
* @param content - The file content to parse.
|
|
238
|
+
* @param filepath - The file path (used for language detection).
|
|
239
|
+
* @returns Array of BreakPoint objects suitable for merging with regex break points.
|
|
240
|
+
*/
|
|
241
|
+
export async function getASTBreakPoints(content, filepath) {
|
|
242
|
+
const language = detectLanguage(filepath);
|
|
243
|
+
if (!language)
|
|
244
|
+
return [];
|
|
245
|
+
try {
|
|
246
|
+
await ensureInit();
|
|
247
|
+
const grammar = await loadGrammar(language);
|
|
248
|
+
if (!grammar)
|
|
249
|
+
return [];
|
|
250
|
+
const parser = new ParserClass();
|
|
251
|
+
parser.setLanguage(grammar);
|
|
252
|
+
const tree = parser.parse(content);
|
|
253
|
+
if (!tree) {
|
|
254
|
+
parser.delete();
|
|
255
|
+
return [];
|
|
256
|
+
}
|
|
257
|
+
const query = getQuery(language, grammar);
|
|
258
|
+
const captures = query.captures(tree.rootNode);
|
|
259
|
+
// Deduplicate: at each byte position, keep the highest-scoring capture.
|
|
260
|
+
// This handles cases like export_statement wrapping a class_declaration
|
|
261
|
+
// at different offsets — we want the outermost (earliest) position.
|
|
262
|
+
const seen = new Map();
|
|
263
|
+
for (const cap of captures) {
|
|
264
|
+
const pos = cap.node.startIndex;
|
|
265
|
+
const score = SCORE_MAP[cap.name] ?? 20;
|
|
266
|
+
const type = `ast:${cap.name}`;
|
|
267
|
+
const existing = seen.get(pos);
|
|
268
|
+
if (!existing || score > existing.score) {
|
|
269
|
+
seen.set(pos, { pos, score, type });
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
tree.delete();
|
|
273
|
+
parser.delete();
|
|
274
|
+
return Array.from(seen.values()).sort((a, b) => a.pos - b.pos);
|
|
275
|
+
}
|
|
276
|
+
catch (err) {
|
|
277
|
+
console.warn(`[qmd] AST parse failed for ${filepath}, falling back to regex: ${err instanceof Error ? err.message : err}`);
|
|
278
|
+
return [];
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
// =============================================================================
|
|
282
|
+
// Health / Status
|
|
283
|
+
// =============================================================================
|
|
284
|
+
/**
|
|
285
|
+
* Check which tree-sitter grammars are available.
|
|
286
|
+
* Returns a status object for each supported language.
|
|
287
|
+
*/
|
|
288
|
+
export async function getASTStatus() {
|
|
289
|
+
const languages = [];
|
|
290
|
+
try {
|
|
291
|
+
await ensureInit();
|
|
292
|
+
}
|
|
293
|
+
catch (err) {
|
|
294
|
+
return {
|
|
295
|
+
available: false,
|
|
296
|
+
languages: Object.keys(GRAMMAR_MAP).map(lang => ({
|
|
297
|
+
language: lang,
|
|
298
|
+
available: false,
|
|
299
|
+
error: `web-tree-sitter init failed: ${err instanceof Error ? err.message : err}`,
|
|
300
|
+
})),
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
for (const lang of Object.keys(GRAMMAR_MAP)) {
|
|
304
|
+
try {
|
|
305
|
+
const grammar = await loadGrammar(lang);
|
|
306
|
+
if (grammar) {
|
|
307
|
+
// Also verify the query compiles
|
|
308
|
+
getQuery(lang, grammar);
|
|
309
|
+
languages.push({ language: lang, available: true });
|
|
310
|
+
}
|
|
311
|
+
else {
|
|
312
|
+
languages.push({ language: lang, available: false, error: grammarLoadErrors.get(lang) ?? "grammar failed to load" });
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
catch (err) {
|
|
316
|
+
languages.push({
|
|
317
|
+
language: lang,
|
|
318
|
+
available: false,
|
|
319
|
+
error: err instanceof Error ? err.message : String(err),
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
return {
|
|
324
|
+
available: languages.some(l => l.available),
|
|
325
|
+
languages,
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
/**
|
|
329
|
+
* Extract symbol metadata for code within a byte range.
|
|
330
|
+
* Stubbed for Phase 2 — returns empty array.
|
|
331
|
+
*/
|
|
332
|
+
export function extractSymbols(_content, _language, _startPos, _endPos) {
|
|
333
|
+
return [];
|
|
334
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* QMD Benchmark Harness
|
|
3
|
+
*
|
|
4
|
+
* Runs queries from a fixture file against multiple search backends
|
|
5
|
+
* and measures precision@k, recall, MRR, F1, and latency.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* qmd bench <fixture.json> [--json] [--collection <name>]
|
|
9
|
+
*
|
|
10
|
+
* Backends tested:
|
|
11
|
+
* - bm25: BM25 keyword search (searchLex)
|
|
12
|
+
* - vector: Vector similarity search (searchVector)
|
|
13
|
+
* - hybrid: BM25 + vector RRF fusion without reranking
|
|
14
|
+
* - full: Full hybrid pipeline with LLM reranking
|
|
15
|
+
*/
|
|
16
|
+
import type { BenchmarkResult } from "./types.js";
|
|
17
|
+
export declare function runBenchmark(fixturePath: string, options?: {
|
|
18
|
+
json?: boolean;
|
|
19
|
+
collection?: string;
|
|
20
|
+
backends?: string[];
|
|
21
|
+
dbPath?: string;
|
|
22
|
+
configPath?: string;
|
|
23
|
+
}): Promise<BenchmarkResult>;
|