@joycodetech/qmd-ja 2.5.3-ja.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +821 -0
- package/LICENSE +21 -0
- package/README.md +1143 -0
- package/bin/qmd-ja +162 -0
- package/dist/ast.d.ts +65 -0
- package/dist/ast.js +334 -0
- package/dist/bench/bench.d.ts +23 -0
- package/dist/bench/bench.js +280 -0
- package/dist/bench/score.d.ts +33 -0
- package/dist/bench/score.js +88 -0
- package/dist/bench/types.d.ts +80 -0
- package/dist/bench/types.js +8 -0
- package/dist/cli/formatter.d.ts +120 -0
- package/dist/cli/formatter.js +355 -0
- package/dist/cli/qmd.d.ts +43 -0
- package/dist/cli/qmd.js +4179 -0
- package/dist/collections.d.ts +166 -0
- package/dist/collections.js +410 -0
- package/dist/db.d.ts +44 -0
- package/dist/db.js +75 -0
- package/dist/index.d.ts +230 -0
- package/dist/index.js +242 -0
- package/dist/llm.d.ts +500 -0
- package/dist/llm.js +1615 -0
- package/dist/maintenance.d.ts +23 -0
- package/dist/maintenance.js +37 -0
- package/dist/mcp/server.d.ts +24 -0
- package/dist/mcp/server.js +702 -0
- package/dist/paths.d.ts +1 -0
- package/dist/paths.js +4 -0
- package/dist/store.d.ts +1002 -0
- package/dist/store.js +4208 -0
- package/models/vaporetto-bccwj.model +0 -0
- package/package.json +130 -0
- package/scripts/build.mjs +30 -0
- package/scripts/check-package-grammars.mjs +29 -0
- package/scripts/package-smoke.mjs +65 -0
- package/scripts/test-all.mjs +38 -0
- package/skills/qmd/SKILL.md +295 -0
- package/skills/qmd/references/mcp-setup.md +102 -0
- package/skills/release/SKILL.md +139 -0
- package/skills/release/scripts/install-hooks.sh +38 -0
- package/vendor/vaporetto-node-wasm/LICENSE +22 -0
- package/vendor/vaporetto-node-wasm/package.json +11 -0
- package/vendor/vaporetto-node-wasm/vaporetto_node_wasm.d.ts +19 -0
- package/vendor/vaporetto-node-wasm/vaporetto_node_wasm.js +202 -0
- package/vendor/vaporetto-node-wasm/vaporetto_node_wasm_bg.wasm +0 -0
- package/vendor/vaporetto-node-wasm/vaporetto_node_wasm_bg.wasm.d.ts +13 -0
|
Binary file
|
package/package.json
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@joycodetech/qmd-ja",
|
|
3
|
+
"version": "2.5.3-ja.3",
|
|
4
|
+
"description": "Japanese-enhanced fork of qmd — On-device hybrid search with Vaporetto WASM morphological tokenizer for accurate Japanese BM25 full-text search",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"import": "./dist/index.js",
|
|
11
|
+
"types": "./dist/index.d.ts"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"bin": {
|
|
15
|
+
"qmd-ja": "bin/qmd-ja"
|
|
16
|
+
},
|
|
17
|
+
"files": [
|
|
18
|
+
"bin/",
|
|
19
|
+
"dist/",
|
|
20
|
+
"vendor/vaporetto-node-wasm/vaporetto_node_wasm.js",
|
|
21
|
+
"vendor/vaporetto-node-wasm/vaporetto_node_wasm_bg.wasm",
|
|
22
|
+
"vendor/vaporetto-node-wasm/vaporetto_node_wasm.d.ts",
|
|
23
|
+
"vendor/vaporetto-node-wasm/vaporetto_node_wasm_bg.wasm.d.ts",
|
|
24
|
+
"vendor/vaporetto-node-wasm/package.json",
|
|
25
|
+
"models/vaporetto-bccwj.model",
|
|
26
|
+
"skills/",
|
|
27
|
+
"scripts/build.mjs",
|
|
28
|
+
"scripts/check-package-grammars.mjs",
|
|
29
|
+
"scripts/package-smoke.mjs",
|
|
30
|
+
"scripts/test-all.mjs",
|
|
31
|
+
"LICENSE",
|
|
32
|
+
"CHANGELOG.md"
|
|
33
|
+
],
|
|
34
|
+
"scripts": {
|
|
35
|
+
"prepare": "[ -d .git ] && ./scripts/install-hooks.sh || true",
|
|
36
|
+
"build": "node scripts/build.mjs",
|
|
37
|
+
"test": "node scripts/test-all.mjs",
|
|
38
|
+
"test:types": "node ./node_modules/typescript/bin/tsc -p tsconfig.build.json --noEmit",
|
|
39
|
+
"test:node": "node ./node_modules/vitest/vitest.mjs run --reporter=verbose --testTimeout 60000",
|
|
40
|
+
"test:bun": "bun test --timeout 60000 --preload ./src/test-preload.ts",
|
|
41
|
+
"test:unit": "CI=true node ./node_modules/vitest/vitest.mjs run --reporter=verbose --testTimeout 60000 test/ && CI=true bun test --timeout 60000 --preload ./src/test-preload.ts test/",
|
|
42
|
+
"test:package": "node scripts/package-smoke.mjs",
|
|
43
|
+
"qmd": "tsx src/cli/qmd.ts",
|
|
44
|
+
"index": "tsx src/cli/qmd.ts index",
|
|
45
|
+
"vector": "tsx src/cli/qmd.ts vector",
|
|
46
|
+
"search": "tsx src/cli/qmd.ts search",
|
|
47
|
+
"vsearch": "tsx src/cli/qmd.ts vsearch",
|
|
48
|
+
"rerank": "tsx src/cli/qmd.ts rerank",
|
|
49
|
+
"inspector": "npx @modelcontextprotocol/inspector tsx src/cli/qmd.ts mcp",
|
|
50
|
+
"release": "./scripts/release.sh",
|
|
51
|
+
"smoke:package-grammars": "node scripts/check-package-grammars.mjs"
|
|
52
|
+
},
|
|
53
|
+
"publishConfig": {
|
|
54
|
+
"access": "public"
|
|
55
|
+
},
|
|
56
|
+
"repository": {
|
|
57
|
+
"type": "git",
|
|
58
|
+
"url": "git+https://github.com/joycodetech/qmd-ja.git"
|
|
59
|
+
},
|
|
60
|
+
"homepage": "https://github.com/joycodetech/qmd-ja#readme",
|
|
61
|
+
"bugs": {
|
|
62
|
+
"url": "https://github.com/joycodetech/qmd-ja/issues"
|
|
63
|
+
},
|
|
64
|
+
"dependencies": {
|
|
65
|
+
"@modelcontextprotocol/sdk": "1.29.0",
|
|
66
|
+
"@types/kuromoji": "^0.1.3",
|
|
67
|
+
"better-sqlite3": "12.10.0",
|
|
68
|
+
"fast-glob": "3.3.3",
|
|
69
|
+
"kuromoji": "^0.1.2",
|
|
70
|
+
"node-llama-cpp": "3.18.1",
|
|
71
|
+
"picomatch": "4.0.4",
|
|
72
|
+
"sqlite-vec": "0.1.9",
|
|
73
|
+
"tree-sitter-go": "0.25.0",
|
|
74
|
+
"tree-sitter-python": "0.25.0",
|
|
75
|
+
"tree-sitter-rust": "0.24.0",
|
|
76
|
+
"tree-sitter-typescript": "0.23.2",
|
|
77
|
+
"web-tree-sitter": "0.26.8",
|
|
78
|
+
"yaml": "2.9.0",
|
|
79
|
+
"zod": "4.2.1"
|
|
80
|
+
},
|
|
81
|
+
"optionalDependencies": {
|
|
82
|
+
"sqlite-vec-darwin-arm64": "0.1.9",
|
|
83
|
+
"sqlite-vec-darwin-x64": "0.1.9",
|
|
84
|
+
"sqlite-vec-linux-arm64": "0.1.9",
|
|
85
|
+
"sqlite-vec-linux-x64": "0.1.9",
|
|
86
|
+
"sqlite-vec-windows-x64": "0.1.9"
|
|
87
|
+
},
|
|
88
|
+
"devDependencies": {
|
|
89
|
+
"@types/better-sqlite3": "7.6.13",
|
|
90
|
+
"tsx": "4.21.0",
|
|
91
|
+
"vitest": "3.2.4"
|
|
92
|
+
},
|
|
93
|
+
"pnpm": {
|
|
94
|
+
"onlyBuiltDependencies": [
|
|
95
|
+
"better-sqlite3",
|
|
96
|
+
"esbuild",
|
|
97
|
+
"node-llama-cpp",
|
|
98
|
+
"tree-sitter-go",
|
|
99
|
+
"tree-sitter-javascript",
|
|
100
|
+
"tree-sitter-python",
|
|
101
|
+
"tree-sitter-rust",
|
|
102
|
+
"tree-sitter-typescript"
|
|
103
|
+
]
|
|
104
|
+
},
|
|
105
|
+
"peerDependencies": {
|
|
106
|
+
"typescript": "^5.9.3"
|
|
107
|
+
},
|
|
108
|
+
"engines": {
|
|
109
|
+
"node": ">=22.0.0"
|
|
110
|
+
},
|
|
111
|
+
"keywords": [
|
|
112
|
+
"markdown",
|
|
113
|
+
"search",
|
|
114
|
+
"fts",
|
|
115
|
+
"full-text-search",
|
|
116
|
+
"vector",
|
|
117
|
+
"semantic-search",
|
|
118
|
+
"sqlite",
|
|
119
|
+
"bm25",
|
|
120
|
+
"embeddings",
|
|
121
|
+
"rag",
|
|
122
|
+
"mcp",
|
|
123
|
+
"reranking",
|
|
124
|
+
"knowledge-base",
|
|
125
|
+
"local-ai",
|
|
126
|
+
"llm"
|
|
127
|
+
],
|
|
128
|
+
"author": "Koz Oda <oss@joycodetech.jp>",
|
|
129
|
+
"license": "MIT"
|
|
130
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { spawnSync } from "node:child_process";
|
|
3
|
+
import { chmodSync, readFileSync, renameSync, writeFileSync } from "node:fs";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { fileURLToPath } from "node:url";
|
|
6
|
+
|
|
7
|
+
const root = join(fileURLToPath(new URL("..", import.meta.url)));
|
|
8
|
+
|
|
9
|
+
function run(command, args, options = {}) {
|
|
10
|
+
const result = spawnSync(command, args, {
|
|
11
|
+
cwd: root,
|
|
12
|
+
stdio: "inherit",
|
|
13
|
+
shell: process.platform === "win32",
|
|
14
|
+
...options,
|
|
15
|
+
});
|
|
16
|
+
if (result.status !== 0) {
|
|
17
|
+
process.exit(result.status ?? 1);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
run(process.execPath, [join(root, "node_modules", "typescript", "bin", "tsc"), "-p", "tsconfig.build.json"]);
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
const cliPath = join(root, "dist", "cli", "qmd.js");
|
|
25
|
+
const tmpPath = `${cliPath}.tmp`;
|
|
26
|
+
const built = readFileSync(cliPath, "utf8");
|
|
27
|
+
const withoutExistingShebang = built.startsWith("#!") ? built.slice(built.indexOf("\n") + 1) : built;
|
|
28
|
+
writeFileSync(tmpPath, `#!/usr/bin/env node\n${withoutExistingShebang}`);
|
|
29
|
+
renameSync(tmpPath, cliPath);
|
|
30
|
+
chmodSync(cliPath, 0o755);
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { createRequire } from "node:module";
|
|
3
|
+
|
|
4
|
+
const require = createRequire(import.meta.url);
|
|
5
|
+
|
|
6
|
+
const grammars = [
|
|
7
|
+
"tree-sitter-typescript/tree-sitter-typescript.wasm",
|
|
8
|
+
"tree-sitter-typescript/tree-sitter-tsx.wasm",
|
|
9
|
+
"tree-sitter-python/tree-sitter-python.wasm",
|
|
10
|
+
"tree-sitter-go/tree-sitter-go.wasm",
|
|
11
|
+
"tree-sitter-rust/tree-sitter-rust.wasm",
|
|
12
|
+
];
|
|
13
|
+
|
|
14
|
+
let ok = true;
|
|
15
|
+
for (const grammar of grammars) {
|
|
16
|
+
try {
|
|
17
|
+
const resolved = require.resolve(grammar);
|
|
18
|
+
console.log(`ok ${grammar} -> ${resolved}`);
|
|
19
|
+
} catch (err) {
|
|
20
|
+
ok = false;
|
|
21
|
+
console.error(`missing ${grammar}`);
|
|
22
|
+
console.error(err instanceof Error ? err.message : String(err));
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (!ok) {
|
|
27
|
+
console.error("\nAST grammar package smoke check failed. Run `bun install` locally or repair a broken global install with the matching `bun add tree-sitter-...@<version>` command shown by `qmd status`.");
|
|
28
|
+
process.exit(1);
|
|
29
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { spawnSync } from "node:child_process";
|
|
3
|
+
import { existsSync, readFileSync, statSync } from "node:fs";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { fileURLToPath } from "node:url";
|
|
6
|
+
|
|
7
|
+
const root = fileURLToPath(new URL("..", import.meta.url));
|
|
8
|
+
const pkg = JSON.parse(readFileSync(join(root, "package.json"), "utf8"));
|
|
9
|
+
|
|
10
|
+
function run(label, command, args, options = {}) {
|
|
11
|
+
console.log(`==> ${label}`);
|
|
12
|
+
const { quiet, ...spawnOptions } = options;
|
|
13
|
+
const result = spawnSync(command, args, {
|
|
14
|
+
cwd: root,
|
|
15
|
+
stdio: quiet ? "pipe" : "inherit",
|
|
16
|
+
shell: process.platform === "win32",
|
|
17
|
+
...spawnOptions,
|
|
18
|
+
});
|
|
19
|
+
if (result.status !== 0) {
|
|
20
|
+
console.error(`Package smoke failed: ${label}`);
|
|
21
|
+
if (quiet) {
|
|
22
|
+
if (result.stdout) process.stderr.write(result.stdout);
|
|
23
|
+
if (result.stderr) process.stderr.write(result.stderr);
|
|
24
|
+
}
|
|
25
|
+
process.exit(result.status ?? 1);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function assertPath(path, label = path) {
|
|
30
|
+
const full = join(root, path);
|
|
31
|
+
if (!existsSync(full)) {
|
|
32
|
+
console.error(`Package smoke failed: missing ${label} (${path})`);
|
|
33
|
+
process.exit(1);
|
|
34
|
+
}
|
|
35
|
+
return full;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
run("build compiled package", process.execPath, ["scripts/build.mjs"]);
|
|
39
|
+
run("AST grammar runtime packages", process.execPath, ["scripts/check-package-grammars.mjs"]);
|
|
40
|
+
|
|
41
|
+
for (const entry of pkg.files ?? []) {
|
|
42
|
+
assertPath(entry.replace(/\/$/, ""), `package.json files[] entry ${entry}`);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
for (const [name, binPath] of Object.entries(pkg.bin ?? {})) {
|
|
46
|
+
const full = assertPath(binPath, `bin ${name}`);
|
|
47
|
+
const mode = statSync(full).mode;
|
|
48
|
+
if ((mode & 0o111) === 0) {
|
|
49
|
+
console.error(`Package smoke failed: bin ${name} is not executable (${binPath})`);
|
|
50
|
+
process.exit(1);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
assertPath("dist/index.js", "compiled main export");
|
|
55
|
+
assertPath("dist/index.d.ts", "compiled type export");
|
|
56
|
+
assertPath("dist/cli/qmd.js", "compiled CLI");
|
|
57
|
+
|
|
58
|
+
run("compiled CLI under Node", process.execPath, ["dist/cli/qmd.js", "--help"], { quiet: true });
|
|
59
|
+
run("package wrapper", "sh", ["bin/qmd", "--help"], { quiet: true });
|
|
60
|
+
|
|
61
|
+
if (process.env.QMD_SKIP_BUN_SMOKE === "1") {
|
|
62
|
+
console.log("==> compiled CLI under Bun (skipped by QMD_SKIP_BUN_SMOKE=1)");
|
|
63
|
+
} else {
|
|
64
|
+
run("compiled CLI under Bun", "bun", ["dist/cli/qmd.js", "--help"], { quiet: true });
|
|
65
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { spawnSync } from "node:child_process";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
|
|
6
|
+
const root = fileURLToPath(new URL("..", import.meta.url));
|
|
7
|
+
|
|
8
|
+
// Mirror bin/qmd's darwin Metal residency mitigation for test subprocesses.
|
|
9
|
+
// libggml-metal asserts on a non-empty residency set during its static
|
|
10
|
+
// destructor (ggml-org/llama.cpp#22593, fix open as #22595) and dumps a
|
|
11
|
+
// multi-kB backtrace at process exit even when tests pass. The env var must
|
|
12
|
+
// be set BEFORE the subprocess starts because libggml-metal reads it via
|
|
13
|
+
// libc getenv at module-load time. Opt out with QMD_METAL_KEEP_RESIDENCY=1.
|
|
14
|
+
const darwinMetalEnv =
|
|
15
|
+
process.platform === "darwin" && process.env.QMD_METAL_KEEP_RESIDENCY !== "1"
|
|
16
|
+
? { GGML_METAL_NO_RESIDENCY: "1" }
|
|
17
|
+
: {};
|
|
18
|
+
|
|
19
|
+
function run(label, command, args, options = {}) {
|
|
20
|
+
console.log(`==> ${label}`);
|
|
21
|
+
const { env: extraEnv, ...spawnOptions } = options;
|
|
22
|
+
const result = spawnSync(command, args, {
|
|
23
|
+
cwd: root,
|
|
24
|
+
stdio: "inherit",
|
|
25
|
+
shell: process.platform === "win32",
|
|
26
|
+
env: { ...process.env, ...darwinMetalEnv, ...(extraEnv ?? {}) },
|
|
27
|
+
...spawnOptions,
|
|
28
|
+
});
|
|
29
|
+
if (result.status !== 0) {
|
|
30
|
+
console.error(`Test task failed: ${label}`);
|
|
31
|
+
process.exit(result.status ?? 1);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
run("TypeScript build typecheck", process.execPath, [join(root, "node_modules", "typescript", "bin", "tsc"), "-p", "tsconfig.build.json", "--noEmit"]);
|
|
36
|
+
run("Vitest suite under Node", process.execPath, [join(root, "node_modules", "vitest", "vitest.mjs"), "run", "--reporter=verbose", "--testTimeout", "60000", "test/"], { env: { CI: "true" } });
|
|
37
|
+
run("Bun test suite", "bun", ["test", "--timeout", "60000", "--preload", "./src/test-preload.ts", "test/"], { env: { CI: "true" } });
|
|
38
|
+
run("Package smoke", process.execPath, ["scripts/package-smoke.mjs"]);
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: qmd
|
|
3
|
+
description: Search local markdown knowledge bases, notes, docs, and wikis with QMD. Use when users ask to find notes, retrieve documents, inspect a wiki, answer from indexed markdown, or set up QMD access.
|
|
4
|
+
license: MIT
|
|
5
|
+
compatibility: Requires qmd CLI or MCP server. Install via `npm install -g @tobilu/qmd`.
|
|
6
|
+
metadata:
|
|
7
|
+
author: tobi
|
|
8
|
+
version: "2.2.0"
|
|
9
|
+
allowed-tools: Bash(qmd:*), mcp__qmd__*
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
# QMD - Query Markdown Documents
|
|
13
|
+
|
|
14
|
+
## How search works
|
|
15
|
+
|
|
16
|
+
QMD searches local markdown collections: notes, docs, wikis, transcripts, and
|
|
17
|
+
project knowledge bases. Use it before web search when the answer may already be
|
|
18
|
+
in indexed local files.
|
|
19
|
+
|
|
20
|
+
The workflow is always:
|
|
21
|
+
|
|
22
|
+
1. Search for candidate documents.
|
|
23
|
+
2. Retrieve the full source with `qmd get` or `qmd multi-get`.
|
|
24
|
+
3. Answer from retrieved text, citing paths or docids.
|
|
25
|
+
|
|
26
|
+
Do not answer from snippets alone when the user needs facts, decisions, quotes,
|
|
27
|
+
or nuance. Snippets are only leads.
|
|
28
|
+
|
|
29
|
+
Typical loop:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
qmd search "merchant reality support interviews" -n 5
|
|
33
|
+
# leads: #abc123 concepts/customer-proximity.md; #def432 sources/merchant-call.md
|
|
34
|
+
qmd multi-get "#abc123,#def432" --format md
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
**Default to structured `qmd query` with `intent:`, `lex:`, `vec:`, and `hyde:`
|
|
38
|
+
fields that you write yourself.** You are a better query expander than the
|
|
39
|
+
built-in model: you know the user's actual goal, the domain vocabulary, and the
|
|
40
|
+
nearby-but-wrong concepts to avoid. Do not just paste the user's words into
|
|
41
|
+
`qmd query "..."` and hope the expansion model guesses right — supply the
|
|
42
|
+
`intent:` and craft the lexical and semantic terms deliberately (see
|
|
43
|
+
[Pick the right search mode](#pick-the-right-search-mode)).
|
|
44
|
+
|
|
45
|
+
When reporting what you retrieved, a compact note is enough; do not paste whole
|
|
46
|
+
files unless needed:
|
|
47
|
+
|
|
48
|
+
```text
|
|
49
|
+
Retrieved:
|
|
50
|
+
- #abc123 concepts/customer-proximity.md
|
|
51
|
+
- #def432 sources/merchant-call.md
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Pick the right search mode
|
|
55
|
+
|
|
56
|
+
Use **BM25 lexical search** when you know exact words, titles, names, code
|
|
57
|
+
symbols, or rare phrases:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
qmd search "cockpit OKR Goodhart" -n 10
|
|
61
|
+
qmd search '"AI Before Headcount"' -c concepts -n 5
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Use **`qmd query` with structured fields** when the user describes an idea
|
|
65
|
+
indirectly, uses different wording than the source, or needs conceptual recall.
|
|
66
|
+
**This is the default mode — write the fields yourself rather than leaning on
|
|
67
|
+
query expansion.** Combine exact anchors with semantic recall:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
qmd query $'intent: Find the concept note about metrics as instruments without letting OKRs replace judgment.\nlex: cockpit instruments OKR Goodhart metrics judgment\nvec: data informed not metric driven product judgment\nhyde: A concept note says metrics are useful like cockpit instruments, but leaders should remain data-informed rather than metric-driven because OKRs and dashboards can Goodhart product judgment.'
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Structured query fields (you author each one — do not delegate this to the
|
|
74
|
+
expansion model):
|
|
75
|
+
|
|
76
|
+
- `intent:` states what you are trying to find **and what to avoid**. Always
|
|
77
|
+
supply this. It steers ranking away from nearby-but-wrong concepts.
|
|
78
|
+
- `lex:` exact terms, aliases, titles, code symbols, and rare words you expect
|
|
79
|
+
in the source. This is your own keyword expansion.
|
|
80
|
+
- `vec:` paraphrases the idea in natural language, in source-like wording.
|
|
81
|
+
- `hyde:` describes the document or answer that would satisfy the request.
|
|
82
|
+
|
|
83
|
+
You do not need all four every time, but you should almost always write at least
|
|
84
|
+
`intent:` plus one of `lex:`/`vec:`. A bare `qmd query "the user's sentence"`
|
|
85
|
+
throws away the context only you have and relies on the built-in expander to
|
|
86
|
+
reconstruct it — prefer the structured form.
|
|
87
|
+
|
|
88
|
+
If you genuinely have nothing to expand (a single rare token, a verbatim phrase),
|
|
89
|
+
that is a job for `qmd search`, not bare `qmd query`:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
qmd query --format json --explain $'intent: ...\nlex: ...\nvec: ...' # inspect ranking
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
If `qmd query` is slow or model/GPU setup fails, fall back to `qmd search` with
|
|
96
|
+
better lexical terms.
|
|
97
|
+
|
|
98
|
+
## Retrieve sources
|
|
99
|
+
|
|
100
|
+
Search results include docids like `#abc123` and `qmd://...` paths. Fetch them:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
qmd get "#abc123"
|
|
104
|
+
qmd get qmd://concepts/ai-before-headcount.md
|
|
105
|
+
qmd multi-get "#abc123,#def432" --format md
|
|
106
|
+
qmd multi-get 'concepts/{ai-before-headcount.md,data-informed-not-metric-driven.md}' --format md
|
|
107
|
+
qmd multi-get 'sources/podcast-2025-*.md' -l 80
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Use `multi-get` when comparing several hits or gathering context across pages.
|
|
111
|
+
|
|
112
|
+
### Output is line-numbered and carries the docid — cite both
|
|
113
|
+
|
|
114
|
+
`get` and `multi-get` are **line-numbered by default** and always print the
|
|
115
|
+
document's `#docid` and `qmd://` path. So `get` output looks like:
|
|
116
|
+
|
|
117
|
+
```text
|
|
118
|
+
qmd://concepts/note.md #abc123
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
1: # Metrics as instruments
|
|
122
|
+
2:
|
|
123
|
+
3: Treat dashboards like cockpit instruments...
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Cite the docid and exact line numbers in your answer, and use the numbers to ask
|
|
127
|
+
for the next slice. Pass `--no-line-numbers` only when you need raw content to
|
|
128
|
+
copy verbatim (e.g. reproducing a code block).
|
|
129
|
+
|
|
130
|
+
When you need to open or edit the underlying file (e.g. hand a path to `Read`,
|
|
131
|
+
`Edit`, or an editor), add `--full-path`. It replaces the `qmd://` URL + docid
|
|
132
|
+
header with the document's on-disk path, falling back to the canonical header if
|
|
133
|
+
the file no longer exists on disk:
|
|
134
|
+
|
|
135
|
+
```text
|
|
136
|
+
$ qmd get "#abc123" --full-path
|
|
137
|
+
/Users/you/notes/concepts/note.md
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
1: # Metrics as instruments
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
`--full-path` works the same way on `qmd search` and `qmd query`: result paths
|
|
144
|
+
become the file's on-disk path — `./`-prefixed relative path when the file is
|
|
145
|
+
inside `$PWD`, absolute realpath otherwise — and the per-result `#docid` is
|
|
146
|
+
dropped because the path is the identifier. The leading `./` is intentional so
|
|
147
|
+
the output is unambiguously a filesystem path and cannot be mistaken for a bare
|
|
148
|
+
collection-relative string. Default search/query output still uses `qmd://`
|
|
149
|
+
URIs; only opt into `--full-path` when you specifically need a path you can hand
|
|
150
|
+
to a non-QMD tool.
|
|
151
|
+
|
|
152
|
+
### Read line ranges with the `:from:count` suffix — never pipe through `sed`/`head`/`tail`
|
|
153
|
+
|
|
154
|
+
`qmd get` slices files itself. Use the suffix or flags; do **not** shell out to
|
|
155
|
+
`sed -n`, `head`, `tail`, or `awk` to pull a line range. Piping defeats docid
|
|
156
|
+
resolution, virtual-path lookups, line numbering, and the header, and it is
|
|
157
|
+
slower and more error-prone.
|
|
158
|
+
|
|
159
|
+
The most compact form is a `:from:count` suffix right on the path or docid —
|
|
160
|
+
prefer it:
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
qmd get "#abc123:120:40" # 40 lines starting at line 120
|
|
164
|
+
qmd get qmd://concepts/note.md:200:60 # lines 200–259
|
|
165
|
+
qmd get "#abc123:120" # from line 120 to end of file
|
|
166
|
+
qmd get "#abc123" --from 120 -l 40 # equivalent, using flags
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Suffix and flags:
|
|
170
|
+
|
|
171
|
+
- `<path>:<from>:<count>` — start at line `<from>`, read `<count>` lines. **Best
|
|
172
|
+
for reading around a search hit.**
|
|
173
|
+
- `<path>:<from>` — start at `<from>`, read to end of file.
|
|
174
|
+
- `--from <line>` / `-l <lines>` — flag equivalents. Explicit flags override the
|
|
175
|
+
suffix, so `... :5:2 -l 1` reads 1 line.
|
|
176
|
+
- `--no-line-numbers` — drop the `N:` prefixes (line numbers are on by default).
|
|
177
|
+
|
|
178
|
+
Wrong: `qmd get "#abc123" | sed -n '120,160p'`
|
|
179
|
+
Right: `qmd get "#abc123:120:40"`
|
|
180
|
+
|
|
181
|
+
Search results include a `:line` anchor on each hit — feed it straight into
|
|
182
|
+
`qmd get path:line:<n>` to read a window around the match (line numbers in the
|
|
183
|
+
output will start at `line`).
|
|
184
|
+
|
|
185
|
+
## Discover what is indexed
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
qmd collection list
|
|
189
|
+
qmd ls
|
|
190
|
+
qmd status
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Add collection filters when broad searches drift into the wrong corpus:
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
qmd search "headcount autonomous agents" -c concepts -n 10
|
|
197
|
+
qmd query "merchant support product reality" -c concepts -c sources -n 10
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
Omit `-c` to search everything.
|
|
201
|
+
|
|
202
|
+
## MCP Tool: `query`
|
|
203
|
+
|
|
204
|
+
When using the MCP server, prefer structured searches:
|
|
205
|
+
|
|
206
|
+
```json
|
|
207
|
+
{
|
|
208
|
+
"searches": [
|
|
209
|
+
{ "type": "lex", "query": "cockpit OKR Goodhart" },
|
|
210
|
+
{ "type": "vec", "query": "data informed not metric driven product judgment" },
|
|
211
|
+
{ "type": "hyde", "query": "A concept note explains that metrics are useful as instruments, but leaders should not let OKRs or dashboards replace judgment." }
|
|
212
|
+
],
|
|
213
|
+
"intent": "Find the concept note about using metrics as instruments without becoming metric-driven.",
|
|
214
|
+
"collections": ["concepts"],
|
|
215
|
+
"limit": 10
|
|
216
|
+
}
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
Query types:
|
|
220
|
+
|
|
221
|
+
- `lex` — BM25 keyword search. Best for exact terms, names, titles, and code.
|
|
222
|
+
- `vec` — vector semantic search. Best for natural-language concepts.
|
|
223
|
+
- `hyde` — vector search using a hypothetical answer/document passage.
|
|
224
|
+
|
|
225
|
+
## Query craft
|
|
226
|
+
|
|
227
|
+
Good QMD searches mix three things:
|
|
228
|
+
|
|
229
|
+
1. **Title/alias anchors:** exact page titles, named entities, phrases.
|
|
230
|
+
2. **Semantic paraphrase:** how a human would describe the idea.
|
|
231
|
+
3. **Negative space:** enough intent to avoid nearby-but-wrong concepts.
|
|
232
|
+
|
|
233
|
+
Examples:
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
# Exact-ish title lookup
|
|
237
|
+
qmd search '"arm the rebels" merchants tools big companies' -c concepts
|
|
238
|
+
|
|
239
|
+
# Semantic concept lookup
|
|
240
|
+
qmd query $'intent: Find the customer proximity concept, not generic customer delight.\nlex: support pseudonymous merchant customer interviews\nvec: founder stays close to merchant reality through support and product use'
|
|
241
|
+
|
|
242
|
+
# Source lookup
|
|
243
|
+
qmd search "six-week cadence WhatsApp merchant relationships Shawn Ryan" -c sources -n 10
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
## Setup and maintenance
|
|
247
|
+
|
|
248
|
+
Only mutate indexes when the user asked for setup or maintenance. Searching and
|
|
249
|
+
retrieving are safe; collection/index mutation is not a casual first step.
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
npm install -g @tobilu/qmd
|
|
253
|
+
qmd collection add ~/notes --name notes
|
|
254
|
+
qmd update
|
|
255
|
+
qmd embed
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
Health and diagnostics:
|
|
259
|
+
|
|
260
|
+
```bash
|
|
261
|
+
qmd doctor
|
|
262
|
+
qmd status
|
|
263
|
+
qmd pull
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
`qmd doctor` checks config, model cache, device/GPU setup, vector fingerprints,
|
|
267
|
+
and common environment overrides. If a model-backed command fails, run it before
|
|
268
|
+
changing configuration.
|
|
269
|
+
|
|
270
|
+
## MCP setup
|
|
271
|
+
|
|
272
|
+
See `references/mcp-setup.md` for Claude Code, Claude Desktop, OpenClaw, and HTTP
|
|
273
|
+
server configuration.
|
|
274
|
+
|
|
275
|
+
## Pitfalls
|
|
276
|
+
|
|
277
|
+
- **Do not stop at snippets.** Fetch documents before making claims.
|
|
278
|
+
- **Do not slice files with `sed`/`head`/`tail`.** Use the `path:from:count`
|
|
279
|
+
suffix (e.g. `qmd get "#abc123:120:40"`) or `--from`/`-l`. Output is already
|
|
280
|
+
line-numbered; piping breaks docid resolution, the header, and virtual paths.
|
|
281
|
+
- **Do not lean on query expansion.** Write `intent:`/`lex:`/`vec:`/`hyde:`
|
|
282
|
+
yourself. A bare `qmd query "user sentence"` discards the context only you
|
|
283
|
+
have. You expand the query; the model just ranks.
|
|
284
|
+
- **Do not overuse semantic search.** If you know exact titles or terms, BM25 is
|
|
285
|
+
faster and often better.
|
|
286
|
+
- **Do not mutate indexes casually.** `qmd collection add`, `qmd update`, and
|
|
287
|
+
`qmd embed` change local state and can be expensive.
|
|
288
|
+
- **Model-backed commands can be environment-sensitive.** If `qmd query`,
|
|
289
|
+
`qmd vsearch`, or reranking fails because local models/GPU are unavailable,
|
|
290
|
+
use `qmd search` and stronger lexical/structured terms.
|
|
291
|
+
- **Ambiguous user wording needs intent.** Add `intent:` rather than hoping query
|
|
292
|
+
expansion guesses the right domain.
|
|
293
|
+
- **Collection names matter.** Search `concepts` for synthesized wiki pages,
|
|
294
|
+
`sources` for transcripts/raw source pages, and docs collections for code or
|
|
295
|
+
project documentation.
|