@joycodetech/qmd-ja 2.5.3-ja.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/CHANGELOG.md +821 -0
  2. package/LICENSE +21 -0
  3. package/README.md +1143 -0
  4. package/bin/qmd-ja +162 -0
  5. package/dist/ast.d.ts +65 -0
  6. package/dist/ast.js +334 -0
  7. package/dist/bench/bench.d.ts +23 -0
  8. package/dist/bench/bench.js +280 -0
  9. package/dist/bench/score.d.ts +33 -0
  10. package/dist/bench/score.js +88 -0
  11. package/dist/bench/types.d.ts +80 -0
  12. package/dist/bench/types.js +8 -0
  13. package/dist/cli/formatter.d.ts +120 -0
  14. package/dist/cli/formatter.js +355 -0
  15. package/dist/cli/qmd.d.ts +43 -0
  16. package/dist/cli/qmd.js +4179 -0
  17. package/dist/collections.d.ts +166 -0
  18. package/dist/collections.js +410 -0
  19. package/dist/db.d.ts +44 -0
  20. package/dist/db.js +75 -0
  21. package/dist/index.d.ts +230 -0
  22. package/dist/index.js +242 -0
  23. package/dist/llm.d.ts +500 -0
  24. package/dist/llm.js +1615 -0
  25. package/dist/maintenance.d.ts +23 -0
  26. package/dist/maintenance.js +37 -0
  27. package/dist/mcp/server.d.ts +24 -0
  28. package/dist/mcp/server.js +702 -0
  29. package/dist/paths.d.ts +1 -0
  30. package/dist/paths.js +4 -0
  31. package/dist/store.d.ts +1002 -0
  32. package/dist/store.js +4208 -0
  33. package/models/vaporetto-bccwj.model +0 -0
  34. package/package.json +130 -0
  35. package/scripts/build.mjs +30 -0
  36. package/scripts/check-package-grammars.mjs +29 -0
  37. package/scripts/package-smoke.mjs +65 -0
  38. package/scripts/test-all.mjs +38 -0
  39. package/skills/qmd/SKILL.md +295 -0
  40. package/skills/qmd/references/mcp-setup.md +102 -0
  41. package/skills/release/SKILL.md +139 -0
  42. package/skills/release/scripts/install-hooks.sh +38 -0
  43. package/vendor/vaporetto-node-wasm/LICENSE +22 -0
  44. package/vendor/vaporetto-node-wasm/package.json +11 -0
  45. package/vendor/vaporetto-node-wasm/vaporetto_node_wasm.d.ts +19 -0
  46. package/vendor/vaporetto-node-wasm/vaporetto_node_wasm.js +202 -0
  47. package/vendor/vaporetto-node-wasm/vaporetto_node_wasm_bg.wasm +0 -0
  48. package/vendor/vaporetto-node-wasm/vaporetto_node_wasm_bg.wasm.d.ts +13 -0
Binary file
package/package.json ADDED
@@ -0,0 +1,130 @@
1
+ {
2
+ "name": "@joycodetech/qmd-ja",
3
+ "version": "2.5.3-ja.3",
4
+ "description": "Japanese-enhanced fork of qmd — On-device hybrid search with Vaporetto WASM morphological tokenizer for accurate Japanese BM25 full-text search",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "import": "./dist/index.js",
11
+ "types": "./dist/index.d.ts"
12
+ }
13
+ },
14
+ "bin": {
15
+ "qmd-ja": "bin/qmd-ja"
16
+ },
17
+ "files": [
18
+ "bin/",
19
+ "dist/",
20
+ "vendor/vaporetto-node-wasm/vaporetto_node_wasm.js",
21
+ "vendor/vaporetto-node-wasm/vaporetto_node_wasm_bg.wasm",
22
+ "vendor/vaporetto-node-wasm/vaporetto_node_wasm.d.ts",
23
+ "vendor/vaporetto-node-wasm/vaporetto_node_wasm_bg.wasm.d.ts",
24
+ "vendor/vaporetto-node-wasm/package.json",
25
+ "models/vaporetto-bccwj.model",
26
+ "skills/",
27
+ "scripts/build.mjs",
28
+ "scripts/check-package-grammars.mjs",
29
+ "scripts/package-smoke.mjs",
30
+ "scripts/test-all.mjs",
31
+ "LICENSE",
32
+ "CHANGELOG.md"
33
+ ],
34
+ "scripts": {
35
+ "prepare": "[ -d .git ] && ./scripts/install-hooks.sh || true",
36
+ "build": "node scripts/build.mjs",
37
+ "test": "node scripts/test-all.mjs",
38
+ "test:types": "node ./node_modules/typescript/bin/tsc -p tsconfig.build.json --noEmit",
39
+ "test:node": "node ./node_modules/vitest/vitest.mjs run --reporter=verbose --testTimeout 60000",
40
+ "test:bun": "bun test --timeout 60000 --preload ./src/test-preload.ts",
41
+ "test:unit": "CI=true node ./node_modules/vitest/vitest.mjs run --reporter=verbose --testTimeout 60000 test/ && CI=true bun test --timeout 60000 --preload ./src/test-preload.ts test/",
42
+ "test:package": "node scripts/package-smoke.mjs",
43
+ "qmd": "tsx src/cli/qmd.ts",
44
+ "index": "tsx src/cli/qmd.ts index",
45
+ "vector": "tsx src/cli/qmd.ts vector",
46
+ "search": "tsx src/cli/qmd.ts search",
47
+ "vsearch": "tsx src/cli/qmd.ts vsearch",
48
+ "rerank": "tsx src/cli/qmd.ts rerank",
49
+ "inspector": "npx @modelcontextprotocol/inspector tsx src/cli/qmd.ts mcp",
50
+ "release": "./scripts/release.sh",
51
+ "smoke:package-grammars": "node scripts/check-package-grammars.mjs"
52
+ },
53
+ "publishConfig": {
54
+ "access": "public"
55
+ },
56
+ "repository": {
57
+ "type": "git",
58
+ "url": "git+https://github.com/joycodetech/qmd-ja.git"
59
+ },
60
+ "homepage": "https://github.com/joycodetech/qmd-ja#readme",
61
+ "bugs": {
62
+ "url": "https://github.com/joycodetech/qmd-ja/issues"
63
+ },
64
+ "dependencies": {
65
+ "@modelcontextprotocol/sdk": "1.29.0",
66
+ "@types/kuromoji": "^0.1.3",
67
+ "better-sqlite3": "12.10.0",
68
+ "fast-glob": "3.3.3",
69
+ "kuromoji": "^0.1.2",
70
+ "node-llama-cpp": "3.18.1",
71
+ "picomatch": "4.0.4",
72
+ "sqlite-vec": "0.1.9",
73
+ "tree-sitter-go": "0.25.0",
74
+ "tree-sitter-python": "0.25.0",
75
+ "tree-sitter-rust": "0.24.0",
76
+ "tree-sitter-typescript": "0.23.2",
77
+ "web-tree-sitter": "0.26.8",
78
+ "yaml": "2.9.0",
79
+ "zod": "4.2.1"
80
+ },
81
+ "optionalDependencies": {
82
+ "sqlite-vec-darwin-arm64": "0.1.9",
83
+ "sqlite-vec-darwin-x64": "0.1.9",
84
+ "sqlite-vec-linux-arm64": "0.1.9",
85
+ "sqlite-vec-linux-x64": "0.1.9",
86
+ "sqlite-vec-windows-x64": "0.1.9"
87
+ },
88
+ "devDependencies": {
89
+ "@types/better-sqlite3": "7.6.13",
90
+ "tsx": "4.21.0",
91
+ "vitest": "3.2.4"
92
+ },
93
+ "pnpm": {
94
+ "onlyBuiltDependencies": [
95
+ "better-sqlite3",
96
+ "esbuild",
97
+ "node-llama-cpp",
98
+ "tree-sitter-go",
99
+ "tree-sitter-javascript",
100
+ "tree-sitter-python",
101
+ "tree-sitter-rust",
102
+ "tree-sitter-typescript"
103
+ ]
104
+ },
105
+ "peerDependencies": {
106
+ "typescript": "^5.9.3"
107
+ },
108
+ "engines": {
109
+ "node": ">=22.0.0"
110
+ },
111
+ "keywords": [
112
+ "markdown",
113
+ "search",
114
+ "fts",
115
+ "full-text-search",
116
+ "vector",
117
+ "semantic-search",
118
+ "sqlite",
119
+ "bm25",
120
+ "embeddings",
121
+ "rag",
122
+ "mcp",
123
+ "reranking",
124
+ "knowledge-base",
125
+ "local-ai",
126
+ "llm"
127
+ ],
128
+ "author": "Koz Oda <oss@joycodetech.jp>",
129
+ "license": "MIT"
130
+ }
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env node
2
+ import { spawnSync } from "node:child_process";
3
+ import { chmodSync, readFileSync, renameSync, writeFileSync } from "node:fs";
4
+ import { join } from "node:path";
5
+ import { fileURLToPath } from "node:url";
6
+
7
+ const root = join(fileURLToPath(new URL("..", import.meta.url)));
8
+
9
+ function run(command, args, options = {}) {
10
+ const result = spawnSync(command, args, {
11
+ cwd: root,
12
+ stdio: "inherit",
13
+ shell: process.platform === "win32",
14
+ ...options,
15
+ });
16
+ if (result.status !== 0) {
17
+ process.exit(result.status ?? 1);
18
+ }
19
+ }
20
+
21
+ run(process.execPath, [join(root, "node_modules", "typescript", "bin", "tsc"), "-p", "tsconfig.build.json"]);
22
+
23
+
24
+ const cliPath = join(root, "dist", "cli", "qmd.js");
25
+ const tmpPath = `${cliPath}.tmp`;
26
+ const built = readFileSync(cliPath, "utf8");
27
+ const withoutExistingShebang = built.startsWith("#!") ? built.slice(built.indexOf("\n") + 1) : built;
28
+ writeFileSync(tmpPath, `#!/usr/bin/env node\n${withoutExistingShebang}`);
29
+ renameSync(tmpPath, cliPath);
30
+ chmodSync(cliPath, 0o755);
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env node
2
+ import { createRequire } from "node:module";
3
+
4
+ const require = createRequire(import.meta.url);
5
+
6
+ const grammars = [
7
+ "tree-sitter-typescript/tree-sitter-typescript.wasm",
8
+ "tree-sitter-typescript/tree-sitter-tsx.wasm",
9
+ "tree-sitter-python/tree-sitter-python.wasm",
10
+ "tree-sitter-go/tree-sitter-go.wasm",
11
+ "tree-sitter-rust/tree-sitter-rust.wasm",
12
+ ];
13
+
14
+ let ok = true;
15
+ for (const grammar of grammars) {
16
+ try {
17
+ const resolved = require.resolve(grammar);
18
+ console.log(`ok ${grammar} -> ${resolved}`);
19
+ } catch (err) {
20
+ ok = false;
21
+ console.error(`missing ${grammar}`);
22
+ console.error(err instanceof Error ? err.message : String(err));
23
+ }
24
+ }
25
+
26
+ if (!ok) {
27
+ console.error("\nAST grammar package smoke check failed. Run `bun install` locally or repair a broken global install with the matching `bun add tree-sitter-...@<version>` command shown by `qmd status`.");
28
+ process.exit(1);
29
+ }
@@ -0,0 +1,65 @@
1
+ #!/usr/bin/env node
2
+ import { spawnSync } from "node:child_process";
3
+ import { existsSync, readFileSync, statSync } from "node:fs";
4
+ import { join } from "node:path";
5
+ import { fileURLToPath } from "node:url";
6
+
7
+ const root = fileURLToPath(new URL("..", import.meta.url));
8
+ const pkg = JSON.parse(readFileSync(join(root, "package.json"), "utf8"));
9
+
10
+ function run(label, command, args, options = {}) {
11
+ console.log(`==> ${label}`);
12
+ const { quiet, ...spawnOptions } = options;
13
+ const result = spawnSync(command, args, {
14
+ cwd: root,
15
+ stdio: quiet ? "pipe" : "inherit",
16
+ shell: process.platform === "win32",
17
+ ...spawnOptions,
18
+ });
19
+ if (result.status !== 0) {
20
+ console.error(`Package smoke failed: ${label}`);
21
+ if (quiet) {
22
+ if (result.stdout) process.stderr.write(result.stdout);
23
+ if (result.stderr) process.stderr.write(result.stderr);
24
+ }
25
+ process.exit(result.status ?? 1);
26
+ }
27
+ }
28
+
29
+ function assertPath(path, label = path) {
30
+ const full = join(root, path);
31
+ if (!existsSync(full)) {
32
+ console.error(`Package smoke failed: missing ${label} (${path})`);
33
+ process.exit(1);
34
+ }
35
+ return full;
36
+ }
37
+
38
+ run("build compiled package", process.execPath, ["scripts/build.mjs"]);
39
+ run("AST grammar runtime packages", process.execPath, ["scripts/check-package-grammars.mjs"]);
40
+
41
+ for (const entry of pkg.files ?? []) {
42
+ assertPath(entry.replace(/\/$/, ""), `package.json files[] entry ${entry}`);
43
+ }
44
+
45
+ for (const [name, binPath] of Object.entries(pkg.bin ?? {})) {
46
+ const full = assertPath(binPath, `bin ${name}`);
47
+ const mode = statSync(full).mode;
48
+ if ((mode & 0o111) === 0) {
49
+ console.error(`Package smoke failed: bin ${name} is not executable (${binPath})`);
50
+ process.exit(1);
51
+ }
52
+ }
53
+
54
+ assertPath("dist/index.js", "compiled main export");
55
+ assertPath("dist/index.d.ts", "compiled type export");
56
+ assertPath("dist/cli/qmd.js", "compiled CLI");
57
+
58
+ run("compiled CLI under Node", process.execPath, ["dist/cli/qmd.js", "--help"], { quiet: true });
59
+ run("package wrapper", "sh", ["bin/qmd", "--help"], { quiet: true });
60
+
61
+ if (process.env.QMD_SKIP_BUN_SMOKE === "1") {
62
+ console.log("==> compiled CLI under Bun (skipped by QMD_SKIP_BUN_SMOKE=1)");
63
+ } else {
64
+ run("compiled CLI under Bun", "bun", ["dist/cli/qmd.js", "--help"], { quiet: true });
65
+ }
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env node
2
+ import { spawnSync } from "node:child_process";
3
+ import { join } from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+
6
+ const root = fileURLToPath(new URL("..", import.meta.url));
7
+
8
+ // Mirror bin/qmd's darwin Metal residency mitigation for test subprocesses.
9
+ // libggml-metal asserts on a non-empty residency set during its static
10
+ // destructor (ggml-org/llama.cpp#22593, fix open as #22595) and dumps a
11
+ // multi-kB backtrace at process exit even when tests pass. The env var must
12
+ // be set BEFORE the subprocess starts because libggml-metal reads it via
13
+ // libc getenv at module-load time. Opt out with QMD_METAL_KEEP_RESIDENCY=1.
14
+ const darwinMetalEnv =
15
+ process.platform === "darwin" && process.env.QMD_METAL_KEEP_RESIDENCY !== "1"
16
+ ? { GGML_METAL_NO_RESIDENCY: "1" }
17
+ : {};
18
+
19
+ function run(label, command, args, options = {}) {
20
+ console.log(`==> ${label}`);
21
+ const { env: extraEnv, ...spawnOptions } = options;
22
+ const result = spawnSync(command, args, {
23
+ cwd: root,
24
+ stdio: "inherit",
25
+ shell: process.platform === "win32",
26
+ env: { ...process.env, ...darwinMetalEnv, ...(extraEnv ?? {}) },
27
+ ...spawnOptions,
28
+ });
29
+ if (result.status !== 0) {
30
+ console.error(`Test task failed: ${label}`);
31
+ process.exit(result.status ?? 1);
32
+ }
33
+ }
34
+
35
+ run("TypeScript build typecheck", process.execPath, [join(root, "node_modules", "typescript", "bin", "tsc"), "-p", "tsconfig.build.json", "--noEmit"]);
36
+ run("Vitest suite under Node", process.execPath, [join(root, "node_modules", "vitest", "vitest.mjs"), "run", "--reporter=verbose", "--testTimeout", "60000", "test/"], { env: { CI: "true" } });
37
+ run("Bun test suite", "bun", ["test", "--timeout", "60000", "--preload", "./src/test-preload.ts", "test/"], { env: { CI: "true" } });
38
+ run("Package smoke", process.execPath, ["scripts/package-smoke.mjs"]);
@@ -0,0 +1,295 @@
1
+ ---
2
+ name: qmd
3
+ description: Search local markdown knowledge bases, notes, docs, and wikis with QMD. Use when users ask to find notes, retrieve documents, inspect a wiki, answer from indexed markdown, or set up QMD access.
4
+ license: MIT
5
+ compatibility: Requires qmd CLI or MCP server. Install via `npm install -g @tobilu/qmd`.
6
+ metadata:
7
+ author: tobi
8
+ version: "2.2.0"
9
+ allowed-tools: Bash(qmd:*), mcp__qmd__*
10
+ ---
11
+
12
+ # QMD - Query Markdown Documents
13
+
14
+ ## How search works
15
+
16
+ QMD searches local markdown collections: notes, docs, wikis, transcripts, and
17
+ project knowledge bases. Use it before web search when the answer may already be
18
+ in indexed local files.
19
+
20
+ The workflow is always:
21
+
22
+ 1. Search for candidate documents.
23
+ 2. Retrieve the full source with `qmd get` or `qmd multi-get`.
24
+ 3. Answer from retrieved text, citing paths or docids.
25
+
26
+ Do not answer from snippets alone when the user needs facts, decisions, quotes,
27
+ or nuance. Snippets are only leads.
28
+
29
+ Typical loop:
30
+
31
+ ```bash
32
+ qmd search "merchant reality support interviews" -n 5
33
+ # leads: #abc123 concepts/customer-proximity.md; #def432 sources/merchant-call.md
34
+ qmd multi-get "#abc123,#def432" --format md
35
+ ```
36
+
37
+ **Default to structured `qmd query` with `intent:`, `lex:`, `vec:`, and `hyde:`
38
+ fields that you write yourself.** You are a better query expander than the
39
+ built-in model: you know the user's actual goal, the domain vocabulary, and the
40
+ nearby-but-wrong concepts to avoid. Do not just paste the user's words into
41
+ `qmd query "..."` and hope the expansion model guesses right — supply the
42
+ `intent:` and craft the lexical and semantic terms deliberately (see
43
+ [Pick the right search mode](#pick-the-right-search-mode)).
44
+
45
+ When reporting what you retrieved, a compact note is enough; do not paste whole
46
+ files unless needed:
47
+
48
+ ```text
49
+ Retrieved:
50
+ - #abc123 concepts/customer-proximity.md
51
+ - #def432 sources/merchant-call.md
52
+ ```
53
+
54
+ ## Pick the right search mode
55
+
56
+ Use **BM25 lexical search** when you know exact words, titles, names, code
57
+ symbols, or rare phrases:
58
+
59
+ ```bash
60
+ qmd search "cockpit OKR Goodhart" -n 10
61
+ qmd search '"AI Before Headcount"' -c concepts -n 5
62
+ ```
63
+
64
+ Use **`qmd query` with structured fields** when the user describes an idea
65
+ indirectly, uses different wording than the source, or needs conceptual recall.
66
+ **This is the default mode — write the fields yourself rather than leaning on
67
+ query expansion.** Combine exact anchors with semantic recall:
68
+
69
+ ```bash
70
+ qmd query $'intent: Find the concept note about metrics as instruments without letting OKRs replace judgment.\nlex: cockpit instruments OKR Goodhart metrics judgment\nvec: data informed not metric driven product judgment\nhyde: A concept note says metrics are useful like cockpit instruments, but leaders should remain data-informed rather than metric-driven because OKRs and dashboards can Goodhart product judgment.'
71
+ ```
72
+
73
+ Structured query fields (you author each one — do not delegate this to the
74
+ expansion model):
75
+
76
+ - `intent:` states what you are trying to find **and what to avoid**. Always
77
+ supply this. It steers ranking away from nearby-but-wrong concepts.
78
+ - `lex:` exact terms, aliases, titles, code symbols, and rare words you expect
79
+ in the source. This is your own keyword expansion.
80
+ - `vec:` paraphrases the idea in natural language, in source-like wording.
81
+ - `hyde:` describes the document or answer that would satisfy the request.
82
+
83
+ You do not need all four every time, but you should almost always write at least
84
+ `intent:` plus one of `lex:`/`vec:`. A bare `qmd query "the user's sentence"`
85
+ throws away the context only you have and relies on the built-in expander to
86
+ reconstruct it — prefer the structured form.
87
+
88
+ If you genuinely have nothing to expand (a single rare token, a verbatim phrase),
89
+ that is a job for `qmd search`, not bare `qmd query`:
90
+
91
+ ```bash
92
+ qmd query --format json --explain $'intent: ...\nlex: ...\nvec: ...' # inspect ranking
93
+ ```
94
+
95
+ If `qmd query` is slow or model/GPU setup fails, fall back to `qmd search` with
96
+ better lexical terms.
97
+
98
+ ## Retrieve sources
99
+
100
+ Search results include docids like `#abc123` and `qmd://...` paths. Fetch them:
101
+
102
+ ```bash
103
+ qmd get "#abc123"
104
+ qmd get qmd://concepts/ai-before-headcount.md
105
+ qmd multi-get "#abc123,#def432" --format md
106
+ qmd multi-get 'concepts/{ai-before-headcount.md,data-informed-not-metric-driven.md}' --format md
107
+ qmd multi-get 'sources/podcast-2025-*.md' -l 80
108
+ ```
109
+
110
+ Use `multi-get` when comparing several hits or gathering context across pages.
111
+
112
+ ### Output is line-numbered and carries the docid — cite both
113
+
114
+ `get` and `multi-get` are **line-numbered by default** and always print the
115
+ document's `#docid` and `qmd://` path. So `get` output looks like:
116
+
117
+ ```text
118
+ qmd://concepts/note.md #abc123
119
+ ---
120
+
121
+ 1: # Metrics as instruments
122
+ 2:
123
+ 3: Treat dashboards like cockpit instruments...
124
+ ```
125
+
126
+ Cite the docid and exact line numbers in your answer, and use the numbers to ask
127
+ for the next slice. Pass `--no-line-numbers` only when you need raw content to
128
+ copy verbatim (e.g. reproducing a code block).
129
+
130
+ When you need to open or edit the underlying file (e.g. hand a path to `Read`,
131
+ `Edit`, or an editor), add `--full-path`. It replaces the `qmd://` URL + docid
132
+ header with the document's on-disk path, falling back to the canonical header if
133
+ the file no longer exists on disk:
134
+
135
+ ```text
136
+ $ qmd get "#abc123" --full-path
137
+ /Users/you/notes/concepts/note.md
138
+ ---
139
+
140
+ 1: # Metrics as instruments
141
+ ```
142
+
143
+ `--full-path` works the same way on `qmd search` and `qmd query`: result paths
144
+ become the file's on-disk path — `./`-prefixed relative path when the file is
145
+ inside `$PWD`, absolute realpath otherwise — and the per-result `#docid` is
146
+ dropped because the path is the identifier. The leading `./` is intentional so
147
+ the output is unambiguously a filesystem path and cannot be mistaken for a bare
148
+ collection-relative string. Default search/query output still uses `qmd://`
149
+ URIs; only opt into `--full-path` when you specifically need a path you can hand
150
+ to a non-QMD tool.
151
+
152
+ ### Read line ranges with the `:from:count` suffix — never pipe through `sed`/`head`/`tail`
153
+
154
+ `qmd get` slices files itself. Use the suffix or flags; do **not** shell out to
155
+ `sed -n`, `head`, `tail`, or `awk` to pull a line range. Piping defeats docid
156
+ resolution, virtual-path lookups, line numbering, and the header, and it is
157
+ slower and more error-prone.
158
+
159
+ The most compact form is a `:from:count` suffix right on the path or docid —
160
+ prefer it:
161
+
162
+ ```bash
163
+ qmd get "#abc123:120:40" # 40 lines starting at line 120
164
+ qmd get qmd://concepts/note.md:200:60 # lines 200–259
165
+ qmd get "#abc123:120" # from line 120 to end of file
166
+ qmd get "#abc123" --from 120 -l 40 # equivalent, using flags
167
+ ```
168
+
169
+ Suffix and flags:
170
+
171
+ - `<path>:<from>:<count>` — start at line `<from>`, read `<count>` lines. **Best
172
+ for reading around a search hit.**
173
+ - `<path>:<from>` — start at `<from>`, read to end of file.
174
+ - `--from <line>` / `-l <lines>` — flag equivalents. Explicit flags override the
175
+ suffix, so `... :5:2 -l 1` reads 1 line.
176
+ - `--no-line-numbers` — drop the `N:` prefixes (line numbers are on by default).
177
+
178
+ Wrong: `qmd get "#abc123" | sed -n '120,160p'`
179
+ Right: `qmd get "#abc123:120:40"`
180
+
181
+ Search results include a `:line` anchor on each hit — feed it straight into
182
+ `qmd get path:line:<n>` to read a window around the match (line numbers in the
183
+ output will start at `line`).
184
+
185
+ ## Discover what is indexed
186
+
187
+ ```bash
188
+ qmd collection list
189
+ qmd ls
190
+ qmd status
191
+ ```
192
+
193
+ Add collection filters when broad searches drift into the wrong corpus:
194
+
195
+ ```bash
196
+ qmd search "headcount autonomous agents" -c concepts -n 10
197
+ qmd query "merchant support product reality" -c concepts -c sources -n 10
198
+ ```
199
+
200
+ Omit `-c` to search everything.
201
+
202
+ ## MCP Tool: `query`
203
+
204
+ When using the MCP server, prefer structured searches:
205
+
206
+ ```json
207
+ {
208
+ "searches": [
209
+ { "type": "lex", "query": "cockpit OKR Goodhart" },
210
+ { "type": "vec", "query": "data informed not metric driven product judgment" },
211
+ { "type": "hyde", "query": "A concept note explains that metrics are useful as instruments, but leaders should not let OKRs or dashboards replace judgment." }
212
+ ],
213
+ "intent": "Find the concept note about using metrics as instruments without becoming metric-driven.",
214
+ "collections": ["concepts"],
215
+ "limit": 10
216
+ }
217
+ ```
218
+
219
+ Query types:
220
+
221
+ - `lex` — BM25 keyword search. Best for exact terms, names, titles, and code.
222
+ - `vec` — vector semantic search. Best for natural-language concepts.
223
+ - `hyde` — vector search using a hypothetical answer/document passage.
224
+
225
+ ## Query craft
226
+
227
+ Good QMD searches mix three things:
228
+
229
+ 1. **Title/alias anchors:** exact page titles, named entities, phrases.
230
+ 2. **Semantic paraphrase:** how a human would describe the idea.
231
+ 3. **Negative space:** enough intent to avoid nearby-but-wrong concepts.
232
+
233
+ Examples:
234
+
235
+ ```bash
236
+ # Exact-ish title lookup
237
+ qmd search '"arm the rebels" merchants tools big companies' -c concepts
238
+
239
+ # Semantic concept lookup
240
+ qmd query $'intent: Find the customer proximity concept, not generic customer delight.\nlex: support pseudonymous merchant customer interviews\nvec: founder stays close to merchant reality through support and product use'
241
+
242
+ # Source lookup
243
+ qmd search "six-week cadence WhatsApp merchant relationships Shawn Ryan" -c sources -n 10
244
+ ```
245
+
246
+ ## Setup and maintenance
247
+
248
+ Only mutate indexes when the user asked for setup or maintenance. Searching and
249
+ retrieving are safe; collection/index mutation is not a casual first step.
250
+
251
+ ```bash
252
+ npm install -g @tobilu/qmd
253
+ qmd collection add ~/notes --name notes
254
+ qmd update
255
+ qmd embed
256
+ ```
257
+
258
+ Health and diagnostics:
259
+
260
+ ```bash
261
+ qmd doctor
262
+ qmd status
263
+ qmd pull
264
+ ```
265
+
266
+ `qmd doctor` checks config, model cache, device/GPU setup, vector fingerprints,
267
+ and common environment overrides. If a model-backed command fails, run it before
268
+ changing configuration.
269
+
270
+ ## MCP setup
271
+
272
+ See `references/mcp-setup.md` for Claude Code, Claude Desktop, OpenClaw, and HTTP
273
+ server configuration.
274
+
275
+ ## Pitfalls
276
+
277
+ - **Do not stop at snippets.** Fetch documents before making claims.
278
+ - **Do not slice files with `sed`/`head`/`tail`.** Use the `path:from:count`
279
+ suffix (e.g. `qmd get "#abc123:120:40"`) or `--from`/`-l`. Output is already
280
+ line-numbered; piping breaks docid resolution, the header, and virtual paths.
281
+ - **Do not lean on query expansion.** Write `intent:`/`lex:`/`vec:`/`hyde:`
282
+ yourself. A bare `qmd query "user sentence"` discards the context only you
283
+ have. You expand the query; the model just ranks.
284
+ - **Do not overuse semantic search.** If you know exact titles or terms, BM25 is
285
+ faster and often better.
286
+ - **Do not mutate indexes casually.** `qmd collection add`, `qmd update`, and
287
+ `qmd embed` change local state and can be expensive.
288
+ - **Model-backed commands can be environment-sensitive.** If `qmd query`,
289
+ `qmd vsearch`, or reranking fails because local models/GPU are unavailable,
290
+ use `qmd search` and stronger lexical/structured terms.
291
+ - **Ambiguous user wording needs intent.** Add `intent:` rather than hoping query
292
+ expansion guesses the right domain.
293
+ - **Collection names matter.** Search `concepts` for synthesized wiki pages,
294
+ `sources` for transcripts/raw source pages, and docs collections for code or
295
+ project documentation.