org-qmd 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/qmd ADDED
@@ -0,0 +1,32 @@
1
+ #!/bin/sh
2
+ # Resolve symlinks so global installs (npm link / npm install -g) can find the
3
+ # actual package directory instead of the global bin directory.
4
+ SOURCE="$0"
5
+ while [ -L "$SOURCE" ]; do
6
+ SOURCE_DIR="$(cd -P "$(dirname "$SOURCE")" && pwd)"
7
+ TARGET="$(readlink "$SOURCE")"
8
+ case "$TARGET" in
9
+ /*) SOURCE="$TARGET" ;;
10
+ *) SOURCE="$SOURCE_DIR/$TARGET" ;;
11
+ esac
12
+ done
13
+
14
+ # Detect the runtime used to install this package and use the matching one
15
+ # to avoid native module ABI mismatches (e.g., better-sqlite3 compiled for bun vs node)
16
+ DIR="$(cd -P "$(dirname "$SOURCE")/.." && pwd)"
17
+
18
+ # Detect the package manager that installed dependencies by checking lockfiles.
19
+ # $BUN_INSTALL is intentionally NOT checked — it only indicates that bun exists
20
+ # on the system, not that it was used to install this package (see #361).
21
+ #
22
+ # package-lock.json takes priority: if it exists, npm installed the native
23
+ # modules for Node. The repo ships bun.lock, so without this check, source
24
+ # builds that use npm would be incorrectly routed to bun, causing ABI
25
+ # mismatches with better-sqlite3 / sqlite-vec (see #381).
26
+ if [ -f "$DIR/package-lock.json" ]; then
27
+ exec node "$DIR/dist/cli/qmd.js" "$@"
28
+ elif [ -f "$DIR/bun.lock" ] || [ -f "$DIR/bun.lockb" ]; then
29
+ exec bun "$DIR/dist/cli/qmd.js" "$@"
30
+ else
31
+ exec node "$DIR/dist/cli/qmd.js" "$@"
32
+ fi
package/dist/ast.d.ts ADDED
@@ -0,0 +1,64 @@
1
+ /**
2
+ * AST-aware chunking support via web-tree-sitter.
3
+ *
4
+ * Provides language detection, AST break point extraction for supported
5
+ * code file types, and a stub for future symbol extraction.
6
+ *
7
+ * All functions degrade gracefully: parse failures or unsupported languages
8
+ * return empty arrays, falling back to regex-only chunking.
9
+ *
10
+ * ## Dependency Note
11
+ *
12
+ * Grammar packages (tree-sitter-typescript, etc.) are listed as
13
+ * optionalDependencies with pinned versions. They ship native prebuilds
14
+ * and source files (~72 MB total) but QMD only uses the .wasm files
15
+ * (~5 MB). If install size becomes a concern, the .wasm files can be
16
+ * bundled directly in the repo (e.g. assets/grammars/) and resolved
17
+ * via import.meta.url instead of require.resolve(), eliminating the
18
+ * grammar packages entirely.
19
+ */
20
+ import type { BreakPoint } from "./store.js";
21
+ export type SupportedLanguage = "typescript" | "tsx" | "javascript" | "python" | "go" | "rust";
22
+ /**
23
+ * Detect language from file path extension.
24
+ * Returns null for unsupported or unknown extensions (including .md).
25
+ */
26
+ export declare function detectLanguage(filepath: string): SupportedLanguage | null;
27
+ /**
28
+ * Parse a source file and return break points at AST node boundaries.
29
+ *
30
+ * Returns an empty array for unsupported languages, parse failures,
31
+ * or grammar loading failures. Never throws.
32
+ *
33
+ * @param content - The file content to parse.
34
+ * @param filepath - The file path (used for language detection).
35
+ * @returns Array of BreakPoint objects suitable for merging with regex break points.
36
+ */
37
+ export declare function getASTBreakPoints(content: string, filepath: string): Promise<BreakPoint[]>;
38
+ /**
39
+ * Check which tree-sitter grammars are available.
40
+ * Returns a status object for each supported language.
41
+ */
42
+ export declare function getASTStatus(): Promise<{
43
+ available: boolean;
44
+ languages: {
45
+ language: SupportedLanguage;
46
+ available: boolean;
47
+ error?: string;
48
+ }[];
49
+ }>;
50
+ /**
51
+ * Metadata about a code symbol within a chunk.
52
+ * Stubbed for Phase 2 — always returns empty array in Phase 1.
53
+ */
54
+ export interface SymbolInfo {
55
+ name: string;
56
+ kind: string;
57
+ signature?: string;
58
+ line: number;
59
+ }
60
+ /**
61
+ * Extract symbol metadata for code within a byte range.
62
+ * Stubbed for Phase 2 — returns empty array.
63
+ */
64
+ export declare function extractSymbols(_content: string, _language: string, _startPos: number, _endPos: number): SymbolInfo[];
package/dist/ast.js ADDED
@@ -0,0 +1,324 @@
1
+ /**
2
+ * AST-aware chunking support via web-tree-sitter.
3
+ *
4
+ * Provides language detection, AST break point extraction for supported
5
+ * code file types, and a stub for future symbol extraction.
6
+ *
7
+ * All functions degrade gracefully: parse failures or unsupported languages
8
+ * return empty arrays, falling back to regex-only chunking.
9
+ *
10
+ * ## Dependency Note
11
+ *
12
+ * Grammar packages (tree-sitter-typescript, etc.) are listed as
13
+ * optionalDependencies with pinned versions. They ship native prebuilds
14
+ * and source files (~72 MB total) but QMD only uses the .wasm files
15
+ * (~5 MB). If install size becomes a concern, the .wasm files can be
16
+ * bundled directly in the repo (e.g. assets/grammars/) and resolved
17
+ * via import.meta.url instead of require.resolve(), eliminating the
18
+ * grammar packages entirely.
19
+ */
20
+ import { createRequire } from "node:module";
21
+ import { extname } from "node:path";
22
+ const EXTENSION_MAP = {
23
+ ".ts": "typescript",
24
+ ".tsx": "tsx",
25
+ ".js": "javascript",
26
+ ".jsx": "tsx",
27
+ ".mts": "typescript",
28
+ ".cts": "typescript",
29
+ ".mjs": "javascript",
30
+ ".cjs": "javascript",
31
+ ".py": "python",
32
+ ".go": "go",
33
+ ".rs": "rust",
34
+ };
35
+ /**
36
+ * Detect language from file path extension.
37
+ * Returns null for unsupported or unknown extensions (including .md).
38
+ */
39
+ export function detectLanguage(filepath) {
40
+ const ext = extname(filepath).toLowerCase();
41
+ return EXTENSION_MAP[ext] ?? null;
42
+ }
43
+ // =============================================================================
44
+ // Grammar Resolution
45
+ // =============================================================================
46
+ /**
47
+ * Maps language to the npm package and wasm filename for the grammar.
48
+ */
49
+ const GRAMMAR_MAP = {
50
+ typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm" },
51
+ tsx: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm" },
52
+ javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm" },
53
+ python: { pkg: "tree-sitter-python", wasm: "tree-sitter-python.wasm" },
54
+ go: { pkg: "tree-sitter-go", wasm: "tree-sitter-go.wasm" },
55
+ rust: { pkg: "tree-sitter-rust", wasm: "tree-sitter-rust.wasm" },
56
+ };
57
+ // =============================================================================
58
+ // Per-Language Query Definitions
59
+ // =============================================================================
60
+ /**
61
+ * Tree-sitter S-expression queries for each language.
62
+ * Each capture name maps to a break point score via SCORE_MAP.
63
+ *
64
+ * For TypeScript/JavaScript, we match export_statement wrappers to get the
65
+ * correct start position (before `export`), plus bare declarations for
66
+ * non-exported code.
67
+ */
68
+ const LANGUAGE_QUERIES = {
69
+ typescript: `
70
+ (export_statement) @export
71
+ (class_declaration) @class
72
+ (function_declaration) @func
73
+ (method_definition) @method
74
+ (interface_declaration) @iface
75
+ (type_alias_declaration) @type
76
+ (enum_declaration) @enum
77
+ (import_statement) @import
78
+ (lexical_declaration (variable_declarator value: (arrow_function))) @func
79
+ (lexical_declaration (variable_declarator value: (function_expression))) @func
80
+ `,
81
+ tsx: `
82
+ (export_statement) @export
83
+ (class_declaration) @class
84
+ (function_declaration) @func
85
+ (method_definition) @method
86
+ (interface_declaration) @iface
87
+ (type_alias_declaration) @type
88
+ (enum_declaration) @enum
89
+ (import_statement) @import
90
+ (lexical_declaration (variable_declarator value: (arrow_function))) @func
91
+ (lexical_declaration (variable_declarator value: (function_expression))) @func
92
+ `,
93
+ javascript: `
94
+ (export_statement) @export
95
+ (class_declaration) @class
96
+ (function_declaration) @func
97
+ (method_definition) @method
98
+ (import_statement) @import
99
+ (lexical_declaration (variable_declarator value: (arrow_function))) @func
100
+ (lexical_declaration (variable_declarator value: (function_expression))) @func
101
+ `,
102
+ python: `
103
+ (class_definition) @class
104
+ (function_definition) @func
105
+ (decorated_definition) @decorated
106
+ (import_statement) @import
107
+ (import_from_statement) @import
108
+ `,
109
+ go: `
110
+ (type_declaration) @type
111
+ (function_declaration) @func
112
+ (method_declaration) @method
113
+ (import_declaration) @import
114
+ `,
115
+ rust: `
116
+ (struct_item) @struct
117
+ (impl_item) @impl
118
+ (function_item) @func
119
+ (trait_item) @trait
120
+ (enum_item) @enum
121
+ (use_declaration) @import
122
+ (type_item) @type
123
+ (mod_item) @mod
124
+ `,
125
+ };
126
+ /**
127
+ * Score mapping from capture names to break point scores.
128
+ * Aligned with the markdown BREAK_PATTERNS scale (h1=100, h2=90, etc.)
129
+ * so findBestCutoff() decay works unchanged.
130
+ */
131
+ const SCORE_MAP = {
132
+ class: 100,
133
+ iface: 100,
134
+ struct: 100,
135
+ trait: 100,
136
+ impl: 100,
137
+ mod: 100,
138
+ export: 90,
139
+ func: 90,
140
+ method: 90,
141
+ decorated: 90,
142
+ type: 80,
143
+ enum: 80,
144
+ import: 60,
145
+ };
146
+ // =============================================================================
147
+ // Parser Caching & Initialization
148
+ // =============================================================================
149
+ let ParserClass = null;
150
+ let LanguageClass = null;
151
+ let QueryClass = null;
152
+ let initPromise = null;
153
+ /** Languages that have already failed to load — warn only once per process. */
154
+ const failedLanguages = new Set();
155
+ /** Cached grammar load promises. */
156
+ const grammarCache = new Map();
157
+ /** Cached compiled queries per language. */
158
+ const queryCache = new Map();
159
+ /**
160
+ * Initialize web-tree-sitter. Called once and cached.
161
+ */
162
+ async function ensureInit() {
163
+ if (!initPromise) {
164
+ initPromise = (async () => {
165
+ const mod = await import("web-tree-sitter");
166
+ ParserClass = mod.Parser;
167
+ LanguageClass = mod.Language;
168
+ QueryClass = mod.Query;
169
+ await ParserClass.init();
170
+ })();
171
+ }
172
+ return initPromise;
173
+ }
174
+ /**
175
+ * Resolve the filesystem path to a grammar .wasm file.
176
+ * Uses createRequire to resolve from installed dependency packages.
177
+ */
178
+ function resolveGrammarPath(language) {
179
+ const { pkg, wasm } = GRAMMAR_MAP[language];
180
+ const require = createRequire(import.meta.url);
181
+ return require.resolve(`${pkg}/${wasm}`);
182
+ }
183
+ /**
184
+ * Load and cache a grammar for the given language.
185
+ * Returns null on failure (logs once per language).
186
+ */
187
+ async function loadGrammar(language) {
188
+ if (failedLanguages.has(language))
189
+ return null;
190
+ const wasmKey = GRAMMAR_MAP[language].wasm;
191
+ if (!grammarCache.has(wasmKey)) {
192
+ grammarCache.set(wasmKey, (async () => {
193
+ const path = resolveGrammarPath(language);
194
+ return LanguageClass.load(path);
195
+ })());
196
+ }
197
+ try {
198
+ return await grammarCache.get(wasmKey);
199
+ }
200
+ catch (err) {
201
+ failedLanguages.add(language);
202
+ grammarCache.delete(wasmKey);
203
+ console.warn(`[qmd] Failed to load tree-sitter grammar for ${language}: ${err}`);
204
+ return null;
205
+ }
206
+ }
207
+ /**
208
+ * Get or create a compiled query for the given language.
209
+ */
210
+ function getQuery(language, grammar) {
211
+ if (!queryCache.has(language)) {
212
+ const source = LANGUAGE_QUERIES[language];
213
+ const query = new QueryClass(grammar, source);
214
+ queryCache.set(language, query);
215
+ }
216
+ return queryCache.get(language);
217
+ }
218
+ // =============================================================================
219
+ // AST Break Point Extraction
220
+ // =============================================================================
221
+ /**
222
+ * Parse a source file and return break points at AST node boundaries.
223
+ *
224
+ * Returns an empty array for unsupported languages, parse failures,
225
+ * or grammar loading failures. Never throws.
226
+ *
227
+ * @param content - The file content to parse.
228
+ * @param filepath - The file path (used for language detection).
229
+ * @returns Array of BreakPoint objects suitable for merging with regex break points.
230
+ */
231
+ export async function getASTBreakPoints(content, filepath) {
232
+ const language = detectLanguage(filepath);
233
+ if (!language)
234
+ return [];
235
+ try {
236
+ await ensureInit();
237
+ const grammar = await loadGrammar(language);
238
+ if (!grammar)
239
+ return [];
240
+ const parser = new ParserClass();
241
+ parser.setLanguage(grammar);
242
+ const tree = parser.parse(content);
243
+ if (!tree) {
244
+ parser.delete();
245
+ return [];
246
+ }
247
+ const query = getQuery(language, grammar);
248
+ const captures = query.captures(tree.rootNode);
249
+ // Deduplicate: at each byte position, keep the highest-scoring capture.
250
+ // This handles cases like export_statement wrapping a class_declaration
251
+ // at different offsets — we want the outermost (earliest) position.
252
+ const seen = new Map();
253
+ for (const cap of captures) {
254
+ const pos = cap.node.startIndex;
255
+ const score = SCORE_MAP[cap.name] ?? 20;
256
+ const type = `ast:${cap.name}`;
257
+ const existing = seen.get(pos);
258
+ if (!existing || score > existing.score) {
259
+ seen.set(pos, { pos, score, type });
260
+ }
261
+ }
262
+ tree.delete();
263
+ parser.delete();
264
+ return Array.from(seen.values()).sort((a, b) => a.pos - b.pos);
265
+ }
266
+ catch (err) {
267
+ console.warn(`[qmd] AST parse failed for ${filepath}, falling back to regex: ${err instanceof Error ? err.message : err}`);
268
+ return [];
269
+ }
270
+ }
271
+ // =============================================================================
272
+ // Health / Status
273
+ // =============================================================================
274
+ /**
275
+ * Check which tree-sitter grammars are available.
276
+ * Returns a status object for each supported language.
277
+ */
278
+ export async function getASTStatus() {
279
+ const languages = [];
280
+ try {
281
+ await ensureInit();
282
+ }
283
+ catch (err) {
284
+ return {
285
+ available: false,
286
+ languages: Object.keys(GRAMMAR_MAP).map(lang => ({
287
+ language: lang,
288
+ available: false,
289
+ error: `web-tree-sitter init failed: ${err instanceof Error ? err.message : err}`,
290
+ })),
291
+ };
292
+ }
293
+ for (const lang of Object.keys(GRAMMAR_MAP)) {
294
+ try {
295
+ const grammar = await loadGrammar(lang);
296
+ if (grammar) {
297
+ // Also verify the query compiles
298
+ getQuery(lang, grammar);
299
+ languages.push({ language: lang, available: true });
300
+ }
301
+ else {
302
+ languages.push({ language: lang, available: false, error: "grammar failed to load" });
303
+ }
304
+ }
305
+ catch (err) {
306
+ languages.push({
307
+ language: lang,
308
+ available: false,
309
+ error: err instanceof Error ? err.message : String(err),
310
+ });
311
+ }
312
+ }
313
+ return {
314
+ available: languages.some(l => l.available),
315
+ languages,
316
+ };
317
+ }
318
+ /**
319
+ * Extract symbol metadata for code within a byte range.
320
+ * Stubbed for Phase 2 — returns empty array.
321
+ */
322
+ export function extractSymbols(_content, _language, _startPos, _endPos) {
323
+ return [];
324
+ }
@@ -0,0 +1,120 @@
1
+ /**
2
+ * formatter.ts - Output formatting utilities for QMD
3
+ *
4
+ * Provides methods to format search results and documents into various output formats:
5
+ * JSON, CSV, XML, Markdown, files list, and CLI (colored terminal output).
6
+ */
7
+ import type { SearchResult, MultiGetResult, DocumentResult } from "../store.js";
8
+ export type { SearchResult, MultiGetResult, DocumentResult };
9
+ export type MultiGetFile = {
10
+ filepath: string;
11
+ displayPath: string;
12
+ title: string;
13
+ body: string;
14
+ context?: string | null;
15
+ skipped: false;
16
+ } | {
17
+ filepath: string;
18
+ displayPath: string;
19
+ title: string;
20
+ body: string;
21
+ context?: string | null;
22
+ skipped: true;
23
+ skipReason: string;
24
+ };
25
+ export type OutputFormat = "cli" | "csv" | "md" | "xml" | "files" | "json";
26
+ export type FormatOptions = {
27
+ full?: boolean;
28
+ query?: string;
29
+ useColor?: boolean;
30
+ lineNumbers?: boolean;
31
+ intent?: string;
32
+ };
33
+ /**
34
+ * Add line numbers to text content.
35
+ * Each line becomes: "{lineNum}: {content}"
36
+ * @param text The text to add line numbers to
37
+ * @param startLine Optional starting line number (default: 1)
38
+ */
39
+ export declare function addLineNumbers(text: string, startLine?: number): string;
40
+ /**
41
+ * Extract short docid from a full hash (first 6 characters).
42
+ */
43
+ export declare function getDocid(hash: string): string;
44
+ export declare function escapeCSV(value: string | null | number): string;
45
+ export declare function escapeXml(str: string): string;
46
+ /**
47
+ * Format search results as JSON
48
+ */
49
+ export declare function searchResultsToJson(results: SearchResult[], opts?: FormatOptions): string;
50
+ /**
51
+ * Format search results as CSV
52
+ */
53
+ export declare function searchResultsToCsv(results: SearchResult[], opts?: FormatOptions): string;
54
+ /**
55
+ * Format search results as simple files list (docid,score,filepath,context)
56
+ */
57
+ export declare function searchResultsToFiles(results: SearchResult[]): string;
58
+ /**
59
+ * Format search results as Markdown
60
+ */
61
+ export declare function searchResultsToMarkdown(results: SearchResult[], opts?: FormatOptions): string;
62
+ /**
63
+ * Format search results as XML
64
+ */
65
+ export declare function searchResultsToXml(results: SearchResult[], opts?: FormatOptions): string;
66
+ /**
67
+ * Format search results for MCP (simpler CSV format with pre-extracted snippets)
68
+ */
69
+ export declare function searchResultsToMcpCsv(results: {
70
+ docid: string;
71
+ file: string;
72
+ title: string;
73
+ score: number;
74
+ context: string | null;
75
+ snippet: string;
76
+ }[]): string;
77
+ /**
78
+ * Format documents as JSON
79
+ */
80
+ export declare function documentsToJson(results: MultiGetFile[]): string;
81
+ /**
82
+ * Format documents as CSV
83
+ */
84
+ export declare function documentsToCsv(results: MultiGetFile[]): string;
85
+ /**
86
+ * Format documents as files list
87
+ */
88
+ export declare function documentsToFiles(results: MultiGetFile[]): string;
89
+ /**
90
+ * Format documents as Markdown
91
+ */
92
+ export declare function documentsToMarkdown(results: MultiGetFile[]): string;
93
+ /**
94
+ * Format documents as XML
95
+ */
96
+ export declare function documentsToXml(results: MultiGetFile[]): string;
97
+ /**
98
+ * Format a single DocumentResult as JSON
99
+ */
100
+ export declare function documentToJson(doc: DocumentResult): string;
101
+ /**
102
+ * Format a single DocumentResult as Markdown
103
+ */
104
+ export declare function documentToMarkdown(doc: DocumentResult): string;
105
+ /**
106
+ * Format a single DocumentResult as XML
107
+ */
108
+ export declare function documentToXml(doc: DocumentResult): string;
109
+ /**
110
+ * Format a single document to the specified format
111
+ */
112
+ export declare function formatDocument(doc: DocumentResult, format: OutputFormat): string;
113
+ /**
114
+ * Format search results to the specified output format
115
+ */
116
+ export declare function formatSearchResults(results: SearchResult[], format: OutputFormat, opts?: FormatOptions): string;
117
+ /**
118
+ * Format documents to the specified output format
119
+ */
120
+ export declare function formatDocuments(results: MultiGetFile[], format: OutputFormat): string;