@tobilu/qmd 1.0.7 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +39 -1
- package/dist/collections.d.ts +16 -0
- package/dist/collections.js +40 -0
- package/dist/llm.d.ts +1 -0
- package/dist/llm.js +16 -2
- package/dist/mcp.js +143 -93
- package/dist/qmd.js +377 -113
- package/dist/store.d.ts +55 -3
- package/dist/store.js +289 -10
- package/package.json +3 -4
- package/qmd +0 -46
package/dist/qmd.js
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
1
2
|
import { openDatabase } from "./db.js";
|
|
2
3
|
import fastGlob from "fast-glob";
|
|
3
4
|
import { execSync, spawn as nodeSpawn } from "child_process";
|
|
4
5
|
import { fileURLToPath } from "url";
|
|
5
6
|
import { dirname, join as pathJoin } from "path";
|
|
6
7
|
import { parseArgs } from "util";
|
|
7
|
-
import { readFileSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync } from "fs";
|
|
8
|
-
import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, getHashesForEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, } from "./store.js";
|
|
8
|
+
import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync } from "fs";
|
|
9
|
+
import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, getHashesForEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, } from "./store.js";
|
|
9
10
|
import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "./llm.js";
|
|
10
11
|
import { formatSearchResults, formatDocuments, escapeXml, escapeCSV, } from "./formatter.js";
|
|
11
|
-
import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, addContext as yamlAddContext, removeContext as yamlRemoveContext, setGlobalContext, listAllContexts, setConfigIndexName, } from "./collections.js";
|
|
12
|
+
import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, setGlobalContext, listAllContexts, setConfigIndexName, } from "./collections.js";
|
|
12
13
|
// Enable production mode - allows using default database path
|
|
13
14
|
// Tests must set INDEX_PATH or use createStore() with explicit path
|
|
14
15
|
enableProductionMode();
|
|
@@ -155,6 +156,11 @@ function formatTimeAgo(date) {
|
|
|
155
156
|
const days = Math.floor(hours / 24);
|
|
156
157
|
return `${days}d ago`;
|
|
157
158
|
}
|
|
159
|
+
function formatMs(ms) {
|
|
160
|
+
if (ms < 1000)
|
|
161
|
+
return `${ms}ms`;
|
|
162
|
+
return `${(ms / 1000).toFixed(1)}s`;
|
|
163
|
+
}
|
|
158
164
|
function formatBytes(bytes) {
|
|
159
165
|
if (bytes < 1024)
|
|
160
166
|
return `${bytes} B`;
|
|
@@ -308,6 +314,37 @@ async function showStatus() {
|
|
|
308
314
|
catch {
|
|
309
315
|
// Don't fail status if LLM init fails
|
|
310
316
|
}
|
|
317
|
+
// Tips section
|
|
318
|
+
const tips = [];
|
|
319
|
+
// Check for collections without context
|
|
320
|
+
const collectionsWithoutContext = collections.filter(col => {
|
|
321
|
+
const contexts = contextsByCollection.get(col.name) || [];
|
|
322
|
+
return contexts.length === 0;
|
|
323
|
+
});
|
|
324
|
+
if (collectionsWithoutContext.length > 0) {
|
|
325
|
+
const names = collectionsWithoutContext.map(c => c.name).slice(0, 3).join(', ');
|
|
326
|
+
const more = collectionsWithoutContext.length > 3 ? ` +${collectionsWithoutContext.length - 3} more` : '';
|
|
327
|
+
tips.push(`Add context to collections for better search results: ${names}${more}`);
|
|
328
|
+
tips.push(` ${c.dim}qmd context add qmd://<name>/ "What this collection contains"${c.reset}`);
|
|
329
|
+
tips.push(` ${c.dim}qmd context add qmd://<name>/meeting-notes "Weekly team meeting notes"${c.reset}`);
|
|
330
|
+
}
|
|
331
|
+
// Check for collections without update commands
|
|
332
|
+
const collectionsWithoutUpdate = collections.filter(col => {
|
|
333
|
+
const yamlCol = getCollectionFromYaml(col.name);
|
|
334
|
+
return !yamlCol?.update;
|
|
335
|
+
});
|
|
336
|
+
if (collectionsWithoutUpdate.length > 0 && collections.length > 1) {
|
|
337
|
+
const names = collectionsWithoutUpdate.map(c => c.name).slice(0, 3).join(', ');
|
|
338
|
+
const more = collectionsWithoutUpdate.length > 3 ? ` +${collectionsWithoutUpdate.length - 3} more` : '';
|
|
339
|
+
tips.push(`Add update commands to keep collections fresh: ${names}${more}`);
|
|
340
|
+
tips.push(` ${c.dim}qmd collection update-cmd <name> 'git stash && git pull --rebase --ff-only && git stash pop'${c.reset}`);
|
|
341
|
+
}
|
|
342
|
+
if (tips.length > 0) {
|
|
343
|
+
console.log(`\n${c.bold}Tips${c.reset}`);
|
|
344
|
+
for (const tip of tips) {
|
|
345
|
+
console.log(` ${tip}`);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
311
348
|
closeDb();
|
|
312
349
|
}
|
|
313
350
|
async function updateCollections() {
|
|
@@ -533,49 +570,6 @@ function contextRemove(pathArg) {
|
|
|
533
570
|
}
|
|
534
571
|
console.log(`${c.green}✓${c.reset} Removed context for: qmd://${detected.collectionName}/${detected.relativePath}`);
|
|
535
572
|
}
|
|
536
|
-
function contextCheck() {
|
|
537
|
-
const db = getDb();
|
|
538
|
-
// Get collections without any context
|
|
539
|
-
const collectionsWithoutContext = getCollectionsWithoutContext(db);
|
|
540
|
-
// Get all collections to check for missing path contexts
|
|
541
|
-
const allCollections = listCollections(db);
|
|
542
|
-
if (collectionsWithoutContext.length === 0 && allCollections.length > 0) {
|
|
543
|
-
// Check if all collections have contexts
|
|
544
|
-
console.log(`\n${c.green}✓${c.reset} ${c.bold}All collections have context configured${c.reset}\n`);
|
|
545
|
-
}
|
|
546
|
-
if (collectionsWithoutContext.length > 0) {
|
|
547
|
-
console.log(`\n${c.yellow}Collections without any context:${c.reset}\n`);
|
|
548
|
-
for (const coll of collectionsWithoutContext) {
|
|
549
|
-
console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(${coll.doc_count} documents)${c.reset}`);
|
|
550
|
-
console.log(` ${c.dim}Suggestion: qmd context add qmd://${coll.name}/ "Description of ${coll.name}"${c.reset}\n`);
|
|
551
|
-
}
|
|
552
|
-
}
|
|
553
|
-
// Check for top-level paths without context within collections that DO have context
|
|
554
|
-
const collectionsWithContext = allCollections.filter(c => c && !collectionsWithoutContext.some(cwc => cwc.name === c.name));
|
|
555
|
-
let hasPathSuggestions = false;
|
|
556
|
-
for (const coll of collectionsWithContext) {
|
|
557
|
-
if (!coll)
|
|
558
|
-
continue;
|
|
559
|
-
const missingPaths = getTopLevelPathsWithoutContext(db, coll.name);
|
|
560
|
-
if (missingPaths.length > 0) {
|
|
561
|
-
if (!hasPathSuggestions) {
|
|
562
|
-
console.log(`${c.yellow}Top-level directories without context:${c.reset}\n`);
|
|
563
|
-
hasPathSuggestions = true;
|
|
564
|
-
}
|
|
565
|
-
console.log(`${c.cyan}${coll.name}${c.reset}`);
|
|
566
|
-
for (const path of missingPaths) {
|
|
567
|
-
console.log(` ${path}`);
|
|
568
|
-
console.log(` ${c.dim}Suggestion: qmd context add qmd://${coll.name}/${path} "Description of ${path}"${c.reset}`);
|
|
569
|
-
}
|
|
570
|
-
console.log('');
|
|
571
|
-
}
|
|
572
|
-
}
|
|
573
|
-
if (collectionsWithoutContext.length === 0 && !hasPathSuggestions) {
|
|
574
|
-
console.log(`${c.dim}All collections and major paths have context configured.${c.reset}`);
|
|
575
|
-
console.log(`${c.dim}Use 'qmd context list' to see all configured contexts.${c.reset}\n`);
|
|
576
|
-
}
|
|
577
|
-
closeDb();
|
|
578
|
-
}
|
|
579
573
|
function getDocument(filename, fromLine, maxLines, lineNumbers) {
|
|
580
574
|
const db = getDb();
|
|
581
575
|
// Parse :linenum suffix from filename (e.g., "file.md:100")
|
|
@@ -1103,7 +1097,11 @@ function collectionList() {
|
|
|
1103
1097
|
for (const coll of collections) {
|
|
1104
1098
|
const updatedAt = coll.last_modified ? new Date(coll.last_modified) : new Date();
|
|
1105
1099
|
const timeAgo = formatTimeAgo(updatedAt);
|
|
1106
|
-
|
|
1100
|
+
// Get YAML config to check includeByDefault
|
|
1101
|
+
const yamlColl = getCollectionFromYaml(coll.name);
|
|
1102
|
+
const excluded = yamlColl?.includeByDefault === false;
|
|
1103
|
+
const excludeTag = excluded ? ` ${c.yellow}[excluded]${c.reset}` : '';
|
|
1104
|
+
console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(qmd://${coll.name}/)${c.reset}${excludeTag}`);
|
|
1107
1105
|
console.log(` ${c.dim}Pattern:${c.reset} ${coll.glob_pattern}`);
|
|
1108
1106
|
console.log(` ${c.dim}Files:${c.reset} ${coll.active_count}`);
|
|
1109
1107
|
console.log(` ${c.dim}Updated:${c.reset} ${timeAgo}`);
|
|
@@ -1627,7 +1625,11 @@ function outputResults(results, query, opts) {
|
|
|
1627
1625
|
}
|
|
1628
1626
|
// Resolve -c collection filter: supports single string, array, or undefined.
|
|
1629
1627
|
// Returns validated collection names (exits on unknown collection).
|
|
1630
|
-
function resolveCollectionFilter(raw) {
|
|
1628
|
+
function resolveCollectionFilter(raw, useDefaults = false) {
|
|
1629
|
+
// If no filter specified and useDefaults is true, use default collections
|
|
1630
|
+
if (!raw && useDefaults) {
|
|
1631
|
+
return getDefaultCollectionNames();
|
|
1632
|
+
}
|
|
1631
1633
|
if (!raw)
|
|
1632
1634
|
return [];
|
|
1633
1635
|
const names = Array.isArray(raw) ? raw : [raw];
|
|
@@ -1653,10 +1655,69 @@ function filterByCollections(results, collectionNames) {
|
|
|
1653
1655
|
return prefixes.some(p => path.startsWith(p));
|
|
1654
1656
|
});
|
|
1655
1657
|
}
|
|
1658
|
+
/**
|
|
1659
|
+
* Parse structured search query syntax.
|
|
1660
|
+
* Lines starting with lex:, vec:, or hyde: are routed directly.
|
|
1661
|
+
* Plain lines without prefix go through query expansion.
|
|
1662
|
+
*
|
|
1663
|
+
* Returns null if this is a plain query (single line, no prefix).
|
|
1664
|
+
* Returns StructuredSubSearch[] if structured syntax detected.
|
|
1665
|
+
* Throws if multiple plain lines (ambiguous).
|
|
1666
|
+
*
|
|
1667
|
+
* Examples:
|
|
1668
|
+
* "CAP theorem" -> null (plain query, use expansion)
|
|
1669
|
+
* "lex: CAP theorem" -> [{ type: 'lex', query: 'CAP theorem' }]
|
|
1670
|
+
* "lex: CAP\nvec: consistency" -> [{ type: 'lex', ... }, { type: 'vec', ... }]
|
|
1671
|
+
* "CAP\nconsistency" -> throws (multiple plain lines)
|
|
1672
|
+
*/
|
|
1673
|
+
function parseStructuredQuery(query) {
|
|
1674
|
+
const rawLines = query.split('\n').map((line, idx) => ({
|
|
1675
|
+
raw: line,
|
|
1676
|
+
trimmed: line.trim(),
|
|
1677
|
+
number: idx + 1,
|
|
1678
|
+
})).filter(line => line.trimmed.length > 0);
|
|
1679
|
+
if (rawLines.length === 0)
|
|
1680
|
+
return null;
|
|
1681
|
+
const prefixRe = /^(lex|vec|hyde):\s*/i;
|
|
1682
|
+
const expandRe = /^expand:\s*/i;
|
|
1683
|
+
const typed = [];
|
|
1684
|
+
for (const line of rawLines) {
|
|
1685
|
+
if (expandRe.test(line.trimmed)) {
|
|
1686
|
+
if (rawLines.length > 1) {
|
|
1687
|
+
throw new Error(`Line ${line.number} starts with expand:, but query documents cannot mix expand with typed lines. Submit a single expand query instead.`);
|
|
1688
|
+
}
|
|
1689
|
+
const text = line.trimmed.replace(expandRe, '').trim();
|
|
1690
|
+
if (!text) {
|
|
1691
|
+
throw new Error('expand: query must include text.');
|
|
1692
|
+
}
|
|
1693
|
+
return null; // treat as standalone expand query
|
|
1694
|
+
}
|
|
1695
|
+
const match = line.trimmed.match(prefixRe);
|
|
1696
|
+
if (match) {
|
|
1697
|
+
const type = match[1].toLowerCase();
|
|
1698
|
+
const text = line.trimmed.slice(match[0].length).trim();
|
|
1699
|
+
if (!text) {
|
|
1700
|
+
throw new Error(`Line ${line.number} (${type}:) must include text.`);
|
|
1701
|
+
}
|
|
1702
|
+
if (/\r|\n/.test(text)) {
|
|
1703
|
+
throw new Error(`Line ${line.number} (${type}:) contains a newline. Keep each query on a single line.`);
|
|
1704
|
+
}
|
|
1705
|
+
typed.push({ type, query: text, line: line.number });
|
|
1706
|
+
continue;
|
|
1707
|
+
}
|
|
1708
|
+
if (rawLines.length === 1) {
|
|
1709
|
+
// Single plain line -> implicit expand
|
|
1710
|
+
return null;
|
|
1711
|
+
}
|
|
1712
|
+
throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde: prefix. Each line in a query document must start with one.`);
|
|
1713
|
+
}
|
|
1714
|
+
return typed.length > 0 ? typed : null;
|
|
1715
|
+
}
|
|
1656
1716
|
function search(query, opts) {
|
|
1657
1717
|
const db = getDb();
|
|
1658
1718
|
// Validate collection filter (supports multiple -c flags)
|
|
1659
|
-
|
|
1719
|
+
// Use default collections if none specified
|
|
1720
|
+
const collectionNames = resolveCollectionFilter(opts.collection, true);
|
|
1660
1721
|
const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
|
|
1661
1722
|
// Use large limit for --all, otherwise fetch more than needed and let outputResults filter
|
|
1662
1723
|
const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2);
|
|
@@ -1703,7 +1764,8 @@ function logExpansionTree(originalQuery, expanded) {
|
|
|
1703
1764
|
async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
|
|
1704
1765
|
const store = getStore();
|
|
1705
1766
|
// Validate collection filter (supports multiple -c flags)
|
|
1706
|
-
|
|
1767
|
+
// Use default collections if none specified
|
|
1768
|
+
const collectionNames = resolveCollectionFilter(opts.collection, true);
|
|
1707
1769
|
const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
|
|
1708
1770
|
checkIndexHealth(store.db);
|
|
1709
1771
|
await withLLMSession(async () => {
|
|
@@ -1749,31 +1811,83 @@ async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
|
|
|
1749
1811
|
async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rerankModel = DEFAULT_RERANK_MODEL) {
|
|
1750
1812
|
const store = getStore();
|
|
1751
1813
|
// Validate collection filter (supports multiple -c flags)
|
|
1752
|
-
|
|
1814
|
+
// Use default collections if none specified
|
|
1815
|
+
const collectionNames = resolveCollectionFilter(opts.collection, true);
|
|
1753
1816
|
const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
|
|
1754
1817
|
checkIndexHealth(store.db);
|
|
1818
|
+
// Check for structured query syntax (lex:/vec:/hyde: prefixes)
|
|
1819
|
+
const structuredQueries = parseStructuredQuery(query);
|
|
1755
1820
|
await withLLMSession(async () => {
|
|
1756
|
-
let results
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1821
|
+
let results;
|
|
1822
|
+
if (structuredQueries) {
|
|
1823
|
+
// Structured search — user provided their own query expansions
|
|
1824
|
+
const typeLabels = structuredQueries.map(s => s.type).join('+');
|
|
1825
|
+
process.stderr.write(`${c.dim}Structured search: ${structuredQueries.length} queries (${typeLabels})${c.reset}\n`);
|
|
1826
|
+
// Log each sub-query
|
|
1827
|
+
for (const s of structuredQueries) {
|
|
1828
|
+
let preview = s.query.replace(/\n/g, ' ');
|
|
1829
|
+
if (preview.length > 72)
|
|
1830
|
+
preview = preview.substring(0, 69) + '...';
|
|
1831
|
+
process.stderr.write(`${c.dim}├─ ${s.type}: ${preview}${c.reset}\n`);
|
|
1832
|
+
}
|
|
1833
|
+
process.stderr.write(`${c.dim}└─ Searching...${c.reset}\n`);
|
|
1834
|
+
results = await structuredSearch(store, structuredQueries, {
|
|
1835
|
+
collections: singleCollection ? [singleCollection] : undefined,
|
|
1836
|
+
limit: opts.all ? 500 : (opts.limit || 10),
|
|
1837
|
+
minScore: opts.minScore || 0,
|
|
1838
|
+
hooks: {
|
|
1839
|
+
onEmbedStart: (count) => {
|
|
1840
|
+
process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
|
|
1841
|
+
},
|
|
1842
|
+
onEmbedDone: (ms) => {
|
|
1843
|
+
process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
|
|
1844
|
+
},
|
|
1845
|
+
onRerankStart: (chunkCount) => {
|
|
1846
|
+
process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
|
|
1847
|
+
progress.indeterminate();
|
|
1848
|
+
},
|
|
1849
|
+
onRerankDone: (ms) => {
|
|
1850
|
+
progress.clear();
|
|
1851
|
+
process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
|
|
1852
|
+
},
|
|
1771
1853
|
},
|
|
1772
|
-
|
|
1773
|
-
|
|
1854
|
+
});
|
|
1855
|
+
}
|
|
1856
|
+
else {
|
|
1857
|
+
// Standard hybrid query with automatic expansion
|
|
1858
|
+
results = await hybridQuery(store, query, {
|
|
1859
|
+
collection: singleCollection,
|
|
1860
|
+
limit: opts.all ? 500 : (opts.limit || 10),
|
|
1861
|
+
minScore: opts.minScore || 0,
|
|
1862
|
+
hooks: {
|
|
1863
|
+
onStrongSignal: (score) => {
|
|
1864
|
+
process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
|
|
1865
|
+
},
|
|
1866
|
+
onExpandStart: () => {
|
|
1867
|
+
process.stderr.write(`${c.dim}Expanding query...${c.reset}`);
|
|
1868
|
+
},
|
|
1869
|
+
onExpand: (original, expanded, ms) => {
|
|
1870
|
+
process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
|
|
1871
|
+
logExpansionTree(original, expanded);
|
|
1872
|
+
process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`);
|
|
1873
|
+
},
|
|
1874
|
+
onEmbedStart: (count) => {
|
|
1875
|
+
process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
|
|
1876
|
+
},
|
|
1877
|
+
onEmbedDone: (ms) => {
|
|
1878
|
+
process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
|
|
1879
|
+
},
|
|
1880
|
+
onRerankStart: (chunkCount) => {
|
|
1881
|
+
process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
|
|
1882
|
+
progress.indeterminate();
|
|
1883
|
+
},
|
|
1884
|
+
onRerankDone: (ms) => {
|
|
1885
|
+
progress.clear();
|
|
1886
|
+
process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
|
|
1887
|
+
},
|
|
1774
1888
|
},
|
|
1775
|
-
}
|
|
1776
|
-
}
|
|
1889
|
+
});
|
|
1890
|
+
}
|
|
1777
1891
|
// Post-filter for multi-collection
|
|
1778
1892
|
if (collectionNames.length > 1) {
|
|
1779
1893
|
results = results.filter(r => {
|
|
@@ -1791,6 +1905,10 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
|
|
|
1791
1905
|
}
|
|
1792
1906
|
return;
|
|
1793
1907
|
}
|
|
1908
|
+
// Use first lex/vec query for output context, or original query
|
|
1909
|
+
const displayQuery = structuredQueries
|
|
1910
|
+
? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query)
|
|
1911
|
+
: query;
|
|
1794
1912
|
// Map to CLI output format — use bestChunk for snippet display
|
|
1795
1913
|
outputResults(results.map(r => ({
|
|
1796
1914
|
file: r.file,
|
|
@@ -1801,7 +1919,7 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
|
|
|
1801
1919
|
score: r.score,
|
|
1802
1920
|
context: r.context,
|
|
1803
1921
|
docid: r.docid,
|
|
1804
|
-
})),
|
|
1922
|
+
})), displayQuery, { ...opts, limit: results.length });
|
|
1805
1923
|
}, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
|
|
1806
1924
|
}
|
|
1807
1925
|
// Parse CLI arguments using util.parseArgs
|
|
@@ -1818,6 +1936,7 @@ function parseCLI() {
|
|
|
1818
1936
|
},
|
|
1819
1937
|
help: { type: "boolean", short: "h" },
|
|
1820
1938
|
version: { type: "boolean", short: "v" },
|
|
1939
|
+
skill: { type: "boolean" },
|
|
1821
1940
|
// Search options
|
|
1822
1941
|
n: { type: "string" },
|
|
1823
1942
|
"min-score": { type: "string" },
|
|
@@ -1889,50 +2008,100 @@ function parseCLI() {
|
|
|
1889
2008
|
values,
|
|
1890
2009
|
};
|
|
1891
2010
|
}
|
|
2011
|
+
function showSkill() {
|
|
2012
|
+
const scriptDir = dirname(fileURLToPath(import.meta.url));
|
|
2013
|
+
const relativePath = pathJoin("skills", "qmd", "SKILL.md");
|
|
2014
|
+
const skillPath = pathJoin(scriptDir, "..", relativePath);
|
|
2015
|
+
console.log(`QMD Skill (${relativePath})`);
|
|
2016
|
+
console.log(`Location: ${skillPath}`);
|
|
2017
|
+
console.log("");
|
|
2018
|
+
if (!existsSync(skillPath)) {
|
|
2019
|
+
console.error("SKILL.md not found. If you built from source, ensure skills/qmd/SKILL.md exists.");
|
|
2020
|
+
return;
|
|
2021
|
+
}
|
|
2022
|
+
const content = readFileSync(skillPath, "utf-8");
|
|
2023
|
+
process.stdout.write(content.endsWith("\n") ? content : content + "\n");
|
|
2024
|
+
}
|
|
1892
2025
|
function showHelp() {
|
|
2026
|
+
console.log("qmd — Quick Markdown Search");
|
|
2027
|
+
console.log("");
|
|
1893
2028
|
console.log("Usage:");
|
|
1894
|
-
console.log(" qmd
|
|
1895
|
-
console.log("
|
|
1896
|
-
console.log("
|
|
1897
|
-
console.log(" qmd
|
|
1898
|
-
console.log(" qmd
|
|
1899
|
-
console.log(" qmd
|
|
1900
|
-
console.log(" qmd
|
|
1901
|
-
console.log(" qmd
|
|
1902
|
-
console.log(" qmd get <
|
|
1903
|
-
console.log(" qmd
|
|
1904
|
-
console.log("
|
|
1905
|
-
console.log("
|
|
1906
|
-
console.log(" qmd
|
|
1907
|
-
console.log(" qmd
|
|
1908
|
-
console.log(" qmd
|
|
1909
|
-
console.log("
|
|
1910
|
-
console.log("
|
|
1911
|
-
console.log(" qmd
|
|
1912
|
-
console.log(" qmd
|
|
1913
|
-
console.log(" qmd
|
|
1914
|
-
console.log(" qmd
|
|
2029
|
+
console.log(" qmd <command> [options]");
|
|
2030
|
+
console.log("");
|
|
2031
|
+
console.log("Primary commands:");
|
|
2032
|
+
console.log(" qmd query <query> - Hybrid search with auto expansion + reranking (recommended)");
|
|
2033
|
+
console.log(" qmd query 'lex:..\\nvec:...' - Structured query document (you provide lex/vec/hyde lines)");
|
|
2034
|
+
console.log(" qmd search <query> - Full-text BM25 keywords (no LLM)");
|
|
2035
|
+
console.log(" qmd vsearch <query> - Vector similarity only");
|
|
2036
|
+
console.log(" qmd get <file>[:line] [-l N] - Show a single document, optional line slice");
|
|
2037
|
+
console.log(" qmd multi-get <pattern> - Batch fetch via glob or comma-separated list");
|
|
2038
|
+
console.log(" qmd mcp - Start the MCP server (stdio transport for AI agents)");
|
|
2039
|
+
console.log("");
|
|
2040
|
+
console.log("Collections & context:");
|
|
2041
|
+
console.log(" qmd collection add/list/remove/rename/show - Manage indexed folders");
|
|
2042
|
+
console.log(" qmd context add/list/rm - Attach human-written summaries");
|
|
2043
|
+
console.log(" qmd ls [collection[/path]] - Inspect indexed files");
|
|
2044
|
+
console.log("");
|
|
2045
|
+
console.log("Maintenance:");
|
|
2046
|
+
console.log(" qmd status - View index + collection health");
|
|
2047
|
+
console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)");
|
|
2048
|
+
console.log(" qmd embed [-f] - Generate/refresh vector embeddings");
|
|
2049
|
+
console.log(" qmd cleanup - Clear caches, vacuum DB");
|
|
2050
|
+
console.log("");
|
|
2051
|
+
console.log("Query syntax (qmd query):");
|
|
2052
|
+
console.log(" QMD queries are either a single expand query (no prefix) or a multi-line");
|
|
2053
|
+
console.log(" document where every line is typed with lex:, vec:, or hyde:. This grammar");
|
|
2054
|
+
console.log(" matches the docs in docs/SYNTAX.md and is enforced in the CLI.");
|
|
2055
|
+
console.log("");
|
|
2056
|
+
const grammar = [
|
|
2057
|
+
`query = expand_query | query_document ;`,
|
|
2058
|
+
`expand_query = text | explicit_expand ;`,
|
|
2059
|
+
`explicit_expand= "expand:" text ;`,
|
|
2060
|
+
`query_document = { typed_line } ;`,
|
|
2061
|
+
`typed_line = type ":" text newline ;`,
|
|
2062
|
+
`type = "lex" | "vec" | "hyde" ;`,
|
|
2063
|
+
`text = quoted_phrase | plain_text ;`,
|
|
2064
|
+
`quoted_phrase = '"' { character } '"' ;`,
|
|
2065
|
+
`plain_text = { character } ;`,
|
|
2066
|
+
`newline = "\\n" ;`,
|
|
2067
|
+
];
|
|
2068
|
+
console.log(" Grammar:");
|
|
2069
|
+
for (const line of grammar) {
|
|
2070
|
+
console.log(` ${line}`);
|
|
2071
|
+
}
|
|
2072
|
+
console.log("");
|
|
2073
|
+
console.log(" Examples:");
|
|
2074
|
+
console.log(" qmd query \"how does auth work\" # single-line → implicit expand");
|
|
2075
|
+
console.log(" qmd query $'lex: CAP theorem\\nvec: consistency' # typed query document");
|
|
2076
|
+
console.log(" qmd query $'lex: \"exact matches\" sports -baseball' # phrase + negation lex search");
|
|
2077
|
+
console.log(" qmd query $'hyde: Hypothetical answer text' # hyde-only document");
|
|
2078
|
+
console.log("");
|
|
2079
|
+
console.log(" Constraints:");
|
|
2080
|
+
console.log(" - Standalone expand queries cannot mix with typed lines.");
|
|
2081
|
+
console.log(" - Query documents allow only lex:, vec:, or hyde: prefixes.");
|
|
2082
|
+
console.log(" - Each typed line must be single-line text with balanced quotes.");
|
|
2083
|
+
console.log("");
|
|
2084
|
+
console.log("AI agents & integrations:");
|
|
2085
|
+
console.log(" - Run `qmd mcp` to expose the MCP server (stdio) to agents/IDEs.");
|
|
2086
|
+
console.log(" - `qmd --skill` prints the packaged skills/qmd/SKILL.md (path + contents).");
|
|
2087
|
+
console.log(" - Advanced: `qmd mcp --http ...` and `qmd mcp --http --daemon` are optional for custom transports.");
|
|
1915
2088
|
console.log("");
|
|
1916
2089
|
console.log("Global options:");
|
|
1917
|
-
console.log(" --index <name> - Use
|
|
2090
|
+
console.log(" --index <name> - Use a named index (default: index)");
|
|
1918
2091
|
console.log("");
|
|
1919
2092
|
console.log("Search options:");
|
|
1920
|
-
console.log(" -n <num> -
|
|
1921
|
-
console.log(" --all - Return all matches (
|
|
2093
|
+
console.log(" -n <num> - Max results (default 5, or 20 for --files/--json)");
|
|
2094
|
+
console.log(" --all - Return all matches (pair with --min-score)");
|
|
1922
2095
|
console.log(" --min-score <num> - Minimum similarity score");
|
|
1923
2096
|
console.log(" --full - Output full document instead of snippet");
|
|
1924
|
-
console.log(" --line-numbers -
|
|
1925
|
-
console.log(" --files
|
|
1926
|
-
console.log(" --
|
|
1927
|
-
console.log(" --csv - CSV output with snippets");
|
|
1928
|
-
console.log(" --md - Markdown output");
|
|
1929
|
-
console.log(" --xml - XML output");
|
|
1930
|
-
console.log(" -c, --collection <name> - Filter results to a specific collection");
|
|
2097
|
+
console.log(" --line-numbers - Include line numbers in output");
|
|
2098
|
+
console.log(" --files | --json | --csv | --md | --xml - Output format");
|
|
2099
|
+
console.log(" -c, --collection <name> - Filter by one or more collections");
|
|
1931
2100
|
console.log("");
|
|
1932
2101
|
console.log("Multi-get options:");
|
|
1933
2102
|
console.log(" -l <num> - Maximum lines per file");
|
|
1934
|
-
console.log(" --max-bytes <num> - Skip files larger than N bytes (default
|
|
1935
|
-
console.log(" --json/--csv/--md/--xml/--files -
|
|
2103
|
+
console.log(" --max-bytes <num> - Skip files larger than N bytes (default 10240)");
|
|
2104
|
+
console.log(" --json/--csv/--md/--xml/--files - Same formats as search");
|
|
1936
2105
|
console.log("");
|
|
1937
2106
|
console.log(`Index: ${getDbPath()}`);
|
|
1938
2107
|
}
|
|
@@ -1951,12 +2120,22 @@ async function showVersion() {
|
|
|
1951
2120
|
console.log(`qmd ${versionStr}`);
|
|
1952
2121
|
}
|
|
1953
2122
|
// Main CLI - only run if this is the main module
|
|
1954
|
-
|
|
2123
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
2124
|
+
const argv1 = process.argv[1];
|
|
2125
|
+
const isMain = argv1 === __filename
|
|
2126
|
+
|| argv1?.endsWith("/qmd.ts")
|
|
2127
|
+
|| argv1?.endsWith("/qmd.js")
|
|
2128
|
+
|| (argv1 != null && realpathSync(argv1) === __filename);
|
|
2129
|
+
if (isMain) {
|
|
1955
2130
|
const cli = parseCLI();
|
|
1956
2131
|
if (cli.values.version) {
|
|
1957
2132
|
await showVersion();
|
|
1958
2133
|
process.exit(0);
|
|
1959
2134
|
}
|
|
2135
|
+
if (cli.values.skill) {
|
|
2136
|
+
showSkill();
|
|
2137
|
+
process.exit(0);
|
|
2138
|
+
}
|
|
1960
2139
|
if (!cli.command || cli.values.help) {
|
|
1961
2140
|
showHelp();
|
|
1962
2141
|
process.exit(cli.values.help ? 0 : 1);
|
|
@@ -1965,13 +2144,12 @@ if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsW
|
|
|
1965
2144
|
case "context": {
|
|
1966
2145
|
const subcommand = cli.args[0];
|
|
1967
2146
|
if (!subcommand) {
|
|
1968
|
-
console.error("Usage: qmd context <add|list|
|
|
2147
|
+
console.error("Usage: qmd context <add|list|rm>");
|
|
1969
2148
|
console.error("");
|
|
1970
2149
|
console.error("Commands:");
|
|
1971
2150
|
console.error(" qmd context add [path] \"text\" - Add context (defaults to current dir)");
|
|
1972
2151
|
console.error(" qmd context add / \"text\" - Add global context to all collections");
|
|
1973
2152
|
console.error(" qmd context list - List all contexts");
|
|
1974
|
-
console.error(" qmd context check - Check for missing contexts");
|
|
1975
2153
|
console.error(" qmd context rm <path> - Remove context");
|
|
1976
2154
|
process.exit(1);
|
|
1977
2155
|
}
|
|
@@ -2013,10 +2191,6 @@ if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsW
|
|
|
2013
2191
|
contextList();
|
|
2014
2192
|
break;
|
|
2015
2193
|
}
|
|
2016
|
-
case "check": {
|
|
2017
|
-
contextCheck();
|
|
2018
|
-
break;
|
|
2019
|
-
}
|
|
2020
2194
|
case "rm":
|
|
2021
2195
|
case "remove": {
|
|
2022
2196
|
if (cli.args.length < 2 || !cli.args[1]) {
|
|
@@ -2031,7 +2205,7 @@ if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsW
|
|
|
2031
2205
|
}
|
|
2032
2206
|
default:
|
|
2033
2207
|
console.error(`Unknown subcommand: ${subcommand}`);
|
|
2034
|
-
console.error("Available: add, list,
|
|
2208
|
+
console.error("Available: add, list, rm");
|
|
2035
2209
|
process.exit(1);
|
|
2036
2210
|
}
|
|
2037
2211
|
break;
|
|
@@ -2096,9 +2270,99 @@ if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsW
|
|
|
2096
2270
|
collectionRename(cli.args[1], cli.args[2]);
|
|
2097
2271
|
break;
|
|
2098
2272
|
}
|
|
2273
|
+
case "set-update":
|
|
2274
|
+
case "update-cmd": {
|
|
2275
|
+
const name = cli.args[1];
|
|
2276
|
+
const cmd = cli.args.slice(2).join(' ') || null;
|
|
2277
|
+
if (!name) {
|
|
2278
|
+
console.error("Usage: qmd collection update-cmd <name> [command]");
|
|
2279
|
+
console.error(" Set the command to run before indexing (e.g., 'git pull')");
|
|
2280
|
+
console.error(" Omit command to clear it");
|
|
2281
|
+
process.exit(1);
|
|
2282
|
+
}
|
|
2283
|
+
const { updateCollectionSettings, getCollection } = await import("./collections.js");
|
|
2284
|
+
const col = getCollection(name);
|
|
2285
|
+
if (!col) {
|
|
2286
|
+
console.error(`Collection not found: ${name}`);
|
|
2287
|
+
process.exit(1);
|
|
2288
|
+
}
|
|
2289
|
+
updateCollectionSettings(name, { update: cmd });
|
|
2290
|
+
if (cmd) {
|
|
2291
|
+
console.log(`✓ Set update command for '${name}': ${cmd}`);
|
|
2292
|
+
}
|
|
2293
|
+
else {
|
|
2294
|
+
console.log(`✓ Cleared update command for '${name}'`);
|
|
2295
|
+
}
|
|
2296
|
+
break;
|
|
2297
|
+
}
|
|
2298
|
+
case "include":
|
|
2299
|
+
case "exclude": {
|
|
2300
|
+
const name = cli.args[1];
|
|
2301
|
+
if (!name) {
|
|
2302
|
+
console.error(`Usage: qmd collection ${subcommand} <name>`);
|
|
2303
|
+
console.error(` ${subcommand === 'include' ? 'Include' : 'Exclude'} collection in default queries`);
|
|
2304
|
+
process.exit(1);
|
|
2305
|
+
}
|
|
2306
|
+
const { updateCollectionSettings, getCollection } = await import("./collections.js");
|
|
2307
|
+
const col = getCollection(name);
|
|
2308
|
+
if (!col) {
|
|
2309
|
+
console.error(`Collection not found: ${name}`);
|
|
2310
|
+
process.exit(1);
|
|
2311
|
+
}
|
|
2312
|
+
const include = subcommand === 'include';
|
|
2313
|
+
updateCollectionSettings(name, { includeByDefault: include });
|
|
2314
|
+
console.log(`✓ Collection '${name}' ${include ? 'included in' : 'excluded from'} default queries`);
|
|
2315
|
+
break;
|
|
2316
|
+
}
|
|
2317
|
+
case "show":
|
|
2318
|
+
case "info": {
|
|
2319
|
+
const name = cli.args[1];
|
|
2320
|
+
if (!name) {
|
|
2321
|
+
console.error("Usage: qmd collection show <name>");
|
|
2322
|
+
process.exit(1);
|
|
2323
|
+
}
|
|
2324
|
+
const { getCollection } = await import("./collections.js");
|
|
2325
|
+
const col = getCollection(name);
|
|
2326
|
+
if (!col) {
|
|
2327
|
+
console.error(`Collection not found: ${name}`);
|
|
2328
|
+
process.exit(1);
|
|
2329
|
+
}
|
|
2330
|
+
console.log(`Collection: ${name}`);
|
|
2331
|
+
console.log(` Path: ${col.path}`);
|
|
2332
|
+
console.log(` Pattern: ${col.pattern}`);
|
|
2333
|
+
console.log(` Include: ${col.includeByDefault !== false ? 'yes (default)' : 'no'}`);
|
|
2334
|
+
if (col.update) {
|
|
2335
|
+
console.log(` Update: ${col.update}`);
|
|
2336
|
+
}
|
|
2337
|
+
if (col.context) {
|
|
2338
|
+
const ctxCount = Object.keys(col.context).length;
|
|
2339
|
+
console.log(` Contexts: ${ctxCount}`);
|
|
2340
|
+
}
|
|
2341
|
+
break;
|
|
2342
|
+
}
|
|
2343
|
+
case "help":
|
|
2344
|
+
case undefined: {
|
|
2345
|
+
console.log("Usage: qmd collection <command> [options]");
|
|
2346
|
+
console.log("");
|
|
2347
|
+
console.log("Commands:");
|
|
2348
|
+
console.log(" list List all collections");
|
|
2349
|
+
console.log(" add <path> [--name NAME] Add a collection");
|
|
2350
|
+
console.log(" remove <name> Remove a collection");
|
|
2351
|
+
console.log(" rename <old> <new> Rename a collection");
|
|
2352
|
+
console.log(" show <name> Show collection details");
|
|
2353
|
+
console.log(" update-cmd <name> [cmd] Set pre-update command (e.g., 'git pull')");
|
|
2354
|
+
console.log(" include <name> Include in default queries");
|
|
2355
|
+
console.log(" exclude <name> Exclude from default queries");
|
|
2356
|
+
console.log("");
|
|
2357
|
+
console.log("Examples:");
|
|
2358
|
+
console.log(" qmd collection add ~/notes --name notes");
|
|
2359
|
+
console.log(" qmd collection update-cmd brain 'git pull'");
|
|
2360
|
+
console.log(" qmd collection exclude archive");
|
|
2361
|
+
process.exit(0);
|
|
2362
|
+
}
|
|
2099
2363
|
default:
|
|
2100
2364
|
console.error(`Unknown subcommand: ${subcommand}`);
|
|
2101
|
-
console.error("
|
|
2365
|
+
console.error("Run 'qmd collection help' for usage");
|
|
2102
2366
|
process.exit(1);
|
|
2103
2367
|
}
|
|
2104
2368
|
break;
|