@tobilu/qmd 1.0.6 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/qmd.js CHANGED
@@ -1,14 +1,15 @@
1
+ #!/usr/bin/env node
1
2
  import { openDatabase } from "./db.js";
2
3
  import fastGlob from "fast-glob";
3
4
  import { execSync, spawn as nodeSpawn } from "child_process";
4
5
  import { fileURLToPath } from "url";
5
6
  import { dirname, join as pathJoin } from "path";
6
7
  import { parseArgs } from "util";
7
- import { readFileSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync } from "fs";
8
- import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, getHashesForEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, } from "./store.js";
8
+ import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync } from "fs";
9
+ import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, getHashesForEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, } from "./store.js";
9
10
  import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "./llm.js";
10
11
  import { formatSearchResults, formatDocuments, escapeXml, escapeCSV, } from "./formatter.js";
11
- import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, addContext as yamlAddContext, removeContext as yamlRemoveContext, setGlobalContext, listAllContexts, setConfigIndexName, } from "./collections.js";
12
+ import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, setGlobalContext, listAllContexts, setConfigIndexName, } from "./collections.js";
12
13
  // Enable production mode - allows using default database path
13
14
  // Tests must set INDEX_PATH or use createStore() with explicit path
14
15
  enableProductionMode();
@@ -36,7 +37,16 @@ function getDbPath() {
36
37
  return store?.dbPath ?? storeDbPathOverride ?? getDefaultDbPath();
37
38
  }
38
39
  function setIndexName(name) {
39
- storeDbPathOverride = name ? getDefaultDbPath(name) : undefined;
40
+ let normalizedName = name;
41
+ // Normalize relative paths to prevent malformed database paths
42
+ if (name && name.includes('/')) {
43
+ const { resolve } = require('path');
44
+ const { cwd } = require('process');
45
+ const absolutePath = resolve(cwd(), name);
46
+ // Replace path separators with underscores to create a valid filename
47
+ normalizedName = absolutePath.replace(/\//g, '_').replace(/^_/, '');
48
+ }
49
+ storeDbPathOverride = normalizedName ? getDefaultDbPath(normalizedName) : undefined;
40
50
  // Reset open handle so next use opens the new index
41
51
  closeDb();
42
52
  }
@@ -146,6 +156,11 @@ function formatTimeAgo(date) {
146
156
  const days = Math.floor(hours / 24);
147
157
  return `${days}d ago`;
148
158
  }
159
+ function formatMs(ms) {
160
+ if (ms < 1000)
161
+ return `${ms}ms`;
162
+ return `${(ms / 1000).toFixed(1)}s`;
163
+ }
149
164
  function formatBytes(bytes) {
150
165
  if (bytes < 1024)
151
166
  return `${bytes} B`;
@@ -299,6 +314,37 @@ async function showStatus() {
299
314
  catch {
300
315
  // Don't fail status if LLM init fails
301
316
  }
317
+ // Tips section
318
+ const tips = [];
319
+ // Check for collections without context
320
+ const collectionsWithoutContext = collections.filter(col => {
321
+ const contexts = contextsByCollection.get(col.name) || [];
322
+ return contexts.length === 0;
323
+ });
324
+ if (collectionsWithoutContext.length > 0) {
325
+ const names = collectionsWithoutContext.map(c => c.name).slice(0, 3).join(', ');
326
+ const more = collectionsWithoutContext.length > 3 ? ` +${collectionsWithoutContext.length - 3} more` : '';
327
+ tips.push(`Add context to collections for better search results: ${names}${more}`);
328
+ tips.push(` ${c.dim}qmd context add qmd://<name>/ "What this collection contains"${c.reset}`);
329
+ tips.push(` ${c.dim}qmd context add qmd://<name>/meeting-notes "Weekly team meeting notes"${c.reset}`);
330
+ }
331
+ // Check for collections without update commands
332
+ const collectionsWithoutUpdate = collections.filter(col => {
333
+ const yamlCol = getCollectionFromYaml(col.name);
334
+ return !yamlCol?.update;
335
+ });
336
+ if (collectionsWithoutUpdate.length > 0 && collections.length > 1) {
337
+ const names = collectionsWithoutUpdate.map(c => c.name).slice(0, 3).join(', ');
338
+ const more = collectionsWithoutUpdate.length > 3 ? ` +${collectionsWithoutUpdate.length - 3} more` : '';
339
+ tips.push(`Add update commands to keep collections fresh: ${names}${more}`);
340
+ tips.push(` ${c.dim}qmd collection update-cmd <name> 'git stash && git pull --rebase --ff-only && git stash pop'${c.reset}`);
341
+ }
342
+ if (tips.length > 0) {
343
+ console.log(`\n${c.bold}Tips${c.reset}`);
344
+ for (const tip of tips) {
345
+ console.log(` ${tip}`);
346
+ }
347
+ }
302
348
  closeDb();
303
349
  }
304
350
  async function updateCollections() {
@@ -524,49 +570,6 @@ function contextRemove(pathArg) {
524
570
  }
525
571
  console.log(`${c.green}✓${c.reset} Removed context for: qmd://${detected.collectionName}/${detected.relativePath}`);
526
572
  }
527
- function contextCheck() {
528
- const db = getDb();
529
- // Get collections without any context
530
- const collectionsWithoutContext = getCollectionsWithoutContext(db);
531
- // Get all collections to check for missing path contexts
532
- const allCollections = listCollections(db);
533
- if (collectionsWithoutContext.length === 0 && allCollections.length > 0) {
534
- // Check if all collections have contexts
535
- console.log(`\n${c.green}✓${c.reset} ${c.bold}All collections have context configured${c.reset}\n`);
536
- }
537
- if (collectionsWithoutContext.length > 0) {
538
- console.log(`\n${c.yellow}Collections without any context:${c.reset}\n`);
539
- for (const coll of collectionsWithoutContext) {
540
- console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(${coll.doc_count} documents)${c.reset}`);
541
- console.log(` ${c.dim}Suggestion: qmd context add qmd://${coll.name}/ "Description of ${coll.name}"${c.reset}\n`);
542
- }
543
- }
544
- // Check for top-level paths without context within collections that DO have context
545
- const collectionsWithContext = allCollections.filter(c => c && !collectionsWithoutContext.some(cwc => cwc.name === c.name));
546
- let hasPathSuggestions = false;
547
- for (const coll of collectionsWithContext) {
548
- if (!coll)
549
- continue;
550
- const missingPaths = getTopLevelPathsWithoutContext(db, coll.name);
551
- if (missingPaths.length > 0) {
552
- if (!hasPathSuggestions) {
553
- console.log(`${c.yellow}Top-level directories without context:${c.reset}\n`);
554
- hasPathSuggestions = true;
555
- }
556
- console.log(`${c.cyan}${coll.name}${c.reset}`);
557
- for (const path of missingPaths) {
558
- console.log(` ${path}`);
559
- console.log(` ${c.dim}Suggestion: qmd context add qmd://${coll.name}/${path} "Description of ${path}"${c.reset}`);
560
- }
561
- console.log('');
562
- }
563
- }
564
- if (collectionsWithoutContext.length === 0 && !hasPathSuggestions) {
565
- console.log(`${c.dim}All collections and major paths have context configured.${c.reset}`);
566
- console.log(`${c.dim}Use 'qmd context list' to see all configured contexts.${c.reset}\n`);
567
- }
568
- closeDb();
569
- }
570
573
  function getDocument(filename, fromLine, maxLines, lineNumbers) {
571
574
  const db = getDb();
572
575
  // Parse :linenum suffix from filename (e.g., "file.md:100")
@@ -962,7 +965,7 @@ function listFiles(pathArg) {
962
965
  // No argument - list all collections
963
966
  const yamlCollections = yamlListCollections();
964
967
  if (yamlCollections.length === 0) {
965
- console.log("No collections found. Run 'qmd add .' to index files.");
968
+ console.log("No collections found. Run 'qmd collection add .' to index files.");
966
969
  closeDb();
967
970
  return;
968
971
  }
@@ -1086,7 +1089,7 @@ function collectionList() {
1086
1089
  const db = getDb();
1087
1090
  const collections = listCollections(db);
1088
1091
  if (collections.length === 0) {
1089
- console.log("No collections found. Run 'qmd add .' to create one.");
1092
+ console.log("No collections found. Run 'qmd collection add .' to create one.");
1090
1093
  closeDb();
1091
1094
  return;
1092
1095
  }
@@ -1094,7 +1097,11 @@ function collectionList() {
1094
1097
  for (const coll of collections) {
1095
1098
  const updatedAt = coll.last_modified ? new Date(coll.last_modified) : new Date();
1096
1099
  const timeAgo = formatTimeAgo(updatedAt);
1097
- console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(qmd://${coll.name}/)${c.reset}`);
1100
+ // Get YAML config to check includeByDefault
1101
+ const yamlColl = getCollectionFromYaml(coll.name);
1102
+ const excluded = yamlColl?.includeByDefault === false;
1103
+ const excludeTag = excluded ? ` ${c.yellow}[excluded]${c.reset}` : '';
1104
+ console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(qmd://${coll.name}/)${c.reset}${excludeTag}`);
1098
1105
  console.log(` ${c.dim}Pattern:${c.reset} ${coll.glob_pattern}`);
1099
1106
  console.log(` ${c.dim}Files:${c.reset} ${coll.active_count}`);
1100
1107
  console.log(` ${c.dim}Updated:${c.reset} ${timeAgo}`);
@@ -1616,22 +1623,105 @@ function outputResults(results, query, opts) {
1616
1623
  }
1617
1624
  }
1618
1625
  }
1619
- function search(query, opts) {
1620
- const db = getDb();
1621
- // Validate collection filter if specified
1622
- let collectionName;
1623
- if (opts.collection) {
1624
- const coll = getCollectionFromYaml(opts.collection);
1626
+ // Resolve -c collection filter: supports single string, array, or undefined.
1627
+ // Returns validated collection names (exits on unknown collection).
1628
+ function resolveCollectionFilter(raw, useDefaults = false) {
1629
+ // If no filter specified and useDefaults is true, use default collections
1630
+ if (!raw && useDefaults) {
1631
+ return getDefaultCollectionNames();
1632
+ }
1633
+ if (!raw)
1634
+ return [];
1635
+ const names = Array.isArray(raw) ? raw : [raw];
1636
+ const validated = [];
1637
+ for (const name of names) {
1638
+ const coll = getCollectionFromYaml(name);
1625
1639
  if (!coll) {
1626
- console.error(`Collection not found: ${opts.collection}`);
1640
+ console.error(`Collection not found: ${name}`);
1627
1641
  closeDb();
1628
1642
  process.exit(1);
1629
1643
  }
1630
- collectionName = opts.collection;
1644
+ validated.push(name);
1645
+ }
1646
+ return validated;
1647
+ }
1648
+ // Post-filter results to only include files from specified collections.
1649
+ function filterByCollections(results, collectionNames) {
1650
+ if (collectionNames.length <= 1)
1651
+ return results;
1652
+ const prefixes = collectionNames.map(n => `qmd://${n}/`);
1653
+ return results.filter(r => {
1654
+ const path = r.filepath || r.file || '';
1655
+ return prefixes.some(p => path.startsWith(p));
1656
+ });
1657
+ }
1658
+ /**
1659
+ * Parse structured search query syntax.
1660
+ * Lines starting with lex:, vec:, or hyde: are routed directly.
1661
+ * Plain lines without prefix go through query expansion.
1662
+ *
1663
+ * Returns null if this is a plain query (single line, no prefix).
1664
+ * Returns StructuredSubSearch[] if structured syntax detected.
1665
+ * Throws if multiple plain lines (ambiguous).
1666
+ *
1667
+ * Examples:
1668
+ * "CAP theorem" -> null (plain query, use expansion)
1669
+ * "lex: CAP theorem" -> [{ type: 'lex', query: 'CAP theorem' }]
1670
+ * "lex: CAP\nvec: consistency" -> [{ type: 'lex', ... }, { type: 'vec', ... }]
1671
+ * "CAP\nconsistency" -> throws (multiple plain lines)
1672
+ */
1673
+ function parseStructuredQuery(query) {
1674
+ const rawLines = query.split('\n').map((line, idx) => ({
1675
+ raw: line,
1676
+ trimmed: line.trim(),
1677
+ number: idx + 1,
1678
+ })).filter(line => line.trimmed.length > 0);
1679
+ if (rawLines.length === 0)
1680
+ return null;
1681
+ const prefixRe = /^(lex|vec|hyde):\s*/i;
1682
+ const expandRe = /^expand:\s*/i;
1683
+ const typed = [];
1684
+ for (const line of rawLines) {
1685
+ if (expandRe.test(line.trimmed)) {
1686
+ if (rawLines.length > 1) {
1687
+ throw new Error(`Line ${line.number} starts with expand:, but query documents cannot mix expand with typed lines. Submit a single expand query instead.`);
1688
+ }
1689
+ const text = line.trimmed.replace(expandRe, '').trim();
1690
+ if (!text) {
1691
+ throw new Error('expand: query must include text.');
1692
+ }
1693
+ return null; // treat as standalone expand query
1694
+ }
1695
+ const match = line.trimmed.match(prefixRe);
1696
+ if (match) {
1697
+ const type = match[1].toLowerCase();
1698
+ const text = line.trimmed.slice(match[0].length).trim();
1699
+ if (!text) {
1700
+ throw new Error(`Line ${line.number} (${type}:) must include text.`);
1701
+ }
1702
+ if (/\r|\n/.test(text)) {
1703
+ throw new Error(`Line ${line.number} (${type}:) contains a newline. Keep each query on a single line.`);
1704
+ }
1705
+ typed.push({ type, query: text, line: line.number });
1706
+ continue;
1707
+ }
1708
+ if (rawLines.length === 1) {
1709
+ // Single plain line -> implicit expand
1710
+ return null;
1711
+ }
1712
+ throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde: prefix. Each line in a query document must start with one.`);
1631
1713
  }
1714
+ return typed.length > 0 ? typed : null;
1715
+ }
1716
+ function search(query, opts) {
1717
+ const db = getDb();
1718
+ // Validate collection filter (supports multiple -c flags)
1719
+ // Use default collections if none specified
1720
+ const collectionNames = resolveCollectionFilter(opts.collection, true);
1721
+ const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
1632
1722
  // Use large limit for --all, otherwise fetch more than needed and let outputResults filter
1633
1723
  const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2);
1634
- const results = searchFTS(db, query, fetchLimit, collectionName);
1724
+ const results = filterByCollections(searchFTS(db, query, fetchLimit, singleCollection), collectionNames);
1635
1725
  // Add context to results
1636
1726
  const resultsWithContext = results.map(r => ({
1637
1727
  file: r.filepath,
@@ -1645,7 +1735,12 @@ function search(query, opts) {
1645
1735
  }));
1646
1736
  closeDb();
1647
1737
  if (resultsWithContext.length === 0) {
1648
- console.log("No results found.");
1738
+ if (opts.format === "json") {
1739
+ console.log("[]");
1740
+ }
1741
+ else {
1742
+ console.log("No results found.");
1743
+ }
1649
1744
  return;
1650
1745
  }
1651
1746
  outputResults(resultsWithContext, query, opts);
@@ -1668,18 +1763,14 @@ function logExpansionTree(originalQuery, expanded) {
1668
1763
  }
1669
1764
  async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
1670
1765
  const store = getStore();
1671
- if (opts.collection) {
1672
- const coll = getCollectionFromYaml(opts.collection);
1673
- if (!coll) {
1674
- console.error(`Collection not found: ${opts.collection}`);
1675
- closeDb();
1676
- process.exit(1);
1677
- }
1678
- }
1766
+ // Validate collection filter (supports multiple -c flags)
1767
+ // Use default collections if none specified
1768
+ const collectionNames = resolveCollectionFilter(opts.collection, true);
1769
+ const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
1679
1770
  checkIndexHealth(store.db);
1680
1771
  await withLLMSession(async () => {
1681
- const results = await vectorSearchQuery(store, query, {
1682
- collection: opts.collection,
1772
+ let results = await vectorSearchQuery(store, query, {
1773
+ collection: singleCollection,
1683
1774
  limit: opts.all ? 500 : (opts.limit || 10),
1684
1775
  minScore: opts.minScore || 0.3,
1685
1776
  hooks: {
@@ -1689,9 +1780,21 @@ async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
1689
1780
  },
1690
1781
  },
1691
1782
  });
1783
+ // Post-filter for multi-collection
1784
+ if (collectionNames.length > 1) {
1785
+ results = results.filter(r => {
1786
+ const prefixes = collectionNames.map(n => `qmd://${n}/`);
1787
+ return prefixes.some(p => r.file.startsWith(p));
1788
+ });
1789
+ }
1692
1790
  closeDb();
1693
1791
  if (results.length === 0) {
1694
- console.log("No results found.");
1792
+ if (opts.format === "json") {
1793
+ console.log("[]");
1794
+ }
1795
+ else {
1796
+ console.log("No results found.");
1797
+ }
1695
1798
  return;
1696
1799
  }
1697
1800
  outputResults(results.map(r => ({
@@ -1707,42 +1810,105 @@ async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
1707
1810
  }
1708
1811
  async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rerankModel = DEFAULT_RERANK_MODEL) {
1709
1812
  const store = getStore();
1710
- if (opts.collection) {
1711
- const coll = getCollectionFromYaml(opts.collection);
1712
- if (!coll) {
1713
- console.error(`Collection not found: ${opts.collection}`);
1714
- closeDb();
1715
- process.exit(1);
1716
- }
1717
- }
1813
+ // Validate collection filter (supports multiple -c flags)
1814
+ // Use default collections if none specified
1815
+ const collectionNames = resolveCollectionFilter(opts.collection, true);
1816
+ const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
1718
1817
  checkIndexHealth(store.db);
1818
+ // Check for structured query syntax (lex:/vec:/hyde: prefixes)
1819
+ const structuredQueries = parseStructuredQuery(query);
1719
1820
  await withLLMSession(async () => {
1720
- const results = await hybridQuery(store, query, {
1721
- collection: opts.collection,
1722
- limit: opts.all ? 500 : (opts.limit || 10),
1723
- minScore: opts.minScore || 0,
1724
- hooks: {
1725
- onStrongSignal: (score) => {
1726
- process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
1727
- },
1728
- onExpand: (original, expanded) => {
1729
- logExpansionTree(original, expanded);
1730
- process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`);
1731
- },
1732
- onRerankStart: (chunkCount) => {
1733
- process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}\n`);
1734
- progress.indeterminate();
1821
+ let results;
1822
+ if (structuredQueries) {
1823
+ // Structured search user provided their own query expansions
1824
+ const typeLabels = structuredQueries.map(s => s.type).join('+');
1825
+ process.stderr.write(`${c.dim}Structured search: ${structuredQueries.length} queries (${typeLabels})${c.reset}\n`);
1826
+ // Log each sub-query
1827
+ for (const s of structuredQueries) {
1828
+ let preview = s.query.replace(/\n/g, ' ');
1829
+ if (preview.length > 72)
1830
+ preview = preview.substring(0, 69) + '...';
1831
+ process.stderr.write(`${c.dim}├─ ${s.type}: ${preview}${c.reset}\n`);
1832
+ }
1833
+ process.stderr.write(`${c.dim}└─ Searching...${c.reset}\n`);
1834
+ results = await structuredSearch(store, structuredQueries, {
1835
+ collections: singleCollection ? [singleCollection] : undefined,
1836
+ limit: opts.all ? 500 : (opts.limit || 10),
1837
+ minScore: opts.minScore || 0,
1838
+ hooks: {
1839
+ onEmbedStart: (count) => {
1840
+ process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
1841
+ },
1842
+ onEmbedDone: (ms) => {
1843
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
1844
+ },
1845
+ onRerankStart: (chunkCount) => {
1846
+ process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
1847
+ progress.indeterminate();
1848
+ },
1849
+ onRerankDone: (ms) => {
1850
+ progress.clear();
1851
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
1852
+ },
1735
1853
  },
1736
- onRerankDone: () => {
1737
- progress.clear();
1854
+ });
1855
+ }
1856
+ else {
1857
+ // Standard hybrid query with automatic expansion
1858
+ results = await hybridQuery(store, query, {
1859
+ collection: singleCollection,
1860
+ limit: opts.all ? 500 : (opts.limit || 10),
1861
+ minScore: opts.minScore || 0,
1862
+ hooks: {
1863
+ onStrongSignal: (score) => {
1864
+ process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
1865
+ },
1866
+ onExpandStart: () => {
1867
+ process.stderr.write(`${c.dim}Expanding query...${c.reset}`);
1868
+ },
1869
+ onExpand: (original, expanded, ms) => {
1870
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
1871
+ logExpansionTree(original, expanded);
1872
+ process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`);
1873
+ },
1874
+ onEmbedStart: (count) => {
1875
+ process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
1876
+ },
1877
+ onEmbedDone: (ms) => {
1878
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
1879
+ },
1880
+ onRerankStart: (chunkCount) => {
1881
+ process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
1882
+ progress.indeterminate();
1883
+ },
1884
+ onRerankDone: (ms) => {
1885
+ progress.clear();
1886
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
1887
+ },
1738
1888
  },
1739
- },
1740
- });
1889
+ });
1890
+ }
1891
+ // Post-filter for multi-collection
1892
+ if (collectionNames.length > 1) {
1893
+ results = results.filter(r => {
1894
+ const prefixes = collectionNames.map(n => `qmd://${n}/`);
1895
+ return prefixes.some(p => r.file.startsWith(p));
1896
+ });
1897
+ }
1741
1898
  closeDb();
1742
1899
  if (results.length === 0) {
1743
- console.log("No results found.");
1900
+ if (opts.format === "json") {
1901
+ console.log("[]");
1902
+ }
1903
+ else {
1904
+ console.log("No results found.");
1905
+ }
1744
1906
  return;
1745
1907
  }
1908
+ // Use first lex/vec query for output context, or original query
1909
+ const displayQuery = structuredQueries
1910
+ ? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query)
1911
+ : query;
1746
1912
  // Map to CLI output format — use bestChunk for snippet display
1747
1913
  outputResults(results.map(r => ({
1748
1914
  file: r.file,
@@ -1753,7 +1919,7 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1753
1919
  score: r.score,
1754
1920
  context: r.context,
1755
1921
  docid: r.docid,
1756
- })), query, { ...opts, limit: results.length });
1922
+ })), displayQuery, { ...opts, limit: results.length });
1757
1923
  }, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
1758
1924
  }
1759
1925
  // Parse CLI arguments using util.parseArgs
@@ -1768,11 +1934,9 @@ function parseCLI() {
1768
1934
  context: {
1769
1935
  type: "string",
1770
1936
  },
1771
- "no-lex": {
1772
- type: "boolean",
1773
- },
1774
1937
  help: { type: "boolean", short: "h" },
1775
1938
  version: { type: "boolean", short: "v" },
1939
+ skill: { type: "boolean" },
1776
1940
  // Search options
1777
1941
  n: { type: "string" },
1778
1942
  "min-score": { type: "string" },
@@ -1783,7 +1947,7 @@ function parseCLI() {
1783
1947
  xml: { type: "boolean" },
1784
1948
  files: { type: "boolean" },
1785
1949
  json: { type: "boolean" },
1786
- collection: { type: "string", short: "c" }, // Filter by collection
1950
+ collection: { type: "string", short: "c", multiple: true }, // Filter by collection(s)
1787
1951
  // Collection options
1788
1952
  name: { type: "string" }, // collection name
1789
1953
  mask: { type: "string" }, // glob pattern
@@ -1844,50 +2008,100 @@ function parseCLI() {
1844
2008
  values,
1845
2009
  };
1846
2010
  }
2011
+ function showSkill() {
2012
+ const scriptDir = dirname(fileURLToPath(import.meta.url));
2013
+ const relativePath = pathJoin("skills", "qmd", "SKILL.md");
2014
+ const skillPath = pathJoin(scriptDir, "..", relativePath);
2015
+ console.log(`QMD Skill (${relativePath})`);
2016
+ console.log(`Location: ${skillPath}`);
2017
+ console.log("");
2018
+ if (!existsSync(skillPath)) {
2019
+ console.error("SKILL.md not found. If you built from source, ensure skills/qmd/SKILL.md exists.");
2020
+ return;
2021
+ }
2022
+ const content = readFileSync(skillPath, "utf-8");
2023
+ process.stdout.write(content.endsWith("\n") ? content : content + "\n");
2024
+ }
1847
2025
  function showHelp() {
2026
+ console.log("qmd — Quick Markdown Search");
2027
+ console.log("");
1848
2028
  console.log("Usage:");
1849
- console.log(" qmd collection add [path] --name <name> --mask <pattern> - Create/index collection");
1850
- console.log(" qmd collection list - List all collections with details");
1851
- console.log(" qmd collection remove <name> - Remove a collection by name");
1852
- console.log(" qmd collection rename <old> <new> - Rename a collection");
1853
- console.log(" qmd ls [collection[/path]] - List collections or files in a collection");
1854
- console.log(" qmd context add [path] \"text\" - Add context for path (defaults to current dir)");
1855
- console.log(" qmd context list - List all contexts");
1856
- console.log(" qmd context rm <path> - Remove context");
1857
- console.log(" qmd get <file>[:line] [-l N] [--from N] - Get document (optionally from line, max N lines)");
1858
- console.log(" qmd multi-get <pattern> [-l N] [--max-bytes N] - Get multiple docs by glob or comma-separated list");
1859
- console.log(" qmd status - Show index status and collections");
1860
- console.log(" qmd update [--pull] - Re-index all collections (--pull: git pull first)");
1861
- console.log(" qmd embed [-f] - Create vector embeddings (900 tokens/chunk, 15% overlap)");
1862
- console.log(" qmd cleanup - Remove cache and orphaned data, vacuum DB");
1863
- console.log(" qmd query <query> - Search with query expansion + reranking (recommended)");
1864
- console.log(" qmd search <query> - Full-text keyword search (BM25, no LLM)");
1865
- console.log(" qmd vsearch <query> - Vector similarity search (no reranking)");
1866
- console.log(" qmd mcp - Start MCP server (stdio transport)");
1867
- console.log(" qmd mcp --http [--port N] - Start MCP server (HTTP transport, default port 8181)");
1868
- console.log(" qmd mcp --http --daemon - Start MCP server as background daemon");
1869
- console.log(" qmd mcp stop - Stop background MCP daemon");
2029
+ console.log(" qmd <command> [options]");
2030
+ console.log("");
2031
+ console.log("Primary commands:");
2032
+ console.log(" qmd query <query> - Hybrid search with auto expansion + reranking (recommended)");
2033
+ console.log(" qmd query 'lex:..\\nvec:...' - Structured query document (you provide lex/vec/hyde lines)");
2034
+ console.log(" qmd search <query> - Full-text BM25 keywords (no LLM)");
2035
+ console.log(" qmd vsearch <query> - Vector similarity only");
2036
+ console.log(" qmd get <file>[:line] [-l N] - Show a single document, optional line slice");
2037
+ console.log(" qmd multi-get <pattern> - Batch fetch via glob or comma-separated list");
2038
+ console.log(" qmd mcp - Start the MCP server (stdio transport for AI agents)");
2039
+ console.log("");
2040
+ console.log("Collections & context:");
2041
+ console.log(" qmd collection add/list/remove/rename/show - Manage indexed folders");
2042
+ console.log(" qmd context add/list/rm - Attach human-written summaries");
2043
+ console.log(" qmd ls [collection[/path]] - Inspect indexed files");
2044
+ console.log("");
2045
+ console.log("Maintenance:");
2046
+ console.log(" qmd status - View index + collection health");
2047
+ console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)");
2048
+ console.log(" qmd embed [-f] - Generate/refresh vector embeddings");
2049
+ console.log(" qmd cleanup - Clear caches, vacuum DB");
2050
+ console.log("");
2051
+ console.log("Query syntax (qmd query):");
2052
+ console.log(" QMD queries are either a single expand query (no prefix) or a multi-line");
2053
+ console.log(" document where every line is typed with lex:, vec:, or hyde:. This grammar");
2054
+ console.log(" matches the docs in docs/SYNTAX.md and is enforced in the CLI.");
2055
+ console.log("");
2056
+ const grammar = [
2057
+ `query = expand_query | query_document ;`,
2058
+ `expand_query = text | explicit_expand ;`,
2059
+ `explicit_expand= "expand:" text ;`,
2060
+ `query_document = { typed_line } ;`,
2061
+ `typed_line = type ":" text newline ;`,
2062
+ `type = "lex" | "vec" | "hyde" ;`,
2063
+ `text = quoted_phrase | plain_text ;`,
2064
+ `quoted_phrase = '"' { character } '"' ;`,
2065
+ `plain_text = { character } ;`,
2066
+ `newline = "\\n" ;`,
2067
+ ];
2068
+ console.log(" Grammar:");
2069
+ for (const line of grammar) {
2070
+ console.log(` ${line}`);
2071
+ }
2072
+ console.log("");
2073
+ console.log(" Examples:");
2074
+ console.log(" qmd query \"how does auth work\" # single-line → implicit expand");
2075
+ console.log(" qmd query $'lex: CAP theorem\\nvec: consistency' # typed query document");
2076
+ console.log(" qmd query $'lex: \"exact matches\" sports -baseball' # phrase + negation lex search");
2077
+ console.log(" qmd query $'hyde: Hypothetical answer text' # hyde-only document");
2078
+ console.log("");
2079
+ console.log(" Constraints:");
2080
+ console.log(" - Standalone expand queries cannot mix with typed lines.");
2081
+ console.log(" - Query documents allow only lex:, vec:, or hyde: prefixes.");
2082
+ console.log(" - Each typed line must be single-line text with balanced quotes.");
2083
+ console.log("");
2084
+ console.log("AI agents & integrations:");
2085
+ console.log(" - Run `qmd mcp` to expose the MCP server (stdio) to agents/IDEs.");
2086
+ console.log(" - `qmd --skill` prints the packaged skills/qmd/SKILL.md (path + contents).");
2087
+ console.log(" - Advanced: `qmd mcp --http ...` and `qmd mcp --http --daemon` are optional for custom transports.");
1870
2088
  console.log("");
1871
2089
  console.log("Global options:");
1872
- console.log(" --index <name> - Use custom index name (default: index)");
2090
+ console.log(" --index <name> - Use a named index (default: index)");
1873
2091
  console.log("");
1874
2092
  console.log("Search options:");
1875
- console.log(" -n <num> - Number of results (default: 5, or 20 for --files)");
1876
- console.log(" --all - Return all matches (use with --min-score to filter)");
2093
+ console.log(" -n <num> - Max results (default 5, or 20 for --files/--json)");
2094
+ console.log(" --all - Return all matches (pair with --min-score)");
1877
2095
  console.log(" --min-score <num> - Minimum similarity score");
1878
2096
  console.log(" --full - Output full document instead of snippet");
1879
- console.log(" --line-numbers - Add line numbers to output");
1880
- console.log(" --files - Output docid,score,filepath,context (default: 20 results)");
1881
- console.log(" --json - JSON output with snippets (default: 20 results)");
1882
- console.log(" --csv - CSV output with snippets");
1883
- console.log(" --md - Markdown output");
1884
- console.log(" --xml - XML output");
1885
- console.log(" -c, --collection <name> - Filter results to a specific collection");
2097
+ console.log(" --line-numbers - Include line numbers in output");
2098
+ console.log(" --files | --json | --csv | --md | --xml - Output format");
2099
+ console.log(" -c, --collection <name> - Filter by one or more collections");
1886
2100
  console.log("");
1887
2101
  console.log("Multi-get options:");
1888
2102
  console.log(" -l <num> - Maximum lines per file");
1889
- console.log(" --max-bytes <num> - Skip files larger than N bytes (default: 10240)");
1890
- console.log(" --json/--csv/--md/--xml/--files - Output format (same as search)");
2103
+ console.log(" --max-bytes <num> - Skip files larger than N bytes (default 10240)");
2104
+ console.log(" --json/--csv/--md/--xml/--files - Same formats as search");
1891
2105
  console.log("");
1892
2106
  console.log(`Index: ${getDbPath()}`);
1893
2107
  }
@@ -1906,12 +2120,22 @@ async function showVersion() {
1906
2120
  console.log(`qmd ${versionStr}`);
1907
2121
  }
1908
2122
  // Main CLI - only run if this is the main module
1909
- if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsWith("/qmd.ts") || process.argv[1]?.endsWith("/qmd.js")) {
2123
+ const __filename = fileURLToPath(import.meta.url);
2124
+ const argv1 = process.argv[1];
2125
+ const isMain = argv1 === __filename
2126
+ || argv1?.endsWith("/qmd.ts")
2127
+ || argv1?.endsWith("/qmd.js")
2128
+ || (argv1 != null && realpathSync(argv1) === __filename);
2129
+ if (isMain) {
1910
2130
  const cli = parseCLI();
1911
2131
  if (cli.values.version) {
1912
2132
  await showVersion();
1913
2133
  process.exit(0);
1914
2134
  }
2135
+ if (cli.values.skill) {
2136
+ showSkill();
2137
+ process.exit(0);
2138
+ }
1915
2139
  if (!cli.command || cli.values.help) {
1916
2140
  showHelp();
1917
2141
  process.exit(cli.values.help ? 0 : 1);
@@ -1920,13 +2144,12 @@ if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsW
1920
2144
  case "context": {
1921
2145
  const subcommand = cli.args[0];
1922
2146
  if (!subcommand) {
1923
- console.error("Usage: qmd context <add|list|check|rm>");
2147
+ console.error("Usage: qmd context <add|list|rm>");
1924
2148
  console.error("");
1925
2149
  console.error("Commands:");
1926
2150
  console.error(" qmd context add [path] \"text\" - Add context (defaults to current dir)");
1927
2151
  console.error(" qmd context add / \"text\" - Add global context to all collections");
1928
2152
  console.error(" qmd context list - List all contexts");
1929
- console.error(" qmd context check - Check for missing contexts");
1930
2153
  console.error(" qmd context rm <path> - Remove context");
1931
2154
  process.exit(1);
1932
2155
  }
@@ -1968,10 +2191,6 @@ if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsW
1968
2191
  contextList();
1969
2192
  break;
1970
2193
  }
1971
- case "check": {
1972
- contextCheck();
1973
- break;
1974
- }
1975
2194
  case "rm":
1976
2195
  case "remove": {
1977
2196
  if (cli.args.length < 2 || !cli.args[1]) {
@@ -1986,7 +2205,7 @@ if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsW
1986
2205
  }
1987
2206
  default:
1988
2207
  console.error(`Unknown subcommand: ${subcommand}`);
1989
- console.error("Available: add, list, check, rm");
2208
+ console.error("Available: add, list, rm");
1990
2209
  process.exit(1);
1991
2210
  }
1992
2211
  break;
@@ -2051,9 +2270,99 @@ if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsW
2051
2270
  collectionRename(cli.args[1], cli.args[2]);
2052
2271
  break;
2053
2272
  }
2273
+ case "set-update":
2274
+ case "update-cmd": {
2275
+ const name = cli.args[1];
2276
+ const cmd = cli.args.slice(2).join(' ') || null;
2277
+ if (!name) {
2278
+ console.error("Usage: qmd collection update-cmd <name> [command]");
2279
+ console.error(" Set the command to run before indexing (e.g., 'git pull')");
2280
+ console.error(" Omit command to clear it");
2281
+ process.exit(1);
2282
+ }
2283
+ const { updateCollectionSettings, getCollection } = await import("./collections.js");
2284
+ const col = getCollection(name);
2285
+ if (!col) {
2286
+ console.error(`Collection not found: ${name}`);
2287
+ process.exit(1);
2288
+ }
2289
+ updateCollectionSettings(name, { update: cmd });
2290
+ if (cmd) {
2291
+ console.log(`✓ Set update command for '${name}': ${cmd}`);
2292
+ }
2293
+ else {
2294
+ console.log(`✓ Cleared update command for '${name}'`);
2295
+ }
2296
+ break;
2297
+ }
2298
+ case "include":
2299
+ case "exclude": {
2300
+ const name = cli.args[1];
2301
+ if (!name) {
2302
+ console.error(`Usage: qmd collection ${subcommand} <name>`);
2303
+ console.error(` ${subcommand === 'include' ? 'Include' : 'Exclude'} collection in default queries`);
2304
+ process.exit(1);
2305
+ }
2306
+ const { updateCollectionSettings, getCollection } = await import("./collections.js");
2307
+ const col = getCollection(name);
2308
+ if (!col) {
2309
+ console.error(`Collection not found: ${name}`);
2310
+ process.exit(1);
2311
+ }
2312
+ const include = subcommand === 'include';
2313
+ updateCollectionSettings(name, { includeByDefault: include });
2314
+ console.log(`✓ Collection '${name}' ${include ? 'included in' : 'excluded from'} default queries`);
2315
+ break;
2316
+ }
2317
+ case "show":
2318
+ case "info": {
2319
+ const name = cli.args[1];
2320
+ if (!name) {
2321
+ console.error("Usage: qmd collection show <name>");
2322
+ process.exit(1);
2323
+ }
2324
+ const { getCollection } = await import("./collections.js");
2325
+ const col = getCollection(name);
2326
+ if (!col) {
2327
+ console.error(`Collection not found: ${name}`);
2328
+ process.exit(1);
2329
+ }
2330
+ console.log(`Collection: ${name}`);
2331
+ console.log(` Path: ${col.path}`);
2332
+ console.log(` Pattern: ${col.pattern}`);
2333
+ console.log(` Include: ${col.includeByDefault !== false ? 'yes (default)' : 'no'}`);
2334
+ if (col.update) {
2335
+ console.log(` Update: ${col.update}`);
2336
+ }
2337
+ if (col.context) {
2338
+ const ctxCount = Object.keys(col.context).length;
2339
+ console.log(` Contexts: ${ctxCount}`);
2340
+ }
2341
+ break;
2342
+ }
2343
+ case "help":
2344
+ case undefined: {
2345
+ console.log("Usage: qmd collection <command> [options]");
2346
+ console.log("");
2347
+ console.log("Commands:");
2348
+ console.log(" list List all collections");
2349
+ console.log(" add <path> [--name NAME] Add a collection");
2350
+ console.log(" remove <name> Remove a collection");
2351
+ console.log(" rename <old> <new> Rename a collection");
2352
+ console.log(" show <name> Show collection details");
2353
+ console.log(" update-cmd <name> [cmd] Set pre-update command (e.g., 'git pull')");
2354
+ console.log(" include <name> Include in default queries");
2355
+ console.log(" exclude <name> Exclude from default queries");
2356
+ console.log("");
2357
+ console.log("Examples:");
2358
+ console.log(" qmd collection add ~/notes --name notes");
2359
+ console.log(" qmd collection update-cmd brain 'git pull'");
2360
+ console.log(" qmd collection exclude archive");
2361
+ process.exit(0);
2362
+ }
2054
2363
  default:
2055
2364
  console.error(`Unknown subcommand: ${subcommand}`);
2056
- console.error("Available: list, add, remove, rename");
2365
+ console.error("Run 'qmd collection help' for usage");
2057
2366
  process.exit(1);
2058
2367
  }
2059
2368
  break;