@tobilu/qmd 1.0.7 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/qmd.js CHANGED
@@ -1,14 +1,15 @@
1
+ #!/usr/bin/env node
1
2
  import { openDatabase } from "./db.js";
2
3
  import fastGlob from "fast-glob";
3
4
  import { execSync, spawn as nodeSpawn } from "child_process";
4
5
  import { fileURLToPath } from "url";
5
6
  import { dirname, join as pathJoin } from "path";
6
7
  import { parseArgs } from "util";
7
- import { readFileSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync } from "fs";
8
- import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, getHashesForEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, } from "./store.js";
8
+ import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync } from "fs";
9
+ import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, getHashesForEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, } from "./store.js";
9
10
  import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "./llm.js";
10
11
  import { formatSearchResults, formatDocuments, escapeXml, escapeCSV, } from "./formatter.js";
11
- import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, addContext as yamlAddContext, removeContext as yamlRemoveContext, setGlobalContext, listAllContexts, setConfigIndexName, } from "./collections.js";
12
+ import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, setGlobalContext, listAllContexts, setConfigIndexName, } from "./collections.js";
12
13
  // Enable production mode - allows using default database path
13
14
  // Tests must set INDEX_PATH or use createStore() with explicit path
14
15
  enableProductionMode();
@@ -155,6 +156,11 @@ function formatTimeAgo(date) {
155
156
  const days = Math.floor(hours / 24);
156
157
  return `${days}d ago`;
157
158
  }
159
+ function formatMs(ms) {
160
+ if (ms < 1000)
161
+ return `${ms}ms`;
162
+ return `${(ms / 1000).toFixed(1)}s`;
163
+ }
158
164
  function formatBytes(bytes) {
159
165
  if (bytes < 1024)
160
166
  return `${bytes} B`;
@@ -308,6 +314,37 @@ async function showStatus() {
308
314
  catch {
309
315
  // Don't fail status if LLM init fails
310
316
  }
317
+ // Tips section
318
+ const tips = [];
319
+ // Check for collections without context
320
+ const collectionsWithoutContext = collections.filter(col => {
321
+ const contexts = contextsByCollection.get(col.name) || [];
322
+ return contexts.length === 0;
323
+ });
324
+ if (collectionsWithoutContext.length > 0) {
325
+ const names = collectionsWithoutContext.map(c => c.name).slice(0, 3).join(', ');
326
+ const more = collectionsWithoutContext.length > 3 ? ` +${collectionsWithoutContext.length - 3} more` : '';
327
+ tips.push(`Add context to collections for better search results: ${names}${more}`);
328
+ tips.push(` ${c.dim}qmd context add qmd://<name>/ "What this collection contains"${c.reset}`);
329
+ tips.push(` ${c.dim}qmd context add qmd://<name>/meeting-notes "Weekly team meeting notes"${c.reset}`);
330
+ }
331
+ // Check for collections without update commands
332
+ const collectionsWithoutUpdate = collections.filter(col => {
333
+ const yamlCol = getCollectionFromYaml(col.name);
334
+ return !yamlCol?.update;
335
+ });
336
+ if (collectionsWithoutUpdate.length > 0 && collections.length > 1) {
337
+ const names = collectionsWithoutUpdate.map(c => c.name).slice(0, 3).join(', ');
338
+ const more = collectionsWithoutUpdate.length > 3 ? ` +${collectionsWithoutUpdate.length - 3} more` : '';
339
+ tips.push(`Add update commands to keep collections fresh: ${names}${more}`);
340
+ tips.push(` ${c.dim}qmd collection update-cmd <name> 'git stash && git pull --rebase --ff-only && git stash pop'${c.reset}`);
341
+ }
342
+ if (tips.length > 0) {
343
+ console.log(`\n${c.bold}Tips${c.reset}`);
344
+ for (const tip of tips) {
345
+ console.log(` ${tip}`);
346
+ }
347
+ }
311
348
  closeDb();
312
349
  }
313
350
  async function updateCollections() {
@@ -533,49 +570,6 @@ function contextRemove(pathArg) {
533
570
  }
534
571
  console.log(`${c.green}✓${c.reset} Removed context for: qmd://${detected.collectionName}/${detected.relativePath}`);
535
572
  }
536
- function contextCheck() {
537
- const db = getDb();
538
- // Get collections without any context
539
- const collectionsWithoutContext = getCollectionsWithoutContext(db);
540
- // Get all collections to check for missing path contexts
541
- const allCollections = listCollections(db);
542
- if (collectionsWithoutContext.length === 0 && allCollections.length > 0) {
543
- // Check if all collections have contexts
544
- console.log(`\n${c.green}✓${c.reset} ${c.bold}All collections have context configured${c.reset}\n`);
545
- }
546
- if (collectionsWithoutContext.length > 0) {
547
- console.log(`\n${c.yellow}Collections without any context:${c.reset}\n`);
548
- for (const coll of collectionsWithoutContext) {
549
- console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(${coll.doc_count} documents)${c.reset}`);
550
- console.log(` ${c.dim}Suggestion: qmd context add qmd://${coll.name}/ "Description of ${coll.name}"${c.reset}\n`);
551
- }
552
- }
553
- // Check for top-level paths without context within collections that DO have context
554
- const collectionsWithContext = allCollections.filter(c => c && !collectionsWithoutContext.some(cwc => cwc.name === c.name));
555
- let hasPathSuggestions = false;
556
- for (const coll of collectionsWithContext) {
557
- if (!coll)
558
- continue;
559
- const missingPaths = getTopLevelPathsWithoutContext(db, coll.name);
560
- if (missingPaths.length > 0) {
561
- if (!hasPathSuggestions) {
562
- console.log(`${c.yellow}Top-level directories without context:${c.reset}\n`);
563
- hasPathSuggestions = true;
564
- }
565
- console.log(`${c.cyan}${coll.name}${c.reset}`);
566
- for (const path of missingPaths) {
567
- console.log(` ${path}`);
568
- console.log(` ${c.dim}Suggestion: qmd context add qmd://${coll.name}/${path} "Description of ${path}"${c.reset}`);
569
- }
570
- console.log('');
571
- }
572
- }
573
- if (collectionsWithoutContext.length === 0 && !hasPathSuggestions) {
574
- console.log(`${c.dim}All collections and major paths have context configured.${c.reset}`);
575
- console.log(`${c.dim}Use 'qmd context list' to see all configured contexts.${c.reset}\n`);
576
- }
577
- closeDb();
578
- }
579
573
  function getDocument(filename, fromLine, maxLines, lineNumbers) {
580
574
  const db = getDb();
581
575
  // Parse :linenum suffix from filename (e.g., "file.md:100")
@@ -1103,7 +1097,11 @@ function collectionList() {
1103
1097
  for (const coll of collections) {
1104
1098
  const updatedAt = coll.last_modified ? new Date(coll.last_modified) : new Date();
1105
1099
  const timeAgo = formatTimeAgo(updatedAt);
1106
- console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(qmd://${coll.name}/)${c.reset}`);
1100
+ // Get YAML config to check includeByDefault
1101
+ const yamlColl = getCollectionFromYaml(coll.name);
1102
+ const excluded = yamlColl?.includeByDefault === false;
1103
+ const excludeTag = excluded ? ` ${c.yellow}[excluded]${c.reset}` : '';
1104
+ console.log(`${c.cyan}${coll.name}${c.reset} ${c.dim}(qmd://${coll.name}/)${c.reset}${excludeTag}`);
1107
1105
  console.log(` ${c.dim}Pattern:${c.reset} ${coll.glob_pattern}`);
1108
1106
  console.log(` ${c.dim}Files:${c.reset} ${coll.active_count}`);
1109
1107
  console.log(` ${c.dim}Updated:${c.reset} ${timeAgo}`);
@@ -1627,7 +1625,11 @@ function outputResults(results, query, opts) {
1627
1625
  }
1628
1626
  // Resolve -c collection filter: supports single string, array, or undefined.
1629
1627
  // Returns validated collection names (exits on unknown collection).
1630
- function resolveCollectionFilter(raw) {
1628
+ function resolveCollectionFilter(raw, useDefaults = false) {
1629
+ // If no filter specified and useDefaults is true, use default collections
1630
+ if (!raw && useDefaults) {
1631
+ return getDefaultCollectionNames();
1632
+ }
1631
1633
  if (!raw)
1632
1634
  return [];
1633
1635
  const names = Array.isArray(raw) ? raw : [raw];
@@ -1653,10 +1655,69 @@ function filterByCollections(results, collectionNames) {
1653
1655
  return prefixes.some(p => path.startsWith(p));
1654
1656
  });
1655
1657
  }
1658
+ /**
1659
+ * Parse structured search query syntax.
1660
+ * Lines starting with lex:, vec:, or hyde: are routed directly.
1661
+ * Plain lines without prefix go through query expansion.
1662
+ *
1663
+ * Returns null if this is a plain query (single line, no prefix).
1664
+ * Returns StructuredSubSearch[] if structured syntax detected.
1665
+ * Throws if multiple plain lines (ambiguous).
1666
+ *
1667
+ * Examples:
1668
+ * "CAP theorem" -> null (plain query, use expansion)
1669
+ * "lex: CAP theorem" -> [{ type: 'lex', query: 'CAP theorem' }]
1670
+ * "lex: CAP\nvec: consistency" -> [{ type: 'lex', ... }, { type: 'vec', ... }]
1671
+ * "CAP\nconsistency" -> throws (multiple plain lines)
1672
+ */
1673
+ function parseStructuredQuery(query) {
1674
+ const rawLines = query.split('\n').map((line, idx) => ({
1675
+ raw: line,
1676
+ trimmed: line.trim(),
1677
+ number: idx + 1,
1678
+ })).filter(line => line.trimmed.length > 0);
1679
+ if (rawLines.length === 0)
1680
+ return null;
1681
+ const prefixRe = /^(lex|vec|hyde):\s*/i;
1682
+ const expandRe = /^expand:\s*/i;
1683
+ const typed = [];
1684
+ for (const line of rawLines) {
1685
+ if (expandRe.test(line.trimmed)) {
1686
+ if (rawLines.length > 1) {
1687
+ throw new Error(`Line ${line.number} starts with expand:, but query documents cannot mix expand with typed lines. Submit a single expand query instead.`);
1688
+ }
1689
+ const text = line.trimmed.replace(expandRe, '').trim();
1690
+ if (!text) {
1691
+ throw new Error('expand: query must include text.');
1692
+ }
1693
+ return null; // treat as standalone expand query
1694
+ }
1695
+ const match = line.trimmed.match(prefixRe);
1696
+ if (match) {
1697
+ const type = match[1].toLowerCase();
1698
+ const text = line.trimmed.slice(match[0].length).trim();
1699
+ if (!text) {
1700
+ throw new Error(`Line ${line.number} (${type}:) must include text.`);
1701
+ }
1702
+ if (/\r|\n/.test(text)) {
1703
+ throw new Error(`Line ${line.number} (${type}:) contains a newline. Keep each query on a single line.`);
1704
+ }
1705
+ typed.push({ type, query: text, line: line.number });
1706
+ continue;
1707
+ }
1708
+ if (rawLines.length === 1) {
1709
+ // Single plain line -> implicit expand
1710
+ return null;
1711
+ }
1712
+ throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde: prefix. Each line in a query document must start with one.`);
1713
+ }
1714
+ return typed.length > 0 ? typed : null;
1715
+ }
1656
1716
  function search(query, opts) {
1657
1717
  const db = getDb();
1658
1718
  // Validate collection filter (supports multiple -c flags)
1659
- const collectionNames = resolveCollectionFilter(opts.collection);
1719
+ // Use default collections if none specified
1720
+ const collectionNames = resolveCollectionFilter(opts.collection, true);
1660
1721
  const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
1661
1722
  // Use large limit for --all, otherwise fetch more than needed and let outputResults filter
1662
1723
  const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2);
@@ -1703,7 +1764,8 @@ function logExpansionTree(originalQuery, expanded) {
1703
1764
  async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
1704
1765
  const store = getStore();
1705
1766
  // Validate collection filter (supports multiple -c flags)
1706
- const collectionNames = resolveCollectionFilter(opts.collection);
1767
+ // Use default collections if none specified
1768
+ const collectionNames = resolveCollectionFilter(opts.collection, true);
1707
1769
  const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
1708
1770
  checkIndexHealth(store.db);
1709
1771
  await withLLMSession(async () => {
@@ -1749,31 +1811,83 @@ async function vectorSearch(query, opts, _model = DEFAULT_EMBED_MODEL) {
1749
1811
  async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rerankModel = DEFAULT_RERANK_MODEL) {
1750
1812
  const store = getStore();
1751
1813
  // Validate collection filter (supports multiple -c flags)
1752
- const collectionNames = resolveCollectionFilter(opts.collection);
1814
+ // Use default collections if none specified
1815
+ const collectionNames = resolveCollectionFilter(opts.collection, true);
1753
1816
  const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
1754
1817
  checkIndexHealth(store.db);
1818
+ // Check for structured query syntax (lex:/vec:/hyde: prefixes)
1819
+ const structuredQueries = parseStructuredQuery(query);
1755
1820
  await withLLMSession(async () => {
1756
- let results = await hybridQuery(store, query, {
1757
- collection: singleCollection,
1758
- limit: opts.all ? 500 : (opts.limit || 10),
1759
- minScore: opts.minScore || 0,
1760
- hooks: {
1761
- onStrongSignal: (score) => {
1762
- process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
1763
- },
1764
- onExpand: (original, expanded) => {
1765
- logExpansionTree(original, expanded);
1766
- process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`);
1767
- },
1768
- onRerankStart: (chunkCount) => {
1769
- process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}\n`);
1770
- progress.indeterminate();
1821
+ let results;
1822
+ if (structuredQueries) {
1823
+ // Structured search user provided their own query expansions
1824
+ const typeLabels = structuredQueries.map(s => s.type).join('+');
1825
+ process.stderr.write(`${c.dim}Structured search: ${structuredQueries.length} queries (${typeLabels})${c.reset}\n`);
1826
+ // Log each sub-query
1827
+ for (const s of structuredQueries) {
1828
+ let preview = s.query.replace(/\n/g, ' ');
1829
+ if (preview.length > 72)
1830
+ preview = preview.substring(0, 69) + '...';
1831
+ process.stderr.write(`${c.dim}├─ ${s.type}: ${preview}${c.reset}\n`);
1832
+ }
1833
+ process.stderr.write(`${c.dim}└─ Searching...${c.reset}\n`);
1834
+ results = await structuredSearch(store, structuredQueries, {
1835
+ collections: singleCollection ? [singleCollection] : undefined,
1836
+ limit: opts.all ? 500 : (opts.limit || 10),
1837
+ minScore: opts.minScore || 0,
1838
+ hooks: {
1839
+ onEmbedStart: (count) => {
1840
+ process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
1841
+ },
1842
+ onEmbedDone: (ms) => {
1843
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
1844
+ },
1845
+ onRerankStart: (chunkCount) => {
1846
+ process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
1847
+ progress.indeterminate();
1848
+ },
1849
+ onRerankDone: (ms) => {
1850
+ progress.clear();
1851
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
1852
+ },
1771
1853
  },
1772
- onRerankDone: () => {
1773
- progress.clear();
1854
+ });
1855
+ }
1856
+ else {
1857
+ // Standard hybrid query with automatic expansion
1858
+ results = await hybridQuery(store, query, {
1859
+ collection: singleCollection,
1860
+ limit: opts.all ? 500 : (opts.limit || 10),
1861
+ minScore: opts.minScore || 0,
1862
+ hooks: {
1863
+ onStrongSignal: (score) => {
1864
+ process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
1865
+ },
1866
+ onExpandStart: () => {
1867
+ process.stderr.write(`${c.dim}Expanding query...${c.reset}`);
1868
+ },
1869
+ onExpand: (original, expanded, ms) => {
1870
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
1871
+ logExpansionTree(original, expanded);
1872
+ process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`);
1873
+ },
1874
+ onEmbedStart: (count) => {
1875
+ process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
1876
+ },
1877
+ onEmbedDone: (ms) => {
1878
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
1879
+ },
1880
+ onRerankStart: (chunkCount) => {
1881
+ process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}`);
1882
+ progress.indeterminate();
1883
+ },
1884
+ onRerankDone: (ms) => {
1885
+ progress.clear();
1886
+ process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
1887
+ },
1774
1888
  },
1775
- },
1776
- });
1889
+ });
1890
+ }
1777
1891
  // Post-filter for multi-collection
1778
1892
  if (collectionNames.length > 1) {
1779
1893
  results = results.filter(r => {
@@ -1791,6 +1905,10 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1791
1905
  }
1792
1906
  return;
1793
1907
  }
1908
+ // Use first lex/vec query for output context, or original query
1909
+ const displayQuery = structuredQueries
1910
+ ? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query)
1911
+ : query;
1794
1912
  // Map to CLI output format — use bestChunk for snippet display
1795
1913
  outputResults(results.map(r => ({
1796
1914
  file: r.file,
@@ -1801,7 +1919,7 @@ async function querySearch(query, opts, _embedModel = DEFAULT_EMBED_MODEL, _rera
1801
1919
  score: r.score,
1802
1920
  context: r.context,
1803
1921
  docid: r.docid,
1804
- })), query, { ...opts, limit: results.length });
1922
+ })), displayQuery, { ...opts, limit: results.length });
1805
1923
  }, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
1806
1924
  }
1807
1925
  // Parse CLI arguments using util.parseArgs
@@ -1818,6 +1936,7 @@ function parseCLI() {
1818
1936
  },
1819
1937
  help: { type: "boolean", short: "h" },
1820
1938
  version: { type: "boolean", short: "v" },
1939
+ skill: { type: "boolean" },
1821
1940
  // Search options
1822
1941
  n: { type: "string" },
1823
1942
  "min-score": { type: "string" },
@@ -1889,50 +2008,100 @@ function parseCLI() {
1889
2008
  values,
1890
2009
  };
1891
2010
  }
2011
+ function showSkill() {
2012
+ const scriptDir = dirname(fileURLToPath(import.meta.url));
2013
+ const relativePath = pathJoin("skills", "qmd", "SKILL.md");
2014
+ const skillPath = pathJoin(scriptDir, "..", relativePath);
2015
+ console.log(`QMD Skill (${relativePath})`);
2016
+ console.log(`Location: ${skillPath}`);
2017
+ console.log("");
2018
+ if (!existsSync(skillPath)) {
2019
+ console.error("SKILL.md not found. If you built from source, ensure skills/qmd/SKILL.md exists.");
2020
+ return;
2021
+ }
2022
+ const content = readFileSync(skillPath, "utf-8");
2023
+ process.stdout.write(content.endsWith("\n") ? content : content + "\n");
2024
+ }
1892
2025
  function showHelp() {
2026
+ console.log("qmd — Quick Markdown Search");
2027
+ console.log("");
1893
2028
  console.log("Usage:");
1894
- console.log(" qmd collection add [path] --name <name> --mask <pattern> - Create/index collection");
1895
- console.log(" qmd collection list - List all collections with details");
1896
- console.log(" qmd collection remove <name> - Remove a collection by name");
1897
- console.log(" qmd collection rename <old> <new> - Rename a collection");
1898
- console.log(" qmd ls [collection[/path]] - List collections or files in a collection");
1899
- console.log(" qmd context add [path] \"text\" - Add context for path (defaults to current dir)");
1900
- console.log(" qmd context list - List all contexts");
1901
- console.log(" qmd context rm <path> - Remove context");
1902
- console.log(" qmd get <file>[:line] [-l N] [--from N] - Get document (optionally from line, max N lines)");
1903
- console.log(" qmd multi-get <pattern> [-l N] [--max-bytes N] - Get multiple docs by glob or comma-separated list");
1904
- console.log(" qmd status - Show index status and collections");
1905
- console.log(" qmd update [--pull] - Re-index all collections (--pull: git pull first)");
1906
- console.log(" qmd embed [-f] - Create vector embeddings (900 tokens/chunk, 15% overlap)");
1907
- console.log(" qmd cleanup - Remove cache and orphaned data, vacuum DB");
1908
- console.log(" qmd query <query> - Search with query expansion + reranking (recommended)");
1909
- console.log(" qmd search <query> - Full-text keyword search (BM25, no LLM)");
1910
- console.log(" qmd vsearch <query> - Vector similarity search (no reranking)");
1911
- console.log(" qmd mcp - Start MCP server (stdio transport)");
1912
- console.log(" qmd mcp --http [--port N] - Start MCP server (HTTP transport, default port 8181)");
1913
- console.log(" qmd mcp --http --daemon - Start MCP server as background daemon");
1914
- console.log(" qmd mcp stop - Stop background MCP daemon");
2029
+ console.log(" qmd <command> [options]");
2030
+ console.log("");
2031
+ console.log("Primary commands:");
2032
+ console.log(" qmd query <query> - Hybrid search with auto expansion + reranking (recommended)");
2033
+ console.log(" qmd query 'lex:..\\nvec:...' - Structured query document (you provide lex/vec/hyde lines)");
2034
+ console.log(" qmd search <query> - Full-text BM25 keywords (no LLM)");
2035
+ console.log(" qmd vsearch <query> - Vector similarity only");
2036
+ console.log(" qmd get <file>[:line] [-l N] - Show a single document, optional line slice");
2037
+ console.log(" qmd multi-get <pattern> - Batch fetch via glob or comma-separated list");
2038
+ console.log(" qmd mcp - Start the MCP server (stdio transport for AI agents)");
2039
+ console.log("");
2040
+ console.log("Collections & context:");
2041
+ console.log(" qmd collection add/list/remove/rename/show - Manage indexed folders");
2042
+ console.log(" qmd context add/list/rm - Attach human-written summaries");
2043
+ console.log(" qmd ls [collection[/path]] - Inspect indexed files");
2044
+ console.log("");
2045
+ console.log("Maintenance:");
2046
+ console.log(" qmd status - View index + collection health");
2047
+ console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)");
2048
+ console.log(" qmd embed [-f] - Generate/refresh vector embeddings");
2049
+ console.log(" qmd cleanup - Clear caches, vacuum DB");
2050
+ console.log("");
2051
+ console.log("Query syntax (qmd query):");
2052
+ console.log(" QMD queries are either a single expand query (no prefix) or a multi-line");
2053
+ console.log(" document where every line is typed with lex:, vec:, or hyde:. This grammar");
2054
+ console.log(" matches the docs in docs/SYNTAX.md and is enforced in the CLI.");
2055
+ console.log("");
2056
+ const grammar = [
2057
+ `query = expand_query | query_document ;`,
2058
+ `expand_query = text | explicit_expand ;`,
2059
+ `explicit_expand= "expand:" text ;`,
2060
+ `query_document = { typed_line } ;`,
2061
+ `typed_line = type ":" text newline ;`,
2062
+ `type = "lex" | "vec" | "hyde" ;`,
2063
+ `text = quoted_phrase | plain_text ;`,
2064
+ `quoted_phrase = '"' { character } '"' ;`,
2065
+ `plain_text = { character } ;`,
2066
+ `newline = "\\n" ;`,
2067
+ ];
2068
+ console.log(" Grammar:");
2069
+ for (const line of grammar) {
2070
+ console.log(` ${line}`);
2071
+ }
2072
+ console.log("");
2073
+ console.log(" Examples:");
2074
+ console.log(" qmd query \"how does auth work\" # single-line → implicit expand");
2075
+ console.log(" qmd query $'lex: CAP theorem\\nvec: consistency' # typed query document");
2076
+ console.log(" qmd query $'lex: \"exact matches\" sports -baseball' # phrase + negation lex search");
2077
+ console.log(" qmd query $'hyde: Hypothetical answer text' # hyde-only document");
2078
+ console.log("");
2079
+ console.log(" Constraints:");
2080
+ console.log(" - Standalone expand queries cannot mix with typed lines.");
2081
+ console.log(" - Query documents allow only lex:, vec:, or hyde: prefixes.");
2082
+ console.log(" - Each typed line must be single-line text with balanced quotes.");
2083
+ console.log("");
2084
+ console.log("AI agents & integrations:");
2085
+ console.log(" - Run `qmd mcp` to expose the MCP server (stdio) to agents/IDEs.");
2086
+ console.log(" - `qmd --skill` prints the packaged skills/qmd/SKILL.md (path + contents).");
2087
+ console.log(" - Advanced: `qmd mcp --http ...` and `qmd mcp --http --daemon` are optional for custom transports.");
1915
2088
  console.log("");
1916
2089
  console.log("Global options:");
1917
- console.log(" --index <name> - Use custom index name (default: index)");
2090
+ console.log(" --index <name> - Use a named index (default: index)");
1918
2091
  console.log("");
1919
2092
  console.log("Search options:");
1920
- console.log(" -n <num> - Number of results (default: 5, or 20 for --files)");
1921
- console.log(" --all - Return all matches (use with --min-score to filter)");
2093
+ console.log(" -n <num> - Max results (default 5, or 20 for --files/--json)");
2094
+ console.log(" --all - Return all matches (pair with --min-score)");
1922
2095
  console.log(" --min-score <num> - Minimum similarity score");
1923
2096
  console.log(" --full - Output full document instead of snippet");
1924
- console.log(" --line-numbers - Add line numbers to output");
1925
- console.log(" --files - Output docid,score,filepath,context (default: 20 results)");
1926
- console.log(" --json - JSON output with snippets (default: 20 results)");
1927
- console.log(" --csv - CSV output with snippets");
1928
- console.log(" --md - Markdown output");
1929
- console.log(" --xml - XML output");
1930
- console.log(" -c, --collection <name> - Filter results to a specific collection");
2097
+ console.log(" --line-numbers - Include line numbers in output");
2098
+ console.log(" --files | --json | --csv | --md | --xml - Output format");
2099
+ console.log(" -c, --collection <name> - Filter by one or more collections");
1931
2100
  console.log("");
1932
2101
  console.log("Multi-get options:");
1933
2102
  console.log(" -l <num> - Maximum lines per file");
1934
- console.log(" --max-bytes <num> - Skip files larger than N bytes (default: 10240)");
1935
- console.log(" --json/--csv/--md/--xml/--files - Output format (same as search)");
2103
+ console.log(" --max-bytes <num> - Skip files larger than N bytes (default 10240)");
2104
+ console.log(" --json/--csv/--md/--xml/--files - Same formats as search");
1936
2105
  console.log("");
1937
2106
  console.log(`Index: ${getDbPath()}`);
1938
2107
  }
@@ -1951,12 +2120,22 @@ async function showVersion() {
1951
2120
  console.log(`qmd ${versionStr}`);
1952
2121
  }
1953
2122
  // Main CLI - only run if this is the main module
1954
- if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsWith("/qmd.ts") || process.argv[1]?.endsWith("/qmd.js")) {
2123
+ const __filename = fileURLToPath(import.meta.url);
2124
+ const argv1 = process.argv[1];
2125
+ const isMain = argv1 === __filename
2126
+ || argv1?.endsWith("/qmd.ts")
2127
+ || argv1?.endsWith("/qmd.js")
2128
+ || (argv1 != null && realpathSync(argv1) === __filename);
2129
+ if (isMain) {
1955
2130
  const cli = parseCLI();
1956
2131
  if (cli.values.version) {
1957
2132
  await showVersion();
1958
2133
  process.exit(0);
1959
2134
  }
2135
+ if (cli.values.skill) {
2136
+ showSkill();
2137
+ process.exit(0);
2138
+ }
1960
2139
  if (!cli.command || cli.values.help) {
1961
2140
  showHelp();
1962
2141
  process.exit(cli.values.help ? 0 : 1);
@@ -1965,13 +2144,12 @@ if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsW
1965
2144
  case "context": {
1966
2145
  const subcommand = cli.args[0];
1967
2146
  if (!subcommand) {
1968
- console.error("Usage: qmd context <add|list|check|rm>");
2147
+ console.error("Usage: qmd context <add|list|rm>");
1969
2148
  console.error("");
1970
2149
  console.error("Commands:");
1971
2150
  console.error(" qmd context add [path] \"text\" - Add context (defaults to current dir)");
1972
2151
  console.error(" qmd context add / \"text\" - Add global context to all collections");
1973
2152
  console.error(" qmd context list - List all contexts");
1974
- console.error(" qmd context check - Check for missing contexts");
1975
2153
  console.error(" qmd context rm <path> - Remove context");
1976
2154
  process.exit(1);
1977
2155
  }
@@ -2013,10 +2191,6 @@ if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsW
2013
2191
  contextList();
2014
2192
  break;
2015
2193
  }
2016
- case "check": {
2017
- contextCheck();
2018
- break;
2019
- }
2020
2194
  case "rm":
2021
2195
  case "remove": {
2022
2196
  if (cli.args.length < 2 || !cli.args[1]) {
@@ -2031,7 +2205,7 @@ if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsW
2031
2205
  }
2032
2206
  default:
2033
2207
  console.error(`Unknown subcommand: ${subcommand}`);
2034
- console.error("Available: add, list, check, rm");
2208
+ console.error("Available: add, list, rm");
2035
2209
  process.exit(1);
2036
2210
  }
2037
2211
  break;
@@ -2096,9 +2270,99 @@ if (fileURLToPath(import.meta.url) === process.argv[1] || process.argv[1]?.endsW
2096
2270
  collectionRename(cli.args[1], cli.args[2]);
2097
2271
  break;
2098
2272
  }
2273
+ case "set-update":
2274
+ case "update-cmd": {
2275
+ const name = cli.args[1];
2276
+ const cmd = cli.args.slice(2).join(' ') || null;
2277
+ if (!name) {
2278
+ console.error("Usage: qmd collection update-cmd <name> [command]");
2279
+ console.error(" Set the command to run before indexing (e.g., 'git pull')");
2280
+ console.error(" Omit command to clear it");
2281
+ process.exit(1);
2282
+ }
2283
+ const { updateCollectionSettings, getCollection } = await import("./collections.js");
2284
+ const col = getCollection(name);
2285
+ if (!col) {
2286
+ console.error(`Collection not found: ${name}`);
2287
+ process.exit(1);
2288
+ }
2289
+ updateCollectionSettings(name, { update: cmd });
2290
+ if (cmd) {
2291
+ console.log(`✓ Set update command for '${name}': ${cmd}`);
2292
+ }
2293
+ else {
2294
+ console.log(`✓ Cleared update command for '${name}'`);
2295
+ }
2296
+ break;
2297
+ }
2298
+ case "include":
2299
+ case "exclude": {
2300
+ const name = cli.args[1];
2301
+ if (!name) {
2302
+ console.error(`Usage: qmd collection ${subcommand} <name>`);
2303
+ console.error(` ${subcommand === 'include' ? 'Include' : 'Exclude'} collection in default queries`);
2304
+ process.exit(1);
2305
+ }
2306
+ const { updateCollectionSettings, getCollection } = await import("./collections.js");
2307
+ const col = getCollection(name);
2308
+ if (!col) {
2309
+ console.error(`Collection not found: ${name}`);
2310
+ process.exit(1);
2311
+ }
2312
+ const include = subcommand === 'include';
2313
+ updateCollectionSettings(name, { includeByDefault: include });
2314
+ console.log(`✓ Collection '${name}' ${include ? 'included in' : 'excluded from'} default queries`);
2315
+ break;
2316
+ }
2317
+ case "show":
2318
+ case "info": {
2319
+ const name = cli.args[1];
2320
+ if (!name) {
2321
+ console.error("Usage: qmd collection show <name>");
2322
+ process.exit(1);
2323
+ }
2324
+ const { getCollection } = await import("./collections.js");
2325
+ const col = getCollection(name);
2326
+ if (!col) {
2327
+ console.error(`Collection not found: ${name}`);
2328
+ process.exit(1);
2329
+ }
2330
+ console.log(`Collection: ${name}`);
2331
+ console.log(` Path: ${col.path}`);
2332
+ console.log(` Pattern: ${col.pattern}`);
2333
+ console.log(` Include: ${col.includeByDefault !== false ? 'yes (default)' : 'no'}`);
2334
+ if (col.update) {
2335
+ console.log(` Update: ${col.update}`);
2336
+ }
2337
+ if (col.context) {
2338
+ const ctxCount = Object.keys(col.context).length;
2339
+ console.log(` Contexts: ${ctxCount}`);
2340
+ }
2341
+ break;
2342
+ }
2343
+ case "help":
2344
+ case undefined: {
2345
+ console.log("Usage: qmd collection <command> [options]");
2346
+ console.log("");
2347
+ console.log("Commands:");
2348
+ console.log(" list List all collections");
2349
+ console.log(" add <path> [--name NAME] Add a collection");
2350
+ console.log(" remove <name> Remove a collection");
2351
+ console.log(" rename <old> <new> Rename a collection");
2352
+ console.log(" show <name> Show collection details");
2353
+ console.log(" update-cmd <name> [cmd] Set pre-update command (e.g., 'git pull')");
2354
+ console.log(" include <name> Include in default queries");
2355
+ console.log(" exclude <name> Exclude from default queries");
2356
+ console.log("");
2357
+ console.log("Examples:");
2358
+ console.log(" qmd collection add ~/notes --name notes");
2359
+ console.log(" qmd collection update-cmd brain 'git pull'");
2360
+ console.log(" qmd collection exclude archive");
2361
+ process.exit(0);
2362
+ }
2099
2363
  default:
2100
2364
  console.error(`Unknown subcommand: ${subcommand}`);
2101
- console.error("Available: list, add, remove, rename");
2365
+ console.error("Run 'qmd collection help' for usage");
2102
2366
  process.exit(1);
2103
2367
  }
2104
2368
  break;