org-qmd 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,226 @@
1
+ /**
2
+ * QMD SDK - Library mode for programmatic access to QMD search and indexing.
3
+ *
4
+ * Usage:
5
+ * import { createStore } from '@tobilu/qmd'
6
+ *
7
+ * const store = await createStore({
8
+ * dbPath: './my-index.sqlite',
9
+ * config: {
10
+ * collections: {
11
+ * docs: { path: '/path/to/docs', pattern: '**\/*.md' }
12
+ * }
13
+ * }
14
+ * })
15
+ *
16
+ * const results = await store.search({ query: "how does auth work?" })
17
+ * await store.close()
18
+ */
19
+ import { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES, type Store as InternalStore, type DocumentResult, type DocumentNotFound, type SearchResult, type HybridQueryResult, type HybridQueryOptions, type HybridQueryExplain, type ExpandedQuery, type StructuredSearchOptions, type MultiGetResult, type IndexStatus, type IndexHealthInfo, type SearchHooks, type ReindexProgress, type ReindexResult, type EmbedProgress, type EmbedResult, type ChunkStrategy } from "./store.js";
20
+ import { type Collection, type CollectionConfig, type NamedCollection, type ContextMap } from "./collections.js";
21
+ export type { DocumentResult, DocumentNotFound, SearchResult, HybridQueryResult, HybridQueryOptions, HybridQueryExplain, ExpandedQuery, StructuredSearchOptions, MultiGetResult, IndexStatus, IndexHealthInfo, SearchHooks, ReindexProgress, ReindexResult, EmbedProgress, EmbedResult, Collection, CollectionConfig, NamedCollection, ContextMap, };
22
+ export type { InternalStore };
23
+ export { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES };
24
+ export type { ChunkStrategy } from "./store.js";
25
+ export { getDefaultDbPath } from "./store.js";
26
+ export { Maintenance } from "./maintenance.js";
27
+ /**
28
+ * Progress info emitted during update() for each file processed.
29
+ */
30
+ export type UpdateProgress = {
31
+ collection: string;
32
+ file: string;
33
+ current: number;
34
+ total: number;
35
+ };
36
+ /**
37
+ * Aggregated result from update() across all collections.
38
+ */
39
+ export type UpdateResult = {
40
+ collections: number;
41
+ indexed: number;
42
+ updated: number;
43
+ unchanged: number;
44
+ removed: number;
45
+ needsEmbedding: number;
46
+ };
47
+ /**
48
+ * Options for the unified search() method.
49
+ */
50
+ export interface SearchOptions {
51
+ /** Simple query string — will be auto-expanded via LLM */
52
+ query?: string;
53
+ /** Pre-expanded queries (from expandQuery) — skips auto-expansion */
54
+ queries?: ExpandedQuery[];
55
+ /** Domain intent hint — steers expansion and reranking */
56
+ intent?: string;
57
+ /** Rerank results using LLM (default: true) */
58
+ rerank?: boolean;
59
+ /** Filter to a specific collection */
60
+ collection?: string;
61
+ /** Filter to specific collections */
62
+ collections?: string[];
63
+ /** Max results (default: 10) */
64
+ limit?: number;
65
+ /** Minimum score threshold */
66
+ minScore?: number;
67
+ /** Include explain traces */
68
+ explain?: boolean;
69
+ /** Chunk strategy: "auto" (default, uses AST for code files) or "regex" (legacy) */
70
+ chunkStrategy?: ChunkStrategy;
71
+ }
72
+ /**
73
+ * Options for searchLex() — BM25 keyword search.
74
+ */
75
+ export interface LexSearchOptions {
76
+ limit?: number;
77
+ collection?: string;
78
+ }
79
+ /**
80
+ * Options for searchVector() — vector similarity search.
81
+ */
82
+ export interface VectorSearchOptions {
83
+ limit?: number;
84
+ collection?: string;
85
+ }
86
+ /**
87
+ * Options for expandQuery() — manual query expansion.
88
+ */
89
+ export interface ExpandQueryOptions {
90
+ intent?: string;
91
+ }
92
+ /**
93
+ * Options for creating a QMD store.
94
+ *
95
+ * Provide `dbPath` and optionally `configPath` (YAML file) or `config` (inline).
96
+ * If neither configPath nor config is provided, the store reads from existing
97
+ * DB state (useful for reopening a previously-configured store).
98
+ */
99
+ export interface StoreOptions {
100
+ /** Path to the SQLite database file */
101
+ dbPath: string;
102
+ /** Path to a YAML config file (mutually exclusive with `config`) */
103
+ configPath?: string;
104
+ /** Inline collection config (mutually exclusive with `configPath`) */
105
+ config?: CollectionConfig;
106
+ }
107
+ /**
108
+ * The QMD SDK store — provides search, retrieval, collection management,
109
+ * context management, and indexing operations.
110
+ *
111
+ * All methods are async. The store manages its own LlamaCpp instance
112
+ * (lazy-loaded, auto-unloaded after inactivity) — no global singletons.
113
+ */
114
+ export interface QMDStore {
115
+ /** The underlying internal store (for advanced use) */
116
+ readonly internal: InternalStore;
117
+ /** Path to the SQLite database */
118
+ readonly dbPath: string;
119
+ /** Full search: query expansion + multi-signal retrieval + LLM reranking */
120
+ search(options: SearchOptions): Promise<HybridQueryResult[]>;
121
+ /** BM25 keyword search (fast, no LLM) */
122
+ searchLex(query: string, options?: LexSearchOptions): Promise<SearchResult[]>;
123
+ /** Vector similarity search (embedding model, no reranking) */
124
+ searchVector(query: string, options?: VectorSearchOptions): Promise<SearchResult[]>;
125
+ /** Expand a query into typed sub-searches (lex/vec/hyde) for manual control */
126
+ expandQuery(query: string, options?: ExpandQueryOptions): Promise<ExpandedQuery[]>;
127
+ /** Get a single document by path or docid */
128
+ get(pathOrDocid: string, options?: {
129
+ includeBody?: boolean;
130
+ }): Promise<DocumentResult | DocumentNotFound>;
131
+ /** Get the body content of a document, optionally sliced by line range */
132
+ getDocumentBody(pathOrDocid: string, opts?: {
133
+ fromLine?: number;
134
+ maxLines?: number;
135
+ }): Promise<string | null>;
136
+ /** Get multiple documents by glob pattern or comma-separated list */
137
+ multiGet(pattern: string, options?: {
138
+ includeBody?: boolean;
139
+ maxBytes?: number;
140
+ }): Promise<{
141
+ docs: MultiGetResult[];
142
+ errors: string[];
143
+ }>;
144
+ /** Add or update a collection */
145
+ addCollection(name: string, opts: {
146
+ path: string;
147
+ pattern?: string;
148
+ ignore?: string[];
149
+ }): Promise<void>;
150
+ /** Remove a collection */
151
+ removeCollection(name: string): Promise<boolean>;
152
+ /** Rename a collection */
153
+ renameCollection(oldName: string, newName: string): Promise<boolean>;
154
+ /** List all collections with document stats */
155
+ listCollections(): Promise<{
156
+ name: string;
157
+ pwd: string;
158
+ glob_pattern: string;
159
+ doc_count: number;
160
+ active_count: number;
161
+ last_modified: string | null;
162
+ includeByDefault: boolean;
163
+ }[]>;
164
+ /** Get names of collections included by default in queries */
165
+ getDefaultCollectionNames(): Promise<string[]>;
166
+ /** Add context for a path within a collection */
167
+ addContext(collectionName: string, pathPrefix: string, contextText: string): Promise<boolean>;
168
+ /** Remove context from a collection path */
169
+ removeContext(collectionName: string, pathPrefix: string): Promise<boolean>;
170
+ /** Set global context (applies to all collections) */
171
+ setGlobalContext(context: string | undefined): Promise<void>;
172
+ /** Get global context */
173
+ getGlobalContext(): Promise<string | undefined>;
174
+ /** List all contexts across all collections */
175
+ listContexts(): Promise<Array<{
176
+ collection: string;
177
+ path: string;
178
+ context: string;
179
+ }>>;
180
+ /** Re-index collections by scanning the filesystem */
181
+ update(options?: {
182
+ collections?: string[];
183
+ onProgress?: (info: UpdateProgress) => void;
184
+ }): Promise<UpdateResult>;
185
+ /** Generate vector embeddings for documents that need them */
186
+ embed(options?: {
187
+ force?: boolean;
188
+ model?: string;
189
+ maxDocsPerBatch?: number;
190
+ maxBatchBytes?: number;
191
+ chunkStrategy?: ChunkStrategy;
192
+ onProgress?: (info: EmbedProgress) => void;
193
+ }): Promise<EmbedResult>;
194
+ /** Get index status (document counts, collections, embedding state) */
195
+ getStatus(): Promise<IndexStatus>;
196
+ /** Get index health info (stale embeddings, etc.) */
197
+ getIndexHealth(): Promise<IndexHealthInfo>;
198
+ /** Close the store and release all resources (LLM models, DB connection) */
199
+ close(): Promise<void>;
200
+ }
201
+ /**
202
+ * Create a QMD store for programmatic access to search and indexing.
203
+ *
204
+ * @example
205
+ * ```typescript
206
+ * // With a YAML config file
207
+ * const store = await createStore({
208
+ * dbPath: './index.sqlite',
209
+ * configPath: './qmd.yml',
210
+ * })
211
+ *
212
+ * // With inline config (no files needed besides the DB)
213
+ * const store = await createStore({
214
+ * dbPath: './index.sqlite',
215
+ * config: {
216
+ * collections: {
217
+ * docs: { path: '/path/to/docs', pattern: '**\/*.md' }
218
+ * }
219
+ * }
220
+ * })
221
+ *
222
+ * const results = await store.search({ query: "authentication flow" })
223
+ * await store.close()
224
+ * ```
225
+ */
226
+ export declare function createStore(options: StoreOptions): Promise<QMDStore>;
package/dist/index.js ADDED
@@ -0,0 +1,234 @@
1
+ /**
2
+ * QMD SDK - Library mode for programmatic access to QMD search and indexing.
3
+ *
4
+ * Usage:
5
+ * import { createStore } from '@tobilu/qmd'
6
+ *
7
+ * const store = await createStore({
8
+ * dbPath: './my-index.sqlite',
9
+ * config: {
10
+ * collections: {
11
+ * docs: { path: '/path/to/docs', pattern: '**\/*.md' }
12
+ * }
13
+ * }
14
+ * })
15
+ *
16
+ * const results = await store.search({ query: "how does auth work?" })
17
+ * await store.close()
18
+ */
19
+ import { createStore as createStoreInternal, hybridQuery, structuredSearch, extractSnippet, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_MULTI_GET_MAX_BYTES, reindexCollection, generateEmbeddings, listCollections as storeListCollections, syncConfigToDb, getStoreCollections, getStoreCollection, getStoreGlobalContext, getStoreContexts, upsertStoreCollection, deleteStoreCollection, renameStoreCollection, updateStoreContext, removeStoreContext, setStoreGlobalContext, vacuumDatabase, cleanupOrphanedContent, cleanupOrphanedVectors, deleteLLMCache, deleteInactiveDocuments, clearAllEmbeddings, } from "./store.js";
20
+ import { LlamaCpp, } from "./llm.js";
21
+ import { setConfigSource, loadConfig, addCollection as collectionsAddCollection, removeCollection as collectionsRemoveCollection, renameCollection as collectionsRenameCollection, addContext as collectionsAddContext, removeContext as collectionsRemoveContext, setGlobalContext as collectionsSetGlobalContext, } from "./collections.js";
22
+ // Re-export utility functions and types used by frontends
23
+ export { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES };
24
+ // Re-export getDefaultDbPath for CLI/MCP that need the default database location
25
+ export { getDefaultDbPath } from "./store.js";
26
+ // Re-export Maintenance class for CLI housekeeping operations
27
+ export { Maintenance } from "./maintenance.js";
28
+ /**
29
+ * Create a QMD store for programmatic access to search and indexing.
30
+ *
31
+ * @example
32
+ * ```typescript
33
+ * // With a YAML config file
34
+ * const store = await createStore({
35
+ * dbPath: './index.sqlite',
36
+ * configPath: './qmd.yml',
37
+ * })
38
+ *
39
+ * // With inline config (no files needed besides the DB)
40
+ * const store = await createStore({
41
+ * dbPath: './index.sqlite',
42
+ * config: {
43
+ * collections: {
44
+ * docs: { path: '/path/to/docs', pattern: '**\/*.md' }
45
+ * }
46
+ * }
47
+ * })
48
+ *
49
+ * const results = await store.search({ query: "authentication flow" })
50
+ * await store.close()
51
+ * ```
52
+ */
53
+ export async function createStore(options) {
54
+ if (!options.dbPath) {
55
+ throw new Error("dbPath is required");
56
+ }
57
+ if (options.configPath && options.config) {
58
+ throw new Error("Provide either configPath or config, not both");
59
+ }
60
+ // Create the internal store (opens DB, creates tables)
61
+ const internal = createStoreInternal(options.dbPath);
62
+ const db = internal.db;
63
+ // Track whether we have a YAML config path for write-through
64
+ const hasYamlConfig = !!options.configPath;
65
+ // Sync config into SQLite store_collections
66
+ if (options.configPath) {
67
+ // YAML mode: inject config source for write-through, sync to DB
68
+ setConfigSource({ configPath: options.configPath });
69
+ const config = loadConfig();
70
+ syncConfigToDb(db, config);
71
+ }
72
+ else if (options.config) {
73
+ // Inline config mode: inject config source for mutations, sync to DB
74
+ setConfigSource({ config: options.config });
75
+ syncConfigToDb(db, options.config);
76
+ }
77
+ // else: DB-only mode — no external config, use existing store_collections
78
+ // Create a per-store LlamaCpp instance — lazy-loads models on first use,
79
+ // auto-unloads after 5 min inactivity to free VRAM.
80
+ const llm = new LlamaCpp({
81
+ inactivityTimeoutMs: 5 * 60 * 1000,
82
+ disposeModelsOnInactivity: true,
83
+ });
84
+ internal.llm = llm;
85
+ const store = {
86
+ internal,
87
+ dbPath: internal.dbPath,
88
+ // Search
89
+ search: async (opts) => {
90
+ if (!opts.query && !opts.queries) {
91
+ throw new Error("search() requires either 'query' or 'queries'");
92
+ }
93
+ // Normalize collection/collections
94
+ const collections = [
95
+ ...(opts.collection ? [opts.collection] : []),
96
+ ...(opts.collections ?? []),
97
+ ];
98
+ const skipRerank = opts.rerank === false;
99
+ if (opts.queries) {
100
+ // Pre-expanded queries — use structuredSearch
101
+ return structuredSearch(internal, opts.queries, {
102
+ collections: collections.length > 0 ? collections : undefined,
103
+ limit: opts.limit,
104
+ minScore: opts.minScore,
105
+ explain: opts.explain,
106
+ intent: opts.intent,
107
+ skipRerank,
108
+ chunkStrategy: opts.chunkStrategy,
109
+ });
110
+ }
111
+ // Simple query string — use hybridQuery (expand + search + rerank)
112
+ return hybridQuery(internal, opts.query, {
113
+ collection: collections[0],
114
+ limit: opts.limit,
115
+ minScore: opts.minScore,
116
+ explain: opts.explain,
117
+ intent: opts.intent,
118
+ skipRerank,
119
+ chunkStrategy: opts.chunkStrategy,
120
+ });
121
+ },
122
+ searchLex: async (q, opts) => internal.searchFTS(q, opts?.limit, opts?.collection),
123
+ searchVector: async (q, opts) => internal.searchVec(q, DEFAULT_EMBED_MODEL, opts?.limit, opts?.collection),
124
+ expandQuery: async (q, opts) => internal.expandQuery(q, undefined, opts?.intent),
125
+ get: async (pathOrDocid, opts) => internal.findDocument(pathOrDocid, opts),
126
+ getDocumentBody: async (pathOrDocid, opts) => {
127
+ const result = internal.findDocument(pathOrDocid, { includeBody: false });
128
+ if ("error" in result)
129
+ return null;
130
+ return internal.getDocumentBody(result, opts?.fromLine, opts?.maxLines);
131
+ },
132
+ multiGet: async (pattern, opts) => internal.findDocuments(pattern, opts),
133
+ // Collection Management — write to SQLite + write-through to YAML/inline if configured
134
+ addCollection: async (name, opts) => {
135
+ upsertStoreCollection(db, name, { path: opts.path, pattern: opts.pattern, ignore: opts.ignore });
136
+ if (hasYamlConfig || options.config) {
137
+ collectionsAddCollection(name, opts.path, opts.pattern);
138
+ }
139
+ },
140
+ removeCollection: async (name) => {
141
+ const result = deleteStoreCollection(db, name);
142
+ if (hasYamlConfig || options.config) {
143
+ collectionsRemoveCollection(name);
144
+ }
145
+ return result;
146
+ },
147
+ renameCollection: async (oldName, newName) => {
148
+ const result = renameStoreCollection(db, oldName, newName);
149
+ if (hasYamlConfig || options.config) {
150
+ collectionsRenameCollection(oldName, newName);
151
+ }
152
+ return result;
153
+ },
154
+ listCollections: async () => storeListCollections(db),
155
+ getDefaultCollectionNames: async () => {
156
+ const collections = storeListCollections(db);
157
+ return collections.filter(c => c.includeByDefault).map(c => c.name);
158
+ },
159
+ // Context Management — write to SQLite + write-through to YAML/inline if configured
160
+ addContext: async (collectionName, pathPrefix, contextText) => {
161
+ const result = updateStoreContext(db, collectionName, pathPrefix, contextText);
162
+ if (hasYamlConfig || options.config) {
163
+ collectionsAddContext(collectionName, pathPrefix, contextText);
164
+ }
165
+ return result;
166
+ },
167
+ removeContext: async (collectionName, pathPrefix) => {
168
+ const result = removeStoreContext(db, collectionName, pathPrefix);
169
+ if (hasYamlConfig || options.config) {
170
+ collectionsRemoveContext(collectionName, pathPrefix);
171
+ }
172
+ return result;
173
+ },
174
+ setGlobalContext: async (context) => {
175
+ setStoreGlobalContext(db, context);
176
+ if (hasYamlConfig || options.config) {
177
+ collectionsSetGlobalContext(context);
178
+ }
179
+ },
180
+ getGlobalContext: async () => getStoreGlobalContext(db),
181
+ listContexts: async () => getStoreContexts(db),
182
+ // Indexing — reads collections from SQLite
183
+ update: async (updateOpts) => {
184
+ const collections = getStoreCollections(db);
185
+ const filtered = updateOpts?.collections
186
+ ? collections.filter(c => updateOpts.collections.includes(c.name))
187
+ : collections;
188
+ internal.clearCache();
189
+ let totalIndexed = 0, totalUpdated = 0, totalUnchanged = 0, totalRemoved = 0;
190
+ for (const col of filtered) {
191
+ const result = await reindexCollection(internal, col.path, col.pattern || "**/*.md", col.name, {
192
+ ignorePatterns: col.ignore,
193
+ onProgress: updateOpts?.onProgress
194
+ ? (info) => updateOpts.onProgress({ collection: col.name, ...info })
195
+ : undefined,
196
+ });
197
+ totalIndexed += result.indexed;
198
+ totalUpdated += result.updated;
199
+ totalUnchanged += result.unchanged;
200
+ totalRemoved += result.removed;
201
+ }
202
+ return {
203
+ collections: filtered.length,
204
+ indexed: totalIndexed,
205
+ updated: totalUpdated,
206
+ unchanged: totalUnchanged,
207
+ removed: totalRemoved,
208
+ needsEmbedding: internal.getHashesNeedingEmbedding(),
209
+ };
210
+ },
211
+ embed: async (embedOpts) => {
212
+ return generateEmbeddings(internal, {
213
+ force: embedOpts?.force,
214
+ model: embedOpts?.model,
215
+ maxDocsPerBatch: embedOpts?.maxDocsPerBatch,
216
+ maxBatchBytes: embedOpts?.maxBatchBytes,
217
+ chunkStrategy: embedOpts?.chunkStrategy,
218
+ onProgress: embedOpts?.onProgress,
219
+ });
220
+ },
221
+ // Index Health
222
+ getStatus: async () => internal.getStatus(),
223
+ getIndexHealth: async () => internal.getIndexHealth(),
224
+ // Lifecycle
225
+ close: async () => {
226
+ await llm.dispose();
227
+ internal.close();
228
+ if (hasYamlConfig || options.config) {
229
+ setConfigSource(undefined); // Reset config source
230
+ }
231
+ },
232
+ };
233
+ return store;
234
+ }