@tryformation/querylight-cli 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/Dockerfile +7 -0
  2. package/LICENSE +21 -0
  3. package/README.md +391 -0
  4. package/dist/chunk/chunk-store.d.ts +4 -0
  5. package/dist/chunk/chunker.d.ts +9 -0
  6. package/dist/cli/format.d.ts +4 -0
  7. package/dist/cli/main.d.ts +2 -0
  8. package/dist/cli/main.js +3523 -0
  9. package/dist/cli/run-cli.d.ts +5 -0
  10. package/dist/core/config.d.ts +4 -0
  11. package/dist/core/constants.d.ts +3 -0
  12. package/dist/core/errors.d.ts +17 -0
  13. package/dist/core/files.d.ts +1 -0
  14. package/dist/core/hashing.d.ts +1 -0
  15. package/dist/core/ids.d.ts +1 -0
  16. package/dist/core/jsonl.d.ts +2 -0
  17. package/dist/core/runs.d.ts +3 -0
  18. package/dist/core/workspace.d.ts +7 -0
  19. package/dist/index/index-store.d.ts +11 -0
  20. package/dist/index/querylight-indexer.d.ts +14 -0
  21. package/dist/index.d.ts +11 -0
  22. package/dist/index.js +2794 -0
  23. package/dist/ingest/adapters/crawl4ai-adapter.d.ts +1 -0
  24. package/dist/ingest/adapters/directory-adapter.d.ts +2 -0
  25. package/dist/ingest/adapters/file-adapter.d.ts +16 -0
  26. package/dist/ingest/adapters/rss-adapter.d.ts +7 -0
  27. package/dist/ingest/adapters/url-adapter.d.ts +11 -0
  28. package/dist/ingest/adapters/website-adapter.d.ts +2 -0
  29. package/dist/ingest/document-utils.d.ts +24 -0
  30. package/dist/ingest/extractors/docx-extractor.d.ts +1 -0
  31. package/dist/ingest/extractors/html-extractor.d.ts +5 -0
  32. package/dist/ingest/extractors/markdown-extractor.d.ts +1 -0
  33. package/dist/ingest/extractors/pdf-extractor.d.ts +1 -0
  34. package/dist/ingest/extractors/text-extractor.d.ts +1 -0
  35. package/dist/ingest/ingest-service.d.ts +23 -0
  36. package/dist/normalize/boilerplate.d.ts +1 -0
  37. package/dist/normalize/normalize-markdown.d.ts +2 -0
  38. package/dist/query/context-builder.d.ts +8 -0
  39. package/dist/query/related-service.d.ts +6 -0
  40. package/dist/query/search-service.d.ts +31 -0
  41. package/dist/report/diff-service.d.ts +23 -0
  42. package/dist/sources/source-model.d.ts +1 -0
  43. package/dist/sources/source-store.d.ts +7 -0
  44. package/dist/types/models.d.ts +309 -0
  45. package/dist/vector/dense.d.ts +13 -0
  46. package/dist/vector/runtime.d.ts +18 -0
  47. package/dist/vector/service.d.ts +26 -0
  48. package/dist/vector/sparse.d.ts +19 -0
  49. package/dist/vector/store.d.ts +20 -0
  50. package/dist/vector/text.d.ts +3 -0
  51. package/package.json +66 -0
  52. package/scripts/sparse-encode.py +104 -0
@@ -0,0 +1 @@
1
+ export declare function crawlWithCrawl4Ai(): Promise<never>;
@@ -0,0 +1,2 @@
1
+ import type { Source } from "../../types/models.js";
2
+ export declare function listDirectoryFiles(source: Source): Promise<string[]>;
@@ -0,0 +1,16 @@
1
+ import type { DocumentRecord, Source } from "../../types/models.js";
2
+ export declare function ingestFile({ workspacePath, source, filePath, previous }: {
3
+ workspacePath: string;
4
+ source: Source;
5
+ filePath: string;
6
+ previous?: DocumentRecord;
7
+ }): Promise<DocumentRecord>;
8
+ export declare function ingestInlineContent({ workspacePath, source, content, title, uri, previous }: {
9
+ workspacePath: string;
10
+ source: Source;
11
+ content: string;
12
+ title: string;
13
+ uri: string;
14
+ previous?: DocumentRecord;
15
+ }): Promise<DocumentRecord>;
16
+ export declare function reprocessStoredDocument(document: DocumentRecord, source: Source): Promise<DocumentRecord | null>;
@@ -0,0 +1,7 @@
1
+ import type { Source } from "../../types/models.js";
2
+ export type ParsedFeedItem = {
3
+ url: string;
4
+ title: string;
5
+ publicationDate: string | null;
6
+ };
7
+ export declare function parseRssFeedDocument(xml: string, source: Source): Promise<ParsedFeedItem[]>;
@@ -0,0 +1,11 @@
1
+ import type { DocumentRecord, Source } from "../../types/models.js";
2
+ export type FetchRemoteDocumentOptions = {
3
+ workspacePath: string;
4
+ source: Source;
5
+ url: string;
6
+ previous?: DocumentRecord;
7
+ sourceUri?: string;
8
+ publicationDate?: string | null;
9
+ };
10
+ export declare function fetchUrlDocument({ workspacePath, source, url, previous, sourceUri, publicationDate }: FetchRemoteDocumentOptions): Promise<DocumentRecord>;
11
+ export declare function reprocessRemoteDocument(document: DocumentRecord, source: Source): Promise<DocumentRecord | null>;
@@ -0,0 +1,2 @@
1
+ import type { Source } from "../../types/models.js";
2
+ export declare function crawlWebsite(source: Source): Promise<string[]>;
@@ -0,0 +1,24 @@
1
+ import type { DocumentRecord, Metadata, PrimitiveMetadata, Source } from "../types/models.js";
2
+ export declare function buildDocumentMetadata({ source, sourceUri, publicationDate, crawledAt, indexedAt, extra }: {
3
+ source: Source;
4
+ sourceUri: string;
5
+ publicationDate?: string | null;
6
+ crawledAt?: string;
7
+ indexedAt?: string;
8
+ extra?: Record<string, PrimitiveMetadata | undefined>;
9
+ }): Metadata;
10
+ export declare function writeNormalizedDocument({ documentId, sourceId, title, uri, sourceUri, publicationDate, crawledAt, indexedAt, contentHash, lastChangedAt, normalizedPath, markdown }: {
11
+ documentId: string;
12
+ sourceId: string;
13
+ title: string;
14
+ uri: string;
15
+ sourceUri: string;
16
+ publicationDate?: string | null;
17
+ crawledAt?: string;
18
+ indexedAt?: string;
19
+ contentHash: string;
20
+ lastChangedAt: string;
21
+ normalizedPath: string;
22
+ markdown: string;
23
+ }): Promise<void>;
24
+ export declare function deleteDocumentArtifacts(document: DocumentRecord): Promise<void>;
@@ -0,0 +1 @@
1
+ export declare function extractDocx(filePath: string): Promise<string>;
@@ -0,0 +1,5 @@
1
+ export declare function extractHtmlToMarkdown(html: string): {
2
+ markdown: string;
3
+ title: string;
4
+ };
5
+ export declare function extractPublicationDateFromHtml(html: string): string | null;
@@ -0,0 +1 @@
1
+ export declare function extractMarkdown(filePath: string): Promise<string>;
@@ -0,0 +1 @@
1
+ export declare function extractPdf(filePath: string): Promise<string>;
@@ -0,0 +1 @@
1
+ export declare function extractText(filePath: string): Promise<string>;
@@ -0,0 +1,23 @@
1
+ export declare function ingestSources({ workspacePath, sourceIds, changedOnly }: {
2
+ workspacePath: string;
3
+ sourceIds?: string[];
4
+ changedOnly?: boolean;
5
+ }): Promise<{
6
+ runId: string;
7
+ documents: {
8
+ added: number;
9
+ changed: number;
10
+ unchanged: number;
11
+ failed: number;
12
+ };
13
+ processedSources: number;
14
+ }>;
15
+ export declare function reprocessDocuments({ workspacePath, sourceId, documentId }: {
16
+ workspacePath: string;
17
+ sourceId?: string;
18
+ documentId?: string;
19
+ }): Promise<{
20
+ runId: string;
21
+ documentsReprocessed: number;
22
+ documentsSkipped: number;
23
+ }>;
@@ -0,0 +1 @@
1
+ export declare function stripBoilerplate(html: string): string;
@@ -0,0 +1,2 @@
1
+ export declare function normalizeWhitespace(text: string): string;
2
+ export declare function withFrontmatter(metadata: Record<string, unknown>, body: string): string;
@@ -0,0 +1,8 @@
1
+ import type { ContextResponseData } from "../types/models.js";
2
+ export declare function createContext({ workspacePath, query, topK, maxChars, retrievalMode }: {
3
+ workspacePath: string;
4
+ query: string;
5
+ topK: number;
6
+ maxChars: number;
7
+ retrievalMode?: import("../types/models.js").RetrievalMode;
8
+ }): Promise<ContextResponseData>;
@@ -0,0 +1,6 @@
1
+ import type { RelatedDocumentsResponseData } from "../types/models.js";
2
+ export declare function findRelatedDocuments({ workspacePath, document, topK }: {
3
+ workspacePath: string;
4
+ document: string;
5
+ topK: number;
6
+ }): Promise<RelatedDocumentsResponseData>;
@@ -0,0 +1,31 @@
1
+ import type { RetrievalMode, SearchResponseData } from "../types/models.js";
2
+ type SearchDateField = "publicationDate" | "firstSeenAt" | "lastSeenAt" | "lastChangedAt" | "crawledAt";
3
+ type SearchDateRange = {
4
+ field: SearchDateField;
5
+ from?: string;
6
+ to?: string;
7
+ };
8
+ export declare function searchIndex({ workspacePath, query, topK, sourceId, sourceIds, sourceName, sourceNames, sourceType, sourceTypes, uriPrefix, uriPrefixes, hasPublicationDate, tag, tags, metadata, dateRanges, retrievalMode, showChunks }: {
9
+ workspacePath: string;
10
+ query: string;
11
+ topK: number;
12
+ sourceId?: string;
13
+ sourceIds?: string[];
14
+ sourceName?: string;
15
+ sourceNames?: string[];
16
+ sourceType?: string;
17
+ sourceTypes?: string[];
18
+ uriPrefix?: string;
19
+ uriPrefixes?: string[];
20
+ hasPublicationDate?: boolean;
21
+ tag?: string;
22
+ tags?: string[];
23
+ metadata?: Array<{
24
+ key: string;
25
+ value: string;
26
+ }>;
27
+ dateRanges?: SearchDateRange[];
28
+ retrievalMode?: RetrievalMode;
29
+ showChunks?: boolean;
30
+ }): Promise<SearchResponseData>;
31
+ export {};
@@ -0,0 +1,23 @@
1
+ export declare function diffWorkspace({ workspacePath, sourceId, documentId, since }: {
2
+ workspacePath: string;
3
+ sourceId?: string;
4
+ documentId?: string;
5
+ since?: string;
6
+ }): Promise<{
7
+ changedDocuments: Array<{
8
+ id: string;
9
+ title: string;
10
+ uri: string;
11
+ sourceId: string;
12
+ previousHash?: string;
13
+ currentHash: string;
14
+ }>;
15
+ }>;
16
+ export declare function renderChangeReport(diff: {
17
+ changedDocuments: Array<{
18
+ id: string;
19
+ title: string;
20
+ uri: string;
21
+ sourceId: string;
22
+ }>;
23
+ }): string;
@@ -0,0 +1 @@
1
+ export type { Source, SourceType, CrawlConfig } from "../types/models.js";
@@ -0,0 +1,7 @@
1
+ import type { Source } from "../types/models.js";
2
+ export declare function listSources(workspacePath: string): Promise<Source[]>;
3
+ export declare function addSource(workspacePath: string, source: Omit<Source, "id"> & {
4
+ id?: string;
5
+ }): Promise<Source>;
6
+ export declare function updateSource(workspacePath: string, sourceId: string, patch: Partial<Source>): Promise<Source>;
7
+ export declare function removeSource(workspacePath: string, sourceId: string): Promise<void>;
@@ -0,0 +1,309 @@
1
+ export type SourceType = "url" | "website" | "rss" | "file" | "directory" | "markdown" | "text";
2
+ export type PrimitiveMetadata = string | number | boolean | string[] | null;
3
+ export type Metadata = Record<string, PrimitiveMetadata>;
4
+ export type RetrievalMode = "lexical" | "dense" | "sparse" | "hybrid";
5
+ export type CrawlConfig = {
6
+ maxDepth?: number;
7
+ maxPages?: number;
8
+ includePatterns?: string[];
9
+ excludePatterns?: string[];
10
+ obeyRobotsTxt?: boolean;
11
+ userAgent?: string;
12
+ rateLimitMs?: number;
13
+ useSitemap?: boolean;
14
+ renderJs?: boolean;
15
+ retentionDays?: number;
16
+ fetchArticles?: boolean;
17
+ };
18
+ export type HttpCacheMetadata = {
19
+ etag?: string;
20
+ lastModified?: string;
21
+ cacheControl?: string;
22
+ expires?: string | null;
23
+ lastValidatedAt?: string;
24
+ lastStatus?: number;
25
+ };
26
+ export type Source = {
27
+ id: string;
28
+ type: SourceType;
29
+ name: string;
30
+ uri: string;
31
+ enabled: boolean;
32
+ tags: string[];
33
+ metadata: Metadata;
34
+ crawl?: CrawlConfig;
35
+ createdAt: string;
36
+ updatedAt: string;
37
+ };
38
+ export type DocumentRecord = {
39
+ id: string;
40
+ sourceId: string;
41
+ sourceType: SourceType;
42
+ title: string;
43
+ uri: string;
44
+ sourceUri: string;
45
+ canonicalUri?: string;
46
+ mimeType: string;
47
+ language?: string;
48
+ rawPath?: string;
49
+ normalizedPath: string;
50
+ contentHash: string;
51
+ metadata: Metadata;
52
+ publicationDate?: string | null;
53
+ crawledAt?: string;
54
+ firstSeenAt: string;
55
+ lastSeenAt: string;
56
+ lastChangedAt: string;
57
+ indexedAt?: string;
58
+ httpCache?: HttpCacheMetadata;
59
+ };
60
+ export type ChunkRecord = {
61
+ id: string;
62
+ documentId: string;
63
+ sourceId: string;
64
+ title: string;
65
+ uri: string;
66
+ headingPath: string[];
67
+ text: string;
68
+ tokenEstimate?: number;
69
+ contentHash: string;
70
+ metadata: Metadata;
71
+ firstSeenAt: string;
72
+ lastSeenAt: string;
73
+ lastChangedAt: string;
74
+ };
75
+ export type IndexMetadata = {
76
+ id: string;
77
+ createdAt: string;
78
+ querylightVersion: string;
79
+ kbVersion: string;
80
+ documentCount: number;
81
+ chunkCount: number;
82
+ sourceCount: number;
83
+ fields: string[];
84
+ embeddingModel?: string;
85
+ sparseVectorModel?: string;
86
+ indexHash: string;
87
+ };
88
+ export type DenseVectorModelConfig = {
89
+ enabled: boolean;
90
+ modelId: string;
91
+ cacheDir: string;
92
+ indexHashTables: number;
93
+ indexRandomSeed: number;
94
+ chunkTextMode: "title-heading-text";
95
+ };
96
+ export type SparseVectorModelConfig = {
97
+ enabled: boolean;
98
+ modelId: string;
99
+ cacheDir: string;
100
+ documentTopTokens: number;
101
+ queryEncoding: "tokenizer-token-weights";
102
+ documentEncoding: "masked-lm-max-log1p-relu";
103
+ chunkTextMode: "title-heading-text";
104
+ };
105
+ export type DenseVectorMetadata = {
106
+ createdAt: string;
107
+ modelId: string;
108
+ dimensions: number;
109
+ hashTables: number;
110
+ randomSeed: number;
111
+ chunkCount: number;
112
+ indexHash: string;
113
+ };
114
+ export type SparseVectorMetadata = {
115
+ createdAt: string;
116
+ modelId: string;
117
+ vocabularySize: number;
118
+ documentTopTokens: number;
119
+ queryEncoding: "tokenizer-token-weights";
120
+ documentEncoding: "masked-lm-max-log1p-relu";
121
+ chunkCount: number;
122
+ indexHash: string;
123
+ };
124
+ export type DenseVectorRecord = {
125
+ chunkId: string;
126
+ documentId: string;
127
+ sourceId: string;
128
+ title: string;
129
+ uri: string;
130
+ headingPath: string[];
131
+ text: string;
132
+ embedding: number[];
133
+ };
134
+ export type SparseVectorRecord = {
135
+ chunkId: string;
136
+ documentId: string;
137
+ sourceId: string;
138
+ title: string;
139
+ uri: string;
140
+ headingPath: string[];
141
+ text: string;
142
+ vector: Record<string, number>;
143
+ };
144
+ export type DenseVectorPayload = {
145
+ metadata: DenseVectorMetadata;
146
+ indexState: object;
147
+ chunks: DenseVectorRecord[];
148
+ };
149
+ export type SparseVectorPayload = {
150
+ metadata: SparseVectorMetadata;
151
+ indexState: object;
152
+ chunks: SparseVectorRecord[];
153
+ queryTokenWeights: number[];
154
+ };
155
+ export type WorkspaceConfig = {
156
+ workspaceVersion: number;
157
+ index: {
158
+ name: string;
159
+ fields: Record<string, {
160
+ type: string;
161
+ weight?: number;
162
+ }>;
163
+ chunking: {
164
+ maxChars: number;
165
+ overlapChars: number;
166
+ minChars: number;
167
+ splitOnHeadings: boolean;
168
+ };
169
+ };
170
+ rag: {
171
+ defaultTopK: number;
172
+ maxContextChars: number;
173
+ citationStyle: "markdown";
174
+ };
175
+ retrieval: {
176
+ defaultMode: RetrievalMode;
177
+ dense: DenseVectorModelConfig;
178
+ sparse: SparseVectorModelConfig;
179
+ };
180
+ crawler: {
181
+ defaultUserAgent: string;
182
+ obeyRobotsTxt: boolean;
183
+ rateLimitMs: number;
184
+ renderJs: boolean;
185
+ retentionDays: number;
186
+ fetchArticles: boolean;
187
+ };
188
+ limits: {
189
+ maxFileSizeMb: number;
190
+ maxPagesPerSource: number;
191
+ maxTotalChunks: number;
192
+ };
193
+ };
194
+ export type CommandError = {
195
+ code: string;
196
+ message: string;
197
+ details?: unknown;
198
+ };
199
+ export type CommandResponse<T> = {
200
+ ok: boolean;
201
+ command: string;
202
+ workspace: string;
203
+ version: string;
204
+ data?: T;
205
+ error?: CommandError;
206
+ };
207
+ export type SearchResult = {
208
+ chunkId: string;
209
+ documentId: string;
210
+ sourceId: string;
211
+ sourceType: SourceType;
212
+ score: number;
213
+ title: string;
214
+ uri: string;
215
+ headingPath: string[];
216
+ snippet: string;
217
+ text?: string;
218
+ publicationDate?: string | null;
219
+ firstSeenAt: string;
220
+ lastSeenAt: string;
221
+ lastChangedAt: string;
222
+ metadata: Record<string, unknown>;
223
+ };
224
+ export type SearchResponseData = {
225
+ retrievalMode?: RetrievalMode;
226
+ results: SearchResult[];
227
+ };
228
+ export type RelatedDocumentResult = {
229
+ documentId: string;
230
+ sourceId: string;
231
+ score: number;
232
+ title: string;
233
+ uri: string;
234
+ metadata: Record<string, unknown>;
235
+ };
236
+ export type RelatedDocumentsResponseData = {
237
+ sourceDocument: {
238
+ documentId: string;
239
+ sourceId: string;
240
+ title: string;
241
+ uri: string;
242
+ };
243
+ retrievalMode: "dense";
244
+ results: RelatedDocumentResult[];
245
+ };
246
+ export type ContextSource = {
247
+ chunkId: string;
248
+ documentId: string;
249
+ sourceId: string;
250
+ title: string;
251
+ uri: string;
252
+ headingPath: string[];
253
+ text: string;
254
+ metadata: Record<string, unknown>;
255
+ };
256
+ export type ContextResponseData = {
257
+ retrievalMode?: RetrievalMode;
258
+ markdown: string;
259
+ sources: ContextSource[];
260
+ };
261
+ export type ModelPullResponse = {
262
+ dense?: {
263
+ pulled: boolean;
264
+ modelId: string;
265
+ cacheDir: string;
266
+ };
267
+ sparse?: {
268
+ pulled: boolean;
269
+ modelId: string;
270
+ cacheDir: string;
271
+ };
272
+ };
273
+ export type ModelStatusResponse = {
274
+ dense: {
275
+ configured: boolean;
276
+ modelId: string;
277
+ cacheDir: string;
278
+ available: boolean;
279
+ artifactExists: boolean;
280
+ };
281
+ sparse: {
282
+ configured: boolean;
283
+ modelId: string;
284
+ cacheDir: string;
285
+ uvAvailable: boolean;
286
+ available: boolean;
287
+ artifactExists: boolean;
288
+ };
289
+ };
290
+ export type RunRecord = {
291
+ id: string;
292
+ kind: string;
293
+ createdAt: string;
294
+ success: boolean;
295
+ summary: Record<string, unknown>;
296
+ failures?: Array<{
297
+ sourceId: string;
298
+ uri: string;
299
+ message: string;
300
+ }>;
301
+ documentsSnapshot?: Array<{
302
+ id: string;
303
+ title: string;
304
+ uri: string;
305
+ contentHash: string;
306
+ lastChangedAt: string;
307
+ sourceId: string;
308
+ }>;
309
+ };
@@ -0,0 +1,13 @@
1
+ import type { DenseVectorPayload, WorkspaceConfig } from "../types/models.js";
2
+ export declare function setDenseEmbedderFactoryForTests(factory: ((cacheDir: string, modelId: string) => Promise<(text: string) => Promise<number[]>>) | null): void;
3
+ export declare function pullDenseModel(workspacePath: string, config: WorkspaceConfig["retrieval"]["dense"]): Promise<void>;
4
+ export declare function buildDenseVectors({ workspacePath, config }: {
5
+ workspacePath: string;
6
+ config: WorkspaceConfig["retrieval"]["dense"];
7
+ }): Promise<DenseVectorPayload>;
8
+ export declare function denseQuery({ workspacePath, config, query, topK }: {
9
+ workspacePath: string;
10
+ config: WorkspaceConfig["retrieval"]["dense"];
11
+ query: string;
12
+ topK: number;
13
+ }): Promise<Array<[string, number]>>;
@@ -0,0 +1,18 @@
1
+ import type { SparseVectorModelConfig } from "../types/models.js";
2
+ export declare function resolveCacheDir(workspacePath: string, configuredPath: string): string;
3
+ export declare function packageRootFromImportMeta(importMetaUrl: string): string;
4
+ export declare function sparseScriptPath(importMetaUrl: string): Promise<string>;
5
+ export declare function ensureUvAvailable(): Promise<void>;
6
+ export declare function runSparsePython({ workspacePath, config, payload, importMetaUrl }: {
7
+ workspacePath: string;
8
+ config: SparseVectorModelConfig;
9
+ payload: object;
10
+ importMetaUrl: string;
11
+ }): Promise<string>;
12
+ export declare function getDenseTransformersRuntime(cacheDir: string): Promise<{
13
+ env: {
14
+ cacheDir: string;
15
+ allowLocalModels: boolean;
16
+ };
17
+ pipeline: typeof import("@huggingface/transformers").pipeline;
18
+ }>;
@@ -0,0 +1,26 @@
1
+ import type { DenseVectorPayload, SparseVectorPayload, WorkspaceConfig } from "../types/models.js";
2
+ export declare function resolveModelPullPlan({ pullDenseFlag, pullSparseFlag, uvAvailable }: {
3
+ pullDenseFlag: boolean;
4
+ pullSparseFlag: boolean;
5
+ uvAvailable: boolean;
6
+ }): {
7
+ pullDense: boolean;
8
+ pullSparse: boolean;
9
+ };
10
+ export declare function buildVectorArtifacts({ workspacePath, config, denseOverride, sparseOverride, buildAvailableModels }: {
11
+ workspacePath: string;
12
+ config: WorkspaceConfig;
13
+ denseOverride?: boolean;
14
+ sparseOverride?: boolean;
15
+ buildAvailableModels?: boolean;
16
+ }): Promise<{
17
+ dense?: DenseVectorPayload;
18
+ sparse?: SparseVectorPayload;
19
+ }>;
20
+ export declare function pullModels({ workspacePath, config, pullDense, pullSparse }: {
21
+ workspacePath: string;
22
+ config: WorkspaceConfig;
23
+ pullDense: boolean;
24
+ pullSparse: boolean;
25
+ }): Promise<void>;
26
+ export declare function getModelStatus(workspacePath: string, config: WorkspaceConfig): Promise<import("../index.js").ModelStatusResponse>;
@@ -0,0 +1,19 @@
1
+ import { type SparseVector } from "@tryformation/querylight-ts";
2
+ import type { ChunkRecord, SparseVectorPayload, SparseVectorRecord, WorkspaceConfig } from "../types/models.js";
3
+ export declare function setSparseQueryEncoderFactoryForTests(factory: ((cacheDir: string, modelId: string, queryTokenWeights: number[]) => Promise<(text: string) => Promise<SparseVector>>) | null): void;
4
+ export declare function setSparseDocumentBuilderFactoryForTests(factory: ((workspacePath: string, config: WorkspaceConfig["retrieval"]["sparse"], chunks: ChunkRecord[]) => Promise<{
5
+ queryTokenWeights: number[];
6
+ vocabularySize: number;
7
+ chunks: SparseVectorRecord[];
8
+ }>) | null): void;
9
+ export declare function pullSparseModel(workspacePath: string, config: WorkspaceConfig["retrieval"]["sparse"]): Promise<void>;
10
+ export declare function buildSparseVectors({ workspacePath, config }: {
11
+ workspacePath: string;
12
+ config: WorkspaceConfig["retrieval"]["sparse"];
13
+ }): Promise<SparseVectorPayload>;
14
+ export declare function sparseQuery({ workspacePath, config, query, topK }: {
15
+ workspacePath: string;
16
+ config: WorkspaceConfig["retrieval"]["sparse"];
17
+ query: string;
18
+ topK: number;
19
+ }): Promise<Array<[string, number]>>;
@@ -0,0 +1,20 @@
1
+ import type { DenseVectorPayload, ModelStatusResponse, SparseVectorPayload } from "../types/models.js";
2
+ export declare function denseVectorPath(workspacePath: string): string;
3
+ export declare function denseMetaPath(workspacePath: string): string;
4
+ export declare function sparseVectorPath(workspacePath: string): string;
5
+ export declare function sparseMetaPath(workspacePath: string): string;
6
+ export declare function writeDensePayload(workspacePath: string, payload: DenseVectorPayload): Promise<void>;
7
+ export declare function readDensePayload(workspacePath: string): Promise<DenseVectorPayload>;
8
+ export declare function writeSparsePayload(workspacePath: string, payload: SparseVectorPayload): Promise<void>;
9
+ export declare function readSparsePayload(workspacePath: string): Promise<SparseVectorPayload>;
10
+ export declare function writeDensePullMarker(workspacePath: string, value: object): Promise<void>;
11
+ export declare function writeSparsePullMarker(workspacePath: string, value: object): Promise<void>;
12
+ export declare function buildModelStatus(workspacePath: string, dense: {
13
+ enabled: boolean;
14
+ modelId: string;
15
+ cacheDir: string;
16
+ }, sparse: {
17
+ enabled: boolean;
18
+ modelId: string;
19
+ cacheDir: string;
20
+ }, uvAvailable: boolean): Promise<ModelStatusResponse>;
@@ -0,0 +1,3 @@
1
+ import type { ChunkRecord } from "../types/models.js";
2
+ export declare function createDenseChunkText(chunk: ChunkRecord): string;
3
+ export declare function createSparseChunkText(chunk: ChunkRecord): string;