@tryformation/querylight-cli 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -11
- package/dist/chunk/chunker.d.ts +3 -1
- package/dist/cli/main.js +1163 -285
- package/dist/cli/run-cli.d.ts +4 -1
- package/dist/core/concurrency.d.ts +1 -0
- package/dist/core/constants.d.ts +3 -1
- package/dist/core/gzip-json.d.ts +3 -0
- package/dist/core/progress.d.ts +4 -0
- package/dist/core/urls.d.ts +1 -0
- package/dist/index/index-store.d.ts +3 -0
- package/dist/index/querylight-indexer.d.ts +3 -1
- package/dist/index.js +540 -141
- package/dist/ingest/adapters/website-adapter.d.ts +6 -1
- package/dist/ingest/adapters/website-feed-discovery.d.ts +6 -0
- package/dist/ingest/extractors/html-extractor.d.ts +1 -0
- package/dist/ingest/ingest-service.d.ts +5 -2
- package/dist/types/models.d.ts +2 -2
- package/dist/vector/dense.d.ts +3 -1
- package/dist/vector/runtime.d.ts +2 -0
- package/dist/vector/service.d.ts +20 -2
- package/dist/vector/sparse.d.ts +3 -1
- package/dist/vector/store.d.ts +8 -2
- package/package.json +1 -1
|
@@ -1,2 +1,7 @@
|
|
|
1
|
+
import { type ProgressHandler } from "../../core/progress.js";
|
|
1
2
|
import type { Source } from "../../types/models.js";
|
|
2
|
-
export declare function crawlWebsite(source: Source
|
|
3
|
+
export declare function crawlWebsite(source: Source, defaults: {
|
|
4
|
+
userAgent: string;
|
|
5
|
+
rateLimitMs: number;
|
|
6
|
+
maxConcurrentRequests: number;
|
|
7
|
+
}, progress?: ProgressHandler): Promise<string[]>;
|
|
@@ -2,4 +2,5 @@ export declare function extractHtmlToMarkdown(html: string): {
|
|
|
2
2
|
markdown: string;
|
|
3
3
|
title: string;
|
|
4
4
|
};
|
|
5
|
+
export declare function extractCanonicalUriFromHtml(html: string, baseUrl: string): string | null;
|
|
5
6
|
export declare function extractPublicationDateFromHtml(html: string): string | null;
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
-
|
|
1
|
+
import { type ProgressHandler } from "../core/progress.js";
|
|
2
|
+
export declare function ingestSources({ workspacePath, sourceIds, changedOnly, progress }: {
|
|
2
3
|
workspacePath: string;
|
|
3
4
|
sourceIds?: string[];
|
|
4
5
|
changedOnly?: boolean;
|
|
6
|
+
progress?: ProgressHandler;
|
|
5
7
|
}): Promise<{
|
|
6
8
|
runId: string;
|
|
7
9
|
documents: {
|
|
@@ -12,10 +14,11 @@ export declare function ingestSources({ workspacePath, sourceIds, changedOnly }:
|
|
|
12
14
|
};
|
|
13
15
|
processedSources: number;
|
|
14
16
|
}>;
|
|
15
|
-
export declare function reprocessDocuments({ workspacePath, sourceId, documentId }: {
|
|
17
|
+
export declare function reprocessDocuments({ workspacePath, sourceId, documentId, progress }: {
|
|
16
18
|
workspacePath: string;
|
|
17
19
|
sourceId?: string;
|
|
18
20
|
documentId?: string;
|
|
21
|
+
progress?: ProgressHandler;
|
|
19
22
|
}): Promise<{
|
|
20
23
|
runId: string;
|
|
21
24
|
documentsReprocessed: number;
|
package/dist/types/models.d.ts
CHANGED
|
@@ -5,6 +5,7 @@ export type RetrievalMode = "lexical" | "dense" | "sparse" | "hybrid";
|
|
|
5
5
|
export type CrawlConfig = {
|
|
6
6
|
maxDepth?: number;
|
|
7
7
|
maxPages?: number;
|
|
8
|
+
maxConcurrentRequests?: number;
|
|
8
9
|
includePatterns?: string[];
|
|
9
10
|
excludePatterns?: string[];
|
|
10
11
|
obeyRobotsTxt?: boolean;
|
|
@@ -181,6 +182,7 @@ export type WorkspaceConfig = {
|
|
|
181
182
|
defaultUserAgent: string;
|
|
182
183
|
obeyRobotsTxt: boolean;
|
|
183
184
|
rateLimitMs: number;
|
|
185
|
+
maxConcurrentRequests: number;
|
|
184
186
|
renderJs: boolean;
|
|
185
187
|
retentionDays: number;
|
|
186
188
|
fetchArticles: boolean;
|
|
@@ -212,7 +214,6 @@ export type SearchResult = {
|
|
|
212
214
|
score: number;
|
|
213
215
|
title: string;
|
|
214
216
|
uri: string;
|
|
215
|
-
headingPath: string[];
|
|
216
217
|
snippet: string;
|
|
217
218
|
text?: string;
|
|
218
219
|
publicationDate?: string | null;
|
|
@@ -249,7 +250,6 @@ export type ContextSource = {
|
|
|
249
250
|
sourceId: string;
|
|
250
251
|
title: string;
|
|
251
252
|
uri: string;
|
|
252
|
-
headingPath: string[];
|
|
253
253
|
text: string;
|
|
254
254
|
metadata: Record<string, unknown>;
|
|
255
255
|
};
|
package/dist/vector/dense.d.ts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
import { type ProgressHandler } from "../core/progress.js";
|
|
1
2
|
import type { DenseVectorPayload, WorkspaceConfig } from "../types/models.js";
|
|
2
3
|
export declare function setDenseEmbedderFactoryForTests(factory: ((cacheDir: string, modelId: string) => Promise<(text: string) => Promise<number[]>>) | null): void;
|
|
3
4
|
export declare function pullDenseModel(workspacePath: string, config: WorkspaceConfig["retrieval"]["dense"]): Promise<void>;
|
|
4
|
-
export declare function buildDenseVectors({ workspacePath, config }: {
|
|
5
|
+
export declare function buildDenseVectors({ workspacePath, config, progress }: {
|
|
5
6
|
workspacePath: string;
|
|
6
7
|
config: WorkspaceConfig["retrieval"]["dense"];
|
|
8
|
+
progress?: ProgressHandler;
|
|
7
9
|
}): Promise<DenseVectorPayload>;
|
|
8
10
|
export declare function denseQuery({ workspacePath, config, query, topK }: {
|
|
9
11
|
workspacePath: string;
|
package/dist/vector/runtime.d.ts
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import type { SparseVectorModelConfig } from "../types/models.js";
|
|
2
|
+
export declare function resolveQliHomeDir(): string;
|
|
2
3
|
export declare function resolveCacheDir(workspacePath: string, configuredPath: string): string;
|
|
3
4
|
export declare function packageRootFromImportMeta(importMetaUrl: string): string;
|
|
4
5
|
export declare function sparseScriptPath(importMetaUrl: string): Promise<string>;
|
|
5
6
|
export declare function ensureUvAvailable(): Promise<void>;
|
|
7
|
+
export declare function isUvAvailable(): Promise<boolean>;
|
|
6
8
|
export declare function runSparsePython({ workspacePath, config, payload, importMetaUrl }: {
|
|
7
9
|
workspacePath: string;
|
|
8
10
|
config: SparseVectorModelConfig;
|
package/dist/vector/service.d.ts
CHANGED
|
@@ -1,4 +1,13 @@
|
|
|
1
1
|
import type { DenseVectorPayload, SparseVectorPayload, WorkspaceConfig } from "../types/models.js";
|
|
2
|
+
import { type ProgressHandler } from "../core/progress.js";
|
|
3
|
+
import { buildModelStatus } from "./store.js";
|
|
4
|
+
export declare function setPullModelsForTests(override: ((args: {
|
|
5
|
+
workspacePath: string;
|
|
6
|
+
config: WorkspaceConfig;
|
|
7
|
+
pullDense: boolean;
|
|
8
|
+
pullSparse: boolean;
|
|
9
|
+
progress?: ProgressHandler;
|
|
10
|
+
}) => Promise<void>) | null): void;
|
|
2
11
|
export declare function resolveModelPullPlan({ pullDenseFlag, pullSparseFlag, uvAvailable }: {
|
|
3
12
|
pullDenseFlag: boolean;
|
|
4
13
|
pullSparseFlag: boolean;
|
|
@@ -7,20 +16,29 @@ export declare function resolveModelPullPlan({ pullDenseFlag, pullSparseFlag, uv
|
|
|
7
16
|
pullDense: boolean;
|
|
8
17
|
pullSparse: boolean;
|
|
9
18
|
};
|
|
10
|
-
export declare function
|
|
19
|
+
export declare function resolveMissingConfiguredModelPullPlan({ config, status }: {
|
|
20
|
+
config: WorkspaceConfig;
|
|
21
|
+
status: Awaited<ReturnType<typeof buildModelStatus>>;
|
|
22
|
+
}): {
|
|
23
|
+
pullDense: boolean;
|
|
24
|
+
pullSparse: boolean;
|
|
25
|
+
};
|
|
26
|
+
export declare function buildVectorArtifacts({ workspacePath, config, denseOverride, sparseOverride, buildAvailableModels, progress }: {
|
|
11
27
|
workspacePath: string;
|
|
12
28
|
config: WorkspaceConfig;
|
|
13
29
|
denseOverride?: boolean;
|
|
14
30
|
sparseOverride?: boolean;
|
|
15
31
|
buildAvailableModels?: boolean;
|
|
32
|
+
progress?: ProgressHandler;
|
|
16
33
|
}): Promise<{
|
|
17
34
|
dense?: DenseVectorPayload;
|
|
18
35
|
sparse?: SparseVectorPayload;
|
|
19
36
|
}>;
|
|
20
|
-
export declare function pullModels({ workspacePath, config, pullDense, pullSparse }: {
|
|
37
|
+
export declare function pullModels({ workspacePath, config, pullDense, pullSparse, progress }: {
|
|
21
38
|
workspacePath: string;
|
|
22
39
|
config: WorkspaceConfig;
|
|
23
40
|
pullDense: boolean;
|
|
24
41
|
pullSparse: boolean;
|
|
42
|
+
progress?: ProgressHandler;
|
|
25
43
|
}): Promise<void>;
|
|
26
44
|
export declare function getModelStatus(workspacePath: string, config: WorkspaceConfig): Promise<import("../index.js").ModelStatusResponse>;
|
package/dist/vector/sparse.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { type SparseVector } from "@tryformation/querylight-ts";
|
|
2
|
+
import { type ProgressHandler } from "../core/progress.js";
|
|
2
3
|
import type { ChunkRecord, SparseVectorPayload, SparseVectorRecord, WorkspaceConfig } from "../types/models.js";
|
|
3
4
|
export declare function setSparseQueryEncoderFactoryForTests(factory: ((cacheDir: string, modelId: string, queryTokenWeights: number[]) => Promise<(text: string) => Promise<SparseVector>>) | null): void;
|
|
4
5
|
export declare function setSparseDocumentBuilderFactoryForTests(factory: ((workspacePath: string, config: WorkspaceConfig["retrieval"]["sparse"], chunks: ChunkRecord[]) => Promise<{
|
|
@@ -7,9 +8,10 @@ export declare function setSparseDocumentBuilderFactoryForTests(factory: ((works
|
|
|
7
8
|
chunks: SparseVectorRecord[];
|
|
8
9
|
}>) | null): void;
|
|
9
10
|
export declare function pullSparseModel(workspacePath: string, config: WorkspaceConfig["retrieval"]["sparse"]): Promise<void>;
|
|
10
|
-
export declare function buildSparseVectors({ workspacePath, config }: {
|
|
11
|
+
export declare function buildSparseVectors({ workspacePath, config, progress }: {
|
|
11
12
|
workspacePath: string;
|
|
12
13
|
config: WorkspaceConfig["retrieval"]["sparse"];
|
|
14
|
+
progress?: ProgressHandler;
|
|
13
15
|
}): Promise<SparseVectorPayload>;
|
|
14
16
|
export declare function sparseQuery({ workspacePath, config, query, topK }: {
|
|
15
17
|
workspacePath: string;
|
package/dist/vector/store.d.ts
CHANGED
|
@@ -7,8 +7,14 @@ export declare function writeDensePayload(workspacePath: string, payload: DenseV
|
|
|
7
7
|
export declare function readDensePayload(workspacePath: string): Promise<DenseVectorPayload>;
|
|
8
8
|
export declare function writeSparsePayload(workspacePath: string, payload: SparseVectorPayload): Promise<void>;
|
|
9
9
|
export declare function readSparsePayload(workspacePath: string): Promise<SparseVectorPayload>;
|
|
10
|
-
export declare function writeDensePullMarker(workspacePath: string,
|
|
11
|
-
|
|
10
|
+
export declare function writeDensePullMarker(workspacePath: string, model: {
|
|
11
|
+
modelId: string;
|
|
12
|
+
cacheDir: string;
|
|
13
|
+
}, value: object): Promise<void>;
|
|
14
|
+
export declare function writeSparsePullMarker(workspacePath: string, model: {
|
|
15
|
+
modelId: string;
|
|
16
|
+
cacheDir: string;
|
|
17
|
+
}, value: object): Promise<void>;
|
|
12
18
|
export declare function buildModelStatus(workspacePath: string, dense: {
|
|
13
19
|
enabled: boolean;
|
|
14
20
|
modelId: string;
|
package/package.json
CHANGED