npm - @tryformation/querylight-cli - Versions diffs - 0.2.2 → 0.2.4 - Mend

@tryformation/querylight-cli 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +33 -3
package/dist/cli/format.d.ts +2 -2
package/dist/cli/main.js +694 -135
package/dist/core/constants.d.ts +1 -1
package/dist/index/querylight-indexer.d.ts +2 -2
package/dist/index.d.ts +1 -0
package/dist/index.js +592 -123
package/dist/query/search-service.d.ts +14 -1
package/dist/server/search-api.d.ts +15 -0
package/dist/types/models.d.ts +36 -1
package/dist/vector/dense.d.ts +6 -1
package/package.json +2 -2
package/scripts/sparse-encode.py +29 -8

package/dist/query/search-service.d.ts CHANGED Viewed

@@ -1,10 +1,23 @@
-import type { RetrievalMode, SearchResponseData } from "../types/models.js";
+import { type DocumentIndex, type JsonDslRequest, type JsonDslResponse } from "@tryformation/querylight-ts";
+import type { RetrievalMode, SearchResponseData, SearchResult } from "../types/models.js";
+export declare function loadHydratedIndex(workspacePath: string): Promise<DocumentIndex>;
 type SearchDateField = "publicationDate" | "firstSeenAt" | "lastSeenAt" | "lastChangedAt" | "crawledAt";
 type SearchDateRange = {
     field: SearchDateField;
     from?: string;
     to?: string;
 };
+export declare function searchResultsFromResponse(response: SearchResponseData, showChunks?: boolean): SearchResult[];
+export declare function searchJsonRequest({ index, request, indexName }: {
+    index: DocumentIndex;
+    request: JsonDslRequest;
+    indexName?: string;
+}): Promise<JsonDslResponse>;
+export declare function searchJsonIndex({ workspacePath, request, indexName }: {
+    workspacePath: string;
+    request: JsonDslRequest;
+    indexName?: string;
+}): Promise<JsonDslResponse>;
 export declare function searchIndex({ workspacePath, query, topK, sourceId, sourceIds, sourceName, sourceNames, sourceType, sourceTypes, uriPrefix, uriPrefixes, hasPublicationDate, tag, tags, metadata, dateRanges, retrievalMode, showChunks }: {
     workspacePath: string;
     query: string;

package/dist/server/search-api.d.ts ADDED Viewed

@@ -0,0 +1,15 @@
+export type SearchApiServerInfo = {
+    mode: "single" | "multi";
+    url: string;
+    knowledgeBases: Array<{
+        name: string;
+        workspacePath: string;
+        route: string;
+    }>;
+    close: () => Promise<void>;
+};
+export declare function startSearchApiServer({ workspacePath, host, port }: {
+    workspacePath: string;
+    host?: string;
+    port?: number;
+}): Promise<SearchApiServerInfo>;

package/dist/types/models.d.ts CHANGED Viewed

@@ -222,9 +222,44 @@ export type SearchResult = {
     lastChangedAt: string;
     metadata: Record<string, unknown>;
 };
+export type SearchHitSource = {
+    chunkId: string;
+    documentId: string;
+    sourceId: string;
+    sourceType: SourceType;
+    sourceName?: string;
+    title: string;
+    uri: string;
+    headingPath: string[];
+    text: string;
+    snippet?: string;
+    normalizedPath?: string;
+    publicationDate?: string | null;
+    crawledAt?: string;
+    firstSeenAt: string;
+    lastSeenAt: string;
+    lastChangedAt: string;
+    metadata: Record<string, unknown>;
+};
+export type SearchHit = {
+    _index: string;
+    _id: string;
+    _score: number;
+    _source: SearchHitSource;
+    highlight?: Record<string, string[]>;
+};
 export type SearchResponseData = {
     retrievalMode?: RetrievalMode;
-    results: SearchResult[];
+    took: number;
+    hits: {
+        total: {
+            value: number;
+            relation: "eq";
+        };
+        max_score: number | null;
+        hits: SearchHit[];
+    };
+    aggregations?: Record<string, unknown>;
 };
 export type RelatedDocumentResult = {
     documentId: string;

package/dist/vector/dense.d.ts CHANGED Viewed

@@ -1,6 +1,10 @@
 import { type ProgressHandler } from "../core/progress.js";
 import type { DenseVectorPayload, WorkspaceConfig } from "../types/models.js";
-export declare function setDenseEmbedderFactoryForTests(factory: ((cacheDir: string, modelId: string) => Promise<(text: string) => Promise<number[]>>) | null): void;
+type DenseEmbedder = {
+    embed(text: string): Promise<number[]>;
+    dispose?: () => Promise<void>;
+};
+export declare function setDenseEmbedderFactoryForTests(factory: ((cacheDir: string, modelId: string) => Promise<DenseEmbedder | ((text: string) => Promise<number[]>)>) | null): void;
 export declare function pullDenseModel(workspacePath: string, config: WorkspaceConfig["retrieval"]["dense"]): Promise<void>;
 export declare function buildDenseVectors({ workspacePath, config, progress }: {
     workspacePath: string;
@@ -13,3 +17,4 @@ export declare function denseQuery({ workspacePath, config, query, topK }: {
     query: string;
     topK: number;
 }): Promise<Array<[string, number]>>;
+export {};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tryformation/querylight-cli",
-  "version": "0.2.2",
+  "version": "0.2.4",
   "description": "Querylight CLI for building and querying local knowledge bases.",
   "license": "MIT",
   "homepage": "https://github.com/formation-res/querylight-cli#readme",
@@ -40,7 +40,7 @@
   },
   "dependencies": {
     "@huggingface/transformers": "^3.8.1",
-    "@tryformation/querylight-ts": "^0.10.0",
+    "@tryformation/querylight-ts": "^0.11.0",
     "cheerio": "^1.2.0",
     "cli-table3": "^0.6.5",
     "commander": "^14.0.3",

package/scripts/sparse-encode.py CHANGED Viewed

@@ -7,19 +7,40 @@ from huggingface_hub import hf_hub_download
 from transformers import AutoModelForMaskedLM, AutoTokenizer
+def _load_query_weights_file(model_id: str, filename: str):
+    try:
+        return hf_hub_download(repo_id=model_id, filename=filename)
+    except Exception:
+        return None
 def build_query_token_weight_vector(tokenizer, model_id: str):
-    local_cached_path = hf_hub_download(repo_id=model_id, filename="query_token_weights.txt")
     vector = [0.0] * tokenizer.vocab_size
-    with open(local_cached_path, encoding="utf-8") as handle:
-        for line in handle:
-            line = line.rstrip("\n")
-            if not line:
-                continue
-            token, weight = line.split("\t", 1)
+    local_cached_path = _load_query_weights_file(model_id, "query_token_weights.txt")
+    if local_cached_path is not None:
+        with open(local_cached_path, encoding="utf-8") as handle:
+            for line in handle:
+                line = line.rstrip("\n")
+                if not line:
+                    continue
+                token, weight = line.split("\t", 1)
+                token_id = tokenizer._convert_token_to_id_with_added_voc(token)
+                if token_id is not None and token_id >= 0:
+                    vector[token_id] = float(weight)
+        return vector
+    local_cached_path = _load_query_weights_file(model_id, "idf.json")
+    if local_cached_path is not None:
+        with open(local_cached_path, encoding="utf-8") as handle:
+            idf = json.load(handle)
+        for token, weight in idf.items():
             token_id = tokenizer._convert_token_to_id_with_added_voc(token)
             if token_id is not None and token_id >= 0:
                 vector[token_id] = float(weight)
+        return vector
+    raise FileNotFoundError(f"missing query token weights for {model_id}: expected query_token_weights.txt or idf.json")
     return vector