npm - @tryformation/querylight-cli - Versions diffs - 0.2.1 → 0.2.3 - Mend

@tryformation/querylight-cli 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +7 -3
package/dist/cli/format.d.ts +2 -2
package/dist/cli/main.js +391 -103
package/dist/core/constants.d.ts +1 -1
package/dist/index/querylight-indexer.d.ts +2 -2
package/dist/index.js +344 -88
package/dist/query/search-service.d.ts +8 -1
package/dist/types/models.d.ts +36 -1
package/dist/vector/runtime.d.ts +8 -0
package/package.json +2 -2
package/scripts/sparse-encode.py +5 -1

package/dist/cli/main.js CHANGED Viewed

@@ -2,7 +2,7 @@
 // src/cli/run-cli.ts
 import { Command, Option } from "commander";
-import { stat as stat4 } from "fs/promises";
+import { readFile as readFile11, stat as stat4 } from "fs/promises";
 import path21 from "path";
 // src/chunk/chunker.ts
@@ -16,7 +16,7 @@ import path from "path";
 import YAML from "yaml";
 // src/core/constants.ts
-var PACKAGE_VERSION = "0.2.1";
+var PACKAGE_VERSION = "0.2.3";
 var DEFAULT_WORKSPACE = ".kb";
 var DEFAULT_SHARED_MODEL_CACHE_DIR = "~/.qli/models/huggingface";
 var LEGACY_WORKSPACE_MODEL_CACHE_DIR = ".kb/models/huggingface";
@@ -374,7 +374,7 @@ async function assertWorkspaceExists(workspacePath) {
 }
 // src/index/querylight-indexer.ts
-import { Analyzer, DocumentIndex, KeywordTokenizer, LowerCaseTextFilter, RankingAlgorithm, TextFieldIndex } from "@tryformation/querylight-ts";
+import { Analyzer, DateFieldIndex, DocumentIndex, KeywordTokenizer, LowerCaseTextFilter, RankingAlgorithm, StoredSourceIndex, TextFieldIndex } from "@tryformation/querylight-ts";
 import path11 from "path";
 // src/vector/dense.ts
@@ -387,6 +387,7 @@ import os from "os";
 import path6 from "path";
 import { fileURLToPath } from "url";
 import { execFile, execFileSync } from "child_process";
+import { mkdtemp, rm, writeFile as writeFile3 } from "fs/promises";
 // src/core/files.ts
 import { stat as stat2 } from "fs/promises";
@@ -400,6 +401,7 @@ async function fileExists(filePath) {
 }
 // src/vector/runtime.ts
+var sparseExecFileSync = execFileSync;
 function resolveQliHomeDir() {
   return path6.resolve(process.env.QLI_HOME ?? path6.join(os.homedir(), ".qli"));
 }
@@ -455,29 +457,36 @@ async function runSparsePython({
 }) {
   const cacheDir = resolveCacheDir(workspacePath, config.cacheDir);
   const scriptPath = await sparseScriptPath(importMetaUrl);
-  return execFileSync(
-    "uv",
-    [
-      "run",
-      "--with",
-      "torch",
-      "--with",
-      "transformers",
-      "--with",
-      "huggingface_hub",
-      "python",
-      scriptPath
-    ],
-    {
-      encoding: "utf8",
-      maxBuffer: 1024 * 1024 * 1024,
-      input: JSON.stringify(payload),
-      env: {
-        ...process.env,
-        HF_HOME: cacheDir
+  const payloadDir = await mkdtemp(path6.join(os.tmpdir(), "qli-sparse-"));
+  const payloadPath = path6.join(payloadDir, "payload.json");
+  await writeFile3(payloadPath, JSON.stringify(payload), "utf8");
+  try {
+    return sparseExecFileSync(
+      "uv",
+      [
+        "run",
+        "--with",
+        "torch",
+        "--with",
+        "transformers",
+        "--with",
+        "huggingface_hub",
+        "python",
+        scriptPath,
+        payloadPath
+      ],
+      {
+        encoding: "utf8",
+        maxBuffer: 1024 * 1024 * 1024,
+        env: {
+          ...process.env,
+          HF_HOME: cacheDir
+        }
       }
-    }
-  );
+    );
+  } finally {
+    await rm(payloadDir, { recursive: true, force: true });
+  }
 }
 async function getDenseTransformersRuntime(cacheDir) {
   const transformers = await import("@huggingface/transformers");
@@ -490,18 +499,18 @@ async function getDenseTransformersRuntime(cacheDir) {
 }
 // src/vector/store.ts
-import { mkdir as mkdir3, rm, writeFile as writeFile4 } from "fs/promises";
+import { mkdir as mkdir3, rm as rm2, writeFile as writeFile5 } from "fs/promises";
 import path7 from "path";
 // src/core/gzip-json.ts
-import { readFile as readFile4, writeFile as writeFile3 } from "fs/promises";
+import { readFile as readFile4, writeFile as writeFile4 } from "fs/promises";
 import { promisify } from "util";
 import { gunzip, gzip } from "zlib";
 var gzipAsync = promisify(gzip);
 var gunzipAsync = promisify(gunzip);
 async function writeGzipJson(filePath, value) {
   const payload = JSON.stringify(value, null, 2);
-  await writeFile3(filePath, await gzipAsync(Buffer.from(payload, "utf8")));
+  await writeFile4(filePath, await gzipAsync(Buffer.from(payload, "utf8")));
 }
 async function readJsonFromGzipOrFile(gzipPath, legacyPath) {
   if (await fileExists(gzipPath)) {
@@ -570,8 +579,8 @@ async function writeDensePayload(workspacePath, payload) {
   await writeGzipJson(denseVectorPath(workspacePath), payload);
   await writeGzipJson(denseMetaPath(workspacePath), payload.metadata);
   await Promise.all([
-    rm(legacyDenseVectorPath(workspacePath), { force: true }),
-    rm(legacyDenseMetaPath(workspacePath), { force: true })
+    rm2(legacyDenseVectorPath(workspacePath), { force: true }),
+    rm2(legacyDenseMetaPath(workspacePath), { force: true })
   ]);
 }
 async function readDensePayload(workspacePath) {
@@ -582,8 +591,8 @@ async function writeSparsePayload(workspacePath, payload) {
   await writeGzipJson(sparseVectorPath(workspacePath), payload);
   await writeGzipJson(sparseMetaPath(workspacePath), payload.metadata);
   await Promise.all([
-    rm(legacySparseVectorPath(workspacePath), { force: true }),
-    rm(legacySparseMetaPath(workspacePath), { force: true })
+    rm2(legacySparseVectorPath(workspacePath), { force: true }),
+    rm2(legacySparseMetaPath(workspacePath), { force: true })
   ]);
 }
 async function readSparsePayload(workspacePath) {
@@ -592,12 +601,12 @@ async function readSparsePayload(workspacePath) {
 async function writeDensePullMarker(workspacePath, model, value) {
   const markerPath = densePullMarker(workspacePath, model.modelId, model.cacheDir);
   await mkdir3(path7.dirname(markerPath), { recursive: true });
-  await writeFile4(markerPath, JSON.stringify(value, null, 2), "utf8");
+  await writeFile5(markerPath, JSON.stringify(value, null, 2), "utf8");
 }
 async function writeSparsePullMarker(workspacePath, model, value) {
   const markerPath = sparsePullMarker(workspacePath, model.modelId, model.cacheDir);
   await mkdir3(path7.dirname(markerPath), { recursive: true });
-  await writeFile4(markerPath, JSON.stringify(value, null, 2), "utf8");
+  await writeFile5(markerPath, JSON.stringify(value, null, 2), "utf8");
 }
 async function buildModelStatus(workspacePath, dense, sparse, uvAvailable) {
   const denseCacheDir = resolveCacheDir(workspacePath, dense.cacheDir);
@@ -1015,7 +1024,7 @@ async function getModelStatus(workspacePath, config) {
 }
 // src/index/index-store.ts
-import { mkdir as mkdir6, rm as rm2 } from "fs/promises";
+import { mkdir as mkdir6, rm as rm3 } from "fs/promises";
 import path10 from "path";
 function versionedIndexPath(workspacePath, stamp) {
   return path10.join(workspacePath, "indexes", `${stamp}.json.gz`);
@@ -1057,10 +1066,10 @@ async function writeIndexArtifacts({
   await writeGzipJson(latestIndexArtifactPath, indexState);
   await writeGzipJson(latestMetadataArtifactPath, metadata);
   await Promise.all([
-    rm2(legacyLatestIndexPath(workspacePath), { force: true }),
-    rm2(legacyLatestMetaPath(workspacePath), { force: true }),
-    rm2(versionedLegacyIndexPath(workspacePath, stamp), { force: true }),
-    rm2(versionedLegacyMetaPath(workspacePath, stamp), { force: true })
+    rm3(legacyLatestIndexPath(workspacePath), { force: true }),
+    rm3(legacyLatestMetaPath(workspacePath), { force: true }),
+    rm3(versionedLegacyIndexPath(workspacePath, stamp), { force: true }),
+    rm3(versionedLegacyMetaPath(workspacePath, stamp), { force: true })
   ]);
   return { indexPath: latestIndexArtifactPath, metadataPath: latestMetadataArtifactPath };
 }
@@ -1082,12 +1091,19 @@ function keywordFieldIndex() {
 function createIndexMapping(extraFields = []) {
   const lexical = new TextFieldIndex(void 0, void 0, RankingAlgorithm.BM25);
   const mapping = {
+    _source: new StoredSourceIndex(),
     text: lexical,
     title: new TextFieldIndex(void 0, void 0, RankingAlgorithm.BM25),
     uri: keywordFieldIndex(),
     sourceId: keywordFieldIndex(),
+    sourceName: keywordFieldIndex(),
     tags: keywordFieldIndex(),
-    sourceType: keywordFieldIndex()
+    sourceType: keywordFieldIndex(),
+    publicationDate: new DateFieldIndex(),
+    firstSeenAt: new DateFieldIndex(),
+    lastSeenAt: new DateFieldIndex(),
+    lastChangedAt: new DateFieldIndex(),
+    crawledAt: new DateFieldIndex()
   };
   for (const field of extraFields) {
     mapping[field] = keywordFieldIndex();
@@ -1123,8 +1139,12 @@ async function buildIndex({
   const sources = await readJsonl(path11.join(workspacePath, "sources", "sources.jsonl"));
   const metadataFields = [...new Set(chunks.flatMap((chunk) => Object.keys(chunk.metadata).map((key) => `metadata.${key}`)))];
   const index = new DocumentIndex(createIndexMapping(metadataFields));
+  const documentsById = new Map(documents.map((document) => [document.id, document]));
+  const sourcesById = new Map(sources.map((source) => [source.id, source]));
   reportProgress(progress, `Building lexical index from ${chunks.length} chunk${chunks.length === 1 ? "" : "s"}`);
   for (const chunk of chunks) {
+    const document = documentsById.get(chunk.documentId);
+    const source = sourcesById.get(chunk.sourceId);
     index.index({
       id: chunk.id,
       fields: {
@@ -1132,9 +1152,33 @@ async function buildIndex({
         title: [chunk.title],
         uri: [chunk.uri.toLowerCase()],
         sourceId: [chunk.sourceId.toLowerCase()],
+        sourceName: source ? [source.name.toLowerCase()] : [],
         tags: Array.isArray(chunk.metadata.tags) ? chunk.metadata.tags.map((tag) => String(tag).toLowerCase()) : [],
         sourceType: [String(chunk.metadata.sourceType ?? "").toLowerCase()],
+        publicationDate: document?.publicationDate ? [document.publicationDate] : [],
+        firstSeenAt: [document?.firstSeenAt ?? chunk.firstSeenAt],
+        lastSeenAt: [document?.lastSeenAt ?? chunk.lastSeenAt],
+        lastChangedAt: [document?.lastChangedAt ?? chunk.lastChangedAt],
+        crawledAt: document?.crawledAt ? [document.crawledAt] : [],
         ...flattenMetadata(chunk.metadata)
+      },
+      source: {
+        chunkId: chunk.id,
+        documentId: chunk.documentId,
+        sourceId: chunk.sourceId,
+        sourceType: document?.sourceType ?? "text",
+        sourceName: source?.name,
+        title: chunk.title,
+        uri: chunk.uri,
+        headingPath: chunk.headingPath,
+        text: chunk.text,
+        normalizedPath: document?.normalizedPath,
+        publicationDate: document?.publicationDate ?? null,
+        crawledAt: document?.crawledAt,
+        firstSeenAt: document?.firstSeenAt ?? chunk.firstSeenAt,
+        lastSeenAt: document?.lastSeenAt ?? chunk.lastSeenAt,
+        lastChangedAt: document?.lastChangedAt ?? chunk.lastChangedAt,
+        metadata: chunk.metadata
       }
     });
   }
@@ -1143,7 +1187,7 @@ async function buildIndex({
   const metadata = {
     id: `index_${createdAt.replace(/[:.]/g, "-")}`,
     createdAt,
-    querylightVersion: "0.10.0",
+    querylightVersion: "0.11.0",
     kbVersion: "0.1.0",
     documentCount: documents.length,
     chunkCount: chunks.length,
@@ -1262,7 +1306,7 @@ async function removeSource(workspacePath, sourceId) {
 }
 // src/ingest/document-utils.ts
-import { mkdir as mkdir7, rm as rm3, writeFile as writeFile5 } from "fs/promises";
+import { mkdir as mkdir7, rm as rm4, writeFile as writeFile6 } from "fs/promises";
 import path14 from "path";
 // src/normalize/normalize-markdown.ts
@@ -1316,7 +1360,7 @@ async function writeNormalizedDocument({
   markdown
 }) {
   await mkdir7(path14.dirname(normalizedPath), { recursive: true });
-  await writeFile5(
+  await writeFile6(
     normalizedPath,
     withFrontmatter(
       {
@@ -1338,8 +1382,8 @@ async function writeNormalizedDocument({
 }
 async function deleteDocumentArtifacts(document) {
   await Promise.all([
-    document.rawPath ? rm3(document.rawPath, { force: true }) : Promise.resolve(),
-    rm3(document.normalizedPath, { force: true })
+    document.rawPath ? rm4(document.rawPath, { force: true }) : Promise.resolve(),
+    rm4(document.normalizedPath, { force: true })
   ]);
 }
@@ -1363,7 +1407,7 @@ async function listDirectoryFiles(source) {
 // src/ingest/adapters/file-adapter.ts
 import { basename, extname, resolve } from "path";
-import { mkdir as mkdir8, readFile as readFile8, stat as stat3, writeFile as writeFile6 } from "fs/promises";
+import { mkdir as mkdir8, readFile as readFile8, stat as stat3, writeFile as writeFile7 } from "fs/promises";
 // src/ingest/extractors/docx-extractor.ts
 import mammoth from "mammoth";
@@ -1653,7 +1697,7 @@ async function ingestFile({
   await mkdir8(resolve(workspacePath, "normalized"), { recursive: true });
   await mkdir8(resolve(workspacePath, "raw", source.id), { recursive: true });
   if (extracted.raw) {
-    await writeFile6(rawPath, extracted.raw, "utf8");
+    await writeFile7(rawPath, extracted.raw, "utf8");
   }
   await writeNormalizedDocument({
     documentId,
@@ -1877,7 +1921,7 @@ async function parseRssFeedDocument(xml, source) {
 }
 // src/ingest/adapters/url-adapter.ts
-import { mkdir as mkdir9, readFile as readFile9, writeFile as writeFile7 } from "fs/promises";
+import { mkdir as mkdir9, readFile as readFile9, writeFile as writeFile8 } from "fs/promises";
 import path16 from "path";
 // src/core/urls.ts
@@ -1930,7 +1974,7 @@ ${extracted.markdown}`;
   const crawledAt = now;
   const resolvedPublicationDate = choosePublicationDate(publicationDate, extractPublicationDateFromHtml(body), previous?.publicationDate);
   await mkdir9(path16.resolve(workspacePath, "raw", source.id), { recursive: true });
-  await writeFile7(rawPath, body, "utf8");
+  await writeFile8(rawPath, body, "utf8");
   await writeNormalizedDocument({
     documentId,
     sourceId: source.id,
@@ -2769,7 +2813,7 @@ async function discoverWebsiteFeed(websiteUrl, userAgent) {
 // src/query/search-service.ts
 import { readFile as readFile10 } from "fs/promises";
-import { BoolQuery, MatchQuery, OP, TermQuery, reciprocalRankFusion } from "@tryformation/querylight-ts";
+import { reciprocalRankFusion, searchJsonDsl } from "@tryformation/querylight-ts";
 import path18 from "path";
 async function loadHydratedIndex(workspacePath) {
   let state;
@@ -2797,24 +2841,6 @@ function matchesPrefix(value, prefixes) {
   const lower = value.toLowerCase();
   return prefixes.some((prefix) => lower.startsWith(prefix));
 }
-function buildSearchQuery(query, filters) {
-  const sourceIds = normalizeFilterValues([filters.sourceId, ...filters.sourceIds ?? []].filter((value) => Boolean(value)));
-  const sourceTypes = normalizeFilterValues([filters.sourceType, ...filters.sourceTypes ?? []].filter((value) => Boolean(value)));
-  const tags = normalizeFilterValues([filters.tag, ...filters.tags ?? []].filter((value) => Boolean(value)));
-  return new BoolQuery({
-    should: [
-      new MatchQuery({ field: "title", text: query, operation: OP.AND, boost: 6 }),
-      new MatchQuery({ field: "text", text: query, operation: OP.AND, boost: 4 }),
-      new MatchQuery({ field: "text", text: query, operation: OP.OR, boost: 2 })
-    ],
-    filter: [
-      ...sourceIds.length === 1 ? [new TermQuery({ field: "sourceId", text: sourceIds[0] })] : [],
-      ...sourceTypes.length === 1 ? [new TermQuery({ field: "sourceType", text: sourceTypes[0] })] : [],
-      ...tags.length === 1 ? [new TermQuery({ field: "tags", text: tags[0] })] : [],
-      ...(filters.metadata ?? []).map(({ key, value }) => new TermQuery({ field: `metadata.${key}`, text: value.toLowerCase() }))
-    ]
-  });
-}
 function isValidDate(value) {
   return typeof value === "string" && !Number.isNaN(new Date(value).getTime());
 }
@@ -3013,6 +3039,178 @@ async function buildSnippetWithAdjacentChunks(chunk, query, {
   }
   return buildExpandedParagraphSnippet(paragraphs, currentIndex, query);
 }
+function buildSearchDslRequest({
+  query,
+  topK,
+  filters,
+  dateRanges
+}) {
+  const filterClauses = [];
+  const sourceIds = normalizeFilterValues([filters.sourceId, ...filters.sourceIds ?? []].filter((value) => Boolean(value)));
+  const sourceNames = normalizeFilterValues([filters.sourceName, ...filters.sourceNames ?? []].filter((value) => Boolean(value)));
+  const sourceTypes = normalizeFilterValues([filters.sourceType, ...filters.sourceTypes ?? []].filter((value) => Boolean(value)));
+  const uriPrefixes = normalizeFilterValues([filters.uriPrefix, ...filters.uriPrefixes ?? []].filter((value) => Boolean(value)));
+  const tags = normalizeFilterValues([filters.tag, ...filters.tags ?? []].filter((value) => Boolean(value)));
+  if (sourceIds.length > 0) {
+    filterClauses.push({ terms: { sourceId: sourceIds } });
+  }
+  if (sourceNames.length > 0) {
+    filterClauses.push({ terms: { sourceName: sourceNames } });
+  }
+  if (sourceTypes.length > 0) {
+    filterClauses.push({ terms: { sourceType: sourceTypes } });
+  }
+  if (uriPrefixes.length > 0) {
+    filterClauses.push({
+      bool: {
+        should: uriPrefixes.map((prefix) => ({ prefix: { uri: prefix } })),
+        minimum_should_match: 1
+      }
+    });
+  }
+  if (tags.length > 0) {
+    filterClauses.push({ terms: { tags } });
+  }
+  if (filters.hasPublicationDate) {
+    filterClauses.push({ exists: { field: "publicationDate" } });
+  }
+  for (const { key, value } of filters.metadata ?? []) {
+    filterClauses.push({ term: { [`metadata.${key}`]: value.toLowerCase() } });
+  }
+  for (const { field, from, to } of dateRanges) {
+    filterClauses.push({
+      range: {
+        [field]: {
+          ...from ? { gte: from } : {},
+          ...to ? { lte: to } : {}
+        }
+      }
+    });
+  }
+  return {
+    size: topK,
+    query: {
+      bool: {
+        should: [
+          { match: { title: { query, operator: "and", boost: 6 } } },
+          { match: { text: { query, operator: "and", boost: 4 } } },
+          { match: { text: { query, operator: "or", boost: 2 } } }
+        ],
+        filter: filterClauses,
+        minimum_should_match: 1
+      }
+    }
+  };
+}
+function sourceToChunkRecord(source) {
+  return {
+    id: source.chunkId,
+    documentId: source.documentId,
+    sourceId: source.sourceId,
+    title: source.title,
+    uri: source.uri,
+    headingPath: source.headingPath,
+    text: source.text,
+    contentHash: "",
+    metadata: source.metadata,
+    firstSeenAt: source.firstSeenAt,
+    lastSeenAt: source.lastSeenAt,
+    lastChangedAt: source.lastChangedAt
+  };
+}
+function sourceToDocumentRecord(source) {
+  return {
+    id: source.documentId,
+    sourceId: source.sourceId,
+    sourceType: source.sourceType,
+    title: source.title,
+    uri: source.uri,
+    sourceUri: source.uri,
+    mimeType: "text/plain",
+    normalizedPath: source.normalizedPath ?? "",
+    contentHash: "",
+    metadata: source.metadata,
+    publicationDate: source.publicationDate ?? null,
+    crawledAt: source.crawledAt,
+    firstSeenAt: source.firstSeenAt,
+    lastSeenAt: source.lastSeenAt,
+    lastChangedAt: source.lastChangedAt
+  };
+}
+async function materializeSearchHit(hit, query, config, orderedChunkCache, showChunks) {
+  const source = hit._source;
+  const chunk = sourceToChunkRecord(source);
+  const document = sourceToDocumentRecord(source);
+  const snippet = await buildSnippetWithAdjacentChunks(chunk, query, { document, config, orderedChunkCache });
+  const enrichedSource = {
+    ...source,
+    snippet
+  };
+  const result = {
+    chunkId: source.chunkId,
+    documentId: source.documentId,
+    sourceId: source.sourceId,
+    sourceType: source.sourceType,
+    score: hit._score,
+    title: chooseResultTitle(chunk),
+    uri: source.uri,
+    snippet,
+    text: showChunks ? source.text : void 0,
+    publicationDate: source.publicationDate ?? null,
+    firstSeenAt: source.firstSeenAt,
+    lastSeenAt: source.lastSeenAt,
+    lastChangedAt: source.lastChangedAt,
+    metadata: source.metadata
+  };
+  return {
+    hit: {
+      ...hit,
+      _source: enrichedSource
+    },
+    result
+  };
+}
+function createSearchResponse(retrievalMode, hits, took, aggregations) {
+  return {
+    retrievalMode,
+    took,
+    hits: {
+      total: {
+        value: hits.length,
+        relation: "eq"
+      },
+      max_score: hits.length > 0 ? Math.max(...hits.map((hit) => hit._score)) : null,
+      hits
+    },
+    aggregations
+  };
+}
+function searchResultsFromResponse(response2, showChunks = false) {
+  return response2.hits.hits.map((hit) => ({
+    chunkId: hit._source.chunkId,
+    documentId: hit._source.documentId,
+    sourceId: hit._source.sourceId,
+    sourceType: hit._source.sourceType,
+    score: hit._score,
+    title: chooseResultTitle(sourceToChunkRecord(hit._source)),
+    uri: hit._source.uri,
+    snippet: hit._source.snippet ?? hit.highlight?.text?.join("\n\n") ?? buildSnippet(hit._source.text, hit._source.title),
+    text: showChunks ? hit._source.text : void 0,
+    publicationDate: hit._source.publicationDate ?? null,
+    firstSeenAt: hit._source.firstSeenAt,
+    lastSeenAt: hit._source.lastSeenAt,
+    lastChangedAt: hit._source.lastChangedAt,
+    metadata: hit._source.metadata
+  }));
+}
+async function searchJsonIndex({
+  workspacePath,
+  request,
+  indexName = "querylight"
+}) {
+  const index = await loadHydratedIndex(workspacePath);
+  return searchJsonDsl({ index, request, indexName });
+}
 function normalizeDisplayTitle(title) {
   return title.replace(/\s*\|\s*Querylight TS Demo\s*$/i, "").replace(/\s+/g, " ").trim();
 }
@@ -3150,6 +3348,7 @@ async function searchIndex({
   retrievalMode,
   showChunks = false
 }) {
+  const startedAt = Date.now();
   const config = await loadConfig(workspacePath);
   const mode = retrievalMode ?? config.retrieval.defaultMode;
   const candidateLimit = Math.max(topK * 5, 50);
@@ -3206,12 +3405,48 @@ async function searchIndex({
         };
       })
     );
-    return { retrievalMode: "lexical", results: latestResults.filter((result) => result != null) };
+    const hits2 = latestResults.filter((result) => result != null).map((result) => {
+      const chunk = chunks.get(result.chunkId);
+      const document = documents.get(result.documentId);
+      const source = sources.get(result.sourceId);
+      return {
+        _index: "querylight",
+        _id: result.chunkId,
+        _score: result.score,
+        _source: {
+          chunkId: result.chunkId,
+          documentId: result.documentId,
+          sourceId: result.sourceId,
+          sourceType: result.sourceType,
+          sourceName: source?.name,
+          title: chunk.title,
+          uri: result.uri,
+          headingPath: chunk.headingPath,
+          text: chunk.text,
+          snippet: result.snippet,
+          normalizedPath: document.normalizedPath,
+          publicationDate: result.publicationDate ?? null,
+          crawledAt: document.crawledAt,
+          firstSeenAt: result.firstSeenAt,
+          lastSeenAt: result.lastSeenAt,
+          lastChangedAt: result.lastChangedAt,
+          metadata: result.metadata
+        }
+      };
+    });
+    return createSearchResponse("lexical", hits2, Date.now() - startedAt);
   }
   const lexicalHits = async () => {
-    const index = await loadHydratedIndex(workspacePath);
-    const all = await index.searchRequest({ query: buildSearchQuery(normalizedQuery, { sourceId, sourceIds, sourceType, sourceTypes, tag, tags, metadata }), limit: candidateLimit });
-    return all.filter(([chunkId]) => filterIds.includes(chunkId)).slice(0, candidateLimit);
+    const response2 = await searchJsonIndex({
+      workspacePath,
+      request: buildSearchDslRequest({
+        query: normalizedQuery,
+        topK: candidateLimit,
+        filters: { sourceId, sourceIds, sourceName, sourceNames, sourceType, sourceTypes, uriPrefix, uriPrefixes, hasPublicationDate, tag, tags, metadata },
+        dateRanges
+      })
+    });
+    return response2.hits.hits;
   };
   const denseHits = async () => {
     if (!await fileExists(denseVectorPath(workspacePath))) {
@@ -3225,15 +3460,18 @@ async function searchIndex({
     }
     return sparseQuery({ workspacePath, config: config.retrieval.sparse, query: normalizedQuery, topK: candidateLimit }).then((hits2) => hits2.filter(([chunkId]) => filterIds.includes(chunkId)).slice(0, candidateLimit));
   };
+  let lexicalResponseHits = [];
   let hits;
   if (mode === "lexical") {
-    hits = await lexicalHits();
+    lexicalResponseHits = await lexicalHits();
+    hits = lexicalResponseHits.map((hit) => [hit._id, hit._score]);
   } else if (mode === "dense") {
     hits = await denseHits();
   } else if (mode === "sparse") {
     hits = await sparseHits();
   } else {
-    const rankings = [await lexicalHits()];
+    lexicalResponseHits = await lexicalHits();
+    const rankings = [lexicalResponseHits.map((hit) => [hit._id, hit._score])];
     if (await fileExists(denseVectorPath(workspacePath))) {
       rankings.push(await denseQuery({ workspacePath, config: config.retrieval.dense, query: normalizedQuery, topK: candidateLimit }).then((dense) => dense.filter(([chunkId]) => filterIds.includes(chunkId)).slice(0, candidateLimit)));
     }
@@ -3242,34 +3480,49 @@ async function searchIndex({
     }
     hits = reciprocalRankFusion(rankings, { rankConstant: 20, weights: rankings.map((_, index) => index === 0 ? 3 : 1) }).slice(0, candidateLimit);
   }
-  const rawResults = await Promise.all(hits.map(async ([chunkId, score]) => {
+  const baseHits = mode === "lexical" ? lexicalResponseHits : hits.flatMap(([chunkId, score]) => {
     const chunk = chunks.get(chunkId);
     if (!chunk) {
-      return null;
+      return [];
     }
-    return {
-      chunkId,
-      documentId: chunk.documentId,
-      sourceId: chunk.sourceId,
-      sourceType: documents.get(chunk.documentId)?.sourceType ?? "text",
-      score,
-      title: chooseResultTitle(chunk),
-      uri: chunk.uri,
-      snippet: await buildSnippetWithAdjacentChunks(chunk, normalizedQuery, {
-        document: documents.get(chunk.documentId),
-        config,
-        orderedChunkCache
-      }),
-      text: showChunks ? chunk.text : void 0,
-      publicationDate: documents.get(chunk.documentId)?.publicationDate ?? null,
-      firstSeenAt: documents.get(chunk.documentId)?.firstSeenAt ?? chunk.firstSeenAt,
-      lastSeenAt: documents.get(chunk.documentId)?.lastSeenAt ?? chunk.lastSeenAt,
-      lastChangedAt: documents.get(chunk.documentId)?.lastChangedAt ?? chunk.lastChangedAt,
-      metadata: chunk.metadata
-    };
-  }));
-  const results = rawResults.filter((result) => result != null);
-  return { retrievalMode: mode, results: rerankResultsByDocument(results, topK) };
+    const document = documents.get(chunk.documentId);
+    const source = sources.get(chunk.sourceId);
+    return [{
+      _index: "querylight",
+      _id: chunkId,
+      _score: score,
+      _source: {
+        chunkId,
+        documentId: chunk.documentId,
+        sourceId: chunk.sourceId,
+        sourceType: document?.sourceType ?? "text",
+        sourceName: source?.name,
+        title: chunk.title,
+        uri: chunk.uri,
+        headingPath: chunk.headingPath,
+        text: chunk.text,
+        normalizedPath: document?.normalizedPath,
+        publicationDate: document?.publicationDate ?? null,
+        crawledAt: document?.crawledAt,
+        firstSeenAt: document?.firstSeenAt ?? chunk.firstSeenAt,
+        lastSeenAt: document?.lastSeenAt ?? chunk.lastSeenAt,
+        lastChangedAt: document?.lastChangedAt ?? chunk.lastChangedAt,
+        metadata: chunk.metadata
+      }
+    }];
+  });
+  const materialized = await Promise.all(baseHits.map((hit) => materializeSearchHit(hit, normalizedQuery, config, orderedChunkCache, showChunks)));
+  if (showChunks) {
+    const topHits = materialized.sort((left, right) => right.result.score - left.result.score).slice(0, topK).map(({ hit, result }) => ({ ...hit, _score: result.score }));
+    return createSearchResponse(mode, topHits, Date.now() - startedAt);
+  }
+  const reranked = rerankResultsByDocument(materialized.map(({ result }) => result), topK);
+  const byChunkId = new Map(materialized.map(({ hit }) => [hit._id, hit]));
+  const finalHits = reranked.map((result) => {
+    const hit = byChunkId.get(result.chunkId);
+    return hit ? { ...hit, _score: result.score, _source: { ...hit._source, snippet: result.snippet } } : null;
+  }).filter((hit) => hit != null);
+  return createSearchResponse(mode, finalHits, Date.now() - startedAt);
 }
 // src/query/related-service.ts
@@ -3386,9 +3639,10 @@ async function createContext({
   retrievalMode
 }) {
   const search = await searchIndex({ workspacePath, query, topK, showChunks: true, retrievalMode });
+  const results = searchResultsFromResponse(search, true);
   const sources = [];
   let total = 0;
-  for (const result of search.results) {
+  for (const result of results) {
     const text = result.text ?? "";
     if (total + text.length > maxChars && sources.length > 0) {
       break;
@@ -3489,7 +3743,8 @@ function formatSourcesTable(sources) {
   }
   return table.toString();
 }
-function formatSearchResults(results) {
+function formatSearchResults(response2) {
+  const results = searchResultsFromResponse(response2);
   return results.map((result, index) => [
     `${index + 1}. ${colors.bold(result.title)}`,
     `   URL: ${result.uri}`,
@@ -3794,6 +4049,19 @@ function parseDateValue(input, optionName) {
   }
   return parsed.toISOString();
 }
+async function parseJsonArgument(input) {
+  const raw = input.startsWith("@") ? await readFile11(path21.resolve(input.slice(1)), "utf8") : input;
+  try {
+    const parsed = JSON.parse(raw);
+    if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
+      throw new Error("expected a JSON object");
+    }
+    return parsed;
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    throw new CliError(`invalid JSON request: ${message}`, "INVALID_ARGUMENT", 2 /* InvalidArguments */);
+  }
+}
 function searchDateRanges(options) {
   const entries = [];
   if (options.since || options.until) {
@@ -4138,7 +4406,7 @@ Examples:
     progress?.("info", "Rebuild complete");
     emit(global.json, capture, response("rebuild", workspace, data), `Processed ${ingest.processedSources} sources, wrote ${chunk.chunksWritten} chunks`);
   });
-  program.command("search").description("Search the built index and return ranked matching documents or chunks.").argument("[query]", "Text query. Omit it to list the latest matching documents.").option("--top-k <n>", "Maximum number of results to return.", "12").option("--source <sourceIds>", "Restrict results to one or more source ids. Use comma-separated values.").option("--source-name <names>", "Restrict results to one or more source names. Use comma-separated values.").option("--source-type <types>", `Restrict results to one or more source types. Use comma-separated values: ${SOURCE_TYPE_LIST.join(", ")}`).option("--uri-prefix <prefixes>", "Restrict results to one or more URI prefixes. Use comma-separated values.").option("--tag <tags>", "Restrict results to one or more source tags. Use comma-separated values.").option("--metadata <key=value...>", "Restrict results to sources with matching metadata.").option("--since <date>", "Shortcut for --publication-date-from.").option("--until <date>", "Shortcut for --publication-date-to.").option("--changed-since <date>", "Only include documents changed on or after this date.").option("--has-publication-date", "Only include documents with a publication date.").option("--publication-date-from <date>", "Only include documents published on or after this date.").option("--publication-date-to <date>", "Only include documents published on or before this date.").option("--first-seen-at-from <date>", "Only include documents first seen on or after this date.").option("--first-seen-at-to <date>", "Only include documents first seen on or before this date.").option("--last-seen-at-from <date>", "Only include documents last seen on or after this date.").option("--last-seen-at-to <date>", "Only include documents last seen on or before this date.").option("--last-changed-at-from <date>", "Only include documents changed on or after this date.").option("--last-changed-at-to <date>", "Only include documents changed on or before this date.").option("--crawled-at-from <date>", "Only include documents crawled on or after this date.").option("--crawled-at-to <date>", "Only include documents crawled on or before this date.").option("--retrieval <mode>", `Retrieval mode: ${RETRIEVAL_MODE_LIST.join(", ")}`).option("--show-chunks", "Return chunk-level matches when available.").addHelpText("after", `
+  program.command("search").description("Search the built index and return ranked matching documents or chunks. Use search-json for raw JSON DSL queries.").argument("[query]", "Text query. Omit it to list the latest matching documents.").option("--top-k <n>", "Maximum number of results to return.", "12").option("--source <sourceIds>", "Restrict results to one or more source ids. Use comma-separated values.").option("--source-name <names>", "Restrict results to one or more source names. Use comma-separated values.").option("--source-type <types>", `Restrict results to one or more source types. Use comma-separated values: ${SOURCE_TYPE_LIST.join(", ")}`).option("--uri-prefix <prefixes>", "Restrict results to one or more URI prefixes. Use comma-separated values.").option("--tag <tags>", "Restrict results to one or more source tags. Use comma-separated values.").option("--metadata <key=value...>", "Restrict results to sources with matching metadata.").option("--since <date>", "Shortcut for --publication-date-from.").option("--until <date>", "Shortcut for --publication-date-to.").option("--changed-since <date>", "Only include documents changed on or after this date.").option("--has-publication-date", "Only include documents with a publication date.").option("--publication-date-from <date>", "Only include documents published on or after this date.").option("--publication-date-to <date>", "Only include documents published on or before this date.").option("--first-seen-at-from <date>", "Only include documents first seen on or after this date.").option("--first-seen-at-to <date>", "Only include documents first seen on or before this date.").option("--last-seen-at-from <date>", "Only include documents last seen on or after this date.").option("--last-seen-at-to <date>", "Only include documents last seen on or before this date.").option("--last-changed-at-from <date>", "Only include documents changed on or after this date.").option("--last-changed-at-to <date>", "Only include documents changed on or before this date.").option("--crawled-at-from <date>", "Only include documents crawled on or after this date.").option("--crawled-at-to <date>", "Only include documents crawled on or before this date.").option("--retrieval <mode>", `Retrieval mode: ${RETRIEVAL_MODE_LIST.join(", ")}`).option("--show-chunks", "Return chunk-level matches when available.").addHelpText("after", `
 Examples:
   qli search "pricing api limits"
   qli search "authentication" --top-k 20 --tag docs
@@ -4151,6 +4419,7 @@ Examples:
 Notes:
   lexical works without vector models.
   dense, sparse, and hybrid require the relevant index artifacts to exist.
+  Use search-json when you want the raw Querylight 0.11 JSON DSL and hit format.
   When you omit the query, qli returns the latest matching documents sorted by publication date.`).action(async function command(query, options) {
     const global = this.optsWithGlobals();
     const workspace = await resolveWorkspace({ workspace: global.workspace });
@@ -4169,7 +4438,26 @@ Notes:
       retrievalMode: parseRetrievalMode(options.retrieval),
       showChunks: Boolean(options.showChunks)
     });
-    emit(global.json, capture, response("search", workspace, result), formatSearchResults(result.results));
+    emit(global.json, capture, response("search", workspace, result), formatSearchResults(result));
+  });
+  program.command("search-json").description("Run a raw Querylight 0.11 JSON DSL search request against the lexical index.").argument("<request>", "Inline JSON request or @path/to/request.json.").addHelpText("after", `
+Examples:
+  qli search-json '{"query":{"match":{"text":"authentication"}},"size":5}'
+  qli search-json @./search-request.json
+  qli search-json '{"query":{"bool":{"filter":[{"term":{"sourceType":"rss"}}]}},"aggs":{"types":{"terms":{"field":"sourceType","size":5}}}}' --json
+Notes:
+  search-json uses the lexical index and Querylight 0.11 JSON DSL fields.
+  Stored hit payloads are returned under _source.
+  Use --json when another tool needs the full response envelope.`).action(async function command(requestInput) {
+    const global = this.optsWithGlobals();
+    const workspace = await resolveWorkspace({ workspace: global.workspace });
+    const request = await parseJsonArgument(requestInput);
+    const result = await searchJsonIndex({
+      workspacePath: workspace,
+      request
+    });
+    emit(global.json, capture, response("search-json", workspace, result), JSON.stringify(result, null, 2));
   });
   program.command("related").description("Find documents similar to an existing document by id or URI.").argument("<document>", "Document id, uri, or canonical uri").option("--top-k <n>", "Maximum number of related documents to return.", "12").addHelpText("after", `
 Examples: