skilld 0.13.4 → 0.13.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/{embedding-cache.mjs → embedding-cache2.mjs} +10 -6
- package/dist/_chunks/embedding-cache2.mjs.map +1 -0
- package/dist/cli.mjs +1 -1
- package/dist/index.mjs +0 -2
- package/dist/retriv/index.d.mts +5 -2
- package/dist/retriv/index.d.mts.map +1 -1
- package/dist/retriv/index.mjs +11 -11
- package/dist/retriv/index.mjs.map +1 -1
- package/dist/retriv/worker.mjs +4 -3
- package/dist/retriv/worker.mjs.map +1 -1
- package/package.json +1 -1
- package/dist/_chunks/embedding-cache.mjs.map +0 -1
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
import { n as __require } from "./chunk.mjs";
|
|
1
|
+
import { n as __require, t as __exportAll } from "./chunk.mjs";
|
|
2
2
|
import { t as CACHE_DIR } from "./config.mjs";
|
|
3
3
|
import { join } from "pathe";
|
|
4
4
|
import { rmSync } from "node:fs";
|
|
5
|
-
|
|
5
|
+
var embedding_cache_exports = /* @__PURE__ */ __exportAll({
|
|
6
|
+
cachedEmbeddings: () => cachedEmbeddings,
|
|
7
|
+
clearEmbeddingCache: () => clearEmbeddingCache
|
|
8
|
+
});
|
|
6
9
|
const EMBEDDINGS_DB_PATH = join(CACHE_DIR, "embeddings.db");
|
|
7
10
|
function openDb() {
|
|
8
11
|
const { DatabaseSync: DB } = __require("node:sqlite");
|
|
@@ -28,11 +31,12 @@ function createSqliteStorage(db) {
|
|
|
28
31
|
}
|
|
29
32
|
};
|
|
30
33
|
}
|
|
31
|
-
function cachedEmbeddings
|
|
34
|
+
async function cachedEmbeddings(config) {
|
|
35
|
+
const { cachedEmbeddings: retrivCached } = await import("retriv/embeddings/cached");
|
|
32
36
|
const db = openDb();
|
|
33
37
|
const storage = createSqliteStorage(db);
|
|
34
38
|
const originalResolve = config.resolve;
|
|
35
|
-
return
|
|
39
|
+
return retrivCached({ async resolve() {
|
|
36
40
|
const resolved = await originalResolve();
|
|
37
41
|
const getMetaStmt = db.prepare("SELECT value FROM meta WHERE key = ?");
|
|
38
42
|
const setMetaStmt = db.prepare("INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)");
|
|
@@ -45,6 +49,6 @@ function cachedEmbeddings$1(config) {
|
|
|
45
49
|
function clearEmbeddingCache() {
|
|
46
50
|
rmSync(EMBEDDINGS_DB_PATH, { force: true });
|
|
47
51
|
}
|
|
48
|
-
export { clearEmbeddingCache as n, cachedEmbeddings
|
|
52
|
+
export { clearEmbeddingCache as n, embedding_cache_exports as r, cachedEmbeddings as t };
|
|
49
53
|
|
|
50
|
-
//# sourceMappingURL=embedding-
|
|
54
|
+
//# sourceMappingURL=embedding-cache2.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedding-cache2.mjs","names":[],"sources":["../../src/retriv/embedding-cache.ts"],"sourcesContent":["import type { DatabaseSync } from 'node:sqlite'\nimport type { Embedding } from 'retriv'\nimport { rmSync } from 'node:fs'\nimport { join } from 'pathe'\nimport { CACHE_DIR } from '../cache/index.ts'\n\ninterface EmbeddingConfig {\n resolve: () => Promise<{ embedder: (texts: string[]) => Promise<Embedding[]>, dimensions: number, maxTokens?: number }>\n}\n\nconst EMBEDDINGS_DB_PATH = join(CACHE_DIR, 'embeddings.db')\n\nfunction openDb(): DatabaseSync {\n // eslint-disable-next-line ts/no-require-imports\n const { DatabaseSync: DB } = require('node:sqlite') as typeof import('node:sqlite')\n const db = new DB(EMBEDDINGS_DB_PATH)\n db.exec('PRAGMA journal_mode=WAL')\n db.exec('PRAGMA busy_timeout=5000')\n db.exec(`CREATE TABLE IF NOT EXISTS embeddings (text_hash TEXT PRIMARY KEY, embedding BLOB NOT NULL)`)\n db.exec(`CREATE TABLE IF NOT EXISTS meta (key TEXT PRIMARY KEY, value TEXT NOT NULL)`)\n return db\n}\n\nfunction createSqliteStorage(db: DatabaseSync) {\n const getStmt = db.prepare('SELECT embedding FROM embeddings WHERE text_hash = ?')\n const setStmt = db.prepare('INSERT OR IGNORE INTO embeddings (text_hash, embedding) VALUES (?, ?)')\n\n return {\n get: (hash: string): Embedding | null => {\n const row = getStmt.get(hash) as { embedding: Buffer } | undefined\n if (!row)\n return null\n return new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4)\n },\n set: (hash: string, embedding: Embedding): void => {\n const arr = embedding instanceof Float32Array ? embedding : new Float32Array(embedding)\n setStmt.run(hash, Buffer.from(arr.buffer, arr.byteOffset, arr.byteLength))\n },\n }\n}\n\nexport async function cachedEmbeddings(config: EmbeddingConfig): Promise<EmbeddingConfig> {\n const { cachedEmbeddings: retrivCached } = await import('retriv/embeddings/cached')\n const db = openDb()\n const storage = createSqliteStorage(db)\n\n const originalResolve = config.resolve\n const validatedConfig: EmbeddingConfig = {\n async resolve() {\n const resolved = await originalResolve()\n const getMetaStmt = db.prepare('SELECT value FROM meta WHERE key = ?')\n const setMetaStmt = db.prepare('INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)')\n\n const storedDims = getMetaStmt.get('dimensions') as { value: string } | undefined\n if (storedDims && Number(storedDims.value) !== resolved.dimensions) {\n db.exec('DELETE FROM embeddings')\n }\n setMetaStmt.run('dimensions', String(resolved.dimensions))\n\n return resolved\n },\n }\n\n return retrivCached(validatedConfig, { storage })\n}\n\nexport function clearEmbeddingCache(): void {\n rmSync(EMBEDDINGS_DB_PATH, { force: true })\n}\n"],"mappings":";;;;;;;;AAUA,MAAM,qBAAqB,KAAK,WAAW,gBAAgB;AAE3D,SAAS,SAAuB;CAE9B,MAAM,EAAE,cAAc,OAAA,UAAe,cAAc;CACnD,MAAM,KAAK,IAAI,GAAG,mBAAmB;AACrC,IAAG,KAAK,0BAA0B;AAClC,IAAG,KAAK,2BAA2B;AACnC,IAAG,KAAK,8FAA8F;AACtG,IAAG,KAAK,8EAA8E;AACtF,QAAO;;AAGT,SAAS,oBAAoB,IAAkB;CAC7C,MAAM,UAAU,GAAG,QAAQ,uDAAuD;CAClF,MAAM,UAAU,GAAG,QAAQ,wEAAwE;AAEnG,QAAO;EACL,MAAM,SAAmC;GACvC,MAAM,MAAM,QAAQ,IAAI,KAAK;AAC7B,OAAI,CAAC,IACH,QAAO;AACT,UAAO,IAAI,aAAa,IAAI,UAAU,QAAQ,IAAI,UAAU,YAAY,IAAI,UAAU,aAAa,EAAE;;EAEvG,MAAM,MAAc,cAA+B;GACjD,MAAM,MAAM,qBAAqB,eAAe,YAAY,IAAI,aAAa,UAAU;AACvF,WAAQ,IAAI,MAAM,OAAO,KAAK,IAAI,QAAQ,IAAI,YAAY,IAAI,WAAW,CAAC;;EAE7E;;AAGH,eAAsB,iBAAiB,QAAmD;CACxF,MAAM,EAAE,kBAAkB,iBAAiB,MAAM,OAAO;CACxD,MAAM,KAAK,QAAQ;CACnB,MAAM,UAAU,oBAAoB,GAAG;CAEvC,MAAM,kBAAkB,OAAO;AAiB/B,QAAO,aAhBkC,EACvC,MAAM,UAAU;EACd,MAAM,WAAW,MAAM,iBAAiB;EACxC,MAAM,cAAc,GAAG,QAAQ,uCAAuC;EACtE,MAAM,cAAc,GAAG,QAAQ,yDAAyD;EAExF,MAAM,aAAa,YAAY,IAAI,aAAa;AAChD,MAAI,cAAc,OAAO,WAAW,MAAM,KAAK,SAAS,WACtD,IAAG,KAAK,yBAAyB;AAEnC,cAAY,IAAI,cAAc,OAAO,SAAS,WAAW,CAAC;AAE1D,SAAO;IAEV,EAEoC,EAAE,SAAS,CAAC;;AAGnD,SAAgB,sBAA4B;AAC1C,QAAO,oBAAoB,EAAE,OAAO,MAAM,CAAC"}
|
package/dist/cli.mjs
CHANGED
|
@@ -6,7 +6,6 @@ import { _ as resolvePkgDir, a as getShippedSkills, b as writeToRepoCache, c as
|
|
|
6
6
|
import "./cache/index.mjs";
|
|
7
7
|
import { n as yamlParseKV, r as yamlUnescape, t as yamlEscape } from "./_chunks/yaml.mjs";
|
|
8
8
|
import { i as parseFrontmatter } from "./_chunks/markdown.mjs";
|
|
9
|
-
import { n as clearEmbeddingCache } from "./_chunks/embedding-cache.mjs";
|
|
10
9
|
import { closePool, createIndex, openPool, searchPooled, searchSnippets } from "./retriv/index.mjs";
|
|
11
10
|
import { o as getPrereleaseChangelogRef, t as getBlogPreset } from "./_chunks/package-registry.mjs";
|
|
12
11
|
import { $ as fetchGitHubIssues, A as parseGitSkillInput, C as downloadLlmsDocs, D as normalizeLlmsLinks, F as formatDiscussionAsMarkdown, G as $fetch, H as generateReleaseIndex, I as generateDiscussionIndex, L as fetchCrawledDocs, M as resolveEntryFiles, N as generateDocsIndex, P as fetchGitHubDiscussions, R as toCrawlPattern, T as fetchLlmsTxt, U as isPrerelease, V as fetchReleaseNotes, X as parseGitHubUrl, Z as parsePackageSpec, at as mapInsert, b as isShallowGitDocs, d as resolvePackageDocs, et as formatIssueAsMarkdown, f as resolvePackageDocsWithAttempts, h as fetchGitDocs, i as fetchPkgDist, it as getSharedSkillsDir, k as fetchGitSkills, n as fetchNpmPackage, nt as isGhAvailable, ot as semverGt, p as searchNpmPackages, r as fetchNpmRegistryMeta, rt as SHARED_SKILLS_DIR, s as readLocalDependencies, t as fetchLatestVersion, tt as generateIssueIndex, u as resolveLocalPackageDocs, v as fetchReadmeContent, x as resolveGitHubRepo, y as filterFrameworkDocs, z as fetchBlogReleases } from "./_chunks/npm.mjs";
|
|
@@ -14,6 +13,7 @@ import "./sources/index.mjs";
|
|
|
14
13
|
import { _ as targets, a as sanitizeName, f as maxItems, g as getAgentVersion, h as detectTargetAgent, i as linkSkillToAgents, m as detectInstalledAgents, n as computeSkillDirName, o as unlinkSkillFromAgents, p as maxLines, t as generateSkillMd } from "./_chunks/prompts.mjs";
|
|
15
14
|
import { a as getModelName, i as getModelLabel, n as createToolProgress, o as optimizeDocs, r as getAvailableModels, t as detectImportedPackages } from "./_chunks/detect-imports.mjs";
|
|
16
15
|
import "./agent/index.mjs";
|
|
16
|
+
import { n as clearEmbeddingCache } from "./_chunks/embedding-cache2.mjs";
|
|
17
17
|
import { n as shutdownWorker } from "./_chunks/pool2.mjs";
|
|
18
18
|
import { createRequire } from "node:module";
|
|
19
19
|
import { homedir } from "node:os";
|
package/dist/index.mjs
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
import "./_chunks/chunk.mjs";
|
|
2
1
|
import { c as getVersionKey, n as REFERENCES_DIR, o as getCacheDir, s as getCacheKey, t as CACHE_DIR } from "./_chunks/config.mjs";
|
|
3
2
|
import { h as readCachedDocs, n as clearCache, p as listCached, r as ensureCacheDir, s as isCached, t as clearAllCache, y as writeToCache } from "./_chunks/storage.mjs";
|
|
4
3
|
import "./cache/index.mjs";
|
|
5
4
|
import "./_chunks/markdown.mjs";
|
|
6
|
-
import "./_chunks/embedding-cache.mjs";
|
|
7
5
|
import { createIndex, search, searchSnippets } from "./retriv/index.mjs";
|
|
8
6
|
import "./_chunks/package-registry.mjs";
|
|
9
7
|
import { C as downloadLlmsDocs, D as normalizeLlmsLinks, O as parseMarkdownLinks, T as fetchLlmsTxt, d as resolvePackageDocs, n as fetchNpmPackage, s as readLocalDependencies, v as fetchReadmeContent } from "./_chunks/npm.mjs";
|
package/dist/retriv/index.d.mts
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import { a as IndexProgress, c as SearchResult, i as IndexPhase, l as SearchSnippet, n as Document, o as SearchFilter, r as IndexConfig, s as SearchOptions, t as ChunkEntity } from "../_chunks/types.mjs";
|
|
2
|
-
import
|
|
2
|
+
import * as retriv from "retriv";
|
|
3
3
|
|
|
4
4
|
//#region src/retriv/index.d.ts
|
|
5
|
-
type RetrivInstance = Awaited<ReturnType<typeof
|
|
5
|
+
type RetrivInstance = Awaited<ReturnType<typeof getDb>>;
|
|
6
|
+
declare function getDb(config: Pick<IndexConfig, 'dbPath'>): Promise<retriv.SearchProvider & {
|
|
7
|
+
_testSetCategories?: (cats: string[]) => void;
|
|
8
|
+
}>;
|
|
6
9
|
/**
|
|
7
10
|
* Index documents in-process (no worker thread).
|
|
8
11
|
* Preferred for tests and environments where worker_threads is unreliable.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.mts","names":[],"sources":["../../src/retriv/index.ts"],"mappings":";;;;
|
|
1
|
+
{"version":3,"file":"index.d.mts","names":[],"sources":["../../src/retriv/index.ts"],"mappings":";;;;KAKK,cAAA,GAAiB,OAAA,CAAQ,UAAA,QAAkB,KAAA;AAAA,iBAGjC,KAAA,CAAM,MAAA,EAAQ,IAAA,CAAK,WAAA,cAAsB,OAAA,CAAvB,MAAA,CAAuB,cAAA;;;;;;;iBA4BlC,iBAAA,CACpB,SAAA,EAAW,QAAA,IACX,MAAA,EAAQ,WAAA,GACP,OAAA;;;;;iBAUmB,WAAA,CACpB,SAAA,EAAW,QAAA,IACX,MAAA,EAAQ,WAAA,GACP,OAAA;AAAA,iBAMmB,MAAA,CACpB,KAAA,UACA,MAAA,EAAQ,WAAA,EACR,OAAA,GAAS,aAAA,GACR,OAAA,CAAQ,YAAA;AAzD0C;;;AAAA,iBA8E/B,cAAA,CACpB,KAAA,UACA,MAAA,EAAQ,WAAA,EACR,OAAA,GAAS,aAAA,GACR,OAAA,CAAQ,aAAA;AAAA,iBA2BW,QAAA,CAAS,OAAA,aAAoB,OAAA,CAAQ,GAAA,SAAY,cAAA;AAAA,iBASjD,YAAA,CACpB,KAAA,UACA,IAAA,EAAM,GAAA,SAAY,cAAA,GAClB,OAAA,GAAS,aAAA,GACR,OAAA,CAAQ,aAAA;AAAA,iBAqBW,SAAA,CAAU,IAAA,EAAM,GAAA,SAAY,cAAA,IAAkB,OAAA"}
|
package/dist/retriv/index.mjs
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
|
-
import "../_chunks/chunk.mjs";
|
|
2
|
-
import "../_chunks/config.mjs";
|
|
3
|
-
import "../_chunks/storage.mjs";
|
|
4
1
|
import { a as stripFrontmatter } from "../_chunks/markdown.mjs";
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
import
|
|
8
|
-
import
|
|
9
|
-
import
|
|
10
|
-
|
|
2
|
+
async function getDb(config) {
|
|
3
|
+
const [{ createRetriv }, { autoChunker }, sqliteMod, { transformersJs }, { cachedEmbeddings }] = await Promise.all([
|
|
4
|
+
import("retriv"),
|
|
5
|
+
import("retriv/chunkers/auto"),
|
|
6
|
+
import("retriv/db/sqlite"),
|
|
7
|
+
import("retriv/embeddings/transformers-js"),
|
|
8
|
+
import("../_chunks/embedding-cache2.mjs").then((n) => n.r)
|
|
9
|
+
]);
|
|
10
|
+
const embeddings = await cachedEmbeddings(transformersJs());
|
|
11
11
|
return createRetriv({
|
|
12
|
-
driver:
|
|
12
|
+
driver: sqliteMod.default({
|
|
13
13
|
path: config.dbPath,
|
|
14
|
-
embeddings
|
|
14
|
+
embeddings
|
|
15
15
|
}),
|
|
16
16
|
chunking: autoChunker()
|
|
17
17
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.mjs","names":[],"sources":["../../src/retriv/index.ts"],"sourcesContent":["import type { ChunkEntity, Document, IndexConfig, IndexPhase, IndexProgress, SearchFilter, SearchOptions, SearchResult, SearchSnippet } from './types.ts'\nimport {
|
|
1
|
+
{"version":3,"file":"index.mjs","names":[],"sources":["../../src/retriv/index.ts"],"sourcesContent":["import type { ChunkEntity, Document, IndexConfig, IndexPhase, IndexProgress, SearchFilter, SearchOptions, SearchResult, SearchSnippet } from './types.ts'\nimport { stripFrontmatter } from '../core/markdown.ts'\n\nexport type { ChunkEntity, Document, IndexConfig, IndexPhase, IndexProgress, SearchFilter, SearchOptions, SearchResult, SearchSnippet }\n\ntype RetrivInstance = Awaited<ReturnType<typeof getDb>>\n\n// Dynamic imports: retriv/chunkers/auto eagerly loads typescript which may not be installed (e.g. npx)\nasync function getDb(config: Pick<IndexConfig, 'dbPath'>) {\n const [\n { createRetriv },\n { autoChunker },\n sqliteMod,\n { transformersJs },\n { cachedEmbeddings },\n ] = await Promise.all([\n import('retriv'),\n import('retriv/chunkers/auto'),\n import('retriv/db/sqlite'),\n import('retriv/embeddings/transformers-js'),\n import('./embedding-cache.ts'),\n ])\n const embeddings = await cachedEmbeddings(transformersJs())\n return createRetriv({\n driver: sqliteMod.default({\n path: config.dbPath,\n embeddings,\n }),\n chunking: autoChunker(),\n })\n}\n\n/**\n * Index documents in-process (no worker thread).\n * Preferred for tests and environments where worker_threads is unreliable.\n */\nexport async function createIndexDirect(\n documents: Document[],\n config: IndexConfig,\n): Promise<void> {\n const db = await getDb(config)\n await db.index(documents, { onProgress: config.onProgress })\n await db.close?.()\n}\n\n/**\n * Index documents in a background worker thread.\n * Falls back to direct indexing if worker fails to spawn.\n */\nexport async function createIndex(\n documents: Document[],\n config: IndexConfig,\n): Promise<void> {\n // Dynamic import justified: search/searchSnippets shouldn't pull in worker_threads\n const { createIndexInWorker } = await import('./pool.ts')\n return createIndexInWorker(documents, config)\n}\n\nexport async function search(\n query: string,\n config: IndexConfig,\n options: SearchOptions = {},\n): Promise<SearchResult[]> {\n const { limit = 10, filter } = options\n const db = await getDb(config)\n const results = await db.search(query, { limit, filter, returnContent: true, returnMetadata: true, returnMeta: true })\n await db.close?.()\n\n return results.map(r => ({\n id: r.id,\n content: r.content ?? '',\n score: r.score,\n metadata: r.metadata ?? {},\n highlights: r._meta?.highlights ?? [],\n lineRange: r._chunk?.lineRange,\n entities: r._chunk?.entities,\n scope: r._chunk?.scope,\n }))\n}\n\n/**\n * Search and return formatted snippets\n */\nexport async function searchSnippets(\n query: string,\n config: IndexConfig,\n options: SearchOptions = {},\n): Promise<SearchSnippet[]> {\n const results = await search(query, config, options)\n return toSnippets(results)\n}\n\nfunction toSnippets(results: SearchResult[]): SearchSnippet[] {\n return results.map((r) => {\n const content = stripFrontmatter(r.content)\n const source = r.metadata.source || r.id\n const lines = content.split('\\n').length\n\n return {\n package: r.metadata.package || 'unknown',\n source,\n lineStart: r.lineRange?.[0] ?? 1,\n lineEnd: r.lineRange?.[1] ?? lines,\n content,\n score: r.score,\n highlights: r.highlights,\n entities: r.entities,\n scope: r.scope,\n }\n })\n}\n\n// ── Pooled DB access for interactive search ──\n\nexport async function openPool(dbPaths: string[]): Promise<Map<string, RetrivInstance>> {\n const pool = new Map<string, RetrivInstance>()\n await Promise.all(dbPaths.map(async (dbPath) => {\n const db = await getDb({ dbPath })\n pool.set(dbPath, db)\n }))\n return pool\n}\n\nexport async function searchPooled(\n query: string,\n pool: Map<string, RetrivInstance>,\n options: SearchOptions = {},\n): Promise<SearchSnippet[]> {\n const { limit = 10, filter } = options\n const allResults = await Promise.all(\n [...pool.values()].map(async (db) => {\n const results = await db.search(query, { limit, filter, returnContent: true, returnMetadata: true, returnMeta: true })\n return results.map(r => ({\n id: r.id,\n content: r.content ?? '',\n score: r.score,\n metadata: r.metadata ?? {},\n highlights: r._meta?.highlights ?? [],\n lineRange: r._chunk?.lineRange as [number, number] | undefined,\n entities: r._chunk?.entities,\n scope: r._chunk?.scope,\n }))\n }),\n )\n const merged = allResults.flat().sort((a, b) => b.score - a.score).slice(0, limit)\n return toSnippets(merged)\n}\n\nexport async function closePool(pool: Map<string, RetrivInstance>): Promise<void> {\n await Promise.all([...pool.values()].map(db => db.close?.()))\n pool.clear()\n}\n"],"mappings":";AAQA,eAAe,MAAM,QAAqC;CACxD,MAAM,CACJ,EAAE,gBACF,EAAE,eACF,WACA,EAAE,kBACF,EAAE,sBACA,MAAM,QAAQ,IAAI;EACpB,OAAO;EACP,OAAO;EACP,OAAO;EACP,OAAO;EACP,OAAO,mCAAA,MAAA,MAAA,EAAA,EAAA;EACR,CAAC;CACF,MAAM,aAAa,MAAM,iBAAiB,gBAAgB,CAAC;AAC3D,QAAO,aAAa;EAClB,QAAQ,UAAU,QAAQ;GACxB,MAAM,OAAO;GACb;GACD,CAAC;EACF,UAAU,aAAA;EACX,CAAC;;AAOJ,eAAsB,kBACpB,WACA,QACe;CACf,MAAM,KAAK,MAAM,MAAM,OAAO;AAC9B,OAAM,GAAG,MAAM,WAAW,EAAE,YAAY,OAAO,YAAY,CAAC;AAC5D,OAAM,GAAG,SAAS;;AAOpB,eAAsB,YACpB,WACA,QACe;CAEf,MAAM,EAAE,wBAAwB,MAAM,OAAO,wBAAA,MAAA,MAAA,EAAA,EAAA;AAC7C,QAAO,oBAAoB,WAAW,OAAO;;AAG/C,eAAsB,OACpB,OACA,QACA,UAAyB,EAAE,EACF;CACzB,MAAM,EAAE,QAAQ,IAAI,WAAW;CAC/B,MAAM,KAAK,MAAM,MAAM,OAAO;CAC9B,MAAM,UAAU,MAAM,GAAG,OAAO,OAAO;EAAE;EAAO;EAAQ,eAAe;EAAM,gBAAgB;EAAM,YAAY;EAAM,CAAC;AACtH,OAAM,GAAG,SAAS;AAElB,QAAO,QAAQ,KAAI,OAAM;EACvB,IAAI,EAAE;EACN,SAAS,EAAE,WAAW;EACtB,OAAO,EAAE;EACT,UAAU,EAAE,YAAY,EAAE;EAC1B,YAAY,EAAE,OAAO,cAAc,EAAE;EACrC,WAAW,EAAE,QAAQ;EACrB,UAAU,EAAE,QAAQ;EACpB,OAAO,EAAE,QAAQ;EAClB,EAAE;;AAML,eAAsB,eACpB,OACA,QACA,UAAyB,EAAE,EACD;AAE1B,QAAO,WADS,MAAM,OAAO,OAAO,QAAQ,QAAQ,CAC1B;;AAG5B,SAAS,WAAW,SAA0C;AAC5D,QAAO,QAAQ,KAAK,MAAM;EACxB,MAAM,UAAU,iBAAiB,EAAE,QAAQ;EAC3C,MAAM,SAAS,EAAE,SAAS,UAAU,EAAE;EACtC,MAAM,QAAQ,QAAQ,MAAM,KAAK,CAAC;AAElC,SAAO;GACL,SAAS,EAAE,SAAS,WAAW;GAC/B;GACA,WAAW,EAAE,YAAY,MAAM;GAC/B,SAAS,EAAE,YAAY,MAAM;GAC7B;GACA,OAAO,EAAE;GACT,YAAY,EAAE;GACd,UAAU,EAAE;GACZ,OAAO,EAAE;GACV;GACD;;AAKJ,eAAsB,SAAS,SAAyD;CACtF,MAAM,uBAAO,IAAI,KAA6B;AAC9C,OAAM,QAAQ,IAAI,QAAQ,IAAI,OAAO,WAAW;EAC9C,MAAM,KAAK,MAAM,MAAM,EAAE,QAAQ,CAAC;AAClC,OAAK,IAAI,QAAQ,GAAG;GACpB,CAAC;AACH,QAAO;;AAGT,eAAsB,aACpB,OACA,MACA,UAAyB,EAAE,EACD;CAC1B,MAAM,EAAE,QAAQ,IAAI,WAAW;AAiB/B,QAAO,YAhBY,MAAM,QAAQ,IAC/B,CAAC,GAAG,KAAK,QAAQ,CAAC,CAAC,IAAI,OAAO,OAAO;AAEnC,UADgB,MAAM,GAAG,OAAO,OAAO;GAAE;GAAO;GAAQ,eAAe;GAAM,gBAAgB;GAAM,YAAY;GAAM,CAAC,EACvG,KAAI,OAAM;GACvB,IAAI,EAAE;GACN,SAAS,EAAE,WAAW;GACtB,OAAO,EAAE;GACT,UAAU,EAAE,YAAY,EAAE;GAC1B,YAAY,EAAE,OAAO,cAAc,EAAE;GACrC,WAAW,EAAE,QAAQ;GACrB,UAAU,EAAE,QAAQ;GACpB,OAAO,EAAE,QAAQ;GAClB,EAAE;GACH,CACH,EACyB,MAAM,CAAC,MAAM,GAAG,MAAM,EAAE,QAAQ,EAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CACzD;;AAG3B,eAAsB,UAAU,MAAkD;AAChF,OAAM,QAAQ,IAAI,CAAC,GAAG,KAAK,QAAQ,CAAC,CAAC,KAAI,OAAM,GAAG,SAAS,CAAC,CAAC;AAC7D,MAAK,OAAO"}
|
package/dist/retriv/worker.mjs
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import "../_chunks/chunk.mjs";
|
|
2
2
|
import "../_chunks/config.mjs";
|
|
3
3
|
import "../_chunks/storage.mjs";
|
|
4
|
-
import { t as cachedEmbeddings } from "../_chunks/embedding-
|
|
4
|
+
import { t as cachedEmbeddings } from "../_chunks/embedding-cache2.mjs";
|
|
5
|
+
import { parentPort } from "node:worker_threads";
|
|
5
6
|
import { createRetriv } from "retriv";
|
|
6
7
|
import { autoChunker } from "retriv/chunkers/auto";
|
|
7
8
|
import sqlite from "retriv/db/sqlite";
|
|
8
9
|
import { transformersJs } from "retriv/embeddings/transformers-js";
|
|
9
|
-
import { parentPort } from "node:worker_threads";
|
|
10
10
|
if (parentPort) parentPort.on("message", async (msg) => {
|
|
11
11
|
if (msg.type === "shutdown") process.exit(0);
|
|
12
12
|
if (msg.type === "index") {
|
|
@@ -24,10 +24,11 @@ if (parentPort) parentPort.on("message", async (msg) => {
|
|
|
24
24
|
});
|
|
25
25
|
}
|
|
26
26
|
};
|
|
27
|
+
const embeddings = await cachedEmbeddings(transformersJs());
|
|
27
28
|
const db = await createRetriv({
|
|
28
29
|
driver: sqlite({
|
|
29
30
|
path: config.dbPath,
|
|
30
|
-
embeddings
|
|
31
|
+
embeddings
|
|
31
32
|
}),
|
|
32
33
|
chunking: autoChunker()
|
|
33
34
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"worker.mjs","names":[],"sources":["../../src/retriv/worker.ts"],"sourcesContent":["import type { IndexConfig, Document as RetrivDocument } from './types.ts'\nimport { parentPort } from 'node:worker_threads'\nimport { createRetriv } from 'retriv'\nimport { autoChunker } from 'retriv/chunkers/auto'\nimport sqlite from 'retriv/db/sqlite'\nimport { transformersJs } from 'retriv/embeddings/transformers-js'\nimport { cachedEmbeddings } from './embedding-cache.ts'\n\nexport interface WorkerIndexMessage {\n type: 'index'\n id: number\n documents: RetrivDocument[]\n dbPath: string\n}\n\nexport interface WorkerShutdownMessage {\n type: 'shutdown'\n}\n\nexport type WorkerMessage = WorkerIndexMessage | WorkerShutdownMessage\n\nexport interface WorkerProgressResponse {\n type: 'progress'\n id: number\n phase: string\n current: number\n total: number\n}\n\nexport interface WorkerDoneResponse {\n type: 'done'\n id: number\n}\n\nexport interface WorkerErrorResponse {\n type: 'error'\n id: number\n message: string\n}\n\nexport type WorkerResponse = WorkerProgressResponse | WorkerDoneResponse | WorkerErrorResponse\n\nif (parentPort) {\n parentPort.on('message', async (msg: WorkerMessage) => {\n if (msg.type === 'shutdown') {\n process.exit(0)\n }\n\n if (msg.type === 'index') {\n const { id, documents, dbPath } = msg\n\n try {\n const config: IndexConfig = {\n dbPath,\n onProgress: ({ phase, current, total }) => {\n parentPort!.postMessage({ type: 'progress', id, phase, current, total } satisfies WorkerProgressResponse)\n },\n }\n\n const db = await createRetriv({\n driver: sqlite({\n path: config.dbPath,\n embeddings
|
|
1
|
+
{"version":3,"file":"worker.mjs","names":[],"sources":["../../src/retriv/worker.ts"],"sourcesContent":["import type { IndexConfig, Document as RetrivDocument } from './types.ts'\nimport { parentPort } from 'node:worker_threads'\nimport { createRetriv } from 'retriv'\nimport { autoChunker } from 'retriv/chunkers/auto'\nimport sqlite from 'retriv/db/sqlite'\nimport { transformersJs } from 'retriv/embeddings/transformers-js'\nimport { cachedEmbeddings } from './embedding-cache.ts'\n\nexport interface WorkerIndexMessage {\n type: 'index'\n id: number\n documents: RetrivDocument[]\n dbPath: string\n}\n\nexport interface WorkerShutdownMessage {\n type: 'shutdown'\n}\n\nexport type WorkerMessage = WorkerIndexMessage | WorkerShutdownMessage\n\nexport interface WorkerProgressResponse {\n type: 'progress'\n id: number\n phase: string\n current: number\n total: number\n}\n\nexport interface WorkerDoneResponse {\n type: 'done'\n id: number\n}\n\nexport interface WorkerErrorResponse {\n type: 'error'\n id: number\n message: string\n}\n\nexport type WorkerResponse = WorkerProgressResponse | WorkerDoneResponse | WorkerErrorResponse\n\nif (parentPort) {\n parentPort.on('message', async (msg: WorkerMessage) => {\n if (msg.type === 'shutdown') {\n process.exit(0)\n }\n\n if (msg.type === 'index') {\n const { id, documents, dbPath } = msg\n\n try {\n const config: IndexConfig = {\n dbPath,\n onProgress: ({ phase, current, total }) => {\n parentPort!.postMessage({ type: 'progress', id, phase, current, total } satisfies WorkerProgressResponse)\n },\n }\n\n const embeddings = await cachedEmbeddings(transformersJs())\n const db = await createRetriv({\n driver: sqlite({\n path: config.dbPath,\n embeddings,\n }),\n chunking: autoChunker(),\n })\n\n await db.index(documents, { onProgress: config.onProgress })\n await db.close?.()\n\n parentPort!.postMessage({ type: 'done', id } satisfies WorkerDoneResponse)\n }\n catch (err) {\n parentPort!.postMessage({\n type: 'error',\n id,\n message: err instanceof Error ? err.message : String(err),\n } satisfies WorkerErrorResponse)\n }\n }\n })\n}\n"],"mappings":";;;;;;;;;AA0CA,IAAI,WACF,YAAW,GAAG,WAAW,OAAO,QAAuB;AACrD,KAAI,IAAI,SAAS,WACf,SAAQ,KAAK,EAAE;AAGjB,KAAI,IAAI,SAAS,SAAS;EACxB,MAAM,EAAE,IAAI,WAAW,WAAW;AAElC,MAAI;GACF,MAAM,SAAsB;IAC1B;IACA,aAAa,EAAE,OAAO,SAAS,YAAY;AACzC,gBAAY,YAAY;MAAE,MAAM;MAAY;MAAI;MAAO;MAAS;MAAO,CAAkC;;IAE5G;GAED,MAAM,aAAa,MAAM,iBAAiB,gBAAgB,CAAC;GAC3D,MAAM,KAAK,MAAM,aAAa;IAC5B,QAAQ,OAAO;KACb,MAAM,OAAO;KACb;KACD,CAAC;IACF,UAAU,aAAA;IACX,CAAC;AAEF,SAAM,GAAG,MAAM,WAAW,EAAE,YAAY,OAAO,YAAY,CAAC;AAC5D,SAAM,GAAG,SAAS;AAElB,cAAY,YAAY;IAAE,MAAM;IAAQ;IAAI,CAA8B;WAErE,KAAK;AACV,cAAY,YAAY;IACtB,MAAM;IACN;IACA,SAAS,eAAe,QAAQ,IAAI,UAAU,OAAO,IAAA;IACtD,CAA+B;;;EAGpC"}
|
package/package.json
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"embedding-cache.mjs","names":["cachedEmbeddings","retrivCached"],"sources":["../../src/retriv/embedding-cache.ts"],"sourcesContent":["import type { DatabaseSync } from 'node:sqlite'\nimport type { Embedding } from 'retriv'\nimport { rmSync } from 'node:fs'\nimport { join } from 'pathe'\nimport { cachedEmbeddings as retrivCached } from 'retriv/embeddings/cached'\nimport { CACHE_DIR } from '../cache/index.ts'\n\ninterface EmbeddingConfig {\n resolve: () => Promise<{ embedder: (texts: string[]) => Promise<Embedding[]>, dimensions: number, maxTokens?: number }>\n}\n\nconst EMBEDDINGS_DB_PATH = join(CACHE_DIR, 'embeddings.db')\n\nfunction openDb(): DatabaseSync {\n // eslint-disable-next-line ts/no-require-imports\n const { DatabaseSync: DB } = require('node:sqlite') as typeof import('node:sqlite')\n const db = new DB(EMBEDDINGS_DB_PATH)\n db.exec('PRAGMA journal_mode=WAL')\n db.exec('PRAGMA busy_timeout=5000')\n db.exec(`CREATE TABLE IF NOT EXISTS embeddings (text_hash TEXT PRIMARY KEY, embedding BLOB NOT NULL)`)\n db.exec(`CREATE TABLE IF NOT EXISTS meta (key TEXT PRIMARY KEY, value TEXT NOT NULL)`)\n return db\n}\n\nfunction createSqliteStorage(db: DatabaseSync) {\n const getStmt = db.prepare('SELECT embedding FROM embeddings WHERE text_hash = ?')\n const setStmt = db.prepare('INSERT OR IGNORE INTO embeddings (text_hash, embedding) VALUES (?, ?)')\n\n return {\n get: (hash: string): Embedding | null => {\n const row = getStmt.get(hash) as { embedding: Buffer } | undefined\n if (!row)\n return null\n return new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4)\n },\n set: (hash: string, embedding: Embedding): void => {\n const arr = embedding instanceof Float32Array ? embedding : new Float32Array(embedding)\n setStmt.run(hash, Buffer.from(arr.buffer, arr.byteOffset, arr.byteLength))\n },\n }\n}\n\nexport function cachedEmbeddings(config: EmbeddingConfig): EmbeddingConfig {\n const db = openDb()\n const storage = createSqliteStorage(db)\n\n // Validate dimensions on first resolve\n const originalResolve = config.resolve\n const validatedConfig: EmbeddingConfig = {\n async resolve() {\n const resolved = await originalResolve()\n const getMetaStmt = db.prepare('SELECT value FROM meta WHERE key = ?')\n const setMetaStmt = db.prepare('INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)')\n\n const storedDims = getMetaStmt.get('dimensions') as { value: string } | undefined\n if (storedDims && Number(storedDims.value) !== resolved.dimensions) {\n db.exec('DELETE FROM embeddings')\n }\n setMetaStmt.run('dimensions', String(resolved.dimensions))\n\n return resolved\n },\n }\n\n return retrivCached(validatedConfig, { storage })\n}\n\nexport function clearEmbeddingCache(): void {\n rmSync(EMBEDDINGS_DB_PATH, { force: true })\n}\n"],"mappings":";;;;;AAWA,MAAM,qBAAqB,KAAK,WAAW,gBAAgB;AAE3D,SAAS,SAAuB;CAE9B,MAAM,EAAE,cAAc,OAAA,UAAe,cAAc;CACnD,MAAM,KAAK,IAAI,GAAG,mBAAmB;AACrC,IAAG,KAAK,0BAA0B;AAClC,IAAG,KAAK,2BAA2B;AACnC,IAAG,KAAK,8FAA8F;AACtG,IAAG,KAAK,8EAA8E;AACtF,QAAO;;AAGT,SAAS,oBAAoB,IAAkB;CAC7C,MAAM,UAAU,GAAG,QAAQ,uDAAuD;CAClF,MAAM,UAAU,GAAG,QAAQ,wEAAwE;AAEnG,QAAO;EACL,MAAM,SAAmC;GACvC,MAAM,MAAM,QAAQ,IAAI,KAAK;AAC7B,OAAI,CAAC,IACH,QAAO;AACT,UAAO,IAAI,aAAa,IAAI,UAAU,QAAQ,IAAI,UAAU,YAAY,IAAI,UAAU,aAAa,EAAE;;EAEvG,MAAM,MAAc,cAA+B;GACjD,MAAM,MAAM,qBAAqB,eAAe,YAAY,IAAI,aAAa,UAAU;AACvF,WAAQ,IAAI,MAAM,OAAO,KAAK,IAAI,QAAQ,IAAI,YAAY,IAAI,WAAW,CAAC;;EAE7E;;AAGH,SAAgBA,mBAAiB,QAA0C;CACzE,MAAM,KAAK,QAAQ;CACnB,MAAM,UAAU,oBAAoB,GAAG;CAGvC,MAAM,kBAAkB,OAAO;AAiB/B,QAAOC,iBAhBkC,EACvC,MAAM,UAAU;EACd,MAAM,WAAW,MAAM,iBAAiB;EACxC,MAAM,cAAc,GAAG,QAAQ,uCAAuC;EACtE,MAAM,cAAc,GAAG,QAAQ,yDAAyD;EAExF,MAAM,aAAa,YAAY,IAAI,aAAa;AAChD,MAAI,cAAc,OAAO,WAAW,MAAM,KAAK,SAAS,WACtD,IAAG,KAAK,yBAAyB;AAEnC,cAAY,IAAI,cAAc,OAAO,SAAS,WAAW,CAAC;AAE1D,SAAO;IAEV,EAEoC,EAAE,SAAS,CAAC;;AAGnD,SAAgB,sBAA4B;AAC1C,QAAO,oBAAoB,EAAE,OAAO,MAAM,CAAC"}
|