@vpxa/aikit 0.1.128 → 0.1.130

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vpxa/aikit",
3
- "version": "0.1.128",
3
+ "version": "0.1.130",
4
4
  "type": "module",
5
5
  "description": "Local-first AI developer toolkit — knowledge base, code analysis, context management, and developer tools for LLM agents",
6
6
  "license": "MIT",
@@ -1 +1 @@
1
- import{basename as e,extname as t,join as n,resolve as r}from"node:path";import{createHash as i}from"node:crypto";import{appendFileSync as a,closeSync as o,constants as s,existsSync as c,mkdirSync as l,openSync as ee,readFileSync as u,readdirSync as d,renameSync as f,statSync as te,unlinkSync as p,writeFileSync as m}from"node:fs";import{homedir as h}from"node:os";const g={ai:`.ai`,aiContext:`.ai/context`,aiCurated:`.ai/curated`,restorePoints:`.ai/restore-points`,data:`.aikit-data`,state:`.aikit-state`,logs:`.aikit-state/logs`,brainstorm:`.brainstorm`,handoffs:`.handoffs`},_={root:`.aikit-data`,registry:`registry.json`},v={markdown:{max:1500,min:100},code:{max:2e3,min:50},config:{max:3e3,min:50},default:{max:1500,min:100,overlap:200}},y={model:`mixedbread-ai/mxbai-embed-large-v1`,dimensions:512},b={backend:`sqlite-vec`,path:g.data,tableName:`knowledge`},x={maxFileSizeBytes:1e6,maxCuratedFileSizeBytes:5e4},S={maxResults:10,minScore:.25},C=/^[a-z][a-z0-9-]*$/,w=[`decisions`,`patterns`,`troubleshooting`,`conventions`,`architecture`],T={".ts":`code-typescript`,".tsx":`code-typescript`,".mts":`code-typescript`,".cts":`code-typescript`,".js":`code-javascript`,".jsx":`code-javascript`,".mjs":`code-javascript`,".cjs":`code-javascript`,".py":`code-python`,".json":`config-json`,".yaml":`config-yaml`,".yml":`config-yaml`,".toml":`config-toml`,".env":`config-env`,".md":`markdown`,".mdx":`markdown`},E=[/\.test\.[jt]sx?$/,/\.spec\.[jt]sx?$/,/(^|\/)__tests__\//,/(^|\/)test\//,/(^|\/)tests\//,/(^|\/)spec\//,/(^|\/)fixtures\//],ne=[/\.stack\.[jt]s$/,/(^|\/)stacks\//,/(^|\/)constructs\//,/cdk\.json$/];function re(n){let r=t(n).toLowerCase(),i=e(n).toLowerCase();return n.includes(`${g.aiContext}/`)?`produced-knowledge`:n.includes(`${g.aiCurated}/`)?`curated-knowledge`:E.some(e=>e.test(n))?`test-code`:ne.some(e=>e.test(n))?`cdk-stack`:r in T?T[r]:i.startsWith(`.env`)?`config-env`:[`.go`,`.rs`,`.java`,`.rb`,`.php`,`.sh`,`.ps1`,`.sql`,`.graphql`,`.proto`,`.css`,`.scss`,`.less`,`.html`,`.htm`,`.vue`,`.svelte`,`.astro`,`.hbs`,`.ejs`,`.svg`].includes(r)?`code-other`:`unknown`}const D={"code-typescript":`source`,"code-javascript":`source`,"code-python":`source`,"code-other":`source`,"cdk-stack":`source`,"test-code":`test`,markdown:`documentation`,documentation:`documentation`,"curated-knowledge":`documentation`,"produced-knowledge":`documentation`,"config-json":`config`,"config-yaml":`config`,"config-toml":`config`,"config-env":`config`,unknown:`source`};function ie(e){return D[e]??`source`}function O(e){return Object.entries(D).filter(([,t])=>t===e).map(([e])=>e)}var k=class extends Error{code;constructor(e,t,n){super(e,n===void 0?void 0:{cause:n}),this.code=t,this.name=`AikitError`}},A=class extends k{constructor(e,t){super(e,`EMBEDDING_ERROR`,t),this.name=`EmbeddingError`}},j=class extends k{constructor(e,t){super(e,`STORE_ERROR`,t),this.name=`StoreError`}},M=class extends k{constructor(e,t){super(e,`INDEX_ERROR`,t),this.name=`IndexError`}},N=class extends k{constructor(e,t){super(e,`CONFIG_ERROR`,t),this.name=`ConfigError`}};function P(){return process.env.AIKIT_GLOBAL_DATA_DIR??r(h(),_.root)}function F(t){let n=r(t);return`${e(n).toLowerCase().replace(/[^a-z0-9-]/g,`-`)||`workspace`}-${i(`sha256`).update(n).digest(`hex`).slice(0,8)}`}function I(){let e=r(P(),_.registry);if(!c(e))return{version:1,workspaces:{}};let t=u(e,`utf-8`);try{return JSON.parse(t)}catch{return{version:1,workspaces:{}}}}function L(e,t=5e3){let n=`${e}.lock`,r=Date.now()+t,i=10;for(;Date.now()<r;)try{let e=ee(n,s.O_CREAT|s.O_EXCL|s.O_WRONLY);return m(e,`${process.pid}\n`),o(e),n}catch(e){if(e.code!==`EEXIST`)throw e;try{let{mtimeMs:e}=te(n);if(Date.now()-e>3e4){p(n);continue}}catch{}let t=new SharedArrayBuffer(4);Atomics.wait(new Int32Array(t),0,0,i),i=Math.min(i*2,200)}throw Error(`Failed to acquire registry lock after ${t}ms`)}function R(e){try{p(e)}catch{}}function z(e){let t=P();l(t,{recursive:!0});let n=r(t,_.registry),i=L(n);try{let t=`${n}.tmp`;m(t,JSON.stringify(e,null,2),`utf-8`),f(t,n)}finally{R(i)}}function B(e){let t=I(),n=F(e),i=new Date().toISOString();return t.workspaces[n]?t.workspaces[n].lastAccessedAt=i:t.workspaces[n]={partition:n,workspacePath:r(e),registeredAt:i,lastAccessedAt:i},l(U(n),{recursive:!0}),z(t),t.workspaces[n]}function V(e){let t=I(),n=F(e);return t.workspaces[n]}function H(){let e=I();return Object.values(e.workspaces)}function U(e){return r(P(),e)}function W(){return c(r(P(),_.registry))}function G(e){return W()?r(U(B(e).partition),`state`):r(e,g.state)}const K={debug:0,info:1,warn:2,error:3},q=[];let J=process.env.AIKIT_LOG_LEVEL??`info`,Y=process.env.AIKIT_LOG_FILE_SINK===`true`||process.env.AIKIT_LOG_FILE_SINK!==`false`&&!process.env.VITEST&&process.env.NODE_ENV!==`test`;function ae(){return Y?process.env.VITEST||process.env.NODE_ENV===`test`?process.env.AIKIT_LOG_FILE_SINK===`true`:!0:!1}let X;function Z(){return X||=n(G(process.cwd()),`logs`),X}function oe(e){let t=e.toISOString().slice(0,10);return n(Z(),`${t}.jsonl`)}let Q=0;function se(){let e=Date.now();if(!(e-Q<36e5)){Q=e;try{let t=Z(),r=new Date(e-30*864e5).toISOString().slice(0,10);for(let e of d(t))if(e.endsWith(`.jsonl`)&&e.slice(0,10)<r)try{p(n(t,e))}catch{}}catch{}}}function ce(e,t){try{l(Z(),{recursive:!0}),a(oe(t),`${e}\n`),se()}catch{}}function le(e){J=e}function ue(){return J}function de(e){Y=e}function fe(){X=void 0}function pe(e){if(e instanceof Error){let t={error:e.message};return e.stack&&(t.stack=e.stack),e.cause!==void 0&&(t.cause=e.cause instanceof Error?e.cause.message:String(e.cause)),t}return{error:String(e)}}function $(e){return q.push(e),()=>{let t=q.indexOf(e);t>=0&&q.splice(t,1)}}function me(e){function t(t,n,r){if(K[t]<K[J])return;let i=new Date,a={ts:i.toISOString(),level:t,component:e,msg:n,...r},o=JSON.stringify(a);console.error(o);for(let i of q)try{i({level:t,component:e,message:n,data:r})}catch{}ae()&&(t===`warn`||t===`error`)&&ce(o,i)}return{debug:(e,n)=>t(`debug`,e,n),info:(e,n)=>t(`info`,e,n),warn:(e,n)=>t(`warn`,e,n),error:(e,n)=>t(`error`,e,n)}}const he=[`indexed`,`curated`,`produced`],ge=[`source`,`documentation`,`test`,`config`,`generated`],_e=[`auto`,`manual`,`smart`],ve=[`efficient`,`normal`,`full`],ye=[`documentation`,`code-typescript`,`code-javascript`,`code-python`,`code-other`,`config-json`,`config-yaml`,`config-toml`,`config-env`,`test-code`,`cdk-stack`,`markdown`,`curated-knowledge`,`produced-knowledge`,`unknown`];export{_ as AIKIT_GLOBAL_PATHS,g as AIKIT_PATHS,k as AikitError,C as CATEGORY_PATTERN,v as CHUNK_SIZES,ye as CONTENT_TYPES,N as ConfigError,w as DEFAULT_CATEGORIES,y as EMBEDDING_DEFAULTS,A as EmbeddingError,x as FILE_LIMITS,_e as INDEX_MODES,M as IndexError,he as KNOWLEDGE_ORIGINS,S as SEARCH_DEFAULTS,ge as SOURCE_TYPES,b as STORE_DEFAULTS,j as StoreError,ve as TOKEN_BUDGETS,$ as addLogListener,F as computePartitionKey,ie as contentTypeToSourceType,me as createLogger,re as detectContentType,P as getGlobalDataDir,ue as getLogLevel,U as getPartitionDir,W as isUserInstalled,H as listWorkspaces,I as loadRegistry,V as lookupWorkspace,B as registerWorkspace,fe as resetLogDir,G as resolveStateDir,z as saveRegistry,pe as serializeError,de as setFileSinkEnabled,le as setLogLevel,O as sourceTypeContentTypes};
1
+ import{basename as e,extname as t,join as n,resolve as r}from"node:path";import{createHash as i}from"node:crypto";import{appendFileSync as a,closeSync as o,constants as s,existsSync as c,mkdirSync as l,openSync as ee,readFileSync as u,readdirSync as d,renameSync as f,statSync as te,unlinkSync as p,writeFileSync as m}from"node:fs";import{homedir as h}from"node:os";const g={ai:`.ai`,aiContext:`.ai/context`,aiCurated:`.ai/curated`,restorePoints:`.ai/restore-points`,data:`.aikit-data`,state:`.aikit-state`,logs:`.aikit-state/logs`,brainstorm:`.brainstorm`,handoffs:`.handoffs`},_={root:`.aikit-data`,registry:`registry.json`},v={markdown:{max:1500,min:100},code:{max:2e3,min:50},config:{max:3e3,min:50},default:{max:1500,min:100,overlap:200}},y={model:`mixedbread-ai/mxbai-embed-large-v1`,dimensions:512},b={backend:`sqlite-vec`,path:g.data,tableName:`knowledge`},x={maxFileSizeBytes:1e6,maxCuratedFileSizeBytes:5e4},S={maxResults:10,minScore:.25},C=/^[a-z][a-z0-9-]*$/,w=[`decisions`,`patterns`,`troubleshooting`,`conventions`,`architecture`],T={".ts":`code-typescript`,".tsx":`code-typescript`,".mts":`code-typescript`,".cts":`code-typescript`,".js":`code-javascript`,".jsx":`code-javascript`,".mjs":`code-javascript`,".cjs":`code-javascript`,".py":`code-python`,".json":`config-json`,".yaml":`config-yaml`,".yml":`config-yaml`,".toml":`config-toml`,".env":`config-env`,".md":`markdown`,".mdx":`markdown`},E=[/\.test\.[jt]sx?$/,/\.spec\.[jt]sx?$/,/(^|\/)__tests__\//,/(^|\/)test\//,/(^|\/)tests\//,/(^|\/)spec\//,/(^|\/)fixtures\//],ne=[/\.stack\.[jt]s$/,/(^|\/)stacks\//,/(^|\/)constructs\//,/cdk\.json$/];function re(n){let r=t(n).toLowerCase(),i=e(n).toLowerCase();return n.includes(`${g.aiContext}/`)?`produced-knowledge`:n.includes(`${g.aiCurated}/`)?`curated-knowledge`:E.some(e=>e.test(n))?`test-code`:ne.some(e=>e.test(n))?`cdk-stack`:r in T?T[r]:i.startsWith(`.env`)?`config-env`:[`.go`,`.rs`,`.java`,`.rb`,`.php`,`.sh`,`.ps1`,`.sql`,`.graphql`,`.proto`,`.css`,`.scss`,`.less`,`.html`,`.htm`,`.vue`,`.svelte`,`.astro`,`.hbs`,`.ejs`,`.svg`].includes(r)?`code-other`:`unknown`}const D={"code-typescript":`source`,"code-javascript":`source`,"code-python":`source`,"code-other":`source`,"cdk-stack":`source`,"test-code":`test`,markdown:`documentation`,documentation:`documentation`,"curated-knowledge":`documentation`,"produced-knowledge":`documentation`,"config-json":`config`,"config-yaml":`config`,"config-toml":`config`,"config-env":`config`,unknown:`source`};function ie(e){return D[e]??`source`}function O(e){return Object.entries(D).filter(([,t])=>t===e).map(([e])=>e)}var k=class extends Error{code;constructor(e,t,n){super(e,n===void 0?void 0:{cause:n}),this.code=t,this.name=`AikitError`}},A=class extends k{constructor(e,t){super(e,`EMBEDDING_ERROR`,t),this.name=`EmbeddingError`}},j=class extends k{constructor(e,t){super(e,`STORE_ERROR`,t),this.name=`StoreError`}},M=class extends k{constructor(e,t){super(e,`INDEX_ERROR`,t),this.name=`IndexError`}},N=class extends k{constructor(e,t){super(e,`CONFIG_ERROR`,t),this.name=`ConfigError`}};function P(){return process.env.AIKIT_GLOBAL_DATA_DIR??r(h(),_.root)}function F(t){let n=r(t);return`${e(n).toLowerCase().replace(/[^a-z0-9-]/g,`-`)||`workspace`}-${i(`sha256`).update(n).digest(`hex`).slice(0,8)}`}function I(){let e=r(P(),_.registry);if(!c(e))return{version:1,workspaces:{}};let t=u(e,`utf-8`);try{return JSON.parse(t)}catch{return{version:1,workspaces:{}}}}function L(e,t=5e3){let n=`${e}.lock`,r=Date.now()+t,i=10;for(;Date.now()<r;)try{let e=ee(n,s.O_CREAT|s.O_EXCL|s.O_WRONLY);return m(e,`${process.pid}\n`),o(e),n}catch(e){if(e.code!==`EEXIST`)throw e;try{let{mtimeMs:e}=te(n);if(Date.now()-e>3e4){p(n);continue}}catch{}let t=new SharedArrayBuffer(4);Atomics.wait(new Int32Array(t),0,0,i),i=Math.min(i*2,200)}throw Error(`Failed to acquire registry lock after ${t}ms`)}function R(e){try{p(e)}catch{}}function z(e){let t=P();l(t,{recursive:!0});let n=r(t,_.registry),i=L(n);try{let t=`${n}.tmp`;m(t,JSON.stringify(e,null,2),`utf-8`),f(t,n)}finally{R(i)}}function B(e){let t=I(),n=F(e),i=new Date().toISOString();return t.workspaces[n]?t.workspaces[n].lastAccessedAt=i:t.workspaces[n]={partition:n,workspacePath:r(e),registeredAt:i,lastAccessedAt:i},l(U(n),{recursive:!0}),z(t),t.workspaces[n]}function V(e){let t=I(),n=F(e);return t.workspaces[n]}function H(){let e=I();return Object.values(e.workspaces)}function U(e){return r(P(),e)}function W(){return c(r(P(),_.registry))}function G(e){return W()?r(U(B(e).partition),`state`):r(e,g.state)}const K={debug:0,info:1,warn:2,error:3},q=[];let J=process.env.AIKIT_LOG_LEVEL??`info`,Y=process.env.AIKIT_LOG_FILE_SINK===`true`||process.env.AIKIT_LOG_FILE_SINK!==`false`&&!process.env.VITEST&&process.env.NODE_ENV!==`test`;function ae(){return Y?process.env.VITEST||process.env.NODE_ENV===`test`?process.env.AIKIT_LOG_FILE_SINK===`true`:!0:!1}let X;function Z(){return X||=n(G(process.cwd()),`logs`),X}function oe(e){let t=e.toISOString().slice(0,10);return n(Z(),`${t}.jsonl`)}let Q=0;function se(){let e=Date.now();if(!(e-Q<36e5)){Q=e;try{let t=Z(),r=new Date(e-30*864e5).toISOString().slice(0,10);for(let e of d(t))if(e.endsWith(`.jsonl`)&&e.slice(0,10)<r)try{p(n(t,e))}catch{}}catch{}}}function ce(e,t){try{l(Z(),{recursive:!0}),a(oe(t),`${e}\n`),se()}catch{}}function le(e){J=e}function ue(){return J}function de(e){Y=e}function fe(){X=void 0}function pe(e){if(e instanceof Error){let t={error:e.message};return e.stack&&(t.stack=e.stack),e.cause!==void 0&&(t.cause=e.cause instanceof Error?e.cause.message:String(e.cause)),t}return{error:String(e)}}function $(e){return q.push(e),()=>{let t=q.indexOf(e);t>=0&&q.splice(t,1)}}function me(e){function t(t,n,r){if(K[t]<K[J])return;let i=new Date,a={ts:i.toISOString(),level:t,component:e,msg:n,...r},o=JSON.stringify(a);(t===`warn`||t===`error`)&&console.error(o);for(let i of q)try{i({level:t,component:e,message:n,data:r})}catch{}ae()&&(t===`warn`||t===`error`)&&ce(o,i)}return{debug:(e,n)=>t(`debug`,e,n),info:(e,n)=>t(`info`,e,n),warn:(e,n)=>t(`warn`,e,n),error:(e,n)=>t(`error`,e,n)}}const he=[`indexed`,`curated`,`produced`],ge=[`source`,`documentation`,`test`,`config`,`generated`],_e=[`auto`,`manual`,`smart`],ve=[`efficient`,`normal`,`full`],ye=[`documentation`,`code-typescript`,`code-javascript`,`code-python`,`code-other`,`config-json`,`config-yaml`,`config-toml`,`config-env`,`test-code`,`cdk-stack`,`markdown`,`curated-knowledge`,`produced-knowledge`,`unknown`];export{_ as AIKIT_GLOBAL_PATHS,g as AIKIT_PATHS,k as AikitError,C as CATEGORY_PATTERN,v as CHUNK_SIZES,ye as CONTENT_TYPES,N as ConfigError,w as DEFAULT_CATEGORIES,y as EMBEDDING_DEFAULTS,A as EmbeddingError,x as FILE_LIMITS,_e as INDEX_MODES,M as IndexError,he as KNOWLEDGE_ORIGINS,S as SEARCH_DEFAULTS,ge as SOURCE_TYPES,b as STORE_DEFAULTS,j as StoreError,ve as TOKEN_BUDGETS,$ as addLogListener,F as computePartitionKey,ie as contentTypeToSourceType,me as createLogger,re as detectContentType,P as getGlobalDataDir,ue as getLogLevel,U as getPartitionDir,W as isUserInstalled,H as listWorkspaces,I as loadRegistry,V as lookupWorkspace,B as registerWorkspace,fe as resetLogDir,G as resolveStateDir,z as saveRegistry,pe as serializeError,de as setFileSinkEnabled,le as setLogLevel,O as sourceTypeContentTypes};
@@ -24,6 +24,9 @@ interface IEmbedder {
24
24
  }
25
25
  //#endregion
26
26
  //#region packages/embeddings/src/embedder-proxy.d.ts
27
+ type EmbedderProxyLogger = {
28
+ warn?: (message: string, data?: Record<string, unknown>) => void;
29
+ };
27
30
  interface EmbedderProxyOptions {
28
31
  model?: string;
29
32
  dimensions?: number;
@@ -32,9 +35,15 @@ interface EmbedderProxyOptions {
32
35
  interOpNumThreads?: number;
33
36
  intraOpNumThreads?: number;
34
37
  idleTimeoutMs?: number;
38
+ logger?: EmbedderProxyLogger;
39
+ maxRetries?: number;
40
+ retryBaseDelayMs?: number;
35
41
  }
36
42
  declare class EmbedderProxy implements IEmbedder {
37
43
  private readonly options;
44
+ private readonly logger?;
45
+ private readonly maxRetries;
46
+ private readonly retryBaseDelayMs;
38
47
  private readonly workerPath;
39
48
  private readonly pendingRequests;
40
49
  private readonly childState;
@@ -53,13 +62,18 @@ declare class EmbedderProxy implements IEmbedder {
53
62
  embed(text: string): Promise<Float32Array>;
54
63
  embedQuery(text: string): Promise<Float32Array>;
55
64
  embedBatch(texts: string[], batchSize?: number): Promise<Float32Array[]>;
65
+ private sendBatchRequest;
56
66
  shutdown(): Promise<void>;
57
67
  private startWorker;
58
68
  private spawnChild;
59
69
  private handleChildMessage;
60
70
  private handleChildExit;
61
71
  private handleChildFailure;
72
+ private sendVectorRequestWithRetry;
62
73
  private sendVectorRequest;
74
+ private withWorkerExitRetry;
75
+ private isWorkerExitError;
76
+ private wait;
63
77
  private requireReadyChild;
64
78
  private buildInitConfig;
65
79
  private buildChildEnv;
@@ -1 +1 @@
1
- import{fork as e}from"node:child_process";import{randomUUID as t}from"node:crypto";import{dirname as n,join as r}from"node:path";import{fileURLToPath as i}from"node:url";import{EMBEDDING_DEFAULTS as a}from"../../core/dist/index.js";import{rm as o}from"node:fs/promises";import{homedir as s}from"node:os";var c=class{options;workerPath=r(n(i(import.meta.url)),`embedder-worker.js`);pendingRequests=new Map;childState=new WeakMap;child=null;readyChild=null;pendingInit=null;pendingShutdown=null;initializePromise=null;shutdownPromise=null;currentDimensions;currentModelId;constructor(e={}){this.options=e,this.currentDimensions=e.dimensions??a.dimensions,this.currentModelId=e.model??a.model}get dimensions(){return this.currentDimensions}get modelId(){return this.currentModelId}async initialize(){if(!(this.readyChild&&this.child===this.readyChild)){if(this.initializePromise)return this.initializePromise;if(this.shutdownPromise){try{await this.shutdownPromise}catch{}if(this.readyChild&&this.child===this.readyChild)return}return this.initializePromise=this.startWorker().finally(()=>{this.initializePromise=null}),this.initializePromise}}async embed(e){return this.sendVectorRequest({type:`embed`,text:e})}async embedQuery(e){return this.sendVectorRequest({type:`embedQuery`,text:e})}async embedBatch(e,n){if(e.length===0)return[];await this.initialize();let r=this.requireReadyChild(),i=t(),a=new Promise((e,t)=>{this.pendingRequests.set(i,{child:r,resolve:t=>e(t),reject:t})});try{r.send({type:`embedBatch`,id:i,texts:e,batchSize:n})}catch(e){throw this.pendingRequests.delete(i),this.toError(e,`Failed to send embedBatch request to worker`)}return a}async shutdown(){if(this.shutdownPromise)return this.shutdownPromise;let e=this.child;if(!e)return;let t=this.requireChildState(e);t.shutdownRequested=!0,this.readyChild===e&&(this.readyChild=null),this.shutdownPromise=new Promise((t,n)=>{this.pendingShutdown={child:e,resolve:t,reject:n}}).finally(()=>{this.shutdownPromise=null});try{e.send({type:`shutdown`})}catch(t){let n=this.toError(t,`Failed to send shutdown request to worker`);throw this.clearChildReference(e),this.rejectPendingForChild(e,n),this.rejectLifecycleIfOwned(`shutdown`,e,n),n}return this.shutdownPromise}async startWorker(){this.child&&this.readyChild!==this.child&&(this.child=null);let e=this.spawnChild();this.child=e,this.readyChild=null;let t=new Promise((t,n)=>{this.pendingInit={child:e,resolve:t,reject:n}});try{e.send({type:`init`,config:this.buildInitConfig()})}catch(t){let n=this.toError(t,`Failed to send init request to worker`);throw this.pendingInit=null,this.clearChildReference(e),n}return t}spawnChild(){let t=e(this.workerPath,[],{env:this.buildChildEnv()});return this.childState.set(t,{idleExitNotified:!1,shutdownRequested:!1,terminated:!1}),t.on(`message`,e=>{this.handleChildMessage(t,e)}),t.once(`error`,e=>{this.handleChildFailure(t,this.toError(e,`Embedder worker failed`))}),t.once(`exit`,(e,n)=>{this.handleChildExit(t,e,n)}),t}handleChildMessage(e,t){switch(t.type){case`ready`:{this.currentDimensions=t.dimensions,this.currentModelId=t.modelId,this.child===e&&(this.readyChild=e);let n=this.pendingInit;n?.child===e&&(this.pendingInit=null,n.resolve());return}case`result`:{let n=this.pendingRequests.get(t.id);if(!n||n.child!==e)return;this.pendingRequests.delete(t.id),n.resolve(new Float32Array(t.data));return}case`batchResult`:{let n=this.pendingRequests.get(t.id);if(!n||n.child!==e)return;this.pendingRequests.delete(t.id),n.resolve(t.data.map(e=>new Float32Array(e)));return}case`error`:{let n=Error(t.message);if(t.id===`init`){this.clearChildReference(e),this.rejectLifecycleIfOwned(`init`,e,n);return}if(t.id===`shutdown`){this.rejectLifecycleIfOwned(`shutdown`,e,n);return}let r=this.pendingRequests.get(t.id);if(!r||r.child!==e)return;this.pendingRequests.delete(t.id),r.reject(n);return}case`idle-exit`:{let t=this.requireChildState(e);t.idleExitNotified=!0,this.clearChildReference(e);return}}}handleChildExit(e,t,n){let r=this.requireChildState(e);if(r.terminated)return;r.terminated=!0,this.clearChildReference(e);let i=r.shutdownRequested||r.idleExitNotified,a=Error(i?`Embedder worker exited before completing request`:`Embedder worker exited unexpectedly (code ${t??`null`}${n?`, signal ${n}`:``})`);this.rejectLifecycleIfOwned(`init`,e,a),i?this.resolveLifecycleIfOwned(`shutdown`,e):this.rejectLifecycleIfOwned(`shutdown`,e,a),this.rejectPendingForChild(e,a)}handleChildFailure(e,t){let n=this.requireChildState(e);n.terminated||(n.terminated=!0,this.clearChildReference(e),this.rejectLifecycleIfOwned(`init`,e,t),this.rejectLifecycleIfOwned(`shutdown`,e,t),this.rejectPendingForChild(e,t))}async sendVectorRequest(e){await this.initialize();let n=this.requireReadyChild(),r=t(),i=new Promise((e,t)=>{this.pendingRequests.set(r,{child:n,resolve:t=>e(t),reject:t})});try{n.send({...e,id:r})}catch(t){throw this.pendingRequests.delete(r),this.toError(t,`Failed to send ${e.type} request to worker`)}return i}requireReadyChild(){if(!this.child||this.readyChild!==this.child)throw Error(`Embedder worker is not initialized`);return this.child}buildInitConfig(){return{model:this.options.model,dimensions:this.options.dimensions,nativeDim:this.options.nativeDim,queryPrefix:this.options.queryPrefix,interOpNumThreads:this.options.interOpNumThreads,intraOpNumThreads:this.options.intraOpNumThreads}}buildChildEnv(){return this.options.idleTimeoutMs===void 0?process.env:{...process.env,AIKIT_EMBED_IDLE_MS:String(this.options.idleTimeoutMs)}}requireChildState(e){let t=this.childState.get(e);if(!t)throw Error(`Embedder worker state not found`);return t}clearChildReference(e){this.child===e&&(this.child=null),this.readyChild===e&&(this.readyChild=null)}rejectPendingForChild(e,t){for(let[n,r]of this.pendingRequests)r.child===e&&(this.pendingRequests.delete(n),r.reject(t))}resolveLifecycleIfOwned(e,t){let n=this.pendingShutdown;!n||n.child!==t||(this.pendingShutdown=null,n.resolve())}rejectLifecycleIfOwned(e,t,n){let r=e===`init`?this.pendingInit:this.pendingShutdown;!r||r.child!==t||(e===`init`?this.pendingInit=null:this.pendingShutdown=null,r.reject(n))}toError(e,t){return e instanceof Error?e:Error(`${t}: ${String(e)}`)}};let l=null;async function u(){return l||(l=await import(`@huggingface/transformers`),l.env.cacheDir=r(s(),`.cache`,`huggingface`,`transformers-js`)),l}var d=class{pipe=null;shutdownPromise=null;dimensions;modelId;nativeDim;queryPrefix;threadConfig;constructor(e){if(this.modelId=e?.model??a.model,this.nativeDim=e?.nativeDim??1024,this.dimensions=e?.dimensions??a.dimensions,this.dimensions>this.nativeDim)throw Error(`Configured dimensions (${this.dimensions}) exceeds model native output (${this.nativeDim}). Matryoshka truncation cannot upscale — dimensions must be <= nativeDim.`);this.queryPrefix=e?.queryPrefix??this.detectQueryPrefix(this.modelId),this.threadConfig={interOp:e?.interOpNumThreads??1,intraOp:e?.intraOpNumThreads??4}}getPipelineOptions(e){let t=e.backends.onnx;t.wasm||={};let n=t.wasm;return n.numThreads=this.threadConfig.intraOp,{dtype:`q8`,session_options:{interOpNumThreads:this.threadConfig.interOp,intraOpNumThreads:this.threadConfig.intraOp}}}truncateAndRenorm(e){if(this.dimensions>=this.nativeDim)return e;let t=e.subarray(0,this.dimensions),n=0;for(let e=0;e<t.length;e++)n+=t[e]*t[e];if(n=Math.sqrt(n),n===0)return new Float32Array(this.dimensions);let r=new Float32Array(this.dimensions);for(let e=0;e<this.dimensions;e++)r[e]=t[e]/n;return r}detectQueryPrefix(e){let t=e.toLowerCase();return t.includes(`bge`)||t.includes(`mxbai-embed`)?`Represent this sentence for searching relevant passages: `:t.includes(`/e5-`)||t.includes(`multilingual-e5`)?`query: `:``}async initialize(){if(this.pipe)return;this.shutdownPromise=null;let{pipeline:e,env:t}=await u();try{this.pipe=await e(`feature-extraction`,this.modelId,this.getPipelineOptions(t))}catch(n){let i=n.message?.toLowerCase()??``;if(this.isCorruptionError(i)){let n=r(t.cacheDir??r(s(),`.cache`,`huggingface`,`transformers-js`),this.modelId);console.error(`[aikit:auto-heal] Detected corrupted model cache for "${this.modelId}". Clearing cache at ${n} and retrying download...`);try{await o(n,{recursive:!0,force:!0})}catch{}try{this.pipe=await e(`feature-extraction`,this.modelId,this.getPipelineOptions(t)),console.error(`[aikit:auto-heal] Model "${this.modelId}" re-downloaded successfully.`);return}catch(e){throw Error(`Failed to initialize embedding model "${this.modelId}" after auto-heal: ${e.message}`)}}throw Error(`Failed to initialize embedding model "${this.modelId}": ${n.message}`)}}isCorruptionError(e){return[`protobuf`,`invalid model`,`invalid onnx`,`unexpected end`,`unexpected token`,`failed to load`,`checksum`,`corrupt`,`could not load`,`onnx`,`malformed`].some(t=>e.includes(t))}async shutdown(){return this.shutdownPromise||=this._doShutdown(),this.shutdownPromise}async _doShutdown(){let e=this.pipe;if(e)try{let t=e;typeof t.dispose==`function`?await t.dispose():typeof t.model?.dispose==`function`&&await t.model.dispose()}catch{}finally{this.pipe=null}}async embed(e){this.pipe||await this.initialize();let t=await this.pipe?.(e,{pooling:`mean`,normalize:!0});if(!t?.data)throw Error(`Embedding pipeline returned no output`);try{let e=new Float32Array(t.data);return this.truncateAndRenorm(e)}finally{t.dispose?.()}}async embedQuery(e){return this.embed(this.queryPrefix+e)}async embedBatch(e,t=64){if(e.length===0)return[];this.pipe||await this.initialize();let n=[];for(let r=0;r<e.length;r+=t){let i=e.slice(r,r+t),a=await this.pipe?.(i,{pooling:`mean`,normalize:!0});if(!a?.data)throw Error(`Embedding pipeline returned no output`);try{if(i.length===1){let e=new Float32Array(a.data);n.push(this.truncateAndRenorm(e))}else for(let e=0;e<i.length;e++){let t=e*this.nativeDim,r=a.data.slice(t,t+this.nativeDim);n.push(this.truncateAndRenorm(new Float32Array(r)))}}finally{a.dispose?.()}}return n}};export{c as EmbedderProxy,d as OnnxEmbedder};
1
+ import{fork as e}from"node:child_process";import{randomUUID as t}from"node:crypto";import{dirname as n,join as r}from"node:path";import{fileURLToPath as i}from"node:url";import{EMBEDDING_DEFAULTS as a}from"../../core/dist/index.js";import{rm as o}from"node:fs/promises";import{homedir as s}from"node:os";var c=class{options;logger;maxRetries;retryBaseDelayMs;workerPath=r(n(i(import.meta.url)),`embedder-worker.js`);pendingRequests=new Map;childState=new WeakMap;child=null;readyChild=null;pendingInit=null;pendingShutdown=null;initializePromise=null;shutdownPromise=null;currentDimensions;currentModelId;constructor(e={}){this.options=e,this.logger=e.logger,this.maxRetries=Math.max(0,e.maxRetries??3),this.retryBaseDelayMs=Math.max(0,e.retryBaseDelayMs??500),this.currentDimensions=e.dimensions??a.dimensions,this.currentModelId=e.model??a.model}get dimensions(){return this.currentDimensions}get modelId(){return this.currentModelId}async initialize(){if(!(this.readyChild&&this.child===this.readyChild)){if(this.initializePromise)return this.initializePromise;if(this.shutdownPromise){try{await this.shutdownPromise}catch{}if(this.readyChild&&this.child===this.readyChild)return}return this.initializePromise=this.startWorker().finally(()=>{this.initializePromise=null}),this.initializePromise}}async embed(e){return this.sendVectorRequestWithRetry({type:`embed`,text:e})}async embedQuery(e){return this.sendVectorRequestWithRetry({type:`embedQuery`,text:e})}async embedBatch(e,t){return e.length===0?[]:this.withWorkerExitRetry(`embedBatch`,()=>this.sendBatchRequest(e,t))}async sendBatchRequest(e,n){await this.initialize();let r=this.requireReadyChild(),i=t(),a=new Promise((e,t)=>{this.pendingRequests.set(i,{child:r,resolve:t=>e(t),reject:t})});try{r.send({type:`embedBatch`,id:i,texts:e,batchSize:n})}catch(e){throw this.pendingRequests.delete(i),this.toError(e,`Failed to send embedBatch request to worker`)}return a}async shutdown(){if(this.shutdownPromise)return this.shutdownPromise;let e=this.child;if(!e)return;let t=this.requireChildState(e);t.shutdownRequested=!0,this.readyChild===e&&(this.readyChild=null),this.shutdownPromise=new Promise((t,n)=>{this.pendingShutdown={child:e,resolve:t,reject:n}}).finally(()=>{this.shutdownPromise=null});try{e.send({type:`shutdown`})}catch(t){let n=this.toError(t,`Failed to send shutdown request to worker`);throw this.clearChildReference(e),this.rejectPendingForChild(e,n),this.rejectLifecycleIfOwned(`shutdown`,e,n),n}return this.shutdownPromise}async startWorker(){this.child&&this.readyChild!==this.child&&(this.child=null);let e=this.spawnChild();this.child=e,this.readyChild=null;let t=new Promise((t,n)=>{this.pendingInit={child:e,resolve:t,reject:n}});try{e.send({type:`init`,config:this.buildInitConfig()})}catch(t){let n=this.toError(t,`Failed to send init request to worker`);throw this.pendingInit=null,this.clearChildReference(e),n}return t}spawnChild(){let t=e(this.workerPath,[],{env:this.buildChildEnv()});return this.childState.set(t,{idleExitNotified:!1,shutdownRequested:!1,terminated:!1}),t.on(`message`,e=>{this.handleChildMessage(t,e)}),t.once(`error`,e=>{this.handleChildFailure(t,this.toError(e,`Embedder worker failed`))}),t.once(`exit`,(e,n)=>{this.handleChildExit(t,e,n)}),t}handleChildMessage(e,t){switch(t.type){case`ready`:{this.currentDimensions=t.dimensions,this.currentModelId=t.modelId,this.child===e&&(this.readyChild=e);let n=this.pendingInit;n?.child===e&&(this.pendingInit=null,n.resolve());return}case`result`:{let n=this.pendingRequests.get(t.id);if(!n||n.child!==e)return;this.pendingRequests.delete(t.id),n.resolve(new Float32Array(t.data));return}case`batchResult`:{let n=this.pendingRequests.get(t.id);if(!n||n.child!==e)return;this.pendingRequests.delete(t.id),n.resolve(t.data.map(e=>new Float32Array(e)));return}case`error`:{let n=Error(t.message);if(t.id===`init`){this.clearChildReference(e),this.rejectLifecycleIfOwned(`init`,e,n);return}if(t.id===`shutdown`){this.rejectLifecycleIfOwned(`shutdown`,e,n);return}let r=this.pendingRequests.get(t.id);if(!r||r.child!==e)return;this.pendingRequests.delete(t.id),r.reject(n);return}case`idle-exit`:{let t=this.requireChildState(e);t.idleExitNotified=!0,this.clearChildReference(e);return}}}handleChildExit(e,t,n){let r=this.requireChildState(e);if(r.terminated)return;r.terminated=!0,this.clearChildReference(e);let i=r.shutdownRequested||r.idleExitNotified,a=Error(i?`Embedder worker exited before completing request`:`Embedder worker exited unexpectedly (code ${t??`null`}${n?`, signal ${n}`:``})`);this.rejectLifecycleIfOwned(`init`,e,a),i?this.resolveLifecycleIfOwned(`shutdown`,e):this.rejectLifecycleIfOwned(`shutdown`,e,a),this.rejectPendingForChild(e,a)}handleChildFailure(e,t){let n=this.requireChildState(e);n.terminated||(n.terminated=!0,this.clearChildReference(e),this.rejectLifecycleIfOwned(`init`,e,t),this.rejectLifecycleIfOwned(`shutdown`,e,t),this.rejectPendingForChild(e,t))}async sendVectorRequestWithRetry(e){return this.withWorkerExitRetry(e.type,()=>this.sendVectorRequest(e))}async sendVectorRequest(e){await this.initialize();let n=this.requireReadyChild(),r=t(),i=new Promise((e,t)=>{this.pendingRequests.set(r,{child:n,resolve:t=>e(t),reject:t})});try{n.send({...e,id:r})}catch(t){throw this.pendingRequests.delete(r),this.toError(t,`Failed to send ${e.type} request to worker`)}return i}async withWorkerExitRetry(e,t){let n=null;for(let r=0;r<=this.maxRetries;r++)try{return await t()}catch(t){let i=this.toError(t,`Failed to process ${e} request`);if(!this.isWorkerExitError(i))throw i;if(n??=i,r===this.maxRetries)throw n;let a=r+1,o=this.retryBaseDelayMs*2**r;this.logger?.warn?.(`Embedder retry ${a}/${this.maxRetries} after ${o}ms`,{requestType:e,delayMs:o,error:i.message}),this.child=null,this.readyChild=null,await this.wait(o)}throw n??Error(`Failed to process ${e} request`)}isWorkerExitError(e){return/embedder worker exited/i.test(e.message)}wait(e){return new Promise(t=>{setTimeout(t,e)})}requireReadyChild(){if(!this.child||this.readyChild!==this.child)throw Error(`Embedder worker is not initialized`);return this.child}buildInitConfig(){return{model:this.options.model,dimensions:this.options.dimensions,nativeDim:this.options.nativeDim,queryPrefix:this.options.queryPrefix,interOpNumThreads:this.options.interOpNumThreads,intraOpNumThreads:this.options.intraOpNumThreads}}buildChildEnv(){return this.options.idleTimeoutMs===void 0?process.env:{...process.env,AIKIT_EMBED_IDLE_MS:String(this.options.idleTimeoutMs)}}requireChildState(e){let t=this.childState.get(e);if(!t)throw Error(`Embedder worker state not found`);return t}clearChildReference(e){this.child===e&&(this.child=null),this.readyChild===e&&(this.readyChild=null)}rejectPendingForChild(e,t){for(let[n,r]of this.pendingRequests)r.child===e&&(this.pendingRequests.delete(n),r.reject(t))}resolveLifecycleIfOwned(e,t){let n=this.pendingShutdown;!n||n.child!==t||(this.pendingShutdown=null,n.resolve())}rejectLifecycleIfOwned(e,t,n){let r=e===`init`?this.pendingInit:this.pendingShutdown;!r||r.child!==t||(e===`init`?this.pendingInit=null:this.pendingShutdown=null,r.reject(n))}toError(e,t){return e instanceof Error?e:Error(`${t}: ${String(e)}`)}};let l=null;async function u(){return l||(l=await import(`@huggingface/transformers`),l.env.cacheDir=r(s(),`.cache`,`huggingface`,`transformers-js`)),l}var d=class{pipe=null;shutdownPromise=null;dimensions;modelId;nativeDim;queryPrefix;threadConfig;constructor(e){if(this.modelId=e?.model??a.model,this.nativeDim=e?.nativeDim??1024,this.dimensions=e?.dimensions??a.dimensions,this.dimensions>this.nativeDim)throw Error(`Configured dimensions (${this.dimensions}) exceeds model native output (${this.nativeDim}). Matryoshka truncation cannot upscale — dimensions must be <= nativeDim.`);this.queryPrefix=e?.queryPrefix??this.detectQueryPrefix(this.modelId),this.threadConfig={interOp:e?.interOpNumThreads??1,intraOp:e?.intraOpNumThreads??4}}getPipelineOptions(e){let t=e.backends.onnx;t.wasm||={};let n=t.wasm;return n.numThreads=this.threadConfig.intraOp,{dtype:`q8`,session_options:{interOpNumThreads:this.threadConfig.interOp,intraOpNumThreads:this.threadConfig.intraOp}}}truncateAndRenorm(e){if(this.dimensions>=this.nativeDim)return e;let t=e.subarray(0,this.dimensions),n=0;for(let e=0;e<t.length;e++)n+=t[e]*t[e];if(n=Math.sqrt(n),n===0)return new Float32Array(this.dimensions);let r=new Float32Array(this.dimensions);for(let e=0;e<this.dimensions;e++)r[e]=t[e]/n;return r}detectQueryPrefix(e){let t=e.toLowerCase();return t.includes(`bge`)||t.includes(`mxbai-embed`)?`Represent this sentence for searching relevant passages: `:t.includes(`/e5-`)||t.includes(`multilingual-e5`)?`query: `:``}async initialize(){if(this.pipe)return;this.shutdownPromise=null;let{pipeline:e,env:t}=await u();try{this.pipe=await e(`feature-extraction`,this.modelId,this.getPipelineOptions(t))}catch(n){let i=n.message?.toLowerCase()??``;if(this.isCorruptionError(i)){let n=r(t.cacheDir??r(s(),`.cache`,`huggingface`,`transformers-js`),this.modelId);console.error(`[aikit:auto-heal] Detected corrupted model cache for "${this.modelId}". Clearing cache at ${n} and retrying download...`);try{await o(n,{recursive:!0,force:!0})}catch{}try{this.pipe=await e(`feature-extraction`,this.modelId,this.getPipelineOptions(t)),console.error(`[aikit:auto-heal] Model "${this.modelId}" re-downloaded successfully.`);return}catch(e){throw Error(`Failed to initialize embedding model "${this.modelId}" after auto-heal: ${e.message}`)}}throw Error(`Failed to initialize embedding model "${this.modelId}": ${n.message}`)}}isCorruptionError(e){return[`protobuf`,`invalid model`,`invalid onnx`,`unexpected end`,`unexpected token`,`failed to load`,`checksum`,`corrupt`,`could not load`,`onnx`,`malformed`].some(t=>e.includes(t))}async shutdown(){return this.shutdownPromise||=this._doShutdown(),this.shutdownPromise}async _doShutdown(){let e=this.pipe;if(e)try{let t=e;typeof t.dispose==`function`?await t.dispose():typeof t.model?.dispose==`function`&&await t.model.dispose()}catch{}finally{this.pipe=null}}async embed(e){this.pipe||await this.initialize();let t=await this.pipe?.(e,{pooling:`mean`,normalize:!0});if(!t?.data)throw Error(`Embedding pipeline returned no output`);try{let e=new Float32Array(t.data);return this.truncateAndRenorm(e)}finally{t.dispose?.()}}async embedQuery(e){return this.embed(this.queryPrefix+e)}async embedBatch(e,t=64){if(e.length===0)return[];this.pipe||await this.initialize();let n=[];for(let r=0;r<e.length;r+=t){let i=e.slice(r,r+t),a=await this.pipe?.(i,{pooling:`mean`,normalize:!0});if(!a?.data)throw Error(`Embedding pipeline returned no output`);try{if(i.length===1){let e=new Float32Array(a.data);n.push(this.truncateAndRenorm(e))}else for(let e=0;e<i.length;e++){let t=e*this.nativeDim,r=a.data.slice(t,t+this.nativeDim);n.push(this.truncateAndRenorm(new Float32Array(r)))}}finally{a.dispose?.()}}return n}};export{c as EmbedderProxy,d as OnnxEmbedder};