@kybernesis/brain-embed-openai 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # @kybernesis/brain-embed-openai
2
+
3
+ Portable OpenAI embedding provider for [`@kybernesis/brain-core`](../brain-core)'s
4
+ `EmbeddingProvider` seam. It **produces** vectors (`text-embedding-3-small`, 1536-dim);
5
+ the vector **store** lives separately in
6
+ [`@kybernesis/brain-storage-vec`](../brain-storage-vec). See the
7
+ [0.8.0 kickoff](../../docs/specs/embedding-provider-0.8.0-kickoff.md) §3.5 for why the
8
+ embedder sits in the inference plane, not the storage plane.
9
+
10
+ ## Usage
11
+
12
+ ```ts
13
+ import { createOpenAIEmbedder } from '@kybernesis/brain-embed-openai';
14
+ import { setEmbeddingProvider } from '@kybernesis/brain-core';
15
+
16
+ setEmbeddingProvider(createOpenAIEmbedder());
17
+ ```
18
+
19
+ Once wired, `indexChunk` populates `vectors.db` and `semanticSearch` returns real
20
+ results. Without it, brain-core's vector path no-ops (the seam's default).
21
+
22
+ ## Key resolution (tenant-aware)
23
+
24
+ The provider resolves `OPENAI_API_KEY` **per tenant**:
25
+
26
+ 1. `<tenant.paths.home>/.env` — where the agent's per-brain key is written;
27
+ 2. `process.env.OPENAI_API_KEY` — daemon-global fallback.
28
+
29
+ One OpenAI client is cached per `tenant.slug`. A tenant with no key degrades to `null`
30
+ (no embedding) rather than throwing — so a mixed fleet (some agents keyed, some not)
31
+ behaves per-agent.
32
+
33
+ ## `openai` is an optional peer dependency
34
+
35
+ The host application provides the `openai` SDK (`>=4`). Tests inject a fake client via
36
+ `createOpenAIEmbedder({ clientFactory })`, so the package builds and tests without it.
37
+
38
+ ## Constraint: 1536 dimensions
39
+
40
+ The vector store schema is built for `EMBEDDING_DIM = 1536`. A different embedding model
41
+ must emit 1536-dim vectors, or `EMBEDDING_DIM` + the `vectors.db` schema move together
42
+ and you re-index. Don't mix models within one populated `vectors.db` — distances across
43
+ models aren't comparable.
@@ -0,0 +1,54 @@
1
+ /**
2
+ * @kybernesis/brain-embed-openai
3
+ *
4
+ * Portable OpenAI embedding provider for brain-core's `EmbeddingProvider`
5
+ * seam. Lives in the INFERENCE plane (next to brain-llm-claude) — it produces
6
+ * vectors; the vector STORE stays in brain-storage-vec (see the 0.8.0 kickoff
7
+ * §3.5).
8
+ *
9
+ * Absorbs the hard-won behaviour from KAD commit df80ea6:
10
+ * - resolve OPENAI_API_KEY per tenant from `<home>/.env` (where the Settings
11
+ * UI writes per-agent keys), falling back to `process.env`;
12
+ * - cache one OpenAI client per tenant (the multi-agent daemon doesn't
13
+ * inherit any one agent's `.env`) — keyed on `t.slug`.
14
+ * The name-vs-path normalization KAD needed is GONE here: TenantContext is the
15
+ * clean identity, so `t.paths.home` + `t.slug` replace the registry lookups.
16
+ *
17
+ * Usage:
18
+ * import { createOpenAIEmbedder } from '@kybernesis/brain-embed-openai';
19
+ * import { setEmbeddingProvider } from '@kybernesis/brain-core';
20
+ * setEmbeddingProvider(createOpenAIEmbedder());
21
+ */
22
+ import type { TenantContext } from '@kybernesis/brain-contracts';
23
+ import type { EmbeddingProvider } from '@kybernesis/brain-core';
24
+ export declare const EMBEDDING_MODEL = "text-embedding-3-small";
25
+ /** The slice of the OpenAI SDK surface we use — keeps us decoupled from its types. */
26
+ export interface OpenAILike {
27
+ embeddings: {
28
+ create(args: {
29
+ model: string;
30
+ input: string;
31
+ }): Promise<{
32
+ data: Array<{
33
+ embedding: number[];
34
+ }>;
35
+ }>;
36
+ };
37
+ }
38
+ /** Build an OpenAI-like client from a resolved key. May be async (the default dynamic-imports `openai`). */
39
+ export type OpenAIClientFactory = (apiKey: string) => OpenAILike | Promise<OpenAILike>;
40
+ export interface OpenAIEmbedderOptions {
41
+ /** Inject a client factory (tests pass a fake; default dynamically imports `openai`). */
42
+ clientFactory?: OpenAIClientFactory;
43
+ /** Override the embedding model (default `text-embedding-3-small`). */
44
+ model?: string;
45
+ }
46
+ /** Resolve the OpenAI key for a tenant: `<home>/.env` first, then `process.env`. */
47
+ export declare function resolveOpenAIKey(t: TenantContext): string | null;
48
+ /**
49
+ * Create a tenant-aware OpenAI `EmbeddingProvider`. One client is resolved and
50
+ * cached per `t.slug`; tenants without a key degrade to `null` (brain-core
51
+ * treats that as "no embedding" — indexChunk no-ops, semanticSearch returns []).
52
+ */
53
+ export declare function createOpenAIEmbedder(opts?: OpenAIEmbedderOptions): EmbeddingProvider;
54
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAIH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,6BAA6B,CAAC;AACjE,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAEhE,eAAO,MAAM,eAAe,2BAA2B,CAAC;AAIxD,sFAAsF;AACtF,MAAM,WAAW,UAAU;IACzB,UAAU,EAAE;QACV,MAAM,CAAC,IAAI,EAAE;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAA;SAAE,GAAG,OAAO,CAAC;YAAE,IAAI,EAAE,KAAK,CAAC;gBAAE,SAAS,EAAE,MAAM,EAAE,CAAA;aAAE,CAAC,CAAA;SAAE,CAAC,CAAC;KACnG,CAAC;CACH;AAED,4GAA4G;AAC5G,MAAM,MAAM,mBAAmB,GAAG,CAAC,MAAM,EAAE,MAAM,KAAK,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;AAEvF,MAAM,WAAW,qBAAqB;IACpC,yFAAyF;IACzF,aAAa,CAAC,EAAE,mBAAmB,CAAC;IACpC,uEAAuE;IACvE,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAiCD,oFAAoF;AACpF,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,aAAa,GAAG,MAAM,GAAG,IAAI,CAKhE;AAcD;;;;GAIG;AACH,wBAAgB,oBAAoB,CAAC,IAAI,GAAE,qBAA0B,GAAG,iBAAiB,CAoCxF"}
package/dist/index.js ADDED
@@ -0,0 +1,125 @@
1
+ /**
2
+ * @kybernesis/brain-embed-openai
3
+ *
4
+ * Portable OpenAI embedding provider for brain-core's `EmbeddingProvider`
5
+ * seam. Lives in the INFERENCE plane (next to brain-llm-claude) — it produces
6
+ * vectors; the vector STORE stays in brain-storage-vec (see the 0.8.0 kickoff
7
+ * §3.5).
8
+ *
9
+ * Absorbs the hard-won behaviour from KAD commit df80ea6:
10
+ * - resolve OPENAI_API_KEY per tenant from `<home>/.env` (where the Settings
11
+ * UI writes per-agent keys), falling back to `process.env`;
12
+ * - cache one OpenAI client per tenant (the multi-agent daemon doesn't
13
+ * inherit any one agent's `.env`) — keyed on `t.slug`.
14
+ * The name-vs-path normalization KAD needed is GONE here: TenantContext is the
15
+ * clean identity, so `t.paths.home` + `t.slug` replace the registry lookups.
16
+ *
17
+ * Usage:
18
+ * import { createOpenAIEmbedder } from '@kybernesis/brain-embed-openai';
19
+ * import { setEmbeddingProvider } from '@kybernesis/brain-core';
20
+ * setEmbeddingProvider(createOpenAIEmbedder());
21
+ */
22
+ import { existsSync, readFileSync } from 'node:fs';
23
+ import { join } from 'node:path';
24
+ export const EMBEDDING_MODEL = 'text-embedding-3-small';
25
+ const EMBEDDING_DIM = 1536; // text-embedding-3-small; must match the vectors.db schema
26
+ const INPUT_CHAR_CAP = 8192; // hard cap, mirrors KAD df80ea6
27
+ /**
28
+ * Parse `<home>/.env` into a key→value map. Trivial KEY=VALUE parser (strips
29
+ * `#` comments and surrounding quotes) — we only need OPENAI_API_KEY, so a full
30
+ * dotenv dep is overkill. Returns {} if the file is missing or unreadable.
31
+ */
32
+ function readEnvFile(home) {
33
+ const path = join(home, '.env');
34
+ if (!existsSync(path))
35
+ return {};
36
+ try {
37
+ const out = {};
38
+ for (const raw of readFileSync(path, 'utf-8').split('\n')) {
39
+ const line = raw.trim();
40
+ if (!line || line.startsWith('#'))
41
+ continue;
42
+ const eq = line.indexOf('=');
43
+ if (eq <= 0)
44
+ continue;
45
+ const key = line.slice(0, eq).trim();
46
+ let value = line.slice(eq + 1).trim();
47
+ if ((value.startsWith('"') && value.endsWith('"')) ||
48
+ (value.startsWith("'") && value.endsWith("'"))) {
49
+ value = value.slice(1, -1);
50
+ }
51
+ if (key)
52
+ out[key] = value;
53
+ }
54
+ return out;
55
+ }
56
+ catch {
57
+ return {};
58
+ }
59
+ }
60
+ /** Resolve the OpenAI key for a tenant: `<home>/.env` first, then `process.env`. */
61
+ export function resolveOpenAIKey(t) {
62
+ const fromEnv = readEnvFile(t.paths.home)['OPENAI_API_KEY'];
63
+ if (fromEnv && fromEnv.trim().length > 0)
64
+ return fromEnv.trim();
65
+ const fromProc = process.env['OPENAI_API_KEY'];
66
+ return fromProc && fromProc.trim().length > 0 ? fromProc.trim() : null;
67
+ }
68
+ let _openaiModule = null;
69
+ async function defaultClientFactory(apiKey) {
70
+ if (!_openaiModule) {
71
+ // Dynamic import keeps `openai` an OPTIONAL peer — the host provides it.
72
+ _openaiModule = await import('openai');
73
+ }
74
+ const mod = _openaiModule;
75
+ const OpenAI = mod.default ?? mod.OpenAI;
76
+ if (!OpenAI)
77
+ throw new Error('openai SDK has no default/OpenAI export');
78
+ return new OpenAI({ apiKey });
79
+ }
80
+ /**
81
+ * Create a tenant-aware OpenAI `EmbeddingProvider`. One client is resolved and
82
+ * cached per `t.slug`; tenants without a key degrade to `null` (brain-core
83
+ * treats that as "no embedding" — indexChunk no-ops, semanticSearch returns []).
84
+ */
85
+ export function createOpenAIEmbedder(opts = {}) {
86
+ const model = opts.model ?? EMBEDDING_MODEL;
87
+ const makeClient = opts.clientFactory ?? defaultClientFactory;
88
+ const clients = new Map(); // by t.slug; null = resolved-no-key
89
+ async function getClient(t) {
90
+ if (clients.has(t.slug))
91
+ return clients.get(t.slug) ?? null;
92
+ const key = resolveOpenAIKey(t);
93
+ if (!key) {
94
+ clients.set(t.slug, null);
95
+ return null;
96
+ }
97
+ try {
98
+ const client = await makeClient(key);
99
+ clients.set(t.slug, client);
100
+ return client;
101
+ }
102
+ catch {
103
+ clients.set(t.slug, null);
104
+ return null;
105
+ }
106
+ }
107
+ return {
108
+ async embed(t, text) {
109
+ const client = await getClient(t);
110
+ if (!client)
111
+ return null;
112
+ try {
113
+ const res = await client.embeddings.create({ model, input: text.slice(0, INPUT_CHAR_CAP) });
114
+ const vec = res.data?.[0]?.embedding ?? null;
115
+ if (!vec || vec.length !== EMBEDDING_DIM)
116
+ return null;
117
+ return vec;
118
+ }
119
+ catch {
120
+ return null;
121
+ }
122
+ },
123
+ };
124
+ }
125
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAIjC,MAAM,CAAC,MAAM,eAAe,GAAG,wBAAwB,CAAC;AACxD,MAAM,aAAa,GAAG,IAAI,CAAC,CAAC,2DAA2D;AACvF,MAAM,cAAc,GAAG,IAAI,CAAC,CAAC,gCAAgC;AAmB7D;;;;GAIG;AACH,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAChC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,EAAE,CAAC;IACjC,IAAI,CAAC;QACH,MAAM,GAAG,GAA2B,EAAE,CAAC;QACvC,KAAK,MAAM,GAAG,IAAI,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;YAC1D,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;YACxB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;gBAAE,SAAS;YAC5C,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YAC7B,IAAI,EAAE,IAAI,CAAC;gBAAE,SAAS;YACtB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACrC,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YACtC,IACE,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;gBAC9C,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,EAC9C,CAAC;gBACD,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YAC7B,CAAC;YACD,IAAI,GAAG;gBAAE,GAAG,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QAC5B,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED,oFAAoF;AACpF,MAAM,UAAU,gBAAgB,CAAC,CAAgB;IAC/C,MAAM,OAAO,GAAG,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,gBAAgB,CAAC,CAAC;IAC5D,IAAI,OAAO,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC;IAChE,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAC/C,OAAO,QAAQ,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;AACzE,CAAC;AAED,IAAI,aAAa,GAAY,IAAI,CAAC;AAClC,KAAK,UAAU,oBAAoB,CAAC,MAAc;IAChD,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,yEAAyE;QACzE,aAAa,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,CAAC;IACzC,CAAC;IACD,MAAM,GAAG,GAAG,aAA4H,CAAC;IACzI,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,IAAI,GAAG,CAAC,MAAM,CAAC;IACzC,IAAI,CAAC,MAAM;QAAE,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;IACxE,OAAO,IAAI,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;AAChC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,oBAAoB,CAAC,OAA8B,EAAE;IACnE,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,eAAe,CAAC;IAC5C,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,IAAI,oBAAoB,CAAC;IAC9D,MAAM,OAAO,GAAG,IAAI,GAAG,EAA6B,CAAC,CAAC,oCAAoC;IAE1F,KAAK,UAAU,SAAS,CAAC,CAAgB;QACvC,IAAI,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC;YAAE,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC;QAC5D,MAAM,GAAG,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;QAChC,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;YAC1B,OAAO,IAAI,CAAC;QACd,CAAC;QACD,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC5B,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;YAC1B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO;QACL,KAAK,CAAC,KAAK,CAAC,CAAgB,EAAE,IAAY;YACxC,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,CAAC,CAAC,CAAC;YAClC,IAAI,CAAC,MAAM;gBAAE,OAAO,IAAI,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,EAAE,CAAC,CAAC;gBAC5F,MAAM,GAAG,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,SAAS,IAAI,IAAI,CAAC;gBAC7C,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,KAAK,aAAa;oBAAE,OAAO,IAAI,CAAC;gBACtD,OAAO,GAAG,CAAC;YACb,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;KACF,CAAC;AACJ,CAAC"}
package/package.json ADDED
@@ -0,0 +1,52 @@
1
+ {
2
+ "name": "@kybernesis/brain-embed-openai",
3
+ "version": "0.7.0",
4
+ "description": "OpenAI text-embedding provider for brain-core (inference-plane EmbeddingProvider)",
5
+ "license": "MIT",
6
+ "author": "David Cruwys (AppyDave)",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "git+https://github.com/KybernesisAI/cortex.git",
10
+ "directory": "packages/brain-embed-openai"
11
+ },
12
+ "homepage": "https://github.com/KybernesisAI/cortex/tree/main/packages/brain-embed-openai#readme",
13
+ "bugs": {
14
+ "url": "https://github.com/KybernesisAI/cortex/issues"
15
+ },
16
+ "type": "module",
17
+ "main": "./dist/index.js",
18
+ "types": "./dist/index.d.ts",
19
+ "exports": {
20
+ ".": {
21
+ "types": "./dist/index.d.ts",
22
+ "default": "./dist/index.js"
23
+ }
24
+ },
25
+ "files": [
26
+ "dist",
27
+ "README.md"
28
+ ],
29
+ "peerDependencies": {
30
+ "openai": ">=4",
31
+ "@kybernesis/brain-contracts": "0.8.0",
32
+ "@kybernesis/brain-core": "0.8.0"
33
+ },
34
+ "peerDependenciesMeta": {
35
+ "openai": {
36
+ "optional": true
37
+ }
38
+ },
39
+ "devDependencies": {
40
+ "@kybernesis/brain-contracts": "0.8.0",
41
+ "@kybernesis/brain-testkit": "0.8.0",
42
+ "@kybernesis/brain-core": "0.8.0"
43
+ },
44
+ "publishConfig": {
45
+ "access": "public"
46
+ },
47
+ "scripts": {
48
+ "build": "tsc -b",
49
+ "clean": "tsc -b --clean",
50
+ "typecheck": "tsc -b"
51
+ }
52
+ }