@wopr-network/platform-core 1.12.2 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,7 @@
11
11
  * - text-generation (DeepSeek, Gemini, MiniMax, Kimi, OpenRouter)
12
12
  * - tts (Chatterbox GPU, ElevenLabs)
13
13
  * - transcription (Deepgram)
14
- * - embeddings (OpenRouter)
14
+ * - embeddings (Ollama GPU, OpenRouter)
15
15
  * - image-generation (Replicate, Nano Banana)
16
16
  */
17
17
  import { type EmbeddingsFactoryConfig } from "./embeddings-factory.js";
@@ -74,7 +74,7 @@ export declare function bootstrapAdapters(config: BootstrapConfig): BootstrapRes
74
74
  * - DEEPSEEK_API_KEY, GEMINI_API_KEY, MINIMAX_API_KEY, KIMI_API_KEY, OPENROUTER_API_KEY (text-gen)
75
75
  * - CHATTERBOX_BASE_URL, ELEVENLABS_API_KEY (TTS)
76
76
  * - DEEPGRAM_API_KEY (transcription)
77
- * - OPENROUTER_API_KEY (embeddings)
77
+ * - OLLAMA_BASE_URL, OPENROUTER_API_KEY (embeddings)
78
78
  * - REPLICATE_API_TOKEN, NANO_BANANA_API_KEY (image-gen)
79
79
  *
80
80
  * Accepts optional per-capability config overrides.
@@ -11,7 +11,7 @@
11
11
  * - text-generation (DeepSeek, Gemini, MiniMax, Kimi, OpenRouter)
12
12
  * - tts (Chatterbox GPU, ElevenLabs)
13
13
  * - transcription (Deepgram)
14
- * - embeddings (OpenRouter)
14
+ * - embeddings (Ollama GPU, OpenRouter)
15
15
  * - image-generation (Replicate, Nano Banana)
16
16
  */
17
17
  import { createEmbeddingsAdapters } from "./embeddings-factory.js";
@@ -76,7 +76,7 @@ export function bootstrapAdapters(config) {
76
76
  * - DEEPSEEK_API_KEY, GEMINI_API_KEY, MINIMAX_API_KEY, KIMI_API_KEY, OPENROUTER_API_KEY (text-gen)
77
77
  * - CHATTERBOX_BASE_URL, ELEVENLABS_API_KEY (TTS)
78
78
  * - DEEPGRAM_API_KEY (transcription)
79
- * - OPENROUTER_API_KEY (embeddings)
79
+ * - OLLAMA_BASE_URL, OPENROUTER_API_KEY (embeddings)
80
80
  * - REPLICATE_API_TOKEN, NANO_BANANA_API_KEY (image-gen)
81
81
  *
82
82
  * Accepts optional per-capability config overrides.
@@ -101,6 +101,7 @@ export function bootstrapAdaptersFromEnv(overrides) {
101
101
  ...overrides?.transcription,
102
102
  },
103
103
  embeddings: {
104
+ ollamaBaseUrl: process.env.OLLAMA_BASE_URL,
104
105
  openrouterApiKey: process.env.OPENROUTER_API_KEY,
105
106
  ...overrides?.embeddings,
106
107
  },
@@ -18,6 +18,7 @@ describe("bootstrapAdapters", () => {
18
18
  deepgramApiKey: "sk-dg",
19
19
  },
20
20
  embeddings: {
21
+ ollamaBaseUrl: "http://ollama:11434",
21
22
  openrouterApiKey: "sk-or",
22
23
  },
23
24
  imageGen: {
@@ -25,9 +26,9 @@ describe("bootstrapAdapters", () => {
25
26
  geminiApiKey: "sk-gem",
26
27
  },
27
28
  });
28
- // 5 text-gen + 2 TTS + 1 transcription + 1 embeddings + 2 image-gen = 11
29
- expect(result.adapters).toHaveLength(11);
30
- expect(result.summary.total).toBe(11);
29
+ // 5 text-gen + 2 TTS + 1 transcription + 2 embeddings + 2 image-gen = 12
30
+ expect(result.adapters).toHaveLength(12);
31
+ expect(result.summary.total).toBe(12);
31
32
  expect(result.summary.skipped).toBe(0);
32
33
  });
33
34
  it("allows duplicate provider names across capabilities", () => {
@@ -64,7 +65,7 @@ describe("bootstrapAdapters", () => {
64
65
  });
65
66
  expect(result.skipped.tts).toEqual(["chatterbox-tts", "elevenlabs"]);
66
67
  expect(result.skipped.transcription).toEqual(["deepgram"]);
67
- expect(result.skipped.embeddings).toEqual(["openrouter"]);
68
+ expect(result.skipped.embeddings).toEqual(["ollama-embeddings", "openrouter"]);
68
69
  expect(result.skipped["text-generation"]).toEqual(["gemini", "minimax", "kimi", "openrouter"]);
69
70
  expect(result.skipped["image-generation"]).toEqual(["replicate", "nano-banana"]);
70
71
  });
@@ -117,12 +118,13 @@ describe("bootstrapAdaptersFromEnv", () => {
117
118
  vi.stubEnv("CHATTERBOX_BASE_URL", "http://chatterbox:8000");
118
119
  vi.stubEnv("ELEVENLABS_API_KEY", "env-el");
119
120
  vi.stubEnv("DEEPGRAM_API_KEY", "env-dg");
121
+ vi.stubEnv("OLLAMA_BASE_URL", "http://ollama:11434");
120
122
  vi.stubEnv("REPLICATE_API_TOKEN", "r8-rep");
121
123
  vi.stubEnv("NANO_BANANA_API_KEY", "env-nb");
122
124
  const result = bootstrapAdaptersFromEnv();
123
- // 5 text-gen + 2 TTS + 1 transcription + 1 embeddings + 2 image-gen = 11
124
- expect(result.adapters).toHaveLength(11);
125
- expect(result.summary.total).toBe(11);
125
+ // 5 text-gen + 2 TTS + 1 transcription + 2 embeddings + 2 image-gen = 12
126
+ expect(result.adapters).toHaveLength(12);
127
+ expect(result.summary.total).toBe(12);
126
128
  });
127
129
  it("returns empty when no env vars set", () => {
128
130
  vi.stubEnv("DEEPSEEK_API_KEY", "");
@@ -133,6 +135,7 @@ describe("bootstrapAdaptersFromEnv", () => {
133
135
  vi.stubEnv("CHATTERBOX_BASE_URL", "");
134
136
  vi.stubEnv("ELEVENLABS_API_KEY", "");
135
137
  vi.stubEnv("DEEPGRAM_API_KEY", "");
138
+ vi.stubEnv("OLLAMA_BASE_URL", "");
136
139
  vi.stubEnv("REPLICATE_API_TOKEN", "");
137
140
  vi.stubEnv("NANO_BANANA_API_KEY", "");
138
141
  const result = bootstrapAdaptersFromEnv();
@@ -148,6 +151,7 @@ describe("bootstrapAdaptersFromEnv", () => {
148
151
  vi.stubEnv("CHATTERBOX_BASE_URL", "");
149
152
  vi.stubEnv("ELEVENLABS_API_KEY", "");
150
153
  vi.stubEnv("DEEPGRAM_API_KEY", "");
154
+ vi.stubEnv("OLLAMA_BASE_URL", "");
151
155
  vi.stubEnv("REPLICATE_API_TOKEN", "");
152
156
  vi.stubEnv("NANO_BANANA_API_KEY", "");
153
157
  const result = bootstrapAdaptersFromEnv({
@@ -7,16 +7,20 @@
7
7
  * registers with an ArbitrageRouter or AdapterSocket.
8
8
  *
9
9
  * Priority order (cheapest first, when all adapters available):
10
- * self-hosted-embeddings (GPU, cheapest — not yet implemented)
10
+ * Ollama (GPU, cheapest — $0.005/1M tokens amortized)
11
11
  * → OpenRouter ($0.02/1M tokens via text-embedding-3-small)
12
12
  */
13
+ import { type OllamaEmbeddingsAdapterConfig } from "./ollama-embeddings.js";
13
14
  import { type OpenRouterAdapterConfig } from "./openrouter.js";
14
15
  import type { ProviderAdapter } from "./types.js";
15
- /** Top-level factory config. Only providers with an API key are instantiated. */
16
+ /** Top-level factory config. Only providers with a key/URL are instantiated. */
16
17
  export interface EmbeddingsFactoryConfig {
18
+ /** Ollama base URL (e.g., "http://ollama:11434"). Omit or empty string to skip. */
19
+ ollamaBaseUrl?: string;
17
20
  /** OpenRouter API key. Omit or empty string to skip. */
18
21
  openrouterApiKey?: string;
19
22
  /** Per-adapter config overrides */
23
+ ollama?: Omit<Partial<OllamaEmbeddingsAdapterConfig>, "baseUrl">;
20
24
  openrouter?: Omit<Partial<OpenRouterAdapterConfig>, "apiKey">;
21
25
  }
22
26
  /** Result of the factory — adapters + metadata for observability. */
@@ -31,16 +35,17 @@ export interface EmbeddingsFactoryResult {
31
35
  /**
32
36
  * Create embeddings adapters from the provided config.
33
37
  *
34
- * Returns only adapters whose API key is present and non-empty.
38
+ * Returns only adapters whose key/URL is present and non-empty.
35
39
  * Order matches arbitrage priority: cheapest first.
36
40
  */
37
41
  export declare function createEmbeddingsAdapters(config: EmbeddingsFactoryConfig): EmbeddingsFactoryResult;
38
42
  /**
39
43
  * Create embeddings adapters from environment variables.
40
44
  *
41
- * Reads API keys from:
45
+ * Reads config from:
46
+ * - OLLAMA_BASE_URL (for self-hosted Ollama embeddings)
42
47
  * - OPENROUTER_API_KEY
43
48
  *
44
49
  * Accepts optional per-adapter overrides.
45
50
  */
46
- export declare function createEmbeddingsAdaptersFromEnv(overrides?: Omit<EmbeddingsFactoryConfig, "openrouterApiKey">): EmbeddingsFactoryResult;
51
+ export declare function createEmbeddingsAdaptersFromEnv(overrides?: Omit<EmbeddingsFactoryConfig, "ollamaBaseUrl" | "openrouterApiKey">): EmbeddingsFactoryResult;
@@ -7,19 +7,31 @@
7
7
  * registers with an ArbitrageRouter or AdapterSocket.
8
8
  *
9
9
  * Priority order (cheapest first, when all adapters available):
10
- * self-hosted-embeddings (GPU, cheapest — not yet implemented)
10
+ * Ollama (GPU, cheapest — $0.005/1M tokens amortized)
11
11
  * → OpenRouter ($0.02/1M tokens via text-embedding-3-small)
12
12
  */
13
+ import { createOllamaEmbeddingsAdapter } from "./ollama-embeddings.js";
13
14
  import { createOpenRouterAdapter } from "./openrouter.js";
14
15
  /**
15
16
  * Create embeddings adapters from the provided config.
16
17
  *
17
- * Returns only adapters whose API key is present and non-empty.
18
+ * Returns only adapters whose key/URL is present and non-empty.
18
19
  * Order matches arbitrage priority: cheapest first.
19
20
  */
20
21
  export function createEmbeddingsAdapters(config) {
21
22
  const adapters = [];
22
23
  const skipped = [];
24
+ // Ollama — $0.005/1M tokens (self-hosted GPU, cheapest)
25
+ if (config.ollamaBaseUrl) {
26
+ adapters.push(createOllamaEmbeddingsAdapter({
27
+ baseUrl: config.ollamaBaseUrl,
28
+ costPerUnit: 0.000000005,
29
+ ...config.ollama,
30
+ }));
31
+ }
32
+ else {
33
+ skipped.push("ollama-embeddings");
34
+ }
23
35
  // OpenRouter — $0.02/1M tokens (text-embedding-3-small via OpenAI)
24
36
  if (config.openrouterApiKey) {
25
37
  adapters.push(createOpenRouterAdapter({ ...config.openrouter, apiKey: config.openrouterApiKey }));
@@ -27,7 +39,6 @@ export function createEmbeddingsAdapters(config) {
27
39
  else {
28
40
  skipped.push("openrouter");
29
41
  }
30
- // Future: self-hosted-embeddings will go BEFORE openrouter (GPU tier, cheapest)
31
42
  const adapterMap = new Map();
32
43
  for (const adapter of adapters) {
33
44
  adapterMap.set(adapter.name, adapter);
@@ -37,13 +48,15 @@ export function createEmbeddingsAdapters(config) {
37
48
  /**
38
49
  * Create embeddings adapters from environment variables.
39
50
  *
40
- * Reads API keys from:
51
+ * Reads config from:
52
+ * - OLLAMA_BASE_URL (for self-hosted Ollama embeddings)
41
53
  * - OPENROUTER_API_KEY
42
54
  *
43
55
  * Accepts optional per-adapter overrides.
44
56
  */
45
57
  export function createEmbeddingsAdaptersFromEnv(overrides) {
46
58
  return createEmbeddingsAdapters({
59
+ ollamaBaseUrl: process.env.OLLAMA_BASE_URL,
47
60
  openrouterApiKey: process.env.OPENROUTER_API_KEY,
48
61
  ...overrides,
49
62
  });
@@ -1,54 +1,106 @@
1
1
  import { afterAll, beforeEach, describe, expect, it, vi } from "vitest";
2
2
  import { createEmbeddingsAdapters, createEmbeddingsAdaptersFromEnv } from "./embeddings-factory.js";
3
+ import * as ollamaModule from "./ollama-embeddings.js";
3
4
  import * as openrouterModule from "./openrouter.js";
4
5
  describe("createEmbeddingsAdapters", () => {
5
- it("creates adapter when API key provided", () => {
6
+ it("creates all adapters when all config provided", () => {
6
7
  const result = createEmbeddingsAdapters({
8
+ ollamaBaseUrl: "http://ollama:11434",
7
9
  openrouterApiKey: "sk-or",
8
10
  });
9
- expect(result.adapters).toHaveLength(1);
10
- expect(result.adapterMap.size).toBe(1);
11
+ expect(result.adapters).toHaveLength(2);
12
+ expect(result.adapterMap.size).toBe(2);
11
13
  expect(result.skipped).toHaveLength(0);
12
14
  });
13
- it("adapter is openrouter", () => {
15
+ it("orders adapters cheapest first (ollama before openrouter)", () => {
14
16
  const result = createEmbeddingsAdapters({
17
+ ollamaBaseUrl: "http://ollama:11434",
15
18
  openrouterApiKey: "sk-or",
16
19
  });
20
+ expect(result.adapters[0].name).toBe("ollama-embeddings");
21
+ expect(result.adapters[1].name).toBe("openrouter");
22
+ });
23
+ it("ollama adapter is self-hosted", () => {
24
+ const result = createEmbeddingsAdapters({
25
+ ollamaBaseUrl: "http://ollama:11434",
26
+ });
27
+ expect(result.adapters[0].selfHosted).toBe(true);
28
+ });
29
+ it("creates only openrouter when no ollama URL", () => {
30
+ const result = createEmbeddingsAdapters({
31
+ openrouterApiKey: "sk-or",
32
+ });
33
+ expect(result.adapters).toHaveLength(1);
17
34
  expect(result.adapters[0].name).toBe("openrouter");
35
+ expect(result.skipped).toEqual(["ollama-embeddings"]);
18
36
  });
19
- it("skips openrouter when no API key", () => {
37
+ it("creates only ollama when no openrouter key", () => {
38
+ const result = createEmbeddingsAdapters({
39
+ ollamaBaseUrl: "http://ollama:11434",
40
+ });
41
+ expect(result.adapters).toHaveLength(1);
42
+ expect(result.adapters[0].name).toBe("ollama-embeddings");
43
+ expect(result.skipped).toEqual(["openrouter"]);
44
+ });
45
+ it("skips both when no config", () => {
20
46
  const result = createEmbeddingsAdapters({});
21
47
  expect(result.adapters).toHaveLength(0);
22
- expect(result.skipped).toEqual(["openrouter"]);
48
+ expect(result.skipped).toEqual(["ollama-embeddings", "openrouter"]);
49
+ });
50
+ it("skips ollama with empty string URL", () => {
51
+ const result = createEmbeddingsAdapters({
52
+ ollamaBaseUrl: "",
53
+ });
54
+ expect(result.adapters).toHaveLength(0);
55
+ expect(result.skipped).toContain("ollama-embeddings");
23
56
  });
24
- it("skips adapter with empty string key", () => {
57
+ it("skips openrouter with empty string key", () => {
25
58
  const result = createEmbeddingsAdapters({
26
59
  openrouterApiKey: "",
27
60
  });
28
61
  expect(result.adapters).toHaveLength(0);
29
62
  expect(result.skipped).toContain("openrouter");
30
63
  });
31
- it("adapter supports embeddings capability", () => {
64
+ it("both adapters support embeddings capability", () => {
32
65
  const result = createEmbeddingsAdapters({
66
+ ollamaBaseUrl: "http://ollama:11434",
33
67
  openrouterApiKey: "sk-or",
34
68
  });
35
- expect(result.adapters[0].capabilities).toContain("embeddings");
69
+ for (const adapter of result.adapters) {
70
+ expect(adapter.capabilities).toContain("embeddings");
71
+ }
36
72
  });
37
- it("adapter implements embed", () => {
73
+ it("both adapters implement embed", () => {
38
74
  const result = createEmbeddingsAdapters({
75
+ ollamaBaseUrl: "http://ollama:11434",
39
76
  openrouterApiKey: "sk-or",
40
77
  });
41
- expect(typeof result.adapters[0].embed).toBe("function");
78
+ for (const adapter of result.adapters) {
79
+ expect(typeof adapter.embed).toBe("function");
80
+ }
42
81
  });
43
82
  it("adapterMap keys match adapter names", () => {
44
83
  const result = createEmbeddingsAdapters({
84
+ ollamaBaseUrl: "http://ollama:11434",
45
85
  openrouterApiKey: "sk-or",
46
86
  });
47
87
  for (const [key, adapter] of result.adapterMap) {
48
88
  expect(key).toBe(adapter.name);
49
89
  }
50
90
  });
51
- it("passes per-adapter config overrides to adapter constructor", () => {
91
+ it("passes per-adapter config overrides to ollama constructor", () => {
92
+ const spy = vi.spyOn(ollamaModule, "createOllamaEmbeddingsAdapter");
93
+ createEmbeddingsAdapters({
94
+ ollamaBaseUrl: "http://ollama:11434",
95
+ ollama: { marginMultiplier: 1.5 },
96
+ });
97
+ expect(spy).toHaveBeenCalledWith(expect.objectContaining({
98
+ baseUrl: "http://ollama:11434",
99
+ marginMultiplier: 1.5,
100
+ }));
101
+ spy.mockRestore();
102
+ });
103
+ it("passes per-adapter config overrides to openrouter constructor", () => {
52
104
  const spy = vi.spyOn(openrouterModule, "createOpenRouterAdapter");
53
105
  createEmbeddingsAdapters({
54
106
  openrouterApiKey: "sk-or",
@@ -60,15 +112,6 @@ describe("createEmbeddingsAdapters", () => {
60
112
  }));
61
113
  spy.mockRestore();
62
114
  });
63
- it("apiKey cannot be overridden via openrouter config", () => {
64
- // Ensure apiKey always comes from openrouterApiKey, not from spread
65
- const result = createEmbeddingsAdapters({
66
- openrouterApiKey: "sk-real",
67
- openrouter: { apiKey: "sk-evil" },
68
- });
69
- expect(result.adapters).toHaveLength(1);
70
- expect(result.adapters[0].name).toBe("openrouter");
71
- });
72
115
  });
73
116
  describe("createEmbeddingsAdaptersFromEnv", () => {
74
117
  beforeEach(() => {
@@ -77,28 +120,39 @@ describe("createEmbeddingsAdaptersFromEnv", () => {
77
120
  afterAll(() => {
78
121
  vi.unstubAllEnvs();
79
122
  });
80
- it("reads key from environment variable", () => {
123
+ it("reads keys from environment variables", () => {
124
+ vi.stubEnv("OLLAMA_BASE_URL", "http://ollama:11434");
81
125
  vi.stubEnv("OPENROUTER_API_KEY", "env-or");
82
126
  const result = createEmbeddingsAdaptersFromEnv();
83
- expect(result.adapters).toHaveLength(1);
84
- expect(result.adapters[0].name).toBe("openrouter");
127
+ expect(result.adapters).toHaveLength(2);
128
+ expect(result.adapters[0].name).toBe("ollama-embeddings");
129
+ expect(result.adapters[1].name).toBe("openrouter");
85
130
  expect(result.skipped).toHaveLength(0);
86
131
  });
87
- it("returns empty when no env var set", () => {
132
+ it("returns empty when no env vars set", () => {
133
+ vi.stubEnv("OLLAMA_BASE_URL", "");
88
134
  vi.stubEnv("OPENROUTER_API_KEY", "");
89
135
  const result = createEmbeddingsAdaptersFromEnv();
90
136
  expect(result.adapters).toHaveLength(0);
91
- expect(result.skipped).toEqual(["openrouter"]);
137
+ expect(result.skipped).toEqual(["ollama-embeddings", "openrouter"]);
92
138
  });
93
- it("passes per-adapter overrides alongside env key to adapter constructor", () => {
139
+ it("creates only ollama when only OLLAMA_BASE_URL set", () => {
140
+ vi.stubEnv("OLLAMA_BASE_URL", "http://ollama:11434");
141
+ vi.stubEnv("OPENROUTER_API_KEY", "");
142
+ const result = createEmbeddingsAdaptersFromEnv();
143
+ expect(result.adapters).toHaveLength(1);
144
+ expect(result.adapters[0].name).toBe("ollama-embeddings");
145
+ });
146
+ it("passes per-adapter overrides alongside env vars", () => {
147
+ vi.stubEnv("OLLAMA_BASE_URL", "http://ollama:11434");
94
148
  vi.stubEnv("OPENROUTER_API_KEY", "env-or");
95
- const spy = vi.spyOn(openrouterModule, "createOpenRouterAdapter");
149
+ const spy = vi.spyOn(ollamaModule, "createOllamaEmbeddingsAdapter");
96
150
  createEmbeddingsAdaptersFromEnv({
97
- openrouter: { marginMultiplier: 1.2 },
151
+ ollama: { marginMultiplier: 1.1 },
98
152
  });
99
153
  expect(spy).toHaveBeenCalledWith(expect.objectContaining({
100
- apiKey: "env-or",
101
- marginMultiplier: 1.2,
154
+ baseUrl: "http://ollama:11434",
155
+ marginMultiplier: 1.1,
102
156
  }));
103
157
  spy.mockRestore();
104
158
  });
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Ollama self-hosted embeddings adapter — embeddings on our own GPU infrastructure.
3
+ *
4
+ * Points at a self-hosted Ollama container running on our internal network.
5
+ * Same ProviderAdapter interface as OpenRouter embeddings, but with:
6
+ * - No API key required (internal container-to-container)
7
+ * - Amortized GPU cost instead of third-party API invoicing
8
+ * - Lower margin (cheaper for users = the standard pricing tier)
9
+ *
10
+ * Uses Ollama's OpenAI-compatible /v1/embeddings endpoint, so it works with
11
+ * any Ollama-hosted embedding model (nomic-embed-text, mxbai-embed-large, etc.).
12
+ *
13
+ * Cost model:
14
+ * Base cost = total_tokens * costPerToken
15
+ * Default costPerToken = $0.000000005 (GPU depreciation + electricity)
16
+ * Charge = base_cost * marginMultiplier (e.g., 1.2 = 20% margin vs 30% for third-party)
17
+ */
18
+ import type { FetchFn, SelfHostedAdapterConfig } from "./self-hosted-base.js";
19
+ import type { ProviderAdapter } from "./types.js";
20
+ export type { FetchFn };
21
+ /**
22
+ * Configuration for the Ollama embeddings adapter.
23
+ *
24
+ * Cost precedence: `costPerToken` (if set) > `costPerUnit` (from SelfHostedAdapterConfig).
25
+ * Use `costPerToken` for adapter-specific overrides; `costPerUnit` is the base config
26
+ * shared across all self-hosted adapters.
27
+ */
28
+ export interface OllamaEmbeddingsAdapterConfig extends SelfHostedAdapterConfig {
29
+ /** Cost per token in USD (amortized GPU time, default: $0.000000005). Takes precedence over costPerUnit. */
30
+ costPerToken?: number;
31
+ /** Default embedding model (default: "nomic-embed-text") */
32
+ defaultModel?: string;
33
+ }
34
+ /**
35
+ * Create an Ollama self-hosted embeddings adapter.
36
+ *
37
+ * Uses factory function pattern (not class) for minimal API surface and easy
38
+ * dependency injection of fetch for testing.
39
+ */
40
+ export declare function createOllamaEmbeddingsAdapter(config: OllamaEmbeddingsAdapterConfig, fetchFn?: FetchFn): ProviderAdapter & Required<Pick<ProviderAdapter, "embed">>;
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Ollama self-hosted embeddings adapter — embeddings on our own GPU infrastructure.
3
+ *
4
+ * Points at a self-hosted Ollama container running on our internal network.
5
+ * Same ProviderAdapter interface as OpenRouter embeddings, but with:
6
+ * - No API key required (internal container-to-container)
7
+ * - Amortized GPU cost instead of third-party API invoicing
8
+ * - Lower margin (cheaper for users = the standard pricing tier)
9
+ *
10
+ * Uses Ollama's OpenAI-compatible /v1/embeddings endpoint, so it works with
11
+ * any Ollama-hosted embedding model (nomic-embed-text, mxbai-embed-large, etc.).
12
+ *
13
+ * Cost model:
14
+ * Base cost = total_tokens * costPerToken
15
+ * Default costPerToken = $0.000000005 (GPU depreciation + electricity)
16
+ * Charge = base_cost * marginMultiplier (e.g., 1.2 = 20% margin vs 30% for third-party)
17
+ */
18
+ import { Credit } from "@wopr-network/platform-core/credits";
19
+ import { withMargin } from "./types.js";
20
+ // ~4x cheaper than OpenRouter's text-embedding-3-small ($0.02/1M tokens)
21
+ const DEFAULT_COST_PER_TOKEN = 0.000000005; // $0.005 per 1M tokens
22
+ const DEFAULT_MARGIN = 1.2; // 20% vs 30% for third-party
23
+ const DEFAULT_MODEL = "nomic-embed-text";
24
+ /**
25
+ * Create an Ollama self-hosted embeddings adapter.
26
+ *
27
+ * Uses factory function pattern (not class) for minimal API surface and easy
28
+ * dependency injection of fetch for testing.
29
+ */
30
+ export function createOllamaEmbeddingsAdapter(config, fetchFn = fetch) {
31
+ const costPerToken = config.costPerToken ?? config.costPerUnit ?? DEFAULT_COST_PER_TOKEN;
32
+ const marginMultiplier = config.marginMultiplier ?? DEFAULT_MARGIN;
33
+ const defaultModel = config.defaultModel ?? DEFAULT_MODEL;
34
+ const timeoutMs = config.timeoutMs ?? 30000;
35
+ return {
36
+ name: "ollama-embeddings",
37
+ capabilities: ["embeddings"],
38
+ selfHosted: true,
39
+ async embed(input) {
40
+ const model = input.model ?? defaultModel;
41
+ const body = {
42
+ input: input.input,
43
+ model,
44
+ };
45
+ if (input.dimensions !== undefined) {
46
+ body.dimensions = input.dimensions;
47
+ }
48
+ const base = config.baseUrl.replace(/\/+$/, "");
49
+ const res = await fetchFn(`${base}/v1/embeddings`, {
50
+ method: "POST",
51
+ headers: {
52
+ "Content-Type": "application/json",
53
+ },
54
+ body: JSON.stringify(body),
55
+ signal: AbortSignal.timeout(timeoutMs),
56
+ });
57
+ if (!res.ok) {
58
+ const text = await res.text();
59
+ throw new Error(`Ollama embeddings error (${res.status}): ${text}`);
60
+ }
61
+ const data = (await res.json());
62
+ const totalTokens = data.usage?.total_tokens ?? 0;
63
+ const cost = Credit.fromDollars(totalTokens * costPerToken);
64
+ const charge = withMargin(cost, marginMultiplier);
65
+ return {
66
+ result: {
67
+ embeddings: data.data.map((d) => d.embedding),
68
+ model: data.model,
69
+ totalTokens,
70
+ },
71
+ cost,
72
+ charge,
73
+ };
74
+ },
75
+ };
76
+ }