@gmickel/gno 0.14.1 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -6,6 +6,7 @@
6
6
  [![MIT License](./assets/badges/license.svg)](./LICENSE)
7
7
  [![Website](./assets/badges/website.svg)](https://gno.sh)
8
8
  [![Twitter](./assets/badges/twitter.svg)](https://twitter.com/gmickel)
9
+ [![Discord](./assets/badges/discord.svg)](https://discord.gg/nHEmyJB5tg)
9
10
 
10
11
  > **ClawdHub**: GNO skills bundled for Clawdbot — [clawdhub.com/gmickel/gno](https://clawdhub.com/gmickel/gno)
11
12
 
@@ -0,0 +1,22 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" width="110" height="28" viewBox="0 0 110 28">
2
+ <defs>
3
+ <linearGradient id="bg3" x1="0%" y1="0%" x2="100%" y2="100%">
4
+ <stop offset="0%" style="stop-color:#1a1a2e"/>
5
+ <stop offset="100%" style="stop-color:#0f0f1a"/>
6
+ </linearGradient>
7
+ <filter id="glow3">
8
+ <feGaussianBlur stdDeviation="0.5" result="coloredBlur"/>
9
+ <feMerge>
10
+ <feMergeNode in="coloredBlur"/>
11
+ <feMergeNode in="SourceGraphic"/>
12
+ </feMerge>
13
+ </filter>
14
+ </defs>
15
+ <rect width="110" height="28" rx="6" fill="url(#bg3)"/>
16
+ <rect x="1" y="1" width="108" height="26" rx="5" fill="none" stroke="#5865F2" stroke-opacity="0.4"/>
17
+ <!-- Discord logo -->
18
+ <g transform="translate(10, 6)" fill="#5865F2">
19
+ <path d="M13.5 4.5C12.5 4 11.4 3.6 10.3 3.4C10.2 3.6 10 3.9 9.9 4.1C8.7 3.9 7.5 3.9 6.4 4.1C6.3 3.9 6.1 3.6 6 3.4C4.9 3.6 3.8 4 2.8 4.5C0.7 7.6 0.1 10.6 0.4 13.6C1.7 14.5 2.9 15.1 4.1 15.5C4.4 15.1 4.7 14.6 4.9 14.1C4.4 13.9 4 13.7 3.6 13.4C3.7 13.3 3.8 13.2 3.9 13.1C6.5 14.3 9.3 14.3 11.9 13.1C12 13.2 12.1 13.3 12.2 13.4C11.8 13.7 11.4 13.9 10.9 14.1C11.1 14.6 11.4 15.1 11.7 15.5C12.9 15.1 14.1 14.5 15.4 13.6C15.7 10.1 14.8 7.1 13.5 4.5ZM5.3 11.6C4.6 11.6 4 10.9 4 10.1C4 9.3 4.6 8.6 5.3 8.6C6 8.6 6.6 9.3 6.6 10.1C6.6 10.9 6 11.6 5.3 11.6ZM10.5 11.6C9.8 11.6 9.2 10.9 9.2 10.1C9.2 9.3 9.8 8.6 10.5 8.6C11.2 8.6 11.8 9.3 11.8 10.1C11.8 10.9 11.2 11.6 10.5 11.6Z" transform="scale(1)"/>
20
+ </g>
21
+ <text x="32" y="18" font-family="ui-monospace,SFMono-Regular,Menlo,Monaco,monospace" font-size="12" font-weight="500" fill="#e2e8f0" filter="url(#glow3)">Discord</text>
22
+ </svg>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@gmickel/gno",
3
- "version": "0.14.1",
3
+ "version": "0.15.0",
4
4
  "description": "Local semantic search for your documents. Index Markdown, PDF, and Office files with hybrid BM25 + vector search.",
5
5
  "keywords": [
6
6
  "embeddings",
@@ -43,6 +43,7 @@
43
43
  "start": "bun run src/index.ts",
44
44
  "lint": "oxlint --fix --type-aware --type-check && oxfmt .",
45
45
  "lint:check": "oxlint --type-aware --type-check && oxfmt --check .",
46
+ "typecheck": "bunx tsc --noEmit",
46
47
  "test": "bun test",
47
48
  "test:watch": "bun test --watch",
48
49
  "test:coverage": "bun test --coverage",
@@ -109,7 +110,7 @@
109
110
  "remark-gfm": "^4.0.1",
110
111
  "shiki": "^3.20.0",
111
112
  "sqlite-vec": "^0.1.7-alpha.2",
112
- "streamdown": "^1.6.10",
113
+ "streamdown": "^2.0.1",
113
114
  "tailwind-merge": "^3.4.0",
114
115
  "tailwindcss": "^4.1.18",
115
116
  "use-stick-to-bottom": "^1.1.1",
@@ -154,8 +154,20 @@ async function processBatches(ctx: BatchContext): Promise<BatchResult> {
154
154
  );
155
155
  if (!batchEmbedResult.ok) {
156
156
  if (ctx.verbose) {
157
+ const err = batchEmbedResult.error;
158
+ const cause = err.cause;
159
+ const causeMsg =
160
+ cause && typeof cause === "object" && "message" in cause
161
+ ? (cause as { message: string }).message
162
+ : typeof cause === "string"
163
+ ? cause
164
+ : "";
165
+ const titles = batch
166
+ .slice(0, 3)
167
+ .map((b) => b.title ?? b.mirrorHash.slice(0, 8))
168
+ .join(", ");
157
169
  process.stderr.write(
158
- `\n[embed] Batch failed: ${batchEmbedResult.error.message}\n`
170
+ `\n[embed] Batch failed (${batch.length} chunks: ${titles}${batch.length > 3 ? "..." : ""}): ${err.message}${causeMsg ? ` - ${causeMsg}` : ""}\n`
159
171
  );
160
172
  }
161
173
  errors += batch.length;
@@ -0,0 +1,197 @@
1
+ /**
2
+ * HTTP-based embedding port implementation.
3
+ * Calls OpenAI-compatible embedding endpoints.
4
+ *
5
+ * @module src/llm/httpEmbedding
6
+ */
7
+
8
+ import type { EmbeddingPort, LlmResult } from "./types";
9
+
10
+ import { inferenceFailedError } from "./errors";
11
+
12
+ // ─────────────────────────────────────────────────────────────────────────────
13
+ // Types
14
+ // ─────────────────────────────────────────────────────────────────────────────
15
+
16
+ interface OpenAIEmbeddingResponse {
17
+ data: Array<{
18
+ embedding: number[];
19
+ index: number;
20
+ object: string;
21
+ }>;
22
+ model: string;
23
+ object: string;
24
+ usage: {
25
+ prompt_tokens: number;
26
+ total_tokens: number;
27
+ };
28
+ }
29
+
30
+ // ─────────────────────────────────────────────────────────────────────────────
31
+ // Implementation
32
+ // ─────────────────────────────────────────────────────────────────────────────
33
+
34
+ export class HttpEmbedding implements EmbeddingPort {
35
+ private readonly apiUrl: string;
36
+ private readonly modelName: string;
37
+ private dims: number | null = null;
38
+ readonly modelUri: string;
39
+
40
+ constructor(modelUri: string) {
41
+ this.modelUri = modelUri;
42
+ // Parse URI: http://host:port/v1/embeddings#modelname or just http://host:port
43
+ const hashIndex = modelUri.indexOf("#");
44
+ if (hashIndex > 0) {
45
+ this.apiUrl = modelUri.slice(0, hashIndex);
46
+ this.modelName = modelUri.slice(hashIndex + 1);
47
+ } else {
48
+ this.apiUrl = modelUri;
49
+ // Try to extract model name from URL path or use default
50
+ const url = new URL(modelUri);
51
+ const pathParts = url.pathname.split("/");
52
+ this.modelName = pathParts[pathParts.length - 1] || "embedding-model";
53
+ }
54
+ }
55
+
56
+ async init(): Promise<LlmResult<void>> {
57
+ // Test connection with a simple embedding
58
+ const result = await this.embed("test");
59
+ if (!result.ok) {
60
+ return result;
61
+ }
62
+ return { ok: true, value: undefined };
63
+ }
64
+
65
+ async embed(text: string): Promise<LlmResult<number[]>> {
66
+ try {
67
+ const response = await fetch(this.apiUrl, {
68
+ method: "POST",
69
+ headers: {
70
+ "Content-Type": "application/json",
71
+ },
72
+ body: JSON.stringify({
73
+ input: text,
74
+ model: this.modelName,
75
+ }),
76
+ });
77
+
78
+ if (!response.ok) {
79
+ const errorText = await response.text();
80
+ return {
81
+ ok: false,
82
+ error: inferenceFailedError(
83
+ this.modelUri,
84
+ new Error(`HTTP ${response.status}: ${errorText}`)
85
+ ),
86
+ };
87
+ }
88
+
89
+ const data = (await response.json()) as OpenAIEmbeddingResponse;
90
+ const vector = data.data[0]?.embedding;
91
+
92
+ if (!vector || !Array.isArray(vector)) {
93
+ return {
94
+ ok: false,
95
+ error: inferenceFailedError(
96
+ this.modelUri,
97
+ new Error("Invalid response format: missing embedding")
98
+ ),
99
+ };
100
+ }
101
+
102
+ // Cache dimensions on first call
103
+ if (this.dims === null) {
104
+ this.dims = vector.length;
105
+ }
106
+
107
+ return { ok: true, value: vector };
108
+ } catch (e) {
109
+ return {
110
+ ok: false,
111
+ error: inferenceFailedError(
112
+ this.modelUri,
113
+ e instanceof Error ? e : new Error(String(e))
114
+ ),
115
+ };
116
+ }
117
+ }
118
+
119
+ async embedBatch(texts: string[]): Promise<LlmResult<number[][]>> {
120
+ try {
121
+ const response = await fetch(this.apiUrl, {
122
+ method: "POST",
123
+ headers: {
124
+ "Content-Type": "application/json",
125
+ },
126
+ body: JSON.stringify({
127
+ input: texts,
128
+ model: this.modelName,
129
+ }),
130
+ });
131
+
132
+ if (!response.ok) {
133
+ const errorText = await response.text();
134
+ return {
135
+ ok: false,
136
+ error: inferenceFailedError(
137
+ this.modelUri,
138
+ new Error(`HTTP ${response.status}: ${errorText}`)
139
+ ),
140
+ };
141
+ }
142
+
143
+ const data = (await response.json()) as OpenAIEmbeddingResponse;
144
+
145
+ // Sort by index to maintain order
146
+ const sorted = data.data.sort((a, b) => a.index - b.index);
147
+ const vectors = sorted.map((item) => item.embedding);
148
+
149
+ // Validate all embeddings
150
+ for (let i = 0; i < vectors.length; i++) {
151
+ if (!vectors[i] || !Array.isArray(vectors[i])) {
152
+ return {
153
+ ok: false,
154
+ error: inferenceFailedError(
155
+ this.modelUri,
156
+ new Error(`Invalid embedding at index ${i}`)
157
+ ),
158
+ };
159
+ }
160
+ }
161
+
162
+ // Cache dimensions on first call
163
+ if (this.dims === null && vectors.length > 0 && vectors[0]) {
164
+ this.dims = vectors[0].length;
165
+ }
166
+
167
+ return { ok: true, value: vectors };
168
+ } catch (e) {
169
+ return {
170
+ ok: false,
171
+ error: inferenceFailedError(
172
+ this.modelUri,
173
+ e instanceof Error ? e : new Error(String(e))
174
+ ),
175
+ };
176
+ }
177
+ }
178
+
179
+ dimensions(): number {
180
+ if (this.dims === null) {
181
+ throw new Error("Call init() or embed() first to initialize dimensions");
182
+ }
183
+ return this.dims;
184
+ }
185
+
186
+ async dispose(): Promise<void> {
187
+ // Nothing to dispose for HTTP client
188
+ }
189
+ }
190
+
191
+ // ─────────────────────────────────────────────────────────────────────────────
192
+ // URI Detection
193
+ // ─────────────────────────────────────────────────────────────────────────────
194
+
195
+ export function isHttpModelUri(uri: string): boolean {
196
+ return uri.startsWith("http://") || uri.startsWith("https://");
197
+ }
@@ -0,0 +1,119 @@
1
+ /**
2
+ * HTTP-based generation port implementation.
3
+ * Calls OpenAI-compatible chat completion endpoints.
4
+ *
5
+ * @module src/llm/httpGeneration
6
+ */
7
+
8
+ import type { GenerationPort, GenParams, LlmResult } from "./types";
9
+
10
+ import { inferenceFailedError } from "./errors";
11
+
12
+ // ─────────────────────────────────────────────────────────────────────────────
13
+ // Types
14
+ // ─────────────────────────────────────────────────────────────────────────────
15
+
16
+ interface OpenAIChatResponse {
17
+ id: string;
18
+ object: string;
19
+ created: number;
20
+ model: string;
21
+ choices: Array<{
22
+ index: number;
23
+ message: {
24
+ role: string;
25
+ content: string;
26
+ reasoning_content?: string; // Qwen3 thinking mode
27
+ };
28
+ finish_reason: string;
29
+ }>;
30
+ usage: {
31
+ prompt_tokens: number;
32
+ completion_tokens: number;
33
+ total_tokens: number;
34
+ };
35
+ }
36
+
37
+ // ─────────────────────────────────────────────────────────────────────────────
38
+ // Implementation
39
+ // ─────────────────────────────────────────────────────────────────────────────
40
+
41
+ export class HttpGeneration implements GenerationPort {
42
+ private readonly apiUrl: string;
43
+ private readonly modelName: string;
44
+ readonly modelUri: string;
45
+
46
+ constructor(modelUri: string) {
47
+ this.modelUri = modelUri;
48
+ // Parse URI: http://host:port/v1/chat/completions#modelname
49
+ const hashIndex = modelUri.indexOf("#");
50
+ if (hashIndex > 0) {
51
+ this.apiUrl = modelUri.slice(0, hashIndex);
52
+ this.modelName = modelUri.slice(hashIndex + 1);
53
+ } else {
54
+ this.apiUrl = modelUri;
55
+ // Try to extract model name from URL path or use default
56
+ const url = new URL(modelUri);
57
+ const pathParts = url.pathname.split("/");
58
+ this.modelName = pathParts[pathParts.length - 1] || "llama";
59
+ }
60
+ }
61
+
62
+ async generate(
63
+ prompt: string,
64
+ params?: GenParams
65
+ ): Promise<LlmResult<string>> {
66
+ try {
67
+ const response = await fetch(this.apiUrl, {
68
+ method: "POST",
69
+ headers: {
70
+ "Content-Type": "application/json",
71
+ },
72
+ body: JSON.stringify({
73
+ model: this.modelName,
74
+ messages: [{ role: "user", content: prompt }],
75
+ temperature: params?.temperature ?? 0,
76
+ max_tokens: params?.maxTokens ?? 256,
77
+ stop: params?.stop,
78
+ seed: params?.seed,
79
+ }),
80
+ });
81
+
82
+ if (!response.ok) {
83
+ const errorText = await response.text();
84
+ return {
85
+ ok: false,
86
+ error: inferenceFailedError(
87
+ this.modelUri,
88
+ new Error(`HTTP ${response.status}: ${errorText}`)
89
+ ),
90
+ };
91
+ }
92
+
93
+ const data = (await response.json()) as OpenAIChatResponse;
94
+ const content = data.choices[0]?.message?.content ?? "";
95
+
96
+ return { ok: true, value: content };
97
+ } catch (e) {
98
+ return {
99
+ ok: false,
100
+ error: inferenceFailedError(
101
+ this.modelUri,
102
+ e instanceof Error ? e : new Error(String(e))
103
+ ),
104
+ };
105
+ }
106
+ }
107
+
108
+ async dispose(): Promise<void> {
109
+ // Nothing to dispose for HTTP client
110
+ }
111
+ }
112
+
113
+ // ─────────────────────────────────────────────────────────────────────────────
114
+ // URI Detection
115
+ // ─────────────────────────────────────────────────────────────────────────────
116
+
117
+ export function isHttpGenUri(uri: string): boolean {
118
+ return uri.startsWith("http://") || uri.startsWith("https://");
119
+ }
@@ -0,0 +1,191 @@
1
+ /**
2
+ * HTTP-based rerank port implementation.
3
+ * Calls OpenAI-compatible completions endpoints for reranking.
4
+ *
5
+ * @module src/llm/httpRerank
6
+ */
7
+
8
+ import type { LlmResult, RerankPort, RerankScore } from "./types";
9
+
10
+ import { inferenceFailedError } from "./errors";
11
+
12
+ // ─────────────────────────────────────────────────────────────────────────────
13
+ // Types
14
+ // ─────────────────────────────────────────────────────────────────────────────
15
+
16
+ interface OpenAICompletionResponse {
17
+ choices: Array<{
18
+ text: string;
19
+ index: number;
20
+ logprobs?: unknown;
21
+ finish_reason: string;
22
+ }>;
23
+ model: string;
24
+ usage: {
25
+ prompt_tokens: number;
26
+ completion_tokens: number;
27
+ total_tokens: number;
28
+ };
29
+ }
30
+
31
+ // ─────────────────────────────────────────────────────────────────────────────
32
+ // Implementation
33
+ // ─────────────────────────────────────────────────────────────────────────────
34
+
35
+ export class HttpRerank implements RerankPort {
36
+ private readonly apiUrl: string;
37
+ private readonly modelName: string;
38
+ private readonly instruction: string;
39
+ readonly modelUri: string;
40
+
41
+ constructor(modelUri: string) {
42
+ this.modelUri = modelUri;
43
+ // Parse URI: http://host:port/v1/completions#modelname
44
+ const hashIndex = modelUri.indexOf("#");
45
+ if (hashIndex > 0) {
46
+ this.apiUrl = modelUri.slice(0, hashIndex);
47
+ this.modelName = modelUri.slice(hashIndex + 1);
48
+ } else {
49
+ this.apiUrl = modelUri;
50
+ const url = new URL(modelUri);
51
+ const pathParts = url.pathname.split("/");
52
+ this.modelName = pathParts[pathParts.length - 1] || "reranker-model";
53
+ }
54
+ // Default instruction for retrieval tasks
55
+ this.instruction =
56
+ "Given a web search query, retrieve relevant passages that answer the query";
57
+ }
58
+
59
+ async rerank(
60
+ query: string,
61
+ documents: string[]
62
+ ): Promise<LlmResult<RerankScore[]>> {
63
+ if (documents.length === 0) {
64
+ return { ok: true, value: [] };
65
+ }
66
+
67
+ try {
68
+ // Build prompts for all documents
69
+ const prompts = documents.map((doc) => this.buildPrompt(query, doc));
70
+
71
+ // Score all documents in a single batch request
72
+ const scoresResult = await this.scoreBatch(prompts);
73
+
74
+ if (!scoresResult.ok) {
75
+ return { ok: false, error: scoresResult.error };
76
+ }
77
+
78
+ // Map scores back to document indices
79
+ const scores = scoresResult.value.map((score, index) => ({
80
+ index,
81
+ score,
82
+ }));
83
+
84
+ // Sort by score descending
85
+ scores.sort((a, b) => b.score - a.score);
86
+
87
+ // Assign ranks
88
+ const rankedScores: RerankScore[] = scores.map((item, rank) => ({
89
+ index: item.index,
90
+ score: item.score,
91
+ rank: rank + 1,
92
+ }));
93
+
94
+ return { ok: true, value: rankedScores };
95
+ } catch (e) {
96
+ return {
97
+ ok: false,
98
+ error: inferenceFailedError(
99
+ this.modelUri,
100
+ e instanceof Error ? e : new Error(String(e))
101
+ ),
102
+ };
103
+ }
104
+ }
105
+
106
+ private buildPrompt(query: string, document: string): string {
107
+ return `<Instruct>: ${this.instruction}\n<Query>: ${query}\n<Document>: ${document}\n<Score>:`;
108
+ }
109
+
110
+ private async scoreBatch(prompts: string[]): Promise<LlmResult<number[]>> {
111
+ try {
112
+ const response = await fetch(this.apiUrl, {
113
+ method: "POST",
114
+ headers: {
115
+ "Content-Type": "application/json",
116
+ },
117
+ body: JSON.stringify({
118
+ model: this.modelName,
119
+ prompt: prompts, // Array of prompts for batching
120
+ max_tokens: 10, // Just need the score
121
+ temperature: 0, // Deterministic
122
+ stop: ["\n", "<"],
123
+ }),
124
+ });
125
+
126
+ if (!response.ok) {
127
+ const errorText = await response.text();
128
+ return {
129
+ ok: false,
130
+ error: inferenceFailedError(
131
+ this.modelUri,
132
+ new Error(`HTTP ${response.status}: ${errorText}`)
133
+ ),
134
+ };
135
+ }
136
+
137
+ const data = (await response.json()) as OpenAICompletionResponse;
138
+
139
+ // Parse scores from all choices
140
+ const scores: number[] = data.choices.map((choice) => {
141
+ const text = choice.text?.trim() ?? "";
142
+
143
+ // Parse score from response
144
+ const scoreMatch = text.match(/[-+]?[0-9]*\.?[0-9]+/);
145
+ if (!scoreMatch) {
146
+ return 0; // Default low score if no number found
147
+ }
148
+
149
+ const score = parseFloat(scoreMatch[0]);
150
+ return this.normalizeScore(score);
151
+ });
152
+
153
+ return { ok: true, value: scores };
154
+ } catch (e) {
155
+ return {
156
+ ok: false,
157
+ error: inferenceFailedError(
158
+ this.modelUri,
159
+ e instanceof Error ? e : new Error(String(e))
160
+ ),
161
+ };
162
+ }
163
+ }
164
+
165
+ private normalizeScore(score: number): number {
166
+ // Handle different score ranges
167
+ if (score < -10 || score > 10) {
168
+ // Likely logit or unbounded, apply sigmoid-like normalization
169
+ return 1 / (1 + Math.exp(-score));
170
+ } else if (score >= 0 && score <= 1) {
171
+ // Already normalized
172
+ return score;
173
+ } else {
174
+ // Assume -5 to 5 range, normalize to 0-1
175
+ const normalized = (score + 5) / 10;
176
+ return Math.max(0, Math.min(1, normalized));
177
+ }
178
+ }
179
+
180
+ async dispose(): Promise<void> {
181
+ // Nothing to dispose for HTTP client
182
+ }
183
+ }
184
+
185
+ // ─────────────────────────────────────────────────────────────────────────────
186
+ // URI Detection
187
+ // ─────────────────────────────────────────────────────────────────────────────
188
+
189
+ export function isHttpRerankUri(uri: string): boolean {
190
+ return uri.startsWith("http://") || uri.startsWith("https://");
191
+ }
package/src/llm/index.ts CHANGED
@@ -26,6 +26,12 @@ export {
26
26
  } from "./errors";
27
27
  // Adapter
28
28
  export { createLlmAdapter, LlmAdapter } from "./nodeLlamaCpp/adapter";
29
+ // HTTP Embedding
30
+ export { HttpEmbedding, isHttpModelUri } from "./httpEmbedding";
31
+ // HTTP Generation
32
+ export { HttpGeneration, isHttpGenUri } from "./httpGeneration";
33
+ // HTTP Rerank
34
+ export { HttpRerank, isHttpRerankUri } from "./httpRerank";
29
35
  // Lifecycle
30
36
  export {
31
37
  getModelManager,
@@ -16,6 +16,9 @@ import type {
16
16
  } from "../types";
17
17
 
18
18
  import { ModelCache } from "../cache";
19
+ import { HttpEmbedding, isHttpModelUri } from "../httpEmbedding";
20
+ import { HttpGeneration, isHttpGenUri } from "../httpGeneration";
21
+ import { HttpRerank, isHttpRerankUri } from "../httpRerank";
19
22
  import { getActivePreset, getModelConfig } from "../registry";
20
23
  import { NodeLlamaCppEmbedding } from "./embedding";
21
24
  import { NodeLlamaCppGeneration } from "./generation";
@@ -54,6 +57,7 @@ export class LlmAdapter {
54
57
 
55
58
  /**
56
59
  * Create an embedding port.
60
+ * Supports HTTP endpoints for remote embedding models.
57
61
  * With options.policy.allowDownload=true, auto-downloads if not cached.
58
62
  */
59
63
  async createEmbeddingPort(
@@ -64,6 +68,17 @@ export class LlmAdapter {
64
68
  const uri = modelUri ?? preset.embed;
65
69
  const policy = options?.policy ?? DEFAULT_POLICY;
66
70
 
71
+ // Use HTTP embedding for remote endpoints
72
+ if (isHttpModelUri(uri)) {
73
+ const httpEmbed = new HttpEmbedding(uri);
74
+ // Initialize to verify connection and get dimensions
75
+ const initResult = await httpEmbed.init();
76
+ if (!initResult.ok) {
77
+ return { ok: false, error: initResult.error };
78
+ }
79
+ return { ok: true, value: httpEmbed };
80
+ }
81
+
67
82
  // Ensure model is available (downloads if policy allows)
68
83
  const resolved = await this.cache.ensureModel(
69
84
  uri,
@@ -83,6 +98,7 @@ export class LlmAdapter {
83
98
 
84
99
  /**
85
100
  * Create a generation port.
101
+ * Supports HTTP endpoints for remote generation models.
86
102
  * With options.policy.allowDownload=true, auto-downloads if not cached.
87
103
  */
88
104
  async createGenerationPort(
@@ -93,6 +109,12 @@ export class LlmAdapter {
93
109
  const uri = modelUri ?? preset.gen;
94
110
  const policy = options?.policy ?? DEFAULT_POLICY;
95
111
 
112
+ // Use HTTP generation for remote endpoints
113
+ if (isHttpGenUri(uri)) {
114
+ const httpGen = new HttpGeneration(uri);
115
+ return { ok: true, value: httpGen };
116
+ }
117
+
96
118
  // Ensure model is available (downloads if policy allows)
97
119
  const resolved = await this.cache.ensureModel(
98
120
  uri,
@@ -112,6 +134,7 @@ export class LlmAdapter {
112
134
 
113
135
  /**
114
136
  * Create a rerank port.
137
+ * Supports HTTP endpoints for remote reranking models.
115
138
  * With options.policy.allowDownload=true, auto-downloads if not cached.
116
139
  */
117
140
  async createRerankPort(
@@ -122,6 +145,12 @@ export class LlmAdapter {
122
145
  const uri = modelUri ?? preset.rerank;
123
146
  const policy = options?.policy ?? DEFAULT_POLICY;
124
147
 
148
+ // Use HTTP rerank for remote endpoints
149
+ if (isHttpRerankUri(uri)) {
150
+ const httpRerank = new HttpRerank(uri);
151
+ return { ok: true, value: httpRerank };
152
+ }
153
+
125
154
  // Ensure model is available (downloads if policy allows)
126
155
  const resolved = await this.cache.ensureModel(
127
156
  uri,