saeeol 1.0.9 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/npm/bin/saeeol +42 -0
- package/npm/package.json +39 -0
- package/npm/postinstall.js +162 -0
- package/package.json +2 -2
- package/src/cli/cmd/mcp-refresh.ts +47 -0
- package/src/cli/cmd/mcp.ts +3 -1
- package/src/cli/cmd/tui/app-commands-core.tsx +11 -0
- package/src/cli/cmd/tui/app-commands-system.tsx +20 -0
- package/src/cli/cmd/tui/app-events.ts +43 -0
- package/src/cli/cmd/tui/app.tsx +4 -0
- package/src/cli/cmd/tui/component/dialog-model.tsx +2 -2
- package/src/cli/cmd/tui/component/prompt/use-prompt-memos.ts +1 -1
- package/src/cli/cmd/tui/component/use-connected.tsx +1 -1
- package/src/cli/cmd/tui/context/local.tsx +10 -3
- package/src/cli/cmd/tui/context/route.tsx +5 -1
- package/src/cli/cmd/tui/feature-plugins/sidebar/context.tsx +1 -1
- package/src/cli/cmd/tui/plugin/api.tsx +7 -3
- package/src/cli/cmd/tui/routes/local-models.tsx +151 -0
- package/src/cli/cmd/tui/routes/session/subagent-footer.tsx +1 -1
- package/src/cli/cmd/tui/util/model.ts +1 -1
- package/src/config/config-schema.ts +44 -0
- package/src/ltm/config.ts +124 -0
- package/src/ltm/events.ts +50 -0
- package/src/ltm/index.ts +12 -0
- package/src/ltm/memory/episodic.ts +83 -0
- package/src/ltm/memory/procedural.ts +102 -0
- package/src/ltm/memory/semantic.ts +80 -0
- package/src/ltm/pipeline.ts +155 -0
- package/src/ltm/retrieval.ts +62 -0
- package/src/ltm/scheduler.ts +55 -0
- package/src/ltm/store.ts +150 -0
- package/src/ltm/types.ts +108 -0
- package/src/mcp/index.ts +32 -1
- package/src/provider/custom-loaders.ts +12 -0
- package/src/provider/loader-local.ts +185 -0
- package/src/provider/local/embedder.ts +220 -0
- package/src/provider/local/events.ts +74 -0
- package/src/provider/local/gpu.ts +93 -0
- package/src/provider/local/hub.ts +174 -0
- package/src/provider/local/index.ts +10 -0
- package/src/provider/local/model-manager.ts +113 -0
- package/src/provider/local/orchestrator.ts +301 -0
- package/src/provider/local/rag.ts +112 -0
- package/src/provider/local/types.ts +142 -0
- package/src/provider/provider-conversion.ts +2 -0
- package/src/provider/provider-schema.ts +17 -2
- package/src/provider/provider-schemas.ts +10 -3
- package/src/provider/provider-state.ts +10 -2
- package/src/provider/provider.ts +2 -1
- package/src/saeeol/plugins/sidebar-usage.tsx +1 -1
- package/src/server/routes/instance/config.ts +1 -1
- package/src/server/routes/instance/httpapi/api.ts +2 -0
- package/src/server/routes/instance/httpapi/groups/local.ts +87 -0
- package/src/server/routes/instance/httpapi/groups/mcp.ts +10 -0
- package/src/server/routes/instance/httpapi/handlers/local.ts +95 -0
- package/src/server/routes/instance/httpapi/handlers/mcp.ts +5 -0
- package/src/server/routes/instance/httpapi/handlers/provider.ts +1 -1
- package/src/server/routes/instance/httpapi/server.ts +2 -0
- package/src/server/routes/instance/provider.ts +2 -2
- package/src/session/prompt-reminders.ts +29 -0
- package/test/fake/provider.ts +1 -0
- package/test/provider/local.test.ts +208 -0
- package/test/provider/provider-category.test.ts +190 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/** Local provider loaders — Ollama, LM Studio, vLLM, llama.cpp, text-generation-webui */
|
|
2
|
+
|
|
3
|
+
import os from "os"
|
|
4
|
+
import { Effect } from "effect"
|
|
5
|
+
import { InstallationVersion } from "@saeeol/core/installation/version"
|
|
6
|
+
import type { CustomDep, CustomLoader } from "./provider-types"
|
|
7
|
+
import type { Info } from "./provider-schema"
|
|
8
|
+
import { useLanguageModel } from "./bundled-providers"
|
|
9
|
+
import { iife } from "@/util/iife"
|
|
10
|
+
|
|
11
|
+
function ua(name: string): string {
|
|
12
|
+
return `saeeol/${InstallationVersion} ${name} (${os.platform()} ${os.release()}; ${os.arch()})`
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function localLoaders(dep: CustomDep): Record<string, CustomLoader> {
|
|
16
|
+
return {
|
|
17
|
+
ollama: Effect.fnUntraced(function* (_provider: Info) {
|
|
18
|
+
const env = yield* dep.env()
|
|
19
|
+
const endpoint = iife(() => {
|
|
20
|
+
return [
|
|
21
|
+
_provider.options?.baseURL,
|
|
22
|
+
env["OLLAMA_BASE_URL"],
|
|
23
|
+
env["OLLAMA_HOST"],
|
|
24
|
+
].find((url) => typeof url === "string" && url.trim() !== "")
|
|
25
|
+
}) ?? "http://localhost:11434"
|
|
26
|
+
|
|
27
|
+
return {
|
|
28
|
+
autoload: true,
|
|
29
|
+
options: {
|
|
30
|
+
baseURL: endpoint,
|
|
31
|
+
headers: { "User-Agent": ua("ollama") },
|
|
32
|
+
},
|
|
33
|
+
vars(): Record<string, string> {
|
|
34
|
+
return { OLLAMA_BASE_URL: endpoint }
|
|
35
|
+
},
|
|
36
|
+
async getModel(sdk: any, modelID: string, _options?: Record<string, any>) {
|
|
37
|
+
if (useLanguageModel(sdk)) return sdk.languageModel(modelID)
|
|
38
|
+
return sdk.chat(modelID)
|
|
39
|
+
},
|
|
40
|
+
async discoverModels() {
|
|
41
|
+
try {
|
|
42
|
+
const res = await fetch(`${endpoint}/api/tags`, { signal: AbortSignal.timeout(5000) })
|
|
43
|
+
if (!res.ok) return {}
|
|
44
|
+
const data = (await res.json()) as { models?: Array<{ name: string }> }
|
|
45
|
+
const models: Record<string, any> = {}
|
|
46
|
+
for (const m of data.models ?? []) {
|
|
47
|
+
models[m.name] = {
|
|
48
|
+
id: m.name, name: m.name, providerID: "ollama",
|
|
49
|
+
api: { id: m.name, url: `${endpoint}/v1`, npm: "@ai-sdk/openai-compatible" },
|
|
50
|
+
capabilities: { temperature: true, reasoning: false, attachment: false, toolcall: true, input: { text: true, image: false, audio: false, video: false, pdf: false }, output: { text: true, image: false, audio: false, video: false, pdf: false }, interleaved: false },
|
|
51
|
+
cost: { input: 0, output: 0, cache: { read: 0, write: 0 } },
|
|
52
|
+
limit: { context: 32768, output: 4096 },
|
|
53
|
+
status: "active" as const, options: {}, headers: {}, release_date: "",
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return models
|
|
57
|
+
} catch {
|
|
58
|
+
return {}
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
}
|
|
62
|
+
}),
|
|
63
|
+
|
|
64
|
+
lmstudio: Effect.fnUntraced(function* (_provider: Info) {
|
|
65
|
+
const env = yield* dep.env()
|
|
66
|
+
const endpoint = iife(() => {
|
|
67
|
+
return [
|
|
68
|
+
_provider.options?.baseURL,
|
|
69
|
+
env["LMSTUDIO_BASE_URL"],
|
|
70
|
+
].find((url) => typeof url === "string" && url.trim() !== "")
|
|
71
|
+
}) ?? "http://localhost:1234/v1"
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
autoload: true,
|
|
75
|
+
options: {
|
|
76
|
+
baseURL: endpoint,
|
|
77
|
+
headers: { "User-Agent": ua("lmstudio") },
|
|
78
|
+
},
|
|
79
|
+
vars(): Record<string, string> {
|
|
80
|
+
return { LMSTUDIO_BASE_URL: endpoint }
|
|
81
|
+
},
|
|
82
|
+
async getModel(sdk: any, modelID: string, _options?: Record<string, any>) {
|
|
83
|
+
if (useLanguageModel(sdk)) return sdk.languageModel(modelID)
|
|
84
|
+
return sdk.chat(modelID)
|
|
85
|
+
},
|
|
86
|
+
async discoverModels() {
|
|
87
|
+
try {
|
|
88
|
+
const res = await fetch(`${endpoint}/models`, { signal: AbortSignal.timeout(5000) })
|
|
89
|
+
if (!res.ok) return {}
|
|
90
|
+
const data = (await res.json()) as { data?: Array<{ id: string }> }
|
|
91
|
+
const models: Record<string, any> = {}
|
|
92
|
+
for (const m of data.data ?? []) {
|
|
93
|
+
models[m.id] = {
|
|
94
|
+
id: m.id, name: m.id, providerID: "lmstudio",
|
|
95
|
+
api: { id: m.id, url: endpoint, npm: "@ai-sdk/openai-compatible" },
|
|
96
|
+
capabilities: { temperature: true, reasoning: false, attachment: false, toolcall: true, input: { text: true, image: false, audio: false, video: false, pdf: false }, output: { text: true, image: false, audio: false, video: false, pdf: false }, interleaved: false },
|
|
97
|
+
cost: { input: 0, output: 0, cache: { read: 0, write: 0 } },
|
|
98
|
+
limit: { context: 32768, output: 4096 },
|
|
99
|
+
status: "active" as const, options: {}, headers: {}, release_date: "",
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return models
|
|
103
|
+
} catch {
|
|
104
|
+
return {}
|
|
105
|
+
}
|
|
106
|
+
},
|
|
107
|
+
}
|
|
108
|
+
}),
|
|
109
|
+
|
|
110
|
+
vllm: Effect.fnUntraced(function* (_provider: Info) {
|
|
111
|
+
const env = yield* dep.env()
|
|
112
|
+
const endpoint = iife(() => {
|
|
113
|
+
return [
|
|
114
|
+
_provider.options?.baseURL,
|
|
115
|
+
env["VLLM_BASE_URL"],
|
|
116
|
+
].find((url) => typeof url === "string" && url.trim() !== "")
|
|
117
|
+
}) ?? "http://localhost:8000/v1"
|
|
118
|
+
|
|
119
|
+
return {
|
|
120
|
+
autoload: true,
|
|
121
|
+
options: {
|
|
122
|
+
baseURL: endpoint,
|
|
123
|
+
headers: { "User-Agent": ua("vllm") },
|
|
124
|
+
},
|
|
125
|
+
vars(): Record<string, string> {
|
|
126
|
+
return { VLLM_BASE_URL: endpoint }
|
|
127
|
+
},
|
|
128
|
+
async getModel(sdk: any, modelID: string, _options?: Record<string, any>) {
|
|
129
|
+
if (useLanguageModel(sdk)) return sdk.languageModel(modelID)
|
|
130
|
+
return sdk.chat(modelID)
|
|
131
|
+
},
|
|
132
|
+
}
|
|
133
|
+
}),
|
|
134
|
+
|
|
135
|
+
"text-generation-webui": Effect.fnUntraced(function* (_provider: Info) {
|
|
136
|
+
const env = yield* dep.env()
|
|
137
|
+
const endpoint = iife(() => {
|
|
138
|
+
return [
|
|
139
|
+
_provider.options?.baseURL,
|
|
140
|
+
env["TEXT_GEN_WEBUI_BASE_URL"],
|
|
141
|
+
].find((url) => typeof url === "string" && url.trim() !== "")
|
|
142
|
+
}) ?? "http://localhost:5000"
|
|
143
|
+
|
|
144
|
+
return {
|
|
145
|
+
autoload: true,
|
|
146
|
+
options: {
|
|
147
|
+
baseURL: endpoint,
|
|
148
|
+
headers: { "User-Agent": ua("text-generation-webui") },
|
|
149
|
+
},
|
|
150
|
+
vars(): Record<string, string> {
|
|
151
|
+
return { TEXT_GEN_WEBUI_BASE_URL: endpoint }
|
|
152
|
+
},
|
|
153
|
+
async getModel(sdk: any, modelID: string, _options?: Record<string, any>) {
|
|
154
|
+
if (useLanguageModel(sdk)) return sdk.languageModel(modelID)
|
|
155
|
+
return sdk.chat(modelID)
|
|
156
|
+
},
|
|
157
|
+
}
|
|
158
|
+
}),
|
|
159
|
+
|
|
160
|
+
"llama.cpp": Effect.fnUntraced(function* (_provider: Info) {
|
|
161
|
+
const env = yield* dep.env()
|
|
162
|
+
const endpoint = iife(() => {
|
|
163
|
+
return [
|
|
164
|
+
_provider.options?.baseURL,
|
|
165
|
+
env["LLAMA_CPP_BASE_URL"],
|
|
166
|
+
].find((url) => typeof url === "string" && url.trim() !== "")
|
|
167
|
+
}) ?? "http://localhost:8080"
|
|
168
|
+
|
|
169
|
+
return {
|
|
170
|
+
autoload: true,
|
|
171
|
+
options: {
|
|
172
|
+
baseURL: endpoint,
|
|
173
|
+
headers: { "User-Agent": ua("llama.cpp") },
|
|
174
|
+
},
|
|
175
|
+
vars(): Record<string, string> {
|
|
176
|
+
return { LLAMA_CPP_BASE_URL: endpoint }
|
|
177
|
+
},
|
|
178
|
+
async getModel(sdk: any, modelID: string, _options?: Record<string, any>) {
|
|
179
|
+
if (useLanguageModel(sdk)) return sdk.languageModel(modelID)
|
|
180
|
+
return sdk.chat(modelID)
|
|
181
|
+
},
|
|
182
|
+
}
|
|
183
|
+
}),
|
|
184
|
+
}
|
|
185
|
+
}
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
/** 로컬 임베딩 서버 — Ollama 기반 임베딩 모델 실행 관리 */
|
|
2
|
+
|
|
3
|
+
import { Effect } from "effect"
|
|
4
|
+
import * as Log from "@saeeol/core/util/log"
|
|
5
|
+
import { Process } from "@/util/process"
|
|
6
|
+
import type { EmbeddingServer, EmbedderStatus, HardwareProfile, LLMBakeParams } from "@/ltm/types"
|
|
7
|
+
import type { BackendType } from "@/provider/local/types"
|
|
8
|
+
import * as GPU from "@/provider/local/gpu"
|
|
9
|
+
import * as RAG from "@/provider/local/rag"
|
|
10
|
+
import * as Hub from "@/provider/local/hub"
|
|
11
|
+
import * as Manager from "@/provider/local/model-manager"
|
|
12
|
+
import { iife } from "@/util/iife"
|
|
13
|
+
import * as Bus from "@/bus"
|
|
14
|
+
import { LTMEvent } from "@/ltm/events"
|
|
15
|
+
|
|
16
|
+
const log = Log.create({ service: "local/embedder" })
|
|
17
|
+
|
|
18
|
+
let server: EmbeddingServer | undefined
|
|
19
|
+
|
|
20
|
+
// ── Ollama 상태 확인 ──
|
|
21
|
+
|
|
22
|
+
async function isOllamaRunning(endpoint: string): Promise<boolean> {
|
|
23
|
+
try {
|
|
24
|
+
const res = await fetch(`${endpoint}/api/tags`, { signal: AbortSignal.timeout(3000) })
|
|
25
|
+
return res.ok
|
|
26
|
+
} catch {
|
|
27
|
+
return false
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
async function isModelLoaded(endpoint: string, model: string): Promise<boolean> {
|
|
32
|
+
try {
|
|
33
|
+
const res = await fetch(`${endpoint}/api/tags`, { signal: AbortSignal.timeout(3000) })
|
|
34
|
+
if (!res.ok) return false
|
|
35
|
+
const data = (await res.json()) as { models?: Array<{ name: string }> }
|
|
36
|
+
return (data.models ?? []).some((m) => m.name === model || m.name === `${model}:latest`)
|
|
37
|
+
} catch {
|
|
38
|
+
return false
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Ollama에 임베딩 모델이 설치되어 있는지 확인 */
|
|
43
|
+
async function isModelInstalled(endpoint: string, model: string): Promise<boolean> {
|
|
44
|
+
try {
|
|
45
|
+
const res = await fetch(`${endpoint}/api/tags`, { signal: AbortSignal.timeout(5000) })
|
|
46
|
+
if (!res.ok) return false
|
|
47
|
+
const data = (await res.json()) as { models?: Array<{ name: string }> }
|
|
48
|
+
return (data.models ?? []).some((m) =>
|
|
49
|
+
m.name === model || m.name === `${model}:latest` || m.name.replace(":latest", "") === model,
|
|
50
|
+
)
|
|
51
|
+
} catch {
|
|
52
|
+
return false
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// ── Ollama에 모델 pull ──
|
|
57
|
+
|
|
58
|
+
async function pullModel(endpoint: string, model: string): Promise<void> {
|
|
59
|
+
log.info("pulling embedding model", { model })
|
|
60
|
+
const ollamaModel = getOllamaModelName(model)
|
|
61
|
+
|
|
62
|
+
try {
|
|
63
|
+
const res = await fetch(`${endpoint}/api/pull`, {
|
|
64
|
+
method: "POST",
|
|
65
|
+
headers: { "Content-Type": "application/json" },
|
|
66
|
+
body: JSON.stringify({ name: ollamaModel, stream: false }),
|
|
67
|
+
signal: AbortSignal.timeout(300_000), // 5분 타임아웃
|
|
68
|
+
})
|
|
69
|
+
if (!res.ok) {
|
|
70
|
+
const err = await res.text().catch(() => "unknown error")
|
|
71
|
+
throw new Error(`Ollama pull failed: ${res.status} ${err}`)
|
|
72
|
+
}
|
|
73
|
+
log.info("model pulled", { model: ollamaModel })
|
|
74
|
+
} catch (e) {
|
|
75
|
+
log.error("failed to pull model", { model: ollamaModel, error: e })
|
|
76
|
+
throw e
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/** 내부 모델 ID → Ollama 모델명 매핑 */
|
|
81
|
+
function getOllamaModelName(modelId: string): string {
|
|
82
|
+
const map: Record<string, string> = {
|
|
83
|
+
"bge-small-en": "nomic-embed-text", // Ollama에 가장 가벼운 임베딩
|
|
84
|
+
"bge-base-en": "nomic-embed-text",
|
|
85
|
+
"bge-large-en": "nomic-embed-text",
|
|
86
|
+
"bge-m3": "bge-m3",
|
|
87
|
+
"nomic-embed": "nomic-embed-text",
|
|
88
|
+
"all-minilm-l6": "all-minilm",
|
|
89
|
+
"gte-small": "nomic-embed-text",
|
|
90
|
+
}
|
|
91
|
+
return map[modelId] ?? modelId
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// ── 임베딩 API 호출 ──
|
|
95
|
+
|
|
96
|
+
async function embedViaOllama(endpoint: string, model: string, texts: string[]): Promise<number[][]> {
|
|
97
|
+
const ollamaModel = getOllamaModelName(model)
|
|
98
|
+
const res = await fetch(`${endpoint}/api/embed`, {
|
|
99
|
+
method: "POST",
|
|
100
|
+
headers: { "Content-Type": "application/json" },
|
|
101
|
+
body: JSON.stringify({ model: ollamaModel, input: texts }),
|
|
102
|
+
signal: AbortSignal.timeout(30_000),
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
if (!res.ok) {
|
|
106
|
+
const err = await res.text().catch(() => "unknown error")
|
|
107
|
+
throw new Error(`Embedding failed: ${res.status} ${err}`)
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const data = (await res.json()) as { embeddings?: number[][] }
|
|
111
|
+
if (!data.embeddings || !Array.isArray(data.embeddings)) {
|
|
112
|
+
throw new Error("Invalid embedding response")
|
|
113
|
+
}
|
|
114
|
+
return data.embeddings
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// ── VRAM 추정 ──
|
|
118
|
+
|
|
119
|
+
function estimateEmbeddingVRAM(modelId: string): number {
|
|
120
|
+
const model = RAG.EMBEDDING_MODELS.find((m) => m.id === modelId)
|
|
121
|
+
if (!model) return 200
|
|
122
|
+
return Math.ceil(model.sizeBytes * 1.2 / (1024 * 1024))
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// ── 공개 API ──
|
|
126
|
+
|
|
127
|
+
/** 임베딩 서버 시작 */
|
|
128
|
+
export async function start(bake: LLMBakeParams): Promise<EmbeddingServer> {
|
|
129
|
+
if (server && server.status === "running") {
|
|
130
|
+
log.info("embedder already running", { model: server.model })
|
|
131
|
+
return server
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const endpoint = iife(() => {
|
|
135
|
+
const env = process.env.OLLAMA_BASE_URL ?? process.env.OLLAMA_HOST
|
|
136
|
+
return env ?? "http://localhost:11434"
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
server = {
|
|
140
|
+
id: "local-embedding",
|
|
141
|
+
model: bake.embeddingModel,
|
|
142
|
+
status: "starting",
|
|
143
|
+
endpoint,
|
|
144
|
+
dimensions: bake.embeddingDimensions,
|
|
145
|
+
vramMB: estimateEmbeddingVRAM(bake.embeddingModel),
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
void Bus.publish(LTMEvent.EmbedderStatusChanged, { status: "starting", model: bake.embeddingModel })
|
|
149
|
+
|
|
150
|
+
// Ollama 실행 확인
|
|
151
|
+
const running = await isOllamaRunning(endpoint)
|
|
152
|
+
if (!running) {
|
|
153
|
+
log.warn("Ollama not running, embedding server unavailable", { endpoint })
|
|
154
|
+
server.status = "error"
|
|
155
|
+
void Bus.publish(LTMEvent.EmbedderStatusChanged, { status: "error", model: bake.embeddingModel })
|
|
156
|
+
return server
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// 모델 설치 확인
|
|
160
|
+
const ollamaModel = getOllamaModelName(bake.embeddingModel)
|
|
161
|
+
const installed = await isModelInstalled(endpoint, ollamaModel)
|
|
162
|
+
if (!installed) {
|
|
163
|
+
log.info("model not installed, pulling", { model: ollamaModel })
|
|
164
|
+
await pullModel(endpoint, ollamaModel)
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// 모델 로드 확인 (첫 호출 시 자동 로드됨)
|
|
168
|
+
const loaded = await isModelLoaded(endpoint, ollamaModel)
|
|
169
|
+
if (!loaded) {
|
|
170
|
+
// 웜업 호출 — 모델을 메모리에 로드
|
|
171
|
+
try {
|
|
172
|
+
await embedViaOllama(endpoint, bake.embeddingModel, ["warmup"])
|
|
173
|
+
log.info("model warmed up", { model: ollamaModel })
|
|
174
|
+
} catch (e) {
|
|
175
|
+
log.error("warmup failed", { model: ollamaModel, error: e })
|
|
176
|
+
server.status = "error"
|
|
177
|
+
void Bus.publish(LTMEvent.EmbedderStatusChanged, { status: "error", model: bake.embeddingModel })
|
|
178
|
+
return server
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
server.status = "running"
|
|
183
|
+
void Bus.publish(LTMEvent.EmbedderStatusChanged, { status: "running", model: bake.embeddingModel })
|
|
184
|
+
log.info("embedding server started", { model: bake.embeddingModel, dimensions: bake.embeddingDimensions })
|
|
185
|
+
|
|
186
|
+
return server
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/** 임베딩 서버 중지 */
|
|
190
|
+
export async function stop(): Promise<void> {
|
|
191
|
+
if (!server) return
|
|
192
|
+
server.status = "stopped"
|
|
193
|
+
void Bus.publish(LTMEvent.EmbedderStatusChanged, { status: "stopped" })
|
|
194
|
+
log.info("embedding server stopped")
|
|
195
|
+
server = undefined
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/** 현재 상태 */
|
|
199
|
+
export function status(): EmbeddingServer | undefined {
|
|
200
|
+
return server
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/** 텍스트 배열 → 임베딩 벡터 */
|
|
204
|
+
export async function embed(texts: string[]): Promise<number[][]> {
|
|
205
|
+
if (!server || server.status !== "running") {
|
|
206
|
+
throw new Error("Embedding server not running")
|
|
207
|
+
}
|
|
208
|
+
return embedViaOllama(server.endpoint, server.model, texts)
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/** 단일 텍스트 → 임베딩 벡터 */
|
|
212
|
+
export async function embedOne(text: string): Promise<number[]> {
|
|
213
|
+
const vectors = await embed([text])
|
|
214
|
+
return vectors[0]!
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/** VRAM 사용량 (MB) */
|
|
218
|
+
export function vramUsage(): number {
|
|
219
|
+
return server?.vramMB ?? 0
|
|
220
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/** Local model events — TUI/webview subscription for progress/status updates */
|
|
2
|
+
|
|
3
|
+
import { BusEvent } from "@/bus/bus-event"
|
|
4
|
+
import { Schema } from "effect"
|
|
5
|
+
|
|
6
|
+
export const LocalModelEvent = {
|
|
7
|
+
DownloadStarted: BusEvent.define(
|
|
8
|
+
"local.download.started",
|
|
9
|
+
Schema.Struct({
|
|
10
|
+
id: Schema.String,
|
|
11
|
+
repo: Schema.String,
|
|
12
|
+
filename: Schema.String,
|
|
13
|
+
totalBytes: Schema.Number,
|
|
14
|
+
}),
|
|
15
|
+
),
|
|
16
|
+
DownloadProgress: BusEvent.define(
|
|
17
|
+
"local.download.progress",
|
|
18
|
+
Schema.Struct({
|
|
19
|
+
id: Schema.String,
|
|
20
|
+
downloaded: Schema.Number,
|
|
21
|
+
total: Schema.Number,
|
|
22
|
+
speedMBps: Schema.Finite,
|
|
23
|
+
}),
|
|
24
|
+
),
|
|
25
|
+
DownloadCompleted: BusEvent.define(
|
|
26
|
+
"local.download.completed",
|
|
27
|
+
Schema.Struct({
|
|
28
|
+
id: Schema.String,
|
|
29
|
+
repo: Schema.String,
|
|
30
|
+
filename: Schema.String,
|
|
31
|
+
path: Schema.String,
|
|
32
|
+
}),
|
|
33
|
+
),
|
|
34
|
+
DownloadFailed: BusEvent.define(
|
|
35
|
+
"local.download.failed",
|
|
36
|
+
Schema.Struct({
|
|
37
|
+
id: Schema.String,
|
|
38
|
+
repo: Schema.String,
|
|
39
|
+
error: Schema.String,
|
|
40
|
+
}),
|
|
41
|
+
),
|
|
42
|
+
ModelStarted: BusEvent.define(
|
|
43
|
+
"local.model.started",
|
|
44
|
+
Schema.Struct({
|
|
45
|
+
id: Schema.String,
|
|
46
|
+
repo: Schema.String,
|
|
47
|
+
endpoint: Schema.String,
|
|
48
|
+
gpuIndex: Schema.optional(Schema.Number),
|
|
49
|
+
}),
|
|
50
|
+
),
|
|
51
|
+
ModelStopped: BusEvent.define(
|
|
52
|
+
"local.model.stopped",
|
|
53
|
+
Schema.Struct({
|
|
54
|
+
id: Schema.String,
|
|
55
|
+
repo: Schema.String,
|
|
56
|
+
}),
|
|
57
|
+
),
|
|
58
|
+
ModelError: BusEvent.define(
|
|
59
|
+
"local.model.error",
|
|
60
|
+
Schema.Struct({
|
|
61
|
+
id: Schema.String,
|
|
62
|
+
repo: Schema.String,
|
|
63
|
+
error: Schema.String,
|
|
64
|
+
}),
|
|
65
|
+
),
|
|
66
|
+
GPUProfiled: BusEvent.define(
|
|
67
|
+
"local.gpu.profiled",
|
|
68
|
+
Schema.Struct({
|
|
69
|
+
gpuCount: Schema.Number,
|
|
70
|
+
totalVRAMMB: Schema.Number,
|
|
71
|
+
availableVRAMMB: Schema.Number,
|
|
72
|
+
}),
|
|
73
|
+
),
|
|
74
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/** GPU profiler — detect hardware capabilities for model scheduling */
|
|
2
|
+
|
|
3
|
+
import { Effect } from "effect"
|
|
4
|
+
import * as Log from "@saeeol/core/util/log"
|
|
5
|
+
import { Process } from "@/util/process"
|
|
6
|
+
import type { GPUInfo, GPUProfile } from "./types"
|
|
7
|
+
|
|
8
|
+
const log = Log.create({ service: "local/gpu" })
|
|
9
|
+
|
|
10
|
+
/** Parse nvidia-smi output into structured GPU info */
|
|
11
|
+
function parseNvidiaSmi(stdout: string): GPUInfo[] {
|
|
12
|
+
const gpus: GPUInfo[] = []
|
|
13
|
+
// nvidia-smi --query-gpu=index,name,memory.total,memory.used,memory.free,compute_cap,driver_version,cuda_version --format=csv,noheader,nounits
|
|
14
|
+
for (const line of stdout.trim().split("\n")) {
|
|
15
|
+
const parts = line.split(",").map((s) => s.trim())
|
|
16
|
+
if (parts.length < 5) continue
|
|
17
|
+
gpus.push({
|
|
18
|
+
index: Number(parts[0]),
|
|
19
|
+
name: parts[1],
|
|
20
|
+
vramTotalMB: Number(parts[2]),
|
|
21
|
+
vramUsedMB: Number(parts[3]),
|
|
22
|
+
vramFreeMB: Number(parts[4]),
|
|
23
|
+
computeCapability: parts[5] || undefined,
|
|
24
|
+
driverVersion: parts[6] || undefined,
|
|
25
|
+
cudaVersion: parts[7] || undefined,
|
|
26
|
+
})
|
|
27
|
+
}
|
|
28
|
+
return gpus
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/** Profile all available GPUs via nvidia-smi */
|
|
32
|
+
export const profile: Effect.Effect<GPUProfile> = Effect.gen(function* () {
|
|
33
|
+
const result = yield* Effect.tryPromise(() =>
|
|
34
|
+
Process.run([
|
|
35
|
+
"nvidia-smi",
|
|
36
|
+
"--query-gpu=index,name,memory.total,memory.used,memory.free,compute_cap,driver_version,cuda_version",
|
|
37
|
+
"--format=csv,noheader,nounits",
|
|
38
|
+
]),
|
|
39
|
+
).pipe(Effect.catch(() => Effect.succeed({ code: 1, stdout: Buffer.alloc(0), stderr: Buffer.alloc(0) } as Process.Result)))
|
|
40
|
+
|
|
41
|
+
if (result.code !== 0) {
|
|
42
|
+
log.info("no nvidia-smi found, GPU profiling unavailable")
|
|
43
|
+
return {
|
|
44
|
+
gpus: [],
|
|
45
|
+
totalVRAMMB: 0,
|
|
46
|
+
availableVRAMMB: 0,
|
|
47
|
+
cudaAvailable: false,
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const gpus = parseNvidiaSmi(result.stdout.toString())
|
|
52
|
+
return {
|
|
53
|
+
gpus,
|
|
54
|
+
totalVRAMMB: gpus.reduce((sum, g) => sum + g.vramTotalMB, 0),
|
|
55
|
+
availableVRAMMB: gpus.reduce((sum, g) => sum + g.vramFreeMB, 0),
|
|
56
|
+
cudaAvailable: gpus.length > 0,
|
|
57
|
+
}
|
|
58
|
+
})
|
|
59
|
+
|
|
60
|
+
/** Estimate VRAM needed for a model (bytes) at a given quantization */
|
|
61
|
+
export function estimateVRAM(modelBytes: number, quantization: string): number {
|
|
62
|
+
// Runtime overhead: ~20% for KV cache, context, etc.
|
|
63
|
+
// GGUF files are already quantized — model file size ≈ VRAM needed + 20%
|
|
64
|
+
const overhead = modelBytes * 0.2
|
|
65
|
+
return Math.ceil((modelBytes + overhead) / (1024 * 1024)) // return MB
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/** Find the best GPU for a model that needs `vramMB` megabytes */
|
|
69
|
+
export function findBestGPU(profile: GPUProfile, vramMB: number): GPUInfo | undefined {
|
|
70
|
+
// Prefer GPU with most free VRAM that can fit the model
|
|
71
|
+
const candidates = profile.gpus.filter((g) => g.vramFreeMB >= vramMB)
|
|
72
|
+
if (candidates.length === 0) return undefined
|
|
73
|
+
return candidates.sort((a, b) => b.vramFreeMB - a.vramFreeMB)[0]
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** Check if a model fits in available GPU memory */
|
|
77
|
+
export function canFit(profile: GPUProfile, vramMB: number): boolean {
|
|
78
|
+
return profile.availableVRAMMB >= vramMB
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/** Suggest quantization level based on available VRAM */
|
|
82
|
+
export function suggestQuantization(
|
|
83
|
+
availableMB: number,
|
|
84
|
+
modelSizeBytes: Record<string, number>,
|
|
85
|
+
): string | undefined {
|
|
86
|
+
// Try from highest quality to lowest
|
|
87
|
+
const order = ["fp16", "q8_0", "q6_k", "q5_k_m", "q5_k_s", "q4_k_m", "q4_k_s", "q3_k_m", "q3_k_s", "q2_k"]
|
|
88
|
+
for (const q of order) {
|
|
89
|
+
const size = modelSizeBytes[q]
|
|
90
|
+
if (size !== undefined && estimateVRAM(size, q) <= availableMB) return q
|
|
91
|
+
}
|
|
92
|
+
return undefined
|
|
93
|
+
}
|