@kernel.chat/kbot 3.99.34 → 3.99.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth.d.ts +1 -1
- package/dist/auth.js +20 -2
- package/dist/providers/llada.d.ts +98 -0
- package/dist/providers/llada.js +197 -0
- package/dist/tools/image-thoughtful.d.ts +2 -2
- package/dist/tools/llada-image.d.ts +38 -0
- package/dist/tools/llada-image.js +171 -0
- package/dist/tools/swarm-2026-04.js +2 -0
- package/package.json +1 -1
package/dist/auth.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
declare const KBOT_DIR: string;
|
|
2
2
|
declare const CONFIG_PATH: string;
|
|
3
|
-
export type ByokProvider = 'anthropic' | 'openai' | 'google' | 'mistral' | 'xai' | 'deepseek' | 'groq' | 'together' | 'fireworks' | 'perplexity' | 'cohere' | 'nvidia' | 'sambanova' | 'cerebras' | 'openrouter' | 'lmstudio' | 'jan' | 'ollama' | 'kbot-local' | 'embedded';
|
|
3
|
+
export type ByokProvider = 'anthropic' | 'openai' | 'google' | 'mistral' | 'xai' | 'deepseek' | 'groq' | 'together' | 'fireworks' | 'perplexity' | 'cohere' | 'nvidia' | 'sambanova' | 'cerebras' | 'openrouter' | 'lmstudio' | 'jan' | 'ollama' | 'kbot-local' | 'llada' | 'embedded';
|
|
4
4
|
export interface ProviderConfig {
|
|
5
5
|
name: string;
|
|
6
6
|
apiUrl: string;
|
package/dist/auth.js
CHANGED
|
@@ -18,6 +18,7 @@ const OLLAMA_HOST = process.env.OLLAMA_HOST || 'http://localhost:11434';
|
|
|
18
18
|
const LMSTUDIO_HOST = process.env.LMSTUDIO_HOST || 'http://localhost:1234';
|
|
19
19
|
const JAN_HOST = process.env.JAN_HOST || 'http://localhost:1337';
|
|
20
20
|
const KBOT_LOCAL_HOST = process.env.KBOT_LOCAL_HOST || 'http://127.0.0.1:18789';
|
|
21
|
+
const LLADA_HOST = process.env.KBOT_LLADA_URL || 'http://localhost:8000';
|
|
21
22
|
export const PROVIDERS = {
|
|
22
23
|
anthropic: {
|
|
23
24
|
name: 'Anthropic (Claude)',
|
|
@@ -241,6 +242,23 @@ export const PROVIDERS = {
|
|
|
241
242
|
outputCost: 0,
|
|
242
243
|
authHeader: 'bearer',
|
|
243
244
|
},
|
|
245
|
+
llada: {
|
|
246
|
+
// LLaDA2.0-Uni — Inclusion AI unified discrete-diffusion multimodal LLM.
|
|
247
|
+
// Local, $0 path to image generation + multimodal understanding.
|
|
248
|
+
// SPEC: refine when LLaDA's API stabilizes — currently assumes an
|
|
249
|
+
// OpenAI-compatible server at $KBOT_LLADA_URL (default http://localhost:8000).
|
|
250
|
+
// The upstream repo (github.com/inclusionAI/LLaDA2.0-Uni) ships Python
|
|
251
|
+
// inference scripts today; SGLang serving is on their TODO list.
|
|
252
|
+
name: 'LLaDA2.0-Uni (Local)',
|
|
253
|
+
apiUrl: `${LLADA_HOST}/v1/chat/completions`,
|
|
254
|
+
apiStyle: 'openai',
|
|
255
|
+
defaultModel: 'llada2.0-uni',
|
|
256
|
+
fastModel: 'llada2.0-uni',
|
|
257
|
+
inputCost: 0,
|
|
258
|
+
outputCost: 0,
|
|
259
|
+
authHeader: 'bearer', // Auth is ignored when no key is set; local servers usually don't require one.
|
|
260
|
+
models: ['llada2.0-uni'],
|
|
261
|
+
},
|
|
244
262
|
embedded: {
|
|
245
263
|
name: 'Embedded (llama.cpp)',
|
|
246
264
|
apiUrl: 'embedded://local', // Not a real URL — inference runs in-process
|
|
@@ -381,11 +399,11 @@ const ENV_KEYS = [
|
|
|
381
399
|
];
|
|
382
400
|
/** Check if a provider is local (runs on this machine, may still need a token) */
|
|
383
401
|
export function isLocalProvider(provider) {
|
|
384
|
-
return provider === 'ollama' || provider === 'kbot-local' || provider === 'lmstudio' || provider === 'jan' || provider === 'embedded';
|
|
402
|
+
return provider === 'ollama' || provider === 'kbot-local' || provider === 'lmstudio' || provider === 'jan' || provider === 'embedded' || provider === 'llada';
|
|
385
403
|
}
|
|
386
404
|
/** Check if a provider needs no API key at all */
|
|
387
405
|
export function isKeylessProvider(provider) {
|
|
388
|
-
return provider === 'ollama' || provider === 'lmstudio' || provider === 'jan' || provider === 'embedded';
|
|
406
|
+
return provider === 'ollama' || provider === 'lmstudio' || provider === 'jan' || provider === 'embedded' || provider === 'llada';
|
|
389
407
|
}
|
|
390
408
|
/** Check if BYOK mode is enabled (via env var or config) */
|
|
391
409
|
export function isByokEnabled() {
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
export interface LLaDAClientOptions {
|
|
2
|
+
baseUrl?: string;
|
|
3
|
+
apiKey?: string;
|
|
4
|
+
/** Default request timeout in ms. */
|
|
5
|
+
timeoutMs?: number;
|
|
6
|
+
/** Optional fetch override (used by tests). */
|
|
7
|
+
fetchImpl?: typeof fetch;
|
|
8
|
+
}
|
|
9
|
+
export interface LLaDAChatMessage {
|
|
10
|
+
role: 'system' | 'user' | 'assistant';
|
|
11
|
+
content: string | Array<{
|
|
12
|
+
type: 'text';
|
|
13
|
+
text: string;
|
|
14
|
+
} | {
|
|
15
|
+
type: 'image_url';
|
|
16
|
+
image_url: {
|
|
17
|
+
url: string;
|
|
18
|
+
};
|
|
19
|
+
}>;
|
|
20
|
+
}
|
|
21
|
+
export interface LLaDAChatRequest {
|
|
22
|
+
messages: LLaDAChatMessage[];
|
|
23
|
+
model?: string;
|
|
24
|
+
temperature?: number;
|
|
25
|
+
maxTokens?: number;
|
|
26
|
+
/** Optional thinking budget — LLaDA supports `mode: "thinking"` with `thinking_steps`. */
|
|
27
|
+
thinkingSteps?: number;
|
|
28
|
+
}
|
|
29
|
+
export interface LLaDAChatResponse {
|
|
30
|
+
text: string;
|
|
31
|
+
/** When `thinkingSteps` is set, LLaDA returns the reasoning trace. */
|
|
32
|
+
thinking?: string;
|
|
33
|
+
raw?: unknown;
|
|
34
|
+
}
|
|
35
|
+
export interface LLaDAImageRequest {
|
|
36
|
+
prompt: string;
|
|
37
|
+
/** Either "1024x1024" / "WxH" — converted to image_h/image_w server-side. */
|
|
38
|
+
size?: string;
|
|
39
|
+
/** Optional reference image (URL or base64 data URL) for image editing. */
|
|
40
|
+
refImage?: string;
|
|
41
|
+
/** Diffusion sampling steps (LLaDA defaults to 8 with the turbo decoder). */
|
|
42
|
+
steps?: number;
|
|
43
|
+
cfgScale?: number;
|
|
44
|
+
/** Enable LLaDA's interleaved thinking-then-generate mode. */
|
|
45
|
+
thinking?: boolean;
|
|
46
|
+
}
|
|
47
|
+
export interface LLaDAImageResponse {
|
|
48
|
+
/** A URL or `data:image/png;base64,...` payload. */
|
|
49
|
+
url: string;
|
|
50
|
+
/** Reasoning trace, only present when `thinking: true`. */
|
|
51
|
+
thinking?: string;
|
|
52
|
+
raw?: unknown;
|
|
53
|
+
}
|
|
54
|
+
export interface LLaDAUnderstandRequest {
|
|
55
|
+
prompt: string;
|
|
56
|
+
/** Pass exactly one of imageUrl or imageData (base64). */
|
|
57
|
+
imageUrl?: string;
|
|
58
|
+
imageData?: string;
|
|
59
|
+
model?: string;
|
|
60
|
+
maxTokens?: number;
|
|
61
|
+
}
|
|
62
|
+
export interface LLaDAUnderstandResponse {
|
|
63
|
+
text: string;
|
|
64
|
+
raw?: unknown;
|
|
65
|
+
}
|
|
66
|
+
/** Typed client for a LLaDA2.0-Uni HTTP server (OpenAI-compatible shape). */
|
|
67
|
+
export declare class LLaDAClient {
|
|
68
|
+
readonly baseUrl: string;
|
|
69
|
+
readonly apiKey: string | undefined;
|
|
70
|
+
private readonly timeoutMs;
|
|
71
|
+
private readonly fetchImpl;
|
|
72
|
+
constructor(opts?: LLaDAClientOptions);
|
|
73
|
+
/** Build standard headers — Authorization only attached when an apiKey is set. */
|
|
74
|
+
private headers;
|
|
75
|
+
private post;
|
|
76
|
+
/** Quick health probe. Resolves true when the server responds with 2xx. */
|
|
77
|
+
isReachable(): Promise<boolean>;
|
|
78
|
+
/**
|
|
79
|
+
* Text chat — OpenAI-compatible POST /v1/chat/completions.
|
|
80
|
+
* SPEC: refine when LLaDA's API stabilizes — currently assumes OpenAI-compatible shape.
|
|
81
|
+
*/
|
|
82
|
+
chat(req: LLaDAChatRequest): Promise<LLaDAChatResponse>;
|
|
83
|
+
/**
|
|
84
|
+
* Image generation. The native LLaDA call is `model.generate_image(...)`;
|
|
85
|
+
* we expose it via a POST /v1/images/generations shim that accepts the
|
|
86
|
+
* extra LLaDA-specific knobs (`steps`, `cfg_scale`, `thinking`).
|
|
87
|
+
* SPEC: refine when LLaDA's API stabilizes — assumes OpenAI-compatible shape.
|
|
88
|
+
*/
|
|
89
|
+
generateImage(req: LLaDAImageRequest): Promise<LLaDAImageResponse>;
|
|
90
|
+
/**
|
|
91
|
+
* Multimodal understanding: chat with an image attached.
|
|
92
|
+
* SPEC: refine when LLaDA's API stabilizes — uses OpenAI-vision content blocks.
|
|
93
|
+
*/
|
|
94
|
+
understand(req: LLaDAUnderstandRequest): Promise<LLaDAUnderstandResponse>;
|
|
95
|
+
}
|
|
96
|
+
/** Convenience factory mirroring the rest of kbot's local-provider style. */
|
|
97
|
+
export declare function createLLaDAClient(opts?: LLaDAClientOptions): LLaDAClient;
|
|
98
|
+
//# sourceMappingURL=llada.d.ts.map
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
// LLaDA2.0-Uni provider client
|
|
2
|
+
//
|
|
3
|
+
// LLaDA2.0-Uni (Inclusion AI, ArXiv:2604.20796, 2026-04-22) is a unified
|
|
4
|
+
// discrete-diffusion LLM that does multimodal *understanding* AND text-to-image
|
|
5
|
+
// generation in a single MoE model. It pairs a SigLIP-VQ semantic tokenizer
|
|
6
|
+
// with a diffusion decoder for high-fidelity 8-step image inference.
|
|
7
|
+
//
|
|
8
|
+
// Local serving status (as of 2026-04-25):
|
|
9
|
+
// The official repo (https://github.com/inclusionAI/LLaDA2.0-Uni) currently
|
|
10
|
+
// ships only Python inference scripts (`scripts/t2i_generate.py`,
|
|
11
|
+
// `scripts/mmu_understand.py`, `scripts/image_edit.py`) using the HF
|
|
12
|
+
// transformers loader. SGLang serving is on the README's TODO list but not
|
|
13
|
+
// yet released. Most users will wrap the Python entrypoints behind a small
|
|
14
|
+
// OpenAI-compatible FastAPI shim, or wait for the SGLang adapter.
|
|
15
|
+
//
|
|
16
|
+
// SPEC: refine when LLaDA's API stabilizes — currently assumes OpenAI-compatible
|
|
17
|
+
// shape on http://localhost:8000 (the conventional vllm / TGI / SGLang layout).
|
|
18
|
+
// All endpoints below are speculative until the upstream serving surface lands.
|
|
19
|
+
const DEFAULT_BASE_URL = process.env.KBOT_LLADA_URL || 'http://localhost:8000';
|
|
20
|
+
const DEFAULT_MODEL = 'llada2.0-uni';
|
|
21
|
+
/** Typed client for a LLaDA2.0-Uni HTTP server (OpenAI-compatible shape). */
|
|
22
|
+
export class LLaDAClient {
|
|
23
|
+
baseUrl;
|
|
24
|
+
apiKey;
|
|
25
|
+
timeoutMs;
|
|
26
|
+
fetchImpl;
|
|
27
|
+
constructor(opts = {}) {
|
|
28
|
+
this.baseUrl = (opts.baseUrl || DEFAULT_BASE_URL).replace(/\/+$/, '');
|
|
29
|
+
this.apiKey = opts.apiKey;
|
|
30
|
+
this.timeoutMs = opts.timeoutMs ?? 60_000;
|
|
31
|
+
// Bind so `this` in node's global fetch stays right.
|
|
32
|
+
this.fetchImpl = opts.fetchImpl ?? ((...a) => fetch(...a));
|
|
33
|
+
}
|
|
34
|
+
/** Build standard headers — Authorization only attached when an apiKey is set. */
|
|
35
|
+
headers() {
|
|
36
|
+
const h = { 'Content-Type': 'application/json' };
|
|
37
|
+
if (this.apiKey)
|
|
38
|
+
h.Authorization = `Bearer ${this.apiKey}`;
|
|
39
|
+
return h;
|
|
40
|
+
}
|
|
41
|
+
async post(path, body) {
|
|
42
|
+
const url = `${this.baseUrl}${path}`;
|
|
43
|
+
const ctrl = new AbortController();
|
|
44
|
+
const timer = setTimeout(() => ctrl.abort(), this.timeoutMs);
|
|
45
|
+
try {
|
|
46
|
+
const res = await this.fetchImpl(url, {
|
|
47
|
+
method: 'POST',
|
|
48
|
+
headers: this.headers(),
|
|
49
|
+
body: JSON.stringify(body),
|
|
50
|
+
signal: ctrl.signal,
|
|
51
|
+
});
|
|
52
|
+
if (!res.ok) {
|
|
53
|
+
const txt = await res.text().catch(() => '');
|
|
54
|
+
throw new Error(`LLaDA ${path} failed: ${res.status} ${res.statusText} ${txt}`.trim());
|
|
55
|
+
}
|
|
56
|
+
return (await res.json());
|
|
57
|
+
}
|
|
58
|
+
finally {
|
|
59
|
+
clearTimeout(timer);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
/** Quick health probe. Resolves true when the server responds with 2xx. */
|
|
63
|
+
async isReachable() {
|
|
64
|
+
try {
|
|
65
|
+
const ctrl = new AbortController();
|
|
66
|
+
const timer = setTimeout(() => ctrl.abort(), 2000);
|
|
67
|
+
try {
|
|
68
|
+
// SPEC: refine when LLaDA's API stabilizes — try /v1/models, fall back to /health.
|
|
69
|
+
const res = await this.fetchImpl(`${this.baseUrl}/v1/models`, {
|
|
70
|
+
method: 'GET',
|
|
71
|
+
signal: ctrl.signal,
|
|
72
|
+
});
|
|
73
|
+
if (res.ok)
|
|
74
|
+
return true;
|
|
75
|
+
const res2 = await this.fetchImpl(`${this.baseUrl}/health`, {
|
|
76
|
+
method: 'GET',
|
|
77
|
+
signal: ctrl.signal,
|
|
78
|
+
});
|
|
79
|
+
return res2.ok;
|
|
80
|
+
}
|
|
81
|
+
finally {
|
|
82
|
+
clearTimeout(timer);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
catch {
|
|
86
|
+
return false;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Text chat — OpenAI-compatible POST /v1/chat/completions.
|
|
91
|
+
* SPEC: refine when LLaDA's API stabilizes — currently assumes OpenAI-compatible shape.
|
|
92
|
+
*/
|
|
93
|
+
async chat(req) {
|
|
94
|
+
const body = {
|
|
95
|
+
model: req.model || DEFAULT_MODEL,
|
|
96
|
+
messages: req.messages,
|
|
97
|
+
temperature: req.temperature ?? 0.7,
|
|
98
|
+
};
|
|
99
|
+
if (req.maxTokens !== undefined)
|
|
100
|
+
body.max_tokens = req.maxTokens;
|
|
101
|
+
if (req.thinkingSteps !== undefined) {
|
|
102
|
+
// LLaDA exposes `mode: "thinking"` + `thinking_steps` in its native API.
|
|
103
|
+
// We pass them as extra fields; servers that don't recognize them ignore.
|
|
104
|
+
body.mode = 'thinking';
|
|
105
|
+
body.thinking_steps = req.thinkingSteps;
|
|
106
|
+
}
|
|
107
|
+
const data = await this.post('/v1/chat/completions', body);
|
|
108
|
+
const text = data.choices?.[0]?.message?.content ?? '';
|
|
109
|
+
if (!text)
|
|
110
|
+
throw new Error('LLaDA chat returned no content');
|
|
111
|
+
const thinking = data.choices?.[0]?.message?.thinking ?? data.thinking;
|
|
112
|
+
return { text, thinking, raw: data };
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Image generation. The native LLaDA call is `model.generate_image(...)`;
|
|
116
|
+
* we expose it via a POST /v1/images/generations shim that accepts the
|
|
117
|
+
* extra LLaDA-specific knobs (`steps`, `cfg_scale`, `thinking`).
|
|
118
|
+
* SPEC: refine when LLaDA's API stabilizes — assumes OpenAI-compatible shape.
|
|
119
|
+
*/
|
|
120
|
+
async generateImage(req) {
|
|
121
|
+
const size = req.size || '1024x1024';
|
|
122
|
+
const [w, h] = parseSize(size);
|
|
123
|
+
const body = {
|
|
124
|
+
model: DEFAULT_MODEL,
|
|
125
|
+
prompt: req.prompt,
|
|
126
|
+
size,
|
|
127
|
+
n: 1,
|
|
128
|
+
// LLaDA-native fields (ignored by stricter OpenAI servers):
|
|
129
|
+
image_w: w,
|
|
130
|
+
image_h: h,
|
|
131
|
+
steps: req.steps ?? 8,
|
|
132
|
+
cfg_scale: req.cfgScale ?? 2.0,
|
|
133
|
+
};
|
|
134
|
+
if (req.thinking) {
|
|
135
|
+
body.mode = 'thinking';
|
|
136
|
+
body.thinking_steps = 32;
|
|
137
|
+
}
|
|
138
|
+
if (req.refImage) {
|
|
139
|
+
// SPEC: LLaDA uses `image_tokens` for editing. The likely server shim accepts
|
|
140
|
+
// either `input_image` (URL/data URL) or `image` and tokenizes server-side.
|
|
141
|
+
body.input_image = req.refImage;
|
|
142
|
+
body.image = req.refImage;
|
|
143
|
+
}
|
|
144
|
+
const data = await this.post('/v1/images/generations', body);
|
|
145
|
+
const item = data.data?.[0];
|
|
146
|
+
if (!item)
|
|
147
|
+
throw new Error('LLaDA image returned no data');
|
|
148
|
+
let url = item.url;
|
|
149
|
+
if (!url && item.b64_json)
|
|
150
|
+
url = `data:image/png;base64,${item.b64_json}`;
|
|
151
|
+
if (!url)
|
|
152
|
+
throw new Error('LLaDA image returned neither url nor b64_json');
|
|
153
|
+
const thinking = item.thinking ?? data.thinking;
|
|
154
|
+
return { url, thinking, raw: data };
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Multimodal understanding: chat with an image attached.
|
|
158
|
+
* SPEC: refine when LLaDA's API stabilizes — uses OpenAI-vision content blocks.
|
|
159
|
+
*/
|
|
160
|
+
async understand(req) {
|
|
161
|
+
if (!req.imageUrl && !req.imageData) {
|
|
162
|
+
throw new Error('LLaDA.understand requires imageUrl or imageData');
|
|
163
|
+
}
|
|
164
|
+
const imageUrl = req.imageUrl
|
|
165
|
+
? req.imageUrl
|
|
166
|
+
: req.imageData.startsWith('data:')
|
|
167
|
+
? req.imageData
|
|
168
|
+
: `data:image/png;base64,${req.imageData}`;
|
|
169
|
+
const messages = [
|
|
170
|
+
{
|
|
171
|
+
role: 'user',
|
|
172
|
+
content: [
|
|
173
|
+
{ type: 'text', text: req.prompt },
|
|
174
|
+
{ type: 'image_url', image_url: { url: imageUrl } },
|
|
175
|
+
],
|
|
176
|
+
},
|
|
177
|
+
];
|
|
178
|
+
const out = await this.chat({
|
|
179
|
+
model: req.model,
|
|
180
|
+
messages,
|
|
181
|
+
maxTokens: req.maxTokens ?? 1024,
|
|
182
|
+
temperature: 0.2,
|
|
183
|
+
});
|
|
184
|
+
return { text: out.text, raw: out.raw };
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
function parseSize(size) {
|
|
188
|
+
const m = /^(\d+)\s*x\s*(\d+)$/i.exec(size.trim());
|
|
189
|
+
if (!m)
|
|
190
|
+
return [1024, 1024];
|
|
191
|
+
return [Number(m[1]), Number(m[2])];
|
|
192
|
+
}
|
|
193
|
+
/** Convenience factory mirroring the rest of kbot's local-provider style. */
|
|
194
|
+
export function createLLaDAClient(opts = {}) {
|
|
195
|
+
return new LLaDAClient(opts);
|
|
196
|
+
}
|
|
197
|
+
//# sourceMappingURL=llada.js.map
|
|
@@ -8,14 +8,14 @@ export declare const imageThoughtfulInputSchema: z.ZodObject<{
|
|
|
8
8
|
reference_image_url: z.ZodOptional<z.ZodString>;
|
|
9
9
|
}, "strip", z.ZodTypeAny, {
|
|
10
10
|
prompt: string;
|
|
11
|
-
aspect_ratio: "1:1" | "16:9" | "9:16" | "4:3" | "3:4";
|
|
12
11
|
thinking_steps: number;
|
|
12
|
+
aspect_ratio: "1:1" | "16:9" | "9:16" | "4:3" | "3:4";
|
|
13
13
|
style_hints?: string | undefined;
|
|
14
14
|
reference_image_url?: string | undefined;
|
|
15
15
|
}, {
|
|
16
16
|
prompt: string;
|
|
17
|
-
aspect_ratio?: "1:1" | "16:9" | "9:16" | "4:3" | "3:4" | undefined;
|
|
18
17
|
thinking_steps?: number | undefined;
|
|
18
|
+
aspect_ratio?: "1:1" | "16:9" | "9:16" | "4:3" | "3:4" | undefined;
|
|
19
19
|
style_hints?: string | undefined;
|
|
20
20
|
reference_image_url?: string | undefined;
|
|
21
21
|
}>;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import type { ToolDefinition } from './index.js';
|
|
3
|
+
import { LLaDAClient } from '../providers/llada.js';
|
|
4
|
+
export declare const lladaImageInputSchema: z.ZodObject<{
|
|
5
|
+
prompt: z.ZodString;
|
|
6
|
+
aspect_ratio: z.ZodDefault<z.ZodOptional<z.ZodEnum<["1:1", "16:9", "9:16", "4:3", "3:4"]>>>;
|
|
7
|
+
thinking_steps: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
8
|
+
style_hints: z.ZodOptional<z.ZodString>;
|
|
9
|
+
reference_image_url: z.ZodOptional<z.ZodString>;
|
|
10
|
+
}, "strip", z.ZodTypeAny, {
|
|
11
|
+
prompt: string;
|
|
12
|
+
thinking_steps: number;
|
|
13
|
+
aspect_ratio: "1:1" | "16:9" | "9:16" | "4:3" | "3:4";
|
|
14
|
+
style_hints?: string | undefined;
|
|
15
|
+
reference_image_url?: string | undefined;
|
|
16
|
+
}, {
|
|
17
|
+
prompt: string;
|
|
18
|
+
thinking_steps?: number | undefined;
|
|
19
|
+
aspect_ratio?: "1:1" | "16:9" | "9:16" | "4:3" | "3:4" | undefined;
|
|
20
|
+
style_hints?: string | undefined;
|
|
21
|
+
reference_image_url?: string | undefined;
|
|
22
|
+
}>;
|
|
23
|
+
export type LLaDAImageInput = z.infer<typeof lladaImageInputSchema>;
|
|
24
|
+
export interface LLaDAImageThoughtfulOutput {
|
|
25
|
+
url: string;
|
|
26
|
+
plan: string;
|
|
27
|
+
refinements: string[];
|
|
28
|
+
final_prompt: string;
|
|
29
|
+
/** Optional reasoning trace surfaced by LLaDA's `thinking` mode. */
|
|
30
|
+
thinking?: string;
|
|
31
|
+
}
|
|
32
|
+
export interface RunLLaDAImageOptions {
|
|
33
|
+
/** Inject a client (for tests). Defaults to a fresh LLaDAClient(). */
|
|
34
|
+
client?: LLaDAClient;
|
|
35
|
+
}
|
|
36
|
+
export declare function runLLaDAImageThoughtful(rawInput: unknown, opts?: RunLLaDAImageOptions): Promise<LLaDAImageThoughtfulOutput>;
|
|
37
|
+
export declare const lladaImageTool: ToolDefinition;
|
|
38
|
+
//# sourceMappingURL=llada-image.d.ts.map
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
// kbot Local Image Thoughtful Tool — LLaDA2.0-Uni route
|
|
2
|
+
//
|
|
3
|
+
// Mirrors `image-thoughtful.ts` (which uses OpenAI gpt-image-2) but routes
|
|
4
|
+
// every call — plan, refine, generate — through a local LLaDA2.0-Uni server.
|
|
5
|
+
// LLaDA is a *unified* discrete-diffusion model: the same model that does
|
|
6
|
+
// chat reasoning also does the final image generation, so this tool is the
|
|
7
|
+
// $0 / no-API-key path to thoughtful image gen.
|
|
8
|
+
//
|
|
9
|
+
// SPEC: refine when LLaDA's API stabilizes — currently assumes OpenAI-compatible
|
|
10
|
+
// shape on http://localhost:8000. See `src/providers/llada.ts` for details.
|
|
11
|
+
import { z } from 'zod';
|
|
12
|
+
import { LLaDAClient } from '../providers/llada.js';
|
|
13
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
14
|
+
// Schema (kept in lockstep with image-thoughtful.ts)
|
|
15
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
16
|
+
const AspectRatio = z.enum(['1:1', '16:9', '9:16', '4:3', '3:4']);
|
|
17
|
+
export const lladaImageInputSchema = z.object({
|
|
18
|
+
prompt: z.string().min(1, 'prompt is required'),
|
|
19
|
+
aspect_ratio: AspectRatio.optional().default('1:1'),
|
|
20
|
+
thinking_steps: z.number().int().min(1).max(5).optional().default(3),
|
|
21
|
+
style_hints: z.string().optional(),
|
|
22
|
+
reference_image_url: z.string().url().optional(),
|
|
23
|
+
});
|
|
24
|
+
// LLaDA's image_h/image_w map cleanly off these aspect-ratio sizes.
|
|
25
|
+
const SIZE_MAP = {
|
|
26
|
+
'1:1': '1024x1024',
|
|
27
|
+
'16:9': '1792x1024',
|
|
28
|
+
'9:16': '1024x1792',
|
|
29
|
+
'4:3': '1408x1056',
|
|
30
|
+
'3:4': '1056x1408',
|
|
31
|
+
};
|
|
32
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
33
|
+
// Prompts (intentionally identical wording to image-thoughtful for parity)
|
|
34
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
35
|
+
function planSystemPrompt() {
|
|
36
|
+
return [
|
|
37
|
+
'You are an expert art director planning an image before generation.',
|
|
38
|
+
'Return a JSON object with keys: composition, palette, key_elements, mood, lighting, style.',
|
|
39
|
+
'Keep each value to one or two sentences. Be concrete and visual.',
|
|
40
|
+
].join(' ');
|
|
41
|
+
}
|
|
42
|
+
function critiqueSystemPrompt() {
|
|
43
|
+
return [
|
|
44
|
+
'You are a critic refining an image plan. Read the prompt and the current plan,',
|
|
45
|
+
'identify the single weakest element, and return an improved JSON plan with the',
|
|
46
|
+
'same keys (composition, palette, key_elements, mood, lighting, style).',
|
|
47
|
+
'Do not restate the brief — produce the next iteration of the plan only.',
|
|
48
|
+
].join(' ');
|
|
49
|
+
}
|
|
50
|
+
function finalPromptSystemPrompt() {
|
|
51
|
+
return [
|
|
52
|
+
'You compose the final image-generation prompt. Combine the brief, plan, and any',
|
|
53
|
+
'style hints into a single cohesive paragraph (no JSON, no headings, no lists).',
|
|
54
|
+
'Lead with subject and composition, then palette, lighting, mood, and style.',
|
|
55
|
+
].join(' ');
|
|
56
|
+
}
|
|
57
|
+
function buildPlanUserText(input) {
|
|
58
|
+
return [
|
|
59
|
+
`Brief: ${input.prompt}`,
|
|
60
|
+
`Aspect ratio: ${input.aspect_ratio}`,
|
|
61
|
+
input.style_hints ? `Style hints: ${input.style_hints}` : null,
|
|
62
|
+
input.reference_image_url ? `Reference image URL: ${input.reference_image_url}` : null,
|
|
63
|
+
]
|
|
64
|
+
.filter(Boolean)
|
|
65
|
+
.join('\n');
|
|
66
|
+
}
|
|
67
|
+
export async function runLLaDAImageThoughtful(rawInput, opts = {}) {
|
|
68
|
+
const input = lladaImageInputSchema.parse(rawInput);
|
|
69
|
+
const client = opts.client ?? new LLaDAClient();
|
|
70
|
+
// 1. Plan
|
|
71
|
+
const planResp = await client.chat({
|
|
72
|
+
messages: [
|
|
73
|
+
{ role: 'system', content: planSystemPrompt() },
|
|
74
|
+
{ role: 'user', content: buildPlanUserText(input) },
|
|
75
|
+
],
|
|
76
|
+
temperature: 0.5,
|
|
77
|
+
});
|
|
78
|
+
const plan = planResp.text;
|
|
79
|
+
// 2. Refine — thinking_steps - 1 critique passes (1 = no refinement)
|
|
80
|
+
const refinements = [];
|
|
81
|
+
let currentPlan = plan;
|
|
82
|
+
for (let i = 1; i < input.thinking_steps; i++) {
|
|
83
|
+
const next = await client.chat({
|
|
84
|
+
messages: [
|
|
85
|
+
{ role: 'system', content: critiqueSystemPrompt() },
|
|
86
|
+
{
|
|
87
|
+
role: 'user',
|
|
88
|
+
content: `Brief: ${input.prompt}\nCurrent plan: ${currentPlan}`,
|
|
89
|
+
},
|
|
90
|
+
],
|
|
91
|
+
temperature: 0.5,
|
|
92
|
+
});
|
|
93
|
+
refinements.push(next.text);
|
|
94
|
+
currentPlan = next.text;
|
|
95
|
+
}
|
|
96
|
+
// 3. Compose final prompt
|
|
97
|
+
const finalResp = await client.chat({
|
|
98
|
+
messages: [
|
|
99
|
+
{ role: 'system', content: finalPromptSystemPrompt() },
|
|
100
|
+
{
|
|
101
|
+
role: 'user',
|
|
102
|
+
content: [
|
|
103
|
+
`Brief: ${input.prompt}`,
|
|
104
|
+
input.style_hints ? `Style hints: ${input.style_hints}` : '',
|
|
105
|
+
`Plan: ${currentPlan}`,
|
|
106
|
+
]
|
|
107
|
+
.filter(Boolean)
|
|
108
|
+
.join('\n\n'),
|
|
109
|
+
},
|
|
110
|
+
],
|
|
111
|
+
temperature: 0.4,
|
|
112
|
+
});
|
|
113
|
+
const finalPromptText = finalResp.text;
|
|
114
|
+
// 4. Generate (LLaDA-native call — same model, diffusion decoder).
|
|
115
|
+
const img = await client.generateImage({
|
|
116
|
+
prompt: finalPromptText,
|
|
117
|
+
size: SIZE_MAP[input.aspect_ratio],
|
|
118
|
+
refImage: input.reference_image_url,
|
|
119
|
+
});
|
|
120
|
+
return {
|
|
121
|
+
url: img.url,
|
|
122
|
+
plan,
|
|
123
|
+
refinements,
|
|
124
|
+
final_prompt: finalPromptText,
|
|
125
|
+
thinking: img.thinking,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
129
|
+
// Tool definition
|
|
130
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
131
|
+
export const lladaImageTool = {
|
|
132
|
+
name: 'local_image_thoughtful',
|
|
133
|
+
description: 'Local plan/refine/generate image tool routed through a LLaDA2.0-Uni server (default http://localhost:8000). The same unified diffusion LLM does both the planning and the final image generation, so no OpenAI key is required. Returns the image URL plus the full reasoning trail.',
|
|
134
|
+
tier: 'free',
|
|
135
|
+
parameters: {
|
|
136
|
+
prompt: {
|
|
137
|
+
type: 'string',
|
|
138
|
+
description: 'What to draw — the brief.',
|
|
139
|
+
required: true,
|
|
140
|
+
},
|
|
141
|
+
aspect_ratio: {
|
|
142
|
+
type: 'string',
|
|
143
|
+
description: 'One of "1:1", "16:9", "9:16", "4:3", "3:4". Default "1:1".',
|
|
144
|
+
default: '1:1',
|
|
145
|
+
},
|
|
146
|
+
thinking_steps: {
|
|
147
|
+
type: 'number',
|
|
148
|
+
description: 'How many plan iterations to run (1..5). Default 3. 1 skips refinement.',
|
|
149
|
+
default: 3,
|
|
150
|
+
},
|
|
151
|
+
style_hints: {
|
|
152
|
+
type: 'string',
|
|
153
|
+
description: 'Optional style guidance ("oil painting", "ukiyo-e", "isometric vector").',
|
|
154
|
+
},
|
|
155
|
+
reference_image_url: {
|
|
156
|
+
type: 'string',
|
|
157
|
+
description: 'Optional URL of a reference image. Forwarded to LLaDA as the editing source (input_image).',
|
|
158
|
+
},
|
|
159
|
+
},
|
|
160
|
+
async execute(args) {
|
|
161
|
+
try {
|
|
162
|
+
const out = await runLLaDAImageThoughtful(args);
|
|
163
|
+
return JSON.stringify(out, null, 2);
|
|
164
|
+
}
|
|
165
|
+
catch (err) {
|
|
166
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
167
|
+
return `Error: ${message}`;
|
|
168
|
+
}
|
|
169
|
+
},
|
|
170
|
+
};
|
|
171
|
+
//# sourceMappingURL=llada-image.js.map
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
// lazy registry picks them up alongside everything else.
|
|
4
4
|
import { registerTool } from './index.js';
|
|
5
5
|
import { imageThoughtfulTool } from './image-thoughtful.js';
|
|
6
|
+
import { lladaImageTool } from './llada-image.js';
|
|
6
7
|
import { channelSendTool, channelReceiveTool } from './channel-tools.js';
|
|
7
8
|
import { fileLibraryAddTool, fileLibraryListTool, fileLibrarySearchTool, fileLibraryGetTool, } from './file-library-tools.js';
|
|
8
9
|
import { workspaceAgentTools } from './workspace-agent-tools.js';
|
|
@@ -72,6 +73,7 @@ function adaptSecurityTool(t) {
|
|
|
72
73
|
}
|
|
73
74
|
export function registerSwarm2026Tools() {
|
|
74
75
|
registerTool(imageThoughtfulTool);
|
|
76
|
+
registerTool(lladaImageTool);
|
|
75
77
|
registerTool(channelSendTool);
|
|
76
78
|
registerTool(channelReceiveTool);
|
|
77
79
|
registerTool(fileLibraryAddTool);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kernel.chat/kbot",
|
|
3
|
-
"version": "3.99.
|
|
3
|
+
"version": "3.99.35",
|
|
4
4
|
"description": "Open-source terminal AI agent. 787+ tools, 35 agents, 20 providers. Dreams, learns, watches your system. Controls your phone. Fully local, fully sovereign. MIT.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"repository": {
|