discoclaw 1.2.3 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.context/voice.md +30 -2
- package/.env.example +6 -0
- package/dist/cli/dashboard.js +7 -1
- package/dist/config.js +7 -0
- package/dist/cron/executor.js +72 -1
- package/dist/dashboard/api/metrics.js +7 -0
- package/dist/dashboard/api/metrics.test.js +16 -0
- package/dist/dashboard/api/traces.js +14 -0
- package/dist/dashboard/api/traces.test.js +40 -0
- package/dist/dashboard/page.js +187 -8
- package/dist/dashboard/server.js +81 -14
- package/dist/dashboard/server.test.js +120 -4
- package/dist/discord/deferred-runner.js +306 -219
- package/dist/discord/message-coordinator.js +1 -28
- package/dist/discord/reaction-handler.js +81 -3
- package/dist/index.js +15 -1
- package/dist/observability/trace-store.js +56 -0
- package/dist/observability/trace-utils.js +31 -0
- package/dist/runtime/codex-cli.js +3 -2
- package/dist/runtime/codex-cli.test.js +33 -0
- package/dist/runtime/model-tiers.js +1 -1
- package/dist/runtime/model-tiers.test.js +9 -0
- package/dist/runtime/openai-tool-schemas.js +17 -0
- package/dist/voice/audio-pipeline.js +246 -6
- package/dist/voice/audio-pipeline.test.js +481 -0
- package/dist/voice/audio-receiver.js +8 -0
- package/dist/voice/audio-receiver.test.js +16 -0
- package/dist/voice/conversation-buffer.js +16 -6
- package/dist/voice/providers/gemini-live-provider.js +481 -0
- package/dist/voice/providers/gemini-live-provider.test.js +834 -0
- package/dist/voice/providers/gemini-live-responder.js +267 -0
- package/dist/voice/providers/gemini-live-responder.test.js +615 -0
- package/dist/voice/providers/gemini-live-token-estimator.js +100 -0
- package/dist/voice/providers/gemini-live-token-estimator.test.js +160 -0
- package/dist/voice/providers/gemini-live-types.js +32 -0
- package/dist/voice/providers/gemini-tool-mapper.js +91 -0
- package/dist/voice/providers/gemini-tool-mapper.test.js +253 -0
- package/dist/voice/providers/index.js +3 -0
- package/dist/voice/types.test.js +6 -0
- package/dist/voice/voice-prompt-builder.js +26 -17
- package/dist/voice/voice-prompt-builder.test.js +16 -1
- package/package.json +1 -1
- package/templates/instructions/SYSTEM_DEFAULTS.md +8 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token estimator for Gemini Live sessions.
|
|
3
|
+
*
|
|
4
|
+
* Tracks approximate token usage across text, audio, and tool payloads
|
|
5
|
+
* to warn before the context window fills and server-side sliding window
|
|
6
|
+
* compression silently drops older context.
|
|
7
|
+
*
|
|
8
|
+
* Estimation is deliberately coarse (chars/4 for text, duration-based for
|
|
9
|
+
* audio) — the goal is order-of-magnitude awareness, not exact counts.
|
|
10
|
+
*/
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
// Constants
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
/** Rough chars-per-token for mixed English text. */
|
|
15
|
+
const CHARS_PER_TOKEN = 4;
|
|
16
|
+
/** Approximate tokens per second of 16 kHz mono speech audio. */
|
|
17
|
+
const AUDIO_TOKENS_PER_SECOND = 25;
|
|
18
|
+
/** Input audio: 16 kHz mono PCM s16le → 2 bytes per sample. */
|
|
19
|
+
const INPUT_BYTES_PER_SECOND = 16_000 * 2;
|
|
20
|
+
/** Output audio: 24 kHz mono PCM s16le → 2 bytes per sample. */
|
|
21
|
+
const OUTPUT_BYTES_PER_SECOND = 24_000 * 2;
|
|
22
|
+
const DEFAULT_BUDGET = {
|
|
23
|
+
warnAt: 200_000,
|
|
24
|
+
compressAt: 500_000,
|
|
25
|
+
};
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// Estimator
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
export class GeminiLiveTokenEstimator {
|
|
30
|
+
textTokens = 0;
|
|
31
|
+
audioTokens = 0;
|
|
32
|
+
toolTokens = 0;
|
|
33
|
+
warnEmitted = false;
|
|
34
|
+
compressEmitted = false;
|
|
35
|
+
budget;
|
|
36
|
+
constructor(budget) {
|
|
37
|
+
this.budget = { ...DEFAULT_BUDGET, ...budget };
|
|
38
|
+
}
|
|
39
|
+
/** Record text tokens (sent or received). */
|
|
40
|
+
addText(text) {
|
|
41
|
+
this.textTokens += Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
42
|
+
}
|
|
43
|
+
/** Record input audio tokens from PCM byte count (16 kHz mono). */
|
|
44
|
+
addInputAudio(pcmBytes) {
|
|
45
|
+
const seconds = pcmBytes / INPUT_BYTES_PER_SECOND;
|
|
46
|
+
this.audioTokens += Math.ceil(seconds * AUDIO_TOKENS_PER_SECOND);
|
|
47
|
+
}
|
|
48
|
+
/** Record output audio tokens from PCM byte count (24 kHz mono). */
|
|
49
|
+
addOutputAudio(pcmBytes) {
|
|
50
|
+
const seconds = pcmBytes / OUTPUT_BYTES_PER_SECOND;
|
|
51
|
+
this.audioTokens += Math.ceil(seconds * AUDIO_TOKENS_PER_SECOND);
|
|
52
|
+
}
|
|
53
|
+
/** Record a tool call (function name + serialised args). */
|
|
54
|
+
addToolCall(name, args) {
|
|
55
|
+
const payload = name + JSON.stringify(args);
|
|
56
|
+
this.toolTokens += Math.ceil(payload.length / CHARS_PER_TOKEN);
|
|
57
|
+
}
|
|
58
|
+
/** Record a tool response. */
|
|
59
|
+
addToolResponse(output) {
|
|
60
|
+
this.toolTokens += Math.ceil(output.length / CHARS_PER_TOKEN);
|
|
61
|
+
}
|
|
62
|
+
/** Current estimate snapshot. */
|
|
63
|
+
get estimate() {
|
|
64
|
+
const total = this.textTokens + this.audioTokens + this.toolTokens;
|
|
65
|
+
return { textTokens: this.textTokens, audioTokens: this.audioTokens, toolTokens: this.toolTokens, total };
|
|
66
|
+
}
|
|
67
|
+
/** Whether the warn threshold has been crossed. */
|
|
68
|
+
get shouldWarn() {
|
|
69
|
+
return this.estimate.total >= this.budget.warnAt;
|
|
70
|
+
}
|
|
71
|
+
/** Whether the compress/rotate threshold has been crossed. */
|
|
72
|
+
get shouldCompress() {
|
|
73
|
+
return this.estimate.total >= this.budget.compressAt;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Check thresholds and return which (if any) was newly crossed.
|
|
77
|
+
* Returns the threshold name once per crossing — subsequent calls
|
|
78
|
+
* return `null` until `reset()` is called.
|
|
79
|
+
*/
|
|
80
|
+
checkThreshold() {
|
|
81
|
+
if (this.shouldCompress && !this.compressEmitted) {
|
|
82
|
+
this.compressEmitted = true;
|
|
83
|
+
this.warnEmitted = true; // skip warn if compress fires first
|
|
84
|
+
return 'compress';
|
|
85
|
+
}
|
|
86
|
+
if (this.shouldWarn && !this.warnEmitted) {
|
|
87
|
+
this.warnEmitted = true;
|
|
88
|
+
return 'warn';
|
|
89
|
+
}
|
|
90
|
+
return null;
|
|
91
|
+
}
|
|
92
|
+
/** Reset all counters (e.g. after session rotation). */
|
|
93
|
+
reset() {
|
|
94
|
+
this.textTokens = 0;
|
|
95
|
+
this.audioTokens = 0;
|
|
96
|
+
this.toolTokens = 0;
|
|
97
|
+
this.warnEmitted = false;
|
|
98
|
+
this.compressEmitted = false;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { GeminiLiveTokenEstimator } from './gemini-live-token-estimator.js';
|
|
3
|
+
describe('GeminiLiveTokenEstimator', () => {
|
|
4
|
+
// -----------------------------------------------------------------------
|
|
5
|
+
// addText
|
|
6
|
+
// -----------------------------------------------------------------------
|
|
7
|
+
describe('addText', () => {
|
|
8
|
+
it('estimates tokens at ~4 chars per token', () => {
|
|
9
|
+
const est = new GeminiLiveTokenEstimator();
|
|
10
|
+
est.addText('Hello world!'); // 12 chars -> ceil(12/4) = 3 tokens
|
|
11
|
+
expect(est.estimate.textTokens).toBe(3);
|
|
12
|
+
expect(est.estimate.total).toBe(3);
|
|
13
|
+
});
|
|
14
|
+
it('rounds up partial tokens', () => {
|
|
15
|
+
const est = new GeminiLiveTokenEstimator();
|
|
16
|
+
est.addText('Hi'); // 2 chars -> ceil(2/4) = 1 token
|
|
17
|
+
expect(est.estimate.textTokens).toBe(1);
|
|
18
|
+
});
|
|
19
|
+
it('accumulates across multiple calls', () => {
|
|
20
|
+
const est = new GeminiLiveTokenEstimator();
|
|
21
|
+
est.addText('aaaa'); // 4 chars -> 1 token
|
|
22
|
+
est.addText('bbbbbbbb'); // 8 chars -> 2 tokens
|
|
23
|
+
expect(est.estimate.textTokens).toBe(3);
|
|
24
|
+
});
|
|
25
|
+
});
|
|
26
|
+
// -----------------------------------------------------------------------
|
|
27
|
+
// addInputAudio
|
|
28
|
+
// -----------------------------------------------------------------------
|
|
29
|
+
describe('addInputAudio', () => {
|
|
30
|
+
it('estimates tokens from 16kHz mono PCM bytes', () => {
|
|
31
|
+
const est = new GeminiLiveTokenEstimator();
|
|
32
|
+
// 1 second of 16kHz mono PCM s16le = 32000 bytes -> 25 tokens
|
|
33
|
+
est.addInputAudio(32_000);
|
|
34
|
+
expect(est.estimate.audioTokens).toBe(25);
|
|
35
|
+
});
|
|
36
|
+
it('rounds up partial seconds', () => {
|
|
37
|
+
const est = new GeminiLiveTokenEstimator();
|
|
38
|
+
// 100 bytes -> 100/32000 ~= 0.003s -> ceil(0.003 * 25) = 1 token
|
|
39
|
+
est.addInputAudio(100);
|
|
40
|
+
expect(est.estimate.audioTokens).toBe(1);
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
// -----------------------------------------------------------------------
|
|
44
|
+
// addOutputAudio
|
|
45
|
+
// -----------------------------------------------------------------------
|
|
46
|
+
describe('addOutputAudio', () => {
|
|
47
|
+
it('estimates tokens from 24kHz mono PCM bytes', () => {
|
|
48
|
+
const est = new GeminiLiveTokenEstimator();
|
|
49
|
+
// 1 second of 24kHz mono PCM s16le = 48000 bytes -> 25 tokens
|
|
50
|
+
est.addOutputAudio(48_000);
|
|
51
|
+
expect(est.estimate.audioTokens).toBe(25);
|
|
52
|
+
});
|
|
53
|
+
});
|
|
54
|
+
// -----------------------------------------------------------------------
|
|
55
|
+
// addToolCall / addToolResponse
|
|
56
|
+
// -----------------------------------------------------------------------
|
|
57
|
+
describe('addToolCall', () => {
|
|
58
|
+
it('estimates tokens from function name + serialised args', () => {
|
|
59
|
+
const est = new GeminiLiveTokenEstimator();
|
|
60
|
+
est.addToolCall('web_search', { query: 'hello' });
|
|
61
|
+
// payload = 'web_search{"query":"hello"}' -> 27 chars -> ceil(27/4) = 7
|
|
62
|
+
expect(est.estimate.toolTokens).toBe(7);
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
describe('addToolResponse', () => {
|
|
66
|
+
it('estimates tokens from response output', () => {
|
|
67
|
+
const est = new GeminiLiveTokenEstimator();
|
|
68
|
+
est.addToolResponse('some result text'); // 16 chars -> ceil(16/4) = 4
|
|
69
|
+
expect(est.estimate.toolTokens).toBe(4);
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
// -----------------------------------------------------------------------
|
|
73
|
+
// estimate
|
|
74
|
+
// -----------------------------------------------------------------------
|
|
75
|
+
describe('estimate', () => {
|
|
76
|
+
it('returns combined total of text, audio, and tool tokens', () => {
|
|
77
|
+
const est = new GeminiLiveTokenEstimator();
|
|
78
|
+
est.addText('test'); // 1 token
|
|
79
|
+
est.addInputAudio(32_000); // 25 tokens
|
|
80
|
+
est.addToolCall('x', {}); // 'x{}' -> ceil(3/4) = 1 token
|
|
81
|
+
expect(est.estimate).toEqual({
|
|
82
|
+
textTokens: 1,
|
|
83
|
+
audioTokens: 25,
|
|
84
|
+
toolTokens: 1,
|
|
85
|
+
total: 27,
|
|
86
|
+
});
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
// -----------------------------------------------------------------------
|
|
90
|
+
// threshold checking
|
|
91
|
+
// -----------------------------------------------------------------------
|
|
92
|
+
describe('threshold checking', () => {
|
|
93
|
+
it('returns null when below warn threshold', () => {
|
|
94
|
+
const est = new GeminiLiveTokenEstimator({ warnAt: 100 });
|
|
95
|
+
est.addText('hi'); // 1 token
|
|
96
|
+
expect(est.checkThreshold()).toBeNull();
|
|
97
|
+
});
|
|
98
|
+
it('returns "warn" when crossing the warn threshold', () => {
|
|
99
|
+
const est = new GeminiLiveTokenEstimator({ warnAt: 2, compressAt: 100 });
|
|
100
|
+
est.addText('12345678'); // ceil(8/4) = 2 tokens
|
|
101
|
+
expect(est.checkThreshold()).toBe('warn');
|
|
102
|
+
});
|
|
103
|
+
it('returns "warn" only once per threshold crossing', () => {
|
|
104
|
+
const est = new GeminiLiveTokenEstimator({ warnAt: 2, compressAt: 100 });
|
|
105
|
+
est.addText('12345678'); // 2 tokens -> crosses warn
|
|
106
|
+
expect(est.checkThreshold()).toBe('warn');
|
|
107
|
+
est.addText('more');
|
|
108
|
+
expect(est.checkThreshold()).toBeNull();
|
|
109
|
+
});
|
|
110
|
+
it('returns "compress" when crossing the compress threshold', () => {
|
|
111
|
+
const est = new GeminiLiveTokenEstimator({ warnAt: 2, compressAt: 5 });
|
|
112
|
+
est.addText('12345678901234567890'); // ceil(20/4) = 5 tokens
|
|
113
|
+
expect(est.checkThreshold()).toBe('compress');
|
|
114
|
+
});
|
|
115
|
+
it('skips "warn" if compress fires first', () => {
|
|
116
|
+
const est = new GeminiLiveTokenEstimator({ warnAt: 2, compressAt: 3 });
|
|
117
|
+
est.addText('123456789012'); // ceil(12/4) = 3 tokens -> crosses both
|
|
118
|
+
expect(est.checkThreshold()).toBe('compress');
|
|
119
|
+
expect(est.checkThreshold()).toBeNull();
|
|
120
|
+
});
|
|
121
|
+
it('shouldWarn reflects threshold state', () => {
|
|
122
|
+
const est = new GeminiLiveTokenEstimator({ warnAt: 2 });
|
|
123
|
+
expect(est.shouldWarn).toBe(false);
|
|
124
|
+
est.addText('12345678'); // 2 tokens
|
|
125
|
+
expect(est.shouldWarn).toBe(true);
|
|
126
|
+
});
|
|
127
|
+
it('shouldCompress reflects threshold state', () => {
|
|
128
|
+
const est = new GeminiLiveTokenEstimator({ compressAt: 2 });
|
|
129
|
+
expect(est.shouldCompress).toBe(false);
|
|
130
|
+
est.addText('12345678'); // 2 tokens
|
|
131
|
+
expect(est.shouldCompress).toBe(true);
|
|
132
|
+
});
|
|
133
|
+
});
|
|
134
|
+
// -----------------------------------------------------------------------
|
|
135
|
+
// reset
|
|
136
|
+
// -----------------------------------------------------------------------
|
|
137
|
+
describe('reset', () => {
|
|
138
|
+
it('clears all token counters', () => {
|
|
139
|
+
const est = new GeminiLiveTokenEstimator();
|
|
140
|
+
est.addText('hello');
|
|
141
|
+
est.addInputAudio(32_000);
|
|
142
|
+
est.addToolCall('fn', {});
|
|
143
|
+
est.reset();
|
|
144
|
+
expect(est.estimate).toEqual({
|
|
145
|
+
textTokens: 0,
|
|
146
|
+
audioTokens: 0,
|
|
147
|
+
toolTokens: 0,
|
|
148
|
+
total: 0,
|
|
149
|
+
});
|
|
150
|
+
});
|
|
151
|
+
it('re-arms threshold notifications after reset', () => {
|
|
152
|
+
const est = new GeminiLiveTokenEstimator({ warnAt: 2, compressAt: 100 });
|
|
153
|
+
est.addText('12345678'); // 2 tokens -> crosses warn
|
|
154
|
+
expect(est.checkThreshold()).toBe('warn');
|
|
155
|
+
est.reset();
|
|
156
|
+
est.addText('12345678'); // 2 tokens again -> warn fires again
|
|
157
|
+
expect(est.checkThreshold()).toBe('warn');
|
|
158
|
+
});
|
|
159
|
+
});
|
|
160
|
+
});
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types for the Gemini Multimodal Live API provider.
|
|
3
|
+
*
|
|
4
|
+
* Extracted so that both GeminiLiveProvider (Phase 1.1) and the future
|
|
5
|
+
* GeminiLiveResponder (Phase 1.2) can share them without circular deps.
|
|
6
|
+
*/
|
|
7
|
+
export const DEFAULT_GEMINI_LIVE_MODEL = 'gemini-3.1-flash-live-preview';
|
|
8
|
+
/**
|
|
9
|
+
* Returns the caller-provided Gemini Live model when it looks like a live-capable
|
|
10
|
+
* model ID. Non-live model IDs are ignored so voice mode falls back to the
|
|
11
|
+
* provider default instead of sending an invalid model to the Live API.
|
|
12
|
+
*/
|
|
13
|
+
export function normalizeGeminiLiveModel(model) {
|
|
14
|
+
const trimmed = model?.trim();
|
|
15
|
+
if (!trimmed)
|
|
16
|
+
return undefined;
|
|
17
|
+
return trimmed.startsWith('gemini-') && trimmed.includes('live') ? trimmed : undefined;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Gemini 3.1 Flash Live currently supports synchronous function calling only.
|
|
21
|
+
* Gemini 2.5 Flash Live supports NON_BLOCKING declarations and response scheduling.
|
|
22
|
+
*/
|
|
23
|
+
export function supportsGeminiLiveAsyncFunctionCalling(model) {
|
|
24
|
+
return /^gemini-2\.5-.*live/i.test(model.trim());
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Gemini 3.1 Flash Live only supports clientContent for initial history seeding.
|
|
28
|
+
* Regular conversational text turns must use realtimeInput.text.
|
|
29
|
+
*/
|
|
30
|
+
export function supportsGeminiLiveIncrementalClientContent(model) {
|
|
31
|
+
return /^gemini-2\.5-.*live/i.test(model.trim());
|
|
32
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gemini tool mapper — converts OpenAI function-calling tool schemas into
|
|
3
|
+
* Gemini-compatible function declarations for the Multimodal Live API.
|
|
4
|
+
*
|
|
5
|
+
* Gemini expects:
|
|
6
|
+
* - Uppercase type names (STRING, NUMBER, OBJECT, BOOLEAN, ARRAY, INTEGER)
|
|
7
|
+
* - No `additionalProperties` field
|
|
8
|
+
* - Function declarations as `{ name, description, parameters }` (no `type: 'function'` wrapper)
|
|
9
|
+
*
|
|
10
|
+
* The mapper is intentionally stateless and pure — it transforms schemas at
|
|
11
|
+
* session setup time and has no runtime side effects.
|
|
12
|
+
*/
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Type mapping
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
const TYPE_MAP = {
|
|
17
|
+
string: 'STRING',
|
|
18
|
+
number: 'NUMBER',
|
|
19
|
+
integer: 'INTEGER',
|
|
20
|
+
boolean: 'BOOLEAN',
|
|
21
|
+
object: 'OBJECT',
|
|
22
|
+
array: 'ARRAY',
|
|
23
|
+
};
|
|
24
|
+
/**
|
|
25
|
+
* Convert a JSON Schema type string to Gemini's uppercase equivalent.
|
|
26
|
+
* Returns the input uppercased if not in the known map (defensive).
|
|
27
|
+
*/
|
|
28
|
+
function mapType(jsonSchemaType) {
|
|
29
|
+
return TYPE_MAP[jsonSchemaType] ?? jsonSchemaType.toUpperCase();
|
|
30
|
+
}
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
// Schema conversion
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
/**
|
|
35
|
+
* Recursively convert a JSON Schema object to a GeminiSchema.
|
|
36
|
+
* Strips `additionalProperties` and converts type names.
|
|
37
|
+
*/
|
|
38
|
+
export function convertSchema(schema) {
|
|
39
|
+
const result = {
|
|
40
|
+
type: mapType(String(schema.type ?? 'object')),
|
|
41
|
+
};
|
|
42
|
+
if (typeof schema.description === 'string') {
|
|
43
|
+
result.description = schema.description;
|
|
44
|
+
}
|
|
45
|
+
if (schema.properties != null && typeof schema.properties === 'object') {
|
|
46
|
+
const props = schema.properties;
|
|
47
|
+
result.properties = {};
|
|
48
|
+
for (const [key, value] of Object.entries(props)) {
|
|
49
|
+
result.properties[key] = convertSchema(value);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
if (schema.items != null && typeof schema.items === 'object') {
|
|
53
|
+
result.items = convertSchema(schema.items);
|
|
54
|
+
}
|
|
55
|
+
if (Array.isArray(schema.required) && schema.required.length > 0) {
|
|
56
|
+
result.required = schema.required;
|
|
57
|
+
}
|
|
58
|
+
if (Array.isArray(schema.enum) && schema.enum.length > 0) {
|
|
59
|
+
result.enum = schema.enum;
|
|
60
|
+
}
|
|
61
|
+
// additionalProperties is intentionally omitted — Gemini doesn't support it.
|
|
62
|
+
return result;
|
|
63
|
+
}
|
|
64
|
+
// ---------------------------------------------------------------------------
|
|
65
|
+
// Public API
|
|
66
|
+
// ---------------------------------------------------------------------------
|
|
67
|
+
/**
|
|
68
|
+
* Convert an array of OpenAI function tool schemas to a Gemini tools config.
|
|
69
|
+
* Returns `undefined` when the input array is empty (no tools block needed).
|
|
70
|
+
*/
|
|
71
|
+
export function toGeminiTools(openaiTools, opts = {}) {
|
|
72
|
+
if (openaiTools.length === 0)
|
|
73
|
+
return undefined;
|
|
74
|
+
const nonBlockingFunctionNames = new Set(opts.nonBlockingFunctionNames ?? []);
|
|
75
|
+
const declarations = openaiTools.map((tool) => {
|
|
76
|
+
const decl = {
|
|
77
|
+
name: tool.function.name,
|
|
78
|
+
description: tool.function.description,
|
|
79
|
+
};
|
|
80
|
+
// Only include parameters if the schema has properties
|
|
81
|
+
const params = tool.function.parameters;
|
|
82
|
+
if (params && typeof params === 'object' && Object.keys(params).length > 0) {
|
|
83
|
+
decl.parameters = convertSchema(params);
|
|
84
|
+
}
|
|
85
|
+
if (nonBlockingFunctionNames.has(tool.function.name)) {
|
|
86
|
+
decl.behavior = 'NON_BLOCKING';
|
|
87
|
+
}
|
|
88
|
+
return decl;
|
|
89
|
+
});
|
|
90
|
+
return { functionDeclarations: declarations };
|
|
91
|
+
}
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { toGeminiTools, convertSchema } from './gemini-tool-mapper.js';
|
|
3
|
+
import { buildToolSchemas } from '../../runtime/openai-tool-schemas.js';
|
|
4
|
+
// ---------------------------------------------------------------------------
|
|
5
|
+
// convertSchema
|
|
6
|
+
// ---------------------------------------------------------------------------
|
|
7
|
+
describe('convertSchema', () => {
|
|
8
|
+
it('converts lowercase types to uppercase', () => {
|
|
9
|
+
const result = convertSchema({ type: 'string', description: 'A name' });
|
|
10
|
+
expect(result).toEqual({ type: 'STRING', description: 'A name' });
|
|
11
|
+
});
|
|
12
|
+
it('converts object with properties recursively', () => {
|
|
13
|
+
const result = convertSchema({
|
|
14
|
+
type: 'object',
|
|
15
|
+
properties: {
|
|
16
|
+
file_path: { type: 'string', description: 'Path' },
|
|
17
|
+
count: { type: 'number', description: 'How many' },
|
|
18
|
+
},
|
|
19
|
+
required: ['file_path'],
|
|
20
|
+
});
|
|
21
|
+
expect(result).toEqual({
|
|
22
|
+
type: 'OBJECT',
|
|
23
|
+
properties: {
|
|
24
|
+
file_path: { type: 'STRING', description: 'Path' },
|
|
25
|
+
count: { type: 'NUMBER', description: 'How many' },
|
|
26
|
+
},
|
|
27
|
+
required: ['file_path'],
|
|
28
|
+
});
|
|
29
|
+
});
|
|
30
|
+
it('strips additionalProperties', () => {
|
|
31
|
+
const result = convertSchema({
|
|
32
|
+
type: 'object',
|
|
33
|
+
properties: { x: { type: 'string' } },
|
|
34
|
+
additionalProperties: false,
|
|
35
|
+
});
|
|
36
|
+
expect(result).not.toHaveProperty('additionalProperties');
|
|
37
|
+
});
|
|
38
|
+
it('converts array types with items', () => {
|
|
39
|
+
const result = convertSchema({
|
|
40
|
+
type: 'array',
|
|
41
|
+
items: {
|
|
42
|
+
type: 'object',
|
|
43
|
+
properties: {
|
|
44
|
+
tool: { type: 'string', description: 'Tool name' },
|
|
45
|
+
},
|
|
46
|
+
required: ['tool'],
|
|
47
|
+
additionalProperties: true,
|
|
48
|
+
},
|
|
49
|
+
});
|
|
50
|
+
expect(result).toEqual({
|
|
51
|
+
type: 'ARRAY',
|
|
52
|
+
items: {
|
|
53
|
+
type: 'OBJECT',
|
|
54
|
+
properties: {
|
|
55
|
+
tool: { type: 'STRING', description: 'Tool name' },
|
|
56
|
+
},
|
|
57
|
+
required: ['tool'],
|
|
58
|
+
},
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
it('handles boolean type', () => {
|
|
62
|
+
const result = convertSchema({ type: 'boolean', description: 'Flag' });
|
|
63
|
+
expect(result).toEqual({ type: 'BOOLEAN', description: 'Flag' });
|
|
64
|
+
});
|
|
65
|
+
it('handles integer type', () => {
|
|
66
|
+
const result = convertSchema({ type: 'integer' });
|
|
67
|
+
expect(result).toEqual({ type: 'INTEGER' });
|
|
68
|
+
});
|
|
69
|
+
it('preserves enum values', () => {
|
|
70
|
+
const result = convertSchema({ type: 'string', enum: ['a', 'b', 'c'] });
|
|
71
|
+
expect(result).toEqual({ type: 'STRING', enum: ['a', 'b', 'c'] });
|
|
72
|
+
});
|
|
73
|
+
it('defaults to OBJECT when type is missing', () => {
|
|
74
|
+
const result = convertSchema({ properties: { x: { type: 'string' } } });
|
|
75
|
+
expect(result.type).toBe('OBJECT');
|
|
76
|
+
});
|
|
77
|
+
it('omits description when not present', () => {
|
|
78
|
+
const result = convertSchema({ type: 'string' });
|
|
79
|
+
expect(result).toEqual({ type: 'STRING' });
|
|
80
|
+
expect(Object.keys(result)).toEqual(['type']);
|
|
81
|
+
});
|
|
82
|
+
it('omits required when empty', () => {
|
|
83
|
+
const result = convertSchema({ type: 'object', required: [] });
|
|
84
|
+
expect(result).not.toHaveProperty('required');
|
|
85
|
+
});
|
|
86
|
+
});
|
|
87
|
+
// ---------------------------------------------------------------------------
|
|
88
|
+
// toGeminiTools
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
describe('toGeminiTools', () => {
|
|
91
|
+
it('returns undefined for empty input', () => {
|
|
92
|
+
expect(toGeminiTools([])).toBeUndefined();
|
|
93
|
+
});
|
|
94
|
+
it('converts a single OpenAI tool to Gemini function declaration', () => {
|
|
95
|
+
const openai = [{
|
|
96
|
+
type: 'function',
|
|
97
|
+
function: {
|
|
98
|
+
name: 'web_search',
|
|
99
|
+
description: 'Search the web.',
|
|
100
|
+
parameters: {
|
|
101
|
+
type: 'object',
|
|
102
|
+
properties: {
|
|
103
|
+
query: { type: 'string', description: 'Search query.' },
|
|
104
|
+
},
|
|
105
|
+
required: ['query'],
|
|
106
|
+
additionalProperties: false,
|
|
107
|
+
},
|
|
108
|
+
},
|
|
109
|
+
}];
|
|
110
|
+
const result = toGeminiTools(openai);
|
|
111
|
+
expect(result).toEqual({
|
|
112
|
+
functionDeclarations: [{
|
|
113
|
+
name: 'web_search',
|
|
114
|
+
description: 'Search the web.',
|
|
115
|
+
parameters: {
|
|
116
|
+
type: 'OBJECT',
|
|
117
|
+
properties: {
|
|
118
|
+
query: { type: 'STRING', description: 'Search query.' },
|
|
119
|
+
},
|
|
120
|
+
required: ['query'],
|
|
121
|
+
},
|
|
122
|
+
}],
|
|
123
|
+
});
|
|
124
|
+
});
|
|
125
|
+
it('converts multiple tools', () => {
|
|
126
|
+
const openai = [
|
|
127
|
+
{
|
|
128
|
+
type: 'function',
|
|
129
|
+
function: {
|
|
130
|
+
name: 'read_file',
|
|
131
|
+
description: 'Read a file.',
|
|
132
|
+
parameters: {
|
|
133
|
+
type: 'object',
|
|
134
|
+
properties: {
|
|
135
|
+
file_path: { type: 'string', description: 'Path.' },
|
|
136
|
+
},
|
|
137
|
+
required: ['file_path'],
|
|
138
|
+
additionalProperties: false,
|
|
139
|
+
},
|
|
140
|
+
},
|
|
141
|
+
},
|
|
142
|
+
{
|
|
143
|
+
type: 'function',
|
|
144
|
+
function: {
|
|
145
|
+
name: 'bash',
|
|
146
|
+
description: 'Run a command.',
|
|
147
|
+
parameters: {
|
|
148
|
+
type: 'object',
|
|
149
|
+
properties: {
|
|
150
|
+
command: { type: 'string', description: 'Command.' },
|
|
151
|
+
},
|
|
152
|
+
required: ['command'],
|
|
153
|
+
additionalProperties: false,
|
|
154
|
+
},
|
|
155
|
+
},
|
|
156
|
+
},
|
|
157
|
+
];
|
|
158
|
+
const result = toGeminiTools(openai);
|
|
159
|
+
expect(result?.functionDeclarations).toHaveLength(2);
|
|
160
|
+
expect(result?.functionDeclarations[0].name).toBe('read_file');
|
|
161
|
+
expect(result?.functionDeclarations[1].name).toBe('bash');
|
|
162
|
+
});
|
|
163
|
+
it('marks configured Gemini declarations as NON_BLOCKING', () => {
|
|
164
|
+
const openai = [{
|
|
165
|
+
type: 'function',
|
|
166
|
+
function: {
|
|
167
|
+
name: 'read_file',
|
|
168
|
+
description: 'Read a file.',
|
|
169
|
+
parameters: {
|
|
170
|
+
type: 'object',
|
|
171
|
+
properties: {
|
|
172
|
+
file_path: { type: 'string' },
|
|
173
|
+
},
|
|
174
|
+
required: ['file_path'],
|
|
175
|
+
},
|
|
176
|
+
},
|
|
177
|
+
}];
|
|
178
|
+
const result = toGeminiTools(openai, { nonBlockingFunctionNames: ['read_file'] });
|
|
179
|
+
expect(result?.functionDeclarations[0]).toMatchObject({
|
|
180
|
+
name: 'read_file',
|
|
181
|
+
behavior: 'NON_BLOCKING',
|
|
182
|
+
});
|
|
183
|
+
});
|
|
184
|
+
it('none of the declarations have additionalProperties', () => {
|
|
185
|
+
const openai = buildToolSchemas(['Read', 'Write', 'Edit', 'Glob', 'Grep', 'Bash']);
|
|
186
|
+
const result = toGeminiTools(openai);
|
|
187
|
+
function checkNoAdditionalProperties(obj, path) {
|
|
188
|
+
expect(obj).not.toHaveProperty('additionalProperties');
|
|
189
|
+
if (obj.properties && typeof obj.properties === 'object') {
|
|
190
|
+
for (const [key, val] of Object.entries(obj.properties)) {
|
|
191
|
+
checkNoAdditionalProperties(val, `${path}.${key}`);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
if (obj.items && typeof obj.items === 'object') {
|
|
195
|
+
checkNoAdditionalProperties(obj.items, `${path}.items`);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
for (const decl of result.functionDeclarations) {
|
|
199
|
+
if (decl.parameters) {
|
|
200
|
+
checkNoAdditionalProperties(decl.parameters, decl.name);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
});
|
|
204
|
+
it('all type names are uppercase in converted schemas', () => {
|
|
205
|
+
const openai = buildToolSchemas(['Read', 'Write', 'Edit', 'Glob', 'Grep', 'Bash', 'WebSearch', 'WebFetch']);
|
|
206
|
+
const result = toGeminiTools(openai);
|
|
207
|
+
function checkUppercaseTypes(obj, path) {
|
|
208
|
+
if (typeof obj.type === 'string') {
|
|
209
|
+
expect(obj.type, `type at ${path}`).toMatch(/^[A-Z]+$/);
|
|
210
|
+
}
|
|
211
|
+
if (obj.properties && typeof obj.properties === 'object') {
|
|
212
|
+
for (const [key, val] of Object.entries(obj.properties)) {
|
|
213
|
+
checkUppercaseTypes(val, `${path}.${key}`);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
if (obj.items && typeof obj.items === 'object') {
|
|
217
|
+
checkUppercaseTypes(obj.items, `${path}.items`);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
for (const decl of result.functionDeclarations) {
|
|
221
|
+
if (decl.parameters) {
|
|
222
|
+
checkUppercaseTypes(decl.parameters, decl.name);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
});
|
|
226
|
+
it('integrates with buildToolSchemas for full tool set', () => {
|
|
227
|
+
const openai = buildToolSchemas(['Read', 'Write', 'Edit', 'Glob', 'Grep', 'Bash', 'WebSearch', 'WebFetch']);
|
|
228
|
+
const result = toGeminiTools(openai);
|
|
229
|
+
expect(result).toBeDefined();
|
|
230
|
+
expect(result.functionDeclarations).toHaveLength(8);
|
|
231
|
+
const names = result.functionDeclarations.map((d) => d.name);
|
|
232
|
+
expect(names).toEqual([
|
|
233
|
+
'read_file', 'write_file', 'edit_file', 'list_files',
|
|
234
|
+
'search_content', 'bash', 'web_search', 'web_fetch',
|
|
235
|
+
]);
|
|
236
|
+
// Each declaration has name, description, and parameters
|
|
237
|
+
for (const decl of result.functionDeclarations) {
|
|
238
|
+
expect(typeof decl.name).toBe('string');
|
|
239
|
+
expect(typeof decl.description).toBe('string');
|
|
240
|
+
expect(decl.parameters).toBeDefined();
|
|
241
|
+
expect(decl.parameters.type).toBe('OBJECT');
|
|
242
|
+
}
|
|
243
|
+
});
|
|
244
|
+
it('handles Pipeline expansion through buildToolSchemas', () => {
|
|
245
|
+
const openai = buildToolSchemas(['Pipeline']);
|
|
246
|
+
const result = toGeminiTools(openai);
|
|
247
|
+
expect(result).toBeDefined();
|
|
248
|
+
const names = result.functionDeclarations.map((d) => d.name);
|
|
249
|
+
expect(names).toEqual([
|
|
250
|
+
'pipeline.start', 'pipeline.status', 'pipeline.resume', 'pipeline.cancel',
|
|
251
|
+
]);
|
|
252
|
+
});
|
|
253
|
+
});
|
package/dist/voice/types.test.js
CHANGED
|
@@ -13,6 +13,12 @@ describe('Voice types', () => {
|
|
|
13
13
|
const cfg2 = cfg;
|
|
14
14
|
expect(cfg2).toBe(cfg);
|
|
15
15
|
});
|
|
16
|
+
it('VoicePipelineMode accepts valid values', () => {
|
|
17
|
+
const pipeline = 'pipeline';
|
|
18
|
+
const gemini = 'gemini-live';
|
|
19
|
+
expect(pipeline).toBe('pipeline');
|
|
20
|
+
expect(gemini).toBe('gemini-live');
|
|
21
|
+
});
|
|
16
22
|
it('VoiceConfig accepts minimal shape (optional fields omitted)', () => {
|
|
17
23
|
const cfg = {
|
|
18
24
|
enabled: false,
|