tuna-agent 0.1.123 → 0.1.124
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -82,11 +82,18 @@ async function visionDescribe(frameB64, voiceoverText) {
|
|
|
82
82
|
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${OPENAI_KEY}` },
|
|
83
83
|
body: JSON.stringify({
|
|
84
84
|
model: 'gpt-4o-mini',
|
|
85
|
-
max_tokens:
|
|
85
|
+
max_tokens: 300,
|
|
86
86
|
messages: [{
|
|
87
87
|
role: 'user',
|
|
88
88
|
content: [
|
|
89
|
-
{ type: 'text', text: `
|
|
89
|
+
{ type: 'text', text: `Describe this frame in detail (3-5 sentences, English). Include:
|
|
90
|
+
- Characters: appearance (shape, color, size), facial expression, what they're doing
|
|
91
|
+
- Spatial relationships: where each character is positioned relative to others, are they connected/attached/overlapping/standing apart?
|
|
92
|
+
- Environment: setting, lighting, color palette, atmosphere
|
|
93
|
+
- Camera: angle, framing (close-up, wide, etc.)
|
|
94
|
+
- Action: what is happening in this moment
|
|
95
|
+
|
|
96
|
+
Voiceover at this moment: "${voiceoverText || '(none)'}"` },
|
|
90
97
|
{ type: 'image_url', image_url: { url: `data:image/jpeg;base64,${frameB64}` } },
|
|
91
98
|
],
|
|
92
99
|
}],
|