@aj-archipelago/cortex 1.3.54 → 1.3.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/default.example.json +2 -2
- package/config.js +18 -0
- package/lib/entityConstants.js +2 -0
- package/lib/util.js +56 -1
- package/package.json +1 -1
- package/pathways/image_flux.js +1 -0
- package/pathways/system/entity/sys_entity_agent.js +5 -2
- package/pathways/system/entity/tools/sys_tool_image.js +13 -3
- package/pathways/system/sys_test_response_reasonableness.js +20 -0
- package/pathways/video_seedance.js +17 -0
- package/pathways/video_veo.js +31 -0
- package/server/modelExecutor.js +4 -0
- package/server/plugins/replicateApiPlugin.js +50 -0
- package/server/plugins/veoVideoPlugin.js +218 -0
- package/helper-apps/cortex-file-handler/src/hashUtils.js +0 -91
|
@@ -37,7 +37,7 @@
|
|
|
37
37
|
},
|
|
38
38
|
"gemini-pro-15-vision": {
|
|
39
39
|
"type": "GEMINI-VISION",
|
|
40
|
-
"url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/gemini-1.5-pro
|
|
40
|
+
"url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/gemini-1.5-pro:streamGenerateContent",
|
|
41
41
|
"headers": {
|
|
42
42
|
"Content-Type": "application/json"
|
|
43
43
|
},
|
|
@@ -48,7 +48,7 @@
|
|
|
48
48
|
},
|
|
49
49
|
"gemini-pro-25-vision": {
|
|
50
50
|
"type": "GEMINI-VISION",
|
|
51
|
-
"url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/gemini-2.5-pro
|
|
51
|
+
"url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/gemini-2.5-pro:streamGenerateContent",
|
|
52
52
|
"headers": {
|
|
53
53
|
"Content-Type": "application/json"
|
|
54
54
|
},
|
package/config.js
CHANGED
|
@@ -290,6 +290,15 @@ var config = convict({
|
|
|
290
290
|
"Content-Type": "application/json"
|
|
291
291
|
},
|
|
292
292
|
},
|
|
293
|
+
"replicate-seedance-1-pro": {
|
|
294
|
+
"type": "REPLICATE-API",
|
|
295
|
+
"url": "https://api.replicate.com/v1/models/bytedance/seedance-1-pro/predictions",
|
|
296
|
+
"headers": {
|
|
297
|
+
"Prefer": "wait",
|
|
298
|
+
"Authorization": "Token {{REPLICATE_API_KEY}}",
|
|
299
|
+
"Content-Type": "application/json"
|
|
300
|
+
},
|
|
301
|
+
},
|
|
293
302
|
"replicate-flux-11-pro": {
|
|
294
303
|
"type": "REPLICATE-API",
|
|
295
304
|
"url": "https://api.replicate.com/v1/models/black-forest-labs/flux-1.1-pro/predictions",
|
|
@@ -344,6 +353,15 @@ var config = convict({
|
|
|
344
353
|
"Content-Type": "application/json"
|
|
345
354
|
},
|
|
346
355
|
},
|
|
356
|
+
"replicate-multi-image-kontext-max": {
|
|
357
|
+
"type": "REPLICATE-API",
|
|
358
|
+
"url": "https://api.replicate.com/v1/models/flux-kontext-apps/multi-image-kontext-max/predictions",
|
|
359
|
+
"headers": {
|
|
360
|
+
"Prefer": "wait",
|
|
361
|
+
"Authorization": "Token {{REPLICATE_API_KEY}}",
|
|
362
|
+
"Content-Type": "application/json"
|
|
363
|
+
},
|
|
364
|
+
},
|
|
347
365
|
"azure-video-translate": {
|
|
348
366
|
"type": "AZURE-VIDEO-TRANSLATE",
|
|
349
367
|
"url": "https://eastus.api.cognitive.microsoft.com/videotranslation",
|
package/lib/entityConstants.js
CHANGED
|
@@ -120,6 +120,8 @@ term~N (Match terms similar to "term", edit distance N)
|
|
|
120
120
|
|
|
121
121
|
AI_GROUNDING_INSTRUCTIONS: "# Grounding Responses\n\nIf you base part or all of your response on one or more search results, you MUST cite the source using a custom markdown directive of the form :cd_source[searchResultId]. There is NO other valid way to cite a source and a good UX depends on you using this directive correctly. Do not include other clickable links to the sourcewhen using the :cd_source[searchResultId] directive. Every search result has a unique searchResultId. You must include it verbatim, copied directly from the search results. Place the directives at the end of the phrase, sentence or paragraph that is grounded in that particular search result. If you are citing multiple search results, use multiple individual :cd_source[searchResultId] directives (e.g. :cd_source[searchResultId1] :cd_source[searchResultId2] :cd_source[searchResultId3] etc.)",
|
|
122
122
|
|
|
123
|
+
AI_AVAILABLE_FILES: "# Available Files\n\nThe following files are available for you to use in your tool calls or responses:\n{{{availableFiles}}}\n",
|
|
124
|
+
|
|
123
125
|
AI_MEMORY_INSTRUCTIONS: `# Memory Instructions
|
|
124
126
|
|
|
125
127
|
You have a memory system that contains important details, instructions, and context. Consult your memories when formulating a response to ensure your answers reflect previous learnings and context.
|
package/lib/util.js
CHANGED
|
@@ -310,6 +310,60 @@ function removeImageAndFileFromMessage(message) {
|
|
|
310
310
|
return modifiedMessage;
|
|
311
311
|
}
|
|
312
312
|
|
|
313
|
+
// Helper function to extract file URLs from a content object
|
|
314
|
+
function extractFileUrlsFromContent(contentObj) {
|
|
315
|
+
const urls = [];
|
|
316
|
+
if (contentObj.type === 'image_url' && contentObj.image_url?.url) {
|
|
317
|
+
urls.push(contentObj.image_url.url);
|
|
318
|
+
} else if (contentObj.type === 'file' && contentObj.file) {
|
|
319
|
+
urls.push(contentObj.file);
|
|
320
|
+
}
|
|
321
|
+
return urls;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
function getAvailableFiles(chatHistory) {
|
|
325
|
+
const availableFiles = [];
|
|
326
|
+
|
|
327
|
+
if (!chatHistory || !Array.isArray(chatHistory)) {
|
|
328
|
+
return availableFiles;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
for (const message of chatHistory) {
|
|
332
|
+
if (!message || !message.content) {
|
|
333
|
+
continue;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// Handle array content
|
|
337
|
+
if (Array.isArray(message.content)) {
|
|
338
|
+
for (const content of message.content) {
|
|
339
|
+
try {
|
|
340
|
+
const contentObj = typeof content === 'string' ? JSON.parse(content) : content;
|
|
341
|
+
availableFiles.push(...extractFileUrlsFromContent(contentObj));
|
|
342
|
+
} catch (e) {
|
|
343
|
+
// Not JSON or couldn't be parsed, continue
|
|
344
|
+
continue;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
// Handle string content
|
|
349
|
+
else if (typeof message.content === 'string') {
|
|
350
|
+
try {
|
|
351
|
+
const contentObj = JSON.parse(message.content);
|
|
352
|
+
availableFiles.push(...extractFileUrlsFromContent(contentObj));
|
|
353
|
+
} catch (e) {
|
|
354
|
+
// Not JSON or couldn't be parsed, continue
|
|
355
|
+
continue;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
// Handle object content
|
|
359
|
+
else if (typeof message.content === 'object') {
|
|
360
|
+
availableFiles.push(...extractFileUrlsFromContent(message.content));
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
return availableFiles;
|
|
365
|
+
}
|
|
366
|
+
|
|
313
367
|
export {
|
|
314
368
|
getUniqueId,
|
|
315
369
|
getSearchResultId,
|
|
@@ -322,5 +376,6 @@ export {
|
|
|
322
376
|
alignSubtitles,
|
|
323
377
|
getMediaChunks,
|
|
324
378
|
markCompletedForCleanUp,
|
|
325
|
-
removeOldImageAndFileContent
|
|
379
|
+
removeOldImageAndFileContent,
|
|
380
|
+
getAvailableFiles
|
|
326
381
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.56",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"private": false,
|
|
6
6
|
"repository": {
|
package/pathways/image_flux.js
CHANGED
|
@@ -5,7 +5,7 @@ const MAX_TOOL_CALLS = 50;
|
|
|
5
5
|
import { callPathway, callTool, say } from '../../../lib/pathwayTools.js';
|
|
6
6
|
import logger from '../../../lib/logger.js';
|
|
7
7
|
import { config } from '../../../config.js';
|
|
8
|
-
import { chatArgsHasImageUrl, removeOldImageAndFileContent } from '../../../lib/util.js';
|
|
8
|
+
import { chatArgsHasImageUrl, removeOldImageAndFileContent, getAvailableFiles } from '../../../lib/util.js';
|
|
9
9
|
import { Prompt } from '../../../server/prompt.js';
|
|
10
10
|
import { getToolsForEntity, loadEntityConfig } from './tools/shared/sys_entity_tools.js';
|
|
11
11
|
|
|
@@ -275,7 +275,7 @@ export default {
|
|
|
275
275
|
const instructionTemplates = entityInstructions ? (entityInstructions + '\n\n') : `{{renderTemplate AI_COMMON_INSTRUCTIONS}}\n\n{{renderTemplate AI_EXPERTISE}}\n\n`;
|
|
276
276
|
|
|
277
277
|
const promptMessages = [
|
|
278
|
-
{"role": "system", "content": `${promptPrefix}${instructionTemplates}{{renderTemplate AI_TOOLS}}\n\n{{renderTemplate AI_SEARCH_RULES}}\n\n{{renderTemplate AI_SEARCH_SYNTAX}}\n\n{{renderTemplate AI_GROUNDING_INSTRUCTIONS}}\n\n${memoryTemplates}{{renderTemplate AI_DATETIME}}`},
|
|
278
|
+
{"role": "system", "content": `${promptPrefix}${instructionTemplates}{{renderTemplate AI_TOOLS}}\n\n{{renderTemplate AI_SEARCH_RULES}}\n\n{{renderTemplate AI_SEARCH_SYNTAX}}\n\n{{renderTemplate AI_GROUNDING_INSTRUCTIONS}}\n\n${memoryTemplates}{{renderTemplate AI_AVAILABLE_FILES}}\n\n{{renderTemplate AI_DATETIME}}`},
|
|
279
279
|
"{{chatHistory}}",
|
|
280
280
|
];
|
|
281
281
|
|
|
@@ -294,6 +294,8 @@ export default {
|
|
|
294
294
|
args.chatHistory = args.chatHistory.slice(-20);
|
|
295
295
|
}
|
|
296
296
|
|
|
297
|
+
const availableFiles = getAvailableFiles(args.chatHistory);
|
|
298
|
+
|
|
297
299
|
// remove old image and file content
|
|
298
300
|
const visionContentPresent = chatArgsHasImageUrl(args);
|
|
299
301
|
visionContentPresent && (args.chatHistory = removeOldImageAndFileContent(args.chatHistory));
|
|
@@ -327,6 +329,7 @@ export default {
|
|
|
327
329
|
let response = await runAllPrompts({
|
|
328
330
|
...args,
|
|
329
331
|
chatHistory: currentMessages,
|
|
332
|
+
availableFiles,
|
|
330
333
|
tools: entityToolsOpenAiFormat,
|
|
331
334
|
tool_choice: memoryLookupRequired ? "required" : "auto"
|
|
332
335
|
});
|
|
@@ -41,13 +41,17 @@ export default {
|
|
|
41
41
|
icon: "🔄",
|
|
42
42
|
function: {
|
|
43
43
|
name: "ModifyImage",
|
|
44
|
-
description: "Use when asked to modify, transform, or edit an existing image. This tool can apply various transformations like style changes, artistic effects, or specific modifications to an image that has been previously uploaded or generated.",
|
|
44
|
+
description: "Use when asked to modify, transform, or edit an existing image. This tool can apply various transformations like style changes, artistic effects, or specific modifications to an image that has been previously uploaded or generated. It takes up to two input images as a reference and outputs a new image based on the instructions.",
|
|
45
45
|
parameters: {
|
|
46
46
|
type: "object",
|
|
47
47
|
properties: {
|
|
48
48
|
inputImage: {
|
|
49
49
|
type: "string",
|
|
50
|
-
description: "The
|
|
50
|
+
description: "The first image URL copied exactly from an image_url field in your chat context."
|
|
51
|
+
},
|
|
52
|
+
inputImage2: {
|
|
53
|
+
type: "string",
|
|
54
|
+
description: "The second input image URL copied exactly from an image_url field in your chat context if there is one."
|
|
51
55
|
},
|
|
52
56
|
detailedInstructions: {
|
|
53
57
|
type: "string",
|
|
@@ -77,6 +81,11 @@ export default {
|
|
|
77
81
|
model = "replicate-flux-kontext-max";
|
|
78
82
|
}
|
|
79
83
|
|
|
84
|
+
// If we have two input images, use the multi-image-kontext-max model
|
|
85
|
+
if (args.inputImage2) {
|
|
86
|
+
model = "replicate-multi-image-kontext-max";
|
|
87
|
+
}
|
|
88
|
+
|
|
80
89
|
pathwayResolver.tool = JSON.stringify({ toolUsed: "image" });
|
|
81
90
|
return await callPathway('image_flux', {
|
|
82
91
|
...args,
|
|
@@ -85,7 +94,8 @@ export default {
|
|
|
85
94
|
numberResults,
|
|
86
95
|
model,
|
|
87
96
|
stream: false,
|
|
88
|
-
input_image: args.inputImage
|
|
97
|
+
input_image: args.inputImage,
|
|
98
|
+
input_image_2: args.inputImage2,
|
|
89
99
|
});
|
|
90
100
|
|
|
91
101
|
} catch (e) {
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { Prompt } from '../../server/prompt.js';
|
|
2
|
+
|
|
3
|
+
export default {
|
|
4
|
+
prompt: [
|
|
5
|
+
new Prompt({
|
|
6
|
+
messages: [
|
|
7
|
+
{ "role": "system", "content": "Assistant is a response quality evaluator. When given a chat history and a response to evaluate, assistant will carefully analyze whether the response is reasonable given the context of the conversation. Primarily the assistant is trying to detect errors and hallucinations in the underlying models. As long as the response looks basically correct and relevant, even if it's not ideal or complete or contradicts some rules, it should be isReasonable: true.\n\nAssistant will return a JSON object with the following structure:\n{\n \"isReasonable\": boolean,\n \"score\": number (0-10),\n \"reasoning\": string\n}\n\nWhere:\n- isReasonable: Overall judgment of whether the response is reasonable\n- score: Numerical score from 0-10 (10 being excellent)\n- reasoning: Brief explanation of the judgment" },
|
|
8
|
+
"{{chatHistory}}",
|
|
9
|
+
{ "role": "user", "content": `The response to the above conversation from the model was: '{{{modelResponse}}}'. Is that response acceptable?`},
|
|
10
|
+
]
|
|
11
|
+
})
|
|
12
|
+
],
|
|
13
|
+
inputParameters: {
|
|
14
|
+
chatHistory: [{role: '', content: []}],
|
|
15
|
+
modelResponse: '',
|
|
16
|
+
},
|
|
17
|
+
model: 'oai-gpt4o-mini',
|
|
18
|
+
enableDuplicateRequests: false,
|
|
19
|
+
json: true
|
|
20
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
prompt: ["{{text}}"],
|
|
3
|
+
|
|
4
|
+
enableDuplicateRequests: false,
|
|
5
|
+
inputParameters: {
|
|
6
|
+
model: "replicate-seedance-1-pro",
|
|
7
|
+
resolution: "1080p",
|
|
8
|
+
aspectRatio: "16:9",
|
|
9
|
+
fps: 24,
|
|
10
|
+
duration: 5,
|
|
11
|
+
image: "",
|
|
12
|
+
camera_fixed: false,
|
|
13
|
+
seed: -1,
|
|
14
|
+
},
|
|
15
|
+
|
|
16
|
+
timeout: 60 * 30, // 30 minutes
|
|
17
|
+
};
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
// video_veo.js
|
|
2
|
+
// Pathway for generating videos using Google's Veo model via Vertex AI
|
|
3
|
+
//
|
|
4
|
+
// Model-specific constraints:
|
|
5
|
+
// - Veo 2.0: durationSeconds 5-8, no generateAudio, supports lastFrame/video
|
|
6
|
+
// - Veo 3.0: durationSeconds always 8, generateAudio required, no lastFrame/video
|
|
7
|
+
|
|
8
|
+
export default {
|
|
9
|
+
prompt: ["Generate a video based on the following description: {{text}}"],
|
|
10
|
+
|
|
11
|
+
enableDuplicateRequests: false,
|
|
12
|
+
inputParameters: {
|
|
13
|
+
text: "",
|
|
14
|
+
image: "",
|
|
15
|
+
video: "",
|
|
16
|
+
lastFrame: "",
|
|
17
|
+
model: "veo-2.0-generate",
|
|
18
|
+
aspectRatio: "16:9",
|
|
19
|
+
durationSeconds: 8, // 5-8 for 2.0, always 8 for 3.0
|
|
20
|
+
enhancePrompt: true,
|
|
21
|
+
generateAudio: false, // not supported in 2.0, required in 3.0
|
|
22
|
+
negativePrompt: "",
|
|
23
|
+
personGeneration: "allow_all",
|
|
24
|
+
sampleCount: 1,
|
|
25
|
+
storageUri: "",
|
|
26
|
+
location: "us-central1",
|
|
27
|
+
seed: -1,
|
|
28
|
+
},
|
|
29
|
+
|
|
30
|
+
timeout: 60 * 30, // 30 minutes
|
|
31
|
+
};
|
package/server/modelExecutor.js
CHANGED
|
@@ -29,6 +29,7 @@ import ReplicateApiPlugin from './plugins/replicateApiPlugin.js';
|
|
|
29
29
|
import AzureVideoTranslatePlugin from './plugins/azureVideoTranslatePlugin.js';
|
|
30
30
|
import OllamaChatPlugin from './plugins/ollamaChatPlugin.js';
|
|
31
31
|
import OllamaCompletionPlugin from './plugins/ollamaCompletionPlugin.js';
|
|
32
|
+
import VeoVideoPlugin from './plugins/veoVideoPlugin.js';
|
|
32
33
|
|
|
33
34
|
class ModelExecutor {
|
|
34
35
|
constructor(pathway, model) {
|
|
@@ -117,6 +118,9 @@ class ModelExecutor {
|
|
|
117
118
|
case 'OLLAMA-COMPLETION':
|
|
118
119
|
plugin = new OllamaCompletionPlugin(pathway, model);
|
|
119
120
|
break;
|
|
121
|
+
case 'VEO-VIDEO':
|
|
122
|
+
plugin = new VeoVideoPlugin(pathway, model);
|
|
123
|
+
break;
|
|
120
124
|
default:
|
|
121
125
|
throw new Error(`Unsupported model type: ${model.type}`);
|
|
122
126
|
}
|
|
@@ -35,6 +35,7 @@ class ReplicateApiPlugin extends ModelPlugin {
|
|
|
35
35
|
height: combinedParameters.height,
|
|
36
36
|
size: combinedParameters.size || "1024x1024",
|
|
37
37
|
style: combinedParameters.style || "realistic_image",
|
|
38
|
+
...(combinedParameters.seed && Number.isInteger(combinedParameters.seed) ? { seed: combinedParameters.seed } : {}),
|
|
38
39
|
},
|
|
39
40
|
};
|
|
40
41
|
break;
|
|
@@ -98,11 +99,60 @@ class ReplicateApiPlugin extends ModelPlugin {
|
|
|
98
99
|
'5:4', '3:4', '4:3', '9:16', '9:21', 'match_input_image'
|
|
99
100
|
];
|
|
100
101
|
|
|
102
|
+
let safetyTolerance = combinedParameters.safety_tolerance || 3;
|
|
103
|
+
if(combinedParameters.input_image){
|
|
104
|
+
safetyTolerance = Math.min(safetyTolerance, 2);
|
|
105
|
+
}
|
|
106
|
+
|
|
101
107
|
requestParameters = {
|
|
102
108
|
input: {
|
|
103
109
|
prompt: modelPromptText,
|
|
104
110
|
input_image: combinedParameters.input_image,
|
|
105
111
|
aspect_ratio: validRatios.includes(combinedParameters.aspectRatio) ? combinedParameters.aspectRatio : "1:1",
|
|
112
|
+
safety_tolerance: safetyTolerance,
|
|
113
|
+
...(combinedParameters.seed && Number.isInteger(combinedParameters.seed && combinedParameters.seed > 0) ? { seed: combinedParameters.seed } : {}),
|
|
114
|
+
},
|
|
115
|
+
};
|
|
116
|
+
break;
|
|
117
|
+
}
|
|
118
|
+
case "replicate-multi-image-kontext-max": {
|
|
119
|
+
const validRatios = [
|
|
120
|
+
'1:1', '16:9', '21:9', '3:2', '2:3', '4:5',
|
|
121
|
+
'5:4', '3:4', '4:3', '9:16', '9:21', 'match_input_image'
|
|
122
|
+
];
|
|
123
|
+
|
|
124
|
+
let safetyTolerance = combinedParameters.safety_tolerance || 3;
|
|
125
|
+
if(combinedParameters.input_image_1 || combinedParameters.input_image) {
|
|
126
|
+
safetyTolerance = Math.min(safetyTolerance, 2);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
requestParameters = {
|
|
130
|
+
input: {
|
|
131
|
+
prompt: modelPromptText,
|
|
132
|
+
input_image_1: combinedParameters.input_image_1 || combinedParameters.input_image,
|
|
133
|
+
input_image_2: combinedParameters.input_image_2,
|
|
134
|
+
aspect_ratio: validRatios.includes(combinedParameters.aspectRatio) ? combinedParameters.aspectRatio : "1:1",
|
|
135
|
+
safety_tolerance: safetyTolerance,
|
|
136
|
+
...(combinedParameters.seed && Number.isInteger(combinedParameters.seed && combinedParameters.seed > 0) ? { seed: combinedParameters.seed } : {}),
|
|
137
|
+
},
|
|
138
|
+
};
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
case "replicate-seedance-1-pro": {
|
|
142
|
+
const validResolutions = ["480p", "1080p"];
|
|
143
|
+
const validRatios = ["16:9", "4:3", "9:16", "1:1", "3:4", "21:9", "9:21"];
|
|
144
|
+
const validFps = [24];
|
|
145
|
+
|
|
146
|
+
requestParameters = {
|
|
147
|
+
input: {
|
|
148
|
+
prompt: modelPromptText,
|
|
149
|
+
resolution: validResolutions.includes(combinedParameters.resolution) ? combinedParameters.resolution : "1080p",
|
|
150
|
+
aspect_ratio: validRatios.includes(combinedParameters.aspectRatio) ? combinedParameters.aspectRatio : "16:9",
|
|
151
|
+
...(combinedParameters.seed && Number.isInteger(combinedParameters.seed && combinedParameters.seed > 0) ? { seed: combinedParameters.seed } : {}),
|
|
152
|
+
fps: validFps.includes(combinedParameters.fps) ? combinedParameters.fps : 24,
|
|
153
|
+
camera_fixed: combinedParameters.camera_fixed || false,
|
|
154
|
+
duration: combinedParameters.duration || 5,
|
|
155
|
+
...(combinedParameters.image ? { image: combinedParameters.image } : {}),
|
|
106
156
|
},
|
|
107
157
|
};
|
|
108
158
|
break;
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import ModelPlugin from "./modelPlugin.js";
|
|
2
|
+
import logger from "../../lib/logger.js";
|
|
3
|
+
import axios from "axios";
|
|
4
|
+
|
|
5
|
+
class VeoVideoPlugin extends ModelPlugin {
|
|
6
|
+
constructor(pathway, model) {
|
|
7
|
+
super(pathway, model);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
// Set up parameters specific to the Veo API
|
|
11
|
+
getRequestParameters(text, parameters, prompt) {
|
|
12
|
+
const combinedParameters = { ...this.promptParameters, ...parameters };
|
|
13
|
+
const { modelPromptText } = this.getCompiledPrompt(
|
|
14
|
+
text,
|
|
15
|
+
parameters,
|
|
16
|
+
prompt,
|
|
17
|
+
);
|
|
18
|
+
|
|
19
|
+
// Available Veo models
|
|
20
|
+
const availableModels = {
|
|
21
|
+
'veo-2.0-generate': 'GA',
|
|
22
|
+
'veo-3.0-generate': 'Preview'
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
// Get the model ID from the pathway or use default
|
|
26
|
+
const model = combinedParameters.model || 'veo-2.0-generate';
|
|
27
|
+
|
|
28
|
+
if (!availableModels[model]) {
|
|
29
|
+
throw new Error(`Invalid Veo model ID: ${model}. Available models: ${Object.keys(availableModels).join(', ')}`);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Validate model-specific parameter constraints
|
|
33
|
+
this.validateModelSpecificParameters(combinedParameters, model);
|
|
34
|
+
|
|
35
|
+
// Build the request parameters based on Veo API documentation
|
|
36
|
+
const requestParameters = {
|
|
37
|
+
instances: [
|
|
38
|
+
{
|
|
39
|
+
prompt: modelPromptText,
|
|
40
|
+
// Optional input media fields
|
|
41
|
+
...(combinedParameters.image && { image: JSON.parse(combinedParameters.image) }),
|
|
42
|
+
// lastFrame and video are only supported in 2.0
|
|
43
|
+
...(model === 'veo-2.0-generate' && combinedParameters.lastFrame && { lastFrame: JSON.parse(combinedParameters.lastFrame) }),
|
|
44
|
+
...(model === 'veo-2.0-generate' && combinedParameters.video && { video: JSON.parse(combinedParameters.video) }),
|
|
45
|
+
}
|
|
46
|
+
],
|
|
47
|
+
parameters: {
|
|
48
|
+
// Generation parameters
|
|
49
|
+
...(combinedParameters.aspectRatio && { aspectRatio: combinedParameters.aspectRatio }),
|
|
50
|
+
...(combinedParameters.durationSeconds && { durationSeconds: combinedParameters.durationSeconds }),
|
|
51
|
+
...(combinedParameters.enhancePrompt !== undefined && { enhancePrompt: combinedParameters.enhancePrompt }),
|
|
52
|
+
// generateAudio is required for 3.0 and not supported by 2.0
|
|
53
|
+
...(model === 'veo-3.0-generate' && { generateAudio: combinedParameters.generateAudio !== undefined ? combinedParameters.generateAudio : true }),
|
|
54
|
+
...(combinedParameters.negativePrompt && { negativePrompt: combinedParameters.negativePrompt }),
|
|
55
|
+
...(combinedParameters.personGeneration && { personGeneration: combinedParameters.personGeneration }),
|
|
56
|
+
...(combinedParameters.sampleCount && { sampleCount: combinedParameters.sampleCount }),
|
|
57
|
+
...(combinedParameters.seed && Number.isInteger(combinedParameters.seed && combinedParameters.seed > 0) ? { seed: combinedParameters.seed } : {}),
|
|
58
|
+
...(combinedParameters.storageUri && { storageUri: combinedParameters.storageUri }),
|
|
59
|
+
}
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
return requestParameters;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Validate model-specific parameter constraints
|
|
66
|
+
validateModelSpecificParameters(parameters, model) {
|
|
67
|
+
// Duration constraints
|
|
68
|
+
if (parameters.durationSeconds !== undefined) {
|
|
69
|
+
if (model === 'veo-3.0-generate' && parameters.durationSeconds !== 8) {
|
|
70
|
+
throw new Error(`Veo 3.0 only supports durationSeconds: 8, got: ${parameters.durationSeconds}`);
|
|
71
|
+
}
|
|
72
|
+
if (model === 'veo-2.0-generate' && (parameters.durationSeconds < 5 || parameters.durationSeconds > 8)) {
|
|
73
|
+
throw new Error(`Veo 2.0 supports durationSeconds between 5-8, got: ${parameters.durationSeconds}`);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// lastFrame and video constraints
|
|
78
|
+
if (model === 'veo-3.0-generate') {
|
|
79
|
+
if (parameters.lastFrame) {
|
|
80
|
+
throw new Error('lastFrame parameter is not supported in Veo 3.0');
|
|
81
|
+
}
|
|
82
|
+
if (parameters.video) {
|
|
83
|
+
throw new Error('video parameter is not supported in Veo 3.0');
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// generateAudio constraints
|
|
88
|
+
if (model === 'veo-2.0-generate' && parameters.generateAudio) {
|
|
89
|
+
throw new Error('generateAudio parameter is not supported in Veo 2.0');
|
|
90
|
+
}
|
|
91
|
+
if (model === 'veo-3.0-generate' && parameters.generateAudio === undefined) {
|
|
92
|
+
logger.warn('generateAudio is required for Veo 3.0, defaulting to true');
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Execute the request to the Veo API
|
|
97
|
+
async execute(text, parameters, prompt, cortexRequest) {
|
|
98
|
+
const requestParameters = this.getRequestParameters(
|
|
99
|
+
text,
|
|
100
|
+
parameters,
|
|
101
|
+
prompt,
|
|
102
|
+
);
|
|
103
|
+
|
|
104
|
+
cortexRequest.data = requestParameters;
|
|
105
|
+
cortexRequest.params = requestParameters.params;
|
|
106
|
+
|
|
107
|
+
// Get the model ID for the URL
|
|
108
|
+
const model = parameters.model || 'veo-2.0-generate';
|
|
109
|
+
|
|
110
|
+
// Use the URL from the model configuration (cortexRequest.url is set by Cortex)
|
|
111
|
+
const baseUrl = cortexRequest.url;
|
|
112
|
+
const predictUrl = `${baseUrl}:predictLongRunning`;
|
|
113
|
+
|
|
114
|
+
// Set up the request
|
|
115
|
+
const requestConfig = {
|
|
116
|
+
method: 'POST',
|
|
117
|
+
url: predictUrl,
|
|
118
|
+
headers: {
|
|
119
|
+
'Content-Type': 'application/json',
|
|
120
|
+
...cortexRequest.headers
|
|
121
|
+
},
|
|
122
|
+
data: requestParameters
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
// Get authentication token
|
|
126
|
+
const gcpAuthTokenHelper = this.config.get('gcpAuthTokenHelper');
|
|
127
|
+
const authToken = await gcpAuthTokenHelper.getAccessToken();
|
|
128
|
+
requestConfig.headers.Authorization = `Bearer ${authToken}`;
|
|
129
|
+
|
|
130
|
+
logger.info(`Starting Veo video generation with model: ${model}`);
|
|
131
|
+
|
|
132
|
+
try {
|
|
133
|
+
// Make initial request to start video generation
|
|
134
|
+
const response = await axios(requestConfig);
|
|
135
|
+
const operationName = response.data.name;
|
|
136
|
+
|
|
137
|
+
if (!operationName) {
|
|
138
|
+
throw new Error("No operation name returned from Veo API");
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
logger.info(`Veo video generation started. Operation: ${operationName}`);
|
|
142
|
+
|
|
143
|
+
// Poll for results
|
|
144
|
+
const maxAttempts = 120; // 10 minutes with 5 second intervals
|
|
145
|
+
const pollInterval = 5000;
|
|
146
|
+
|
|
147
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
148
|
+
try {
|
|
149
|
+
// Poll the operation status
|
|
150
|
+
const pollResponse = await axios.post(
|
|
151
|
+
`${baseUrl}:fetchPredictOperation`,
|
|
152
|
+
{ operationName },
|
|
153
|
+
{
|
|
154
|
+
headers: {
|
|
155
|
+
'Content-Type': 'application/json',
|
|
156
|
+
'Authorization': `Bearer ${authToken}`
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
);
|
|
160
|
+
|
|
161
|
+
const operationData = pollResponse.data;
|
|
162
|
+
logger.info(`Polling Veo operation ${operationName} - attempt ${attempt + 1}, done: ${operationData.done || false}`);
|
|
163
|
+
|
|
164
|
+
if (operationData.done) {
|
|
165
|
+
if (operationData.response && operationData.response.videos) {
|
|
166
|
+
logger.info(`Veo video generation completed successfully`);
|
|
167
|
+
return JSON.stringify(operationData);
|
|
168
|
+
} else {
|
|
169
|
+
throw new Error(`Veo operation completed but no videos returned: ${JSON.stringify(operationData)}`);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Wait before next poll
|
|
174
|
+
await new Promise(resolve => setTimeout(resolve, pollInterval));
|
|
175
|
+
} catch (error) {
|
|
176
|
+
logger.error(`Error polling Veo operation: ${error.message}`);
|
|
177
|
+
throw error;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
throw new Error(`Veo video generation timed out after ${maxAttempts * pollInterval / 1000} seconds`);
|
|
182
|
+
} catch (error) {
|
|
183
|
+
logger.error(`Veo video generation failed: ${error.message}`);
|
|
184
|
+
throw error;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Parse the response from the Veo API
|
|
189
|
+
parseResponse(data) {
|
|
190
|
+
if (data.response && data.response.videos) {
|
|
191
|
+
// Return the videos array with GCS URIs
|
|
192
|
+
return JSON.stringify({
|
|
193
|
+
videos: data.response.videos,
|
|
194
|
+
operationName: data.name,
|
|
195
|
+
status: 'completed'
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
return JSON.stringify(data);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Override the logging function to display the request and response
|
|
202
|
+
logRequestData(data, responseData, prompt) {
|
|
203
|
+
const modelInput = data?.instances?.[0]?.prompt;
|
|
204
|
+
const model = this.model || 'veo-2.0-generate';
|
|
205
|
+
const parameters = data?.parameters || {};
|
|
206
|
+
|
|
207
|
+
logger.verbose(`Veo Model: ${model}`);
|
|
208
|
+
logger.verbose(`Prompt: ${modelInput}`);
|
|
209
|
+
logger.verbose(`Parameters: ${JSON.stringify(parameters)}`);
|
|
210
|
+
logger.verbose(`Response: ${this.parseResponse(responseData)}`);
|
|
211
|
+
|
|
212
|
+
prompt &&
|
|
213
|
+
prompt.debugInfo &&
|
|
214
|
+
(prompt.debugInfo += `\n${JSON.stringify(data)}`);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
export default VeoVideoPlugin;
|
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
2
|
-
import os from 'os';
|
|
3
|
-
import path from 'path';
|
|
4
|
-
import { v4 as uuidv4 } from 'uuid';
|
|
5
|
-
|
|
6
|
-
import {
|
|
7
|
-
getFileStoreMap,
|
|
8
|
-
removeFromFileStoreMap,
|
|
9
|
-
setFileStoreMap,
|
|
10
|
-
} from './redis.js';
|
|
11
|
-
|
|
12
|
-
/**
|
|
13
|
-
* Retrieve a hash entry from Redis and ensure that the referenced files
|
|
14
|
-
* still exist in at least one configured storage provider. If one copy is
|
|
15
|
-
* missing it will try to restore it from the other provider (when possible).
|
|
16
|
-
*
|
|
17
|
-
* If the entry is completely invalid (no files found) it is removed from
|
|
18
|
-
* the store and `null` is returned.
|
|
19
|
-
*
|
|
20
|
-
* The function also updates the timestamp of the entry so that active hashes
|
|
21
|
-
* stay fresh in Redis.
|
|
22
|
-
*
|
|
23
|
-
* @param {object} context – Azure Function context for logging
|
|
24
|
-
* @param {string} hash – The hash / key in the FileStoreMap
|
|
25
|
-
* @param {StorageService} storageService – An initialised StorageService instance
|
|
26
|
-
* @returns {object|null} The (possibly refreshed) entry or null when invalid
|
|
27
|
-
*/
|
|
28
|
-
export async function getValidHashEntry(context, hash, storageService) {
|
|
29
|
-
if (!hash) return null;
|
|
30
|
-
|
|
31
|
-
let entry = await getFileStoreMap(hash);
|
|
32
|
-
if (!entry) return null;
|
|
33
|
-
|
|
34
|
-
try {
|
|
35
|
-
const primaryExists = entry?.url ? await storageService.fileExists(entry.url) : false;
|
|
36
|
-
const gcsExists = entry?.gcs ? await storageService.fileExists(entry.gcs) : false;
|
|
37
|
-
|
|
38
|
-
// If neither storage has the file, remove the entry and abort
|
|
39
|
-
if (!primaryExists && !gcsExists) {
|
|
40
|
-
await removeFromFileStoreMap(hash);
|
|
41
|
-
return null;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
// Restore missing GCS copy when primary exists
|
|
45
|
-
if (primaryExists && !gcsExists) {
|
|
46
|
-
try {
|
|
47
|
-
entry = await storageService.ensureGCSUpload(context, entry);
|
|
48
|
-
} catch (err) {
|
|
49
|
-
context.log(`getValidHashEntry: failed to restore GCS copy – ${err}`);
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
// Restore missing primary copy when GCS exists and a primary provider is configured
|
|
54
|
-
if (!primaryExists && gcsExists && storageService.backupProvider?.isConfigured()) {
|
|
55
|
-
let tempDir;
|
|
56
|
-
let downloadedFile;
|
|
57
|
-
try {
|
|
58
|
-
tempDir = path.join(os.tmpdir(), `${uuidv4()}`);
|
|
59
|
-
fs.mkdirSync(tempDir);
|
|
60
|
-
downloadedFile = path.join(tempDir, path.basename(entry.gcs));
|
|
61
|
-
|
|
62
|
-
// Download from GCS, then upload to primary storage
|
|
63
|
-
await storageService.downloadFile(entry.gcs, downloadedFile);
|
|
64
|
-
const res = await storageService.uploadFile(context, downloadedFile, hash);
|
|
65
|
-
entry.url = res.url;
|
|
66
|
-
} catch (err) {
|
|
67
|
-
context.log(`getValidHashEntry: failed to restore primary copy – ${err}`);
|
|
68
|
-
} finally {
|
|
69
|
-
// Clean temp artefacts
|
|
70
|
-
try {
|
|
71
|
-
if (downloadedFile && fs.existsSync(downloadedFile)) {
|
|
72
|
-
fs.unlinkSync(downloadedFile);
|
|
73
|
-
}
|
|
74
|
-
if (tempDir && fs.existsSync(tempDir)) {
|
|
75
|
-
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
76
|
-
}
|
|
77
|
-
} catch (_) {
|
|
78
|
-
/* noop */
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
// Update timestamp so the entry stays fresh
|
|
84
|
-
await setFileStoreMap(hash, entry);
|
|
85
|
-
return entry;
|
|
86
|
-
} catch (err) {
|
|
87
|
-
context.log(`getValidHashEntry: error during validation – ${err}`);
|
|
88
|
-
await removeFromFileStoreMap(hash);
|
|
89
|
-
return null;
|
|
90
|
-
}
|
|
91
|
-
}
|