@aj-archipelago/cortex 1.3.22 → 1.3.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -0
- package/config.js +26 -1
- package/helper-apps/cortex-realtime-voice-server/src/realtime/client.ts +9 -4
- package/helper-apps/cortex-realtime-voice-server/src/realtime/realtimeTypes.ts +1 -0
- package/lib/util.js +4 -24
- package/package.json +5 -2
- package/pathways/system/entity/sys_generator_memory.js +3 -3
- package/pathways/system/rest_streaming/sys_ollama_chat.js +21 -0
- package/pathways/system/rest_streaming/sys_ollama_completion.js +14 -0
- package/pathways/system/rest_streaming/sys_openai_chat.js +2 -2
- package/pathways/transcribe_gemini.js +181 -53
- package/server/modelExecutor.js +8 -0
- package/server/pathwayResolver.js +15 -6
- package/server/plugins/claude3VertexPlugin.js +51 -16
- package/server/plugins/gemini15ChatPlugin.js +94 -1
- package/server/plugins/gemini15VisionPlugin.js +9 -3
- package/server/plugins/modelPlugin.js +11 -8
- package/server/plugins/ollamaChatPlugin.js +158 -0
- package/server/plugins/ollamaCompletionPlugin.js +147 -0
- package/server/rest.js +46 -5
- package/tests/multimodal_conversion.test.js +169 -0
- package/tests/openai_api.test.js +43 -23
- package/tests/streaming.test.js +197 -0
- package/tests/transcribe_gemini.test.js +217 -0
package/README.md
CHANGED
|
@@ -561,6 +561,70 @@ Each model configuration can include:
|
|
|
561
561
|
}
|
|
562
562
|
```
|
|
563
563
|
|
|
564
|
+
### API Compatibility
|
|
565
|
+
|
|
566
|
+
Cortex provides OpenAI-compatible REST endpoints that allow you to use various models through a standardized interface. When `enableRestEndpoints` is set to `true`, Cortex exposes the following endpoints:
|
|
567
|
+
|
|
568
|
+
- `/v1/models`: List available models
|
|
569
|
+
- `/v1/chat/completions`: Chat completion endpoint
|
|
570
|
+
- `/v1/completions`: Text completion endpoint
|
|
571
|
+
|
|
572
|
+
This means you can use Cortex with any client library or tool that supports the OpenAI API format. For example:
|
|
573
|
+
|
|
574
|
+
```python
|
|
575
|
+
from openai import OpenAI
|
|
576
|
+
|
|
577
|
+
client = OpenAI(
|
|
578
|
+
base_url="http://localhost:4000/v1", # Point to your Cortex server
|
|
579
|
+
api_key="your-key" # If you have configured cortexApiKeys
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
response = client.chat.completions.create(
|
|
583
|
+
model="gpt-4", # Or any model configured in Cortex
|
|
584
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
585
|
+
)
|
|
586
|
+
```
|
|
587
|
+
|
|
588
|
+
#### Ollama Integration
|
|
589
|
+
|
|
590
|
+
Cortex includes built-in support for Ollama models through its OpenAI-compatible REST interface. When `ollamaUrl` is configured in your settings, Cortex will:
|
|
591
|
+
1. Automatically discover and expose all available Ollama models through the `/v1/models` endpoint with an "ollama-" prefix
|
|
592
|
+
2. Route any requests using an "ollama-" prefixed model to the appropriate Ollama endpoint
|
|
593
|
+
|
|
594
|
+
To enable Ollama support, add the following to your configuration:
|
|
595
|
+
|
|
596
|
+
```json
|
|
597
|
+
{
|
|
598
|
+
"enableRestEndpoints": true,
|
|
599
|
+
"ollamaUrl": "http://localhost:11434" // or your Ollama server URL
|
|
600
|
+
}
|
|
601
|
+
```
|
|
602
|
+
|
|
603
|
+
You can then use any Ollama model through the standard OpenAI-compatible endpoints:
|
|
604
|
+
|
|
605
|
+
```bash
|
|
606
|
+
# List available models (will include Ollama models with "ollama-" prefix)
|
|
607
|
+
curl http://localhost:4000/v1/models
|
|
608
|
+
|
|
609
|
+
# Use an Ollama model for chat
|
|
610
|
+
curl http://localhost:4000/v1/chat/completions \
|
|
611
|
+
-H "Content-Type: application/json" \
|
|
612
|
+
-d '{
|
|
613
|
+
"model": "ollama-llama2",
|
|
614
|
+
"messages": [{"role": "user", "content": "Hello!"}]
|
|
615
|
+
}'
|
|
616
|
+
|
|
617
|
+
# Use an Ollama model for completions
|
|
618
|
+
curl http://localhost:4000/v1/completions \
|
|
619
|
+
-H "Content-Type: application/json" \
|
|
620
|
+
-d '{
|
|
621
|
+
"model": "ollama-codellama",
|
|
622
|
+
"prompt": "Write a function that"
|
|
623
|
+
}'
|
|
624
|
+
```
|
|
625
|
+
|
|
626
|
+
This integration allows you to seamlessly use local Ollama models alongside cloud-based models through a single, consistent interface.
|
|
627
|
+
|
|
564
628
|
### Other Configuration Properties
|
|
565
629
|
|
|
566
630
|
The following properties can be configured through environment variables or the configuration file:
|
package/config.js
CHANGED
|
@@ -85,6 +85,11 @@ var config = convict({
|
|
|
85
85
|
default: false,
|
|
86
86
|
env: 'CORTEX_ENABLE_REST'
|
|
87
87
|
},
|
|
88
|
+
ollamaUrl: {
|
|
89
|
+
format: String,
|
|
90
|
+
default: 'http://127.0.0.1:11434',
|
|
91
|
+
env: 'OLLAMA_URL'
|
|
92
|
+
},
|
|
88
93
|
entityConstants: {
|
|
89
94
|
format: Object,
|
|
90
95
|
default: {
|
|
@@ -281,7 +286,27 @@ var config = convict({
|
|
|
281
286
|
"headers": {
|
|
282
287
|
"Content-Type": "application/json"
|
|
283
288
|
},
|
|
284
|
-
}
|
|
289
|
+
},
|
|
290
|
+
"ollama-chat": {
|
|
291
|
+
"type": "OLLAMA-CHAT",
|
|
292
|
+
"url": "{{ollamaUrl}}/api/chat",
|
|
293
|
+
"headers": {
|
|
294
|
+
"Content-Type": "application/json"
|
|
295
|
+
},
|
|
296
|
+
"requestsPerSecond": 10,
|
|
297
|
+
"maxTokenLength": 131072,
|
|
298
|
+
"supportsStreaming": true
|
|
299
|
+
},
|
|
300
|
+
"ollama-completion": {
|
|
301
|
+
"type": "OLLAMA-COMPLETION",
|
|
302
|
+
"url": "{{ollamaUrl}}/api/generate",
|
|
303
|
+
"headers": {
|
|
304
|
+
"Content-Type": "application/json"
|
|
305
|
+
},
|
|
306
|
+
"requestsPerSecond": 10,
|
|
307
|
+
"maxTokenLength": 131072,
|
|
308
|
+
"supportsStreaming": true
|
|
309
|
+
},
|
|
285
310
|
},
|
|
286
311
|
env: 'CORTEX_MODELS'
|
|
287
312
|
},
|
|
@@ -355,14 +355,19 @@ export class RealtimeVoiceClient extends EventEmitter implements TypedEmitter {
|
|
|
355
355
|
if (!this.isConnected) {
|
|
356
356
|
throw new Error('Not connected');
|
|
357
357
|
}
|
|
358
|
+
|
|
359
|
+
// Create a new config object without custom_voice_id
|
|
360
|
+
const { custom_voice_id, ...filteredConfig } = {
|
|
361
|
+
...this.sessionConfig,
|
|
362
|
+
...sessionConfig
|
|
363
|
+
};
|
|
364
|
+
|
|
358
365
|
const message = JSON.stringify({
|
|
359
366
|
event_id: createId(),
|
|
360
367
|
type: 'session.update',
|
|
361
|
-
session:
|
|
362
|
-
...this.sessionConfig,
|
|
363
|
-
...sessionConfig,
|
|
364
|
-
},
|
|
368
|
+
session: filteredConfig,
|
|
365
369
|
});
|
|
370
|
+
|
|
366
371
|
// No need to log session update messages as they can be noisy
|
|
367
372
|
logger.log('Sending session update message:', message);
|
|
368
373
|
this.ws?.send(message);
|
|
@@ -46,6 +46,7 @@ export type RealtimeSessionConfig = {
|
|
|
46
46
|
modalities: Array<Modality>,
|
|
47
47
|
instructions: string,
|
|
48
48
|
voice: Voice,
|
|
49
|
+
custom_voice_id?: string | null,
|
|
49
50
|
input_audio_format: AudioFormat,
|
|
50
51
|
output_audio_format: AudioFormat,
|
|
51
52
|
input_audio_transcription: null | { model: 'whisper-1' | (string & {}) },
|
package/lib/util.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logger from "./logger.js";
|
|
2
2
|
import stream from 'stream';
|
|
3
|
-
import
|
|
3
|
+
import subvibe from '@aj-archipelago/subvibe';
|
|
4
4
|
import os from 'os';
|
|
5
5
|
import http from 'http';
|
|
6
6
|
import https from 'https';
|
|
@@ -126,19 +126,9 @@ function convertSrtToText(str) {
|
|
|
126
126
|
function alignSubtitles(subtitles, format, offsets) {
|
|
127
127
|
const result = [];
|
|
128
128
|
|
|
129
|
-
function preprocessStr(str) {
|
|
130
|
-
try{
|
|
131
|
-
if(!str) return '';
|
|
132
|
-
return str.trim().replace(/(\n\n)(?!\n)/g, '\n\n\n');
|
|
133
|
-
}catch(e){
|
|
134
|
-
logger.error(`An error occurred in content text preprocessing: ${e}`);
|
|
135
|
-
return '';
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
|
|
139
129
|
function shiftSubtitles(subtitle, shiftOffset) {
|
|
140
|
-
const captions =
|
|
141
|
-
const resynced =
|
|
130
|
+
const captions = subvibe.parse(subtitle);
|
|
131
|
+
const resynced = subvibe.resync(captions.cues, { offset: shiftOffset });
|
|
142
132
|
return resynced;
|
|
143
133
|
}
|
|
144
134
|
|
|
@@ -146,18 +136,8 @@ function alignSubtitles(subtitles, format, offsets) {
|
|
|
146
136
|
result.push(...shiftSubtitles(subtitles[i], offsets[i]*1000)); // convert to milliseconds
|
|
147
137
|
}
|
|
148
138
|
|
|
149
|
-
try {
|
|
150
|
-
//if content has needed html style tags, keep them
|
|
151
|
-
for(const obj of result) {
|
|
152
|
-
if(obj && obj.content){
|
|
153
|
-
obj.text = obj.content;
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
} catch (error) {
|
|
157
|
-
logger.error(`An error occurred in content text parsing: ${error}`);
|
|
158
|
-
}
|
|
159
139
|
|
|
160
|
-
return
|
|
140
|
+
return subvibe.build(result, format || 'srt');
|
|
161
141
|
}
|
|
162
142
|
|
|
163
143
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.24",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"private": false,
|
|
6
6
|
"repository": {
|
|
@@ -33,6 +33,7 @@
|
|
|
33
33
|
"type": "module",
|
|
34
34
|
"homepage": "https://github.com/aj-archipelago/cortex#readme",
|
|
35
35
|
"dependencies": {
|
|
36
|
+
"@aj-archipelago/subvibe": "^1.0.5",
|
|
36
37
|
"@apollo/server": "^4.7.3",
|
|
37
38
|
"@apollo/server-plugin-response-cache": "^4.1.2",
|
|
38
39
|
"@apollo/utils.keyvadapter": "^3.0.0",
|
|
@@ -63,7 +64,6 @@
|
|
|
63
64
|
"ioredis": "^5.3.1",
|
|
64
65
|
"keyv": "^4.5.2",
|
|
65
66
|
"mime-types": "^2.1.35",
|
|
66
|
-
"subsrt": "^1.1.1",
|
|
67
67
|
"uuid": "^9.0.0",
|
|
68
68
|
"winston": "^3.11.0",
|
|
69
69
|
"ws": "^8.12.0"
|
|
@@ -86,5 +86,8 @@
|
|
|
86
86
|
"dotenv/config"
|
|
87
87
|
],
|
|
88
88
|
"concurrency": 1
|
|
89
|
+
},
|
|
90
|
+
"overrides": {
|
|
91
|
+
"whatwg-url": "^12.0.0"
|
|
89
92
|
}
|
|
90
93
|
}
|
|
@@ -18,7 +18,7 @@ export default {
|
|
|
18
18
|
const { aiStyle, AI_STYLE_ANTHROPIC, AI_STYLE_OPENAI } = args;
|
|
19
19
|
const styleModel = aiStyle === "Anthropic" ? AI_STYLE_ANTHROPIC : AI_STYLE_OPENAI;
|
|
20
20
|
|
|
21
|
-
const memoryContext = await callPathway('sys_search_memory', { ...args, section: 'memoryAll', updateContext: true });
|
|
21
|
+
const memoryContext = await callPathway('sys_search_memory', { ...args, stream: false, section: 'memoryAll', updateContext: true });
|
|
22
22
|
if (memoryContext) {
|
|
23
23
|
const {toolCallId} = addToolCalls(args.chatHistory, "search memory for relevant information", "memory_lookup");
|
|
24
24
|
addToolResults(args.chatHistory, memoryContext, toolCallId);
|
|
@@ -26,9 +26,9 @@ export default {
|
|
|
26
26
|
|
|
27
27
|
let result;
|
|
28
28
|
if (args.voiceResponse) {
|
|
29
|
-
result = await callPathway('sys_generator_quick', { ...args, model: styleModel, stream: false });
|
|
29
|
+
result = await callPathway('sys_generator_quick', { ...args, model: styleModel, stream: false }, resolver);
|
|
30
30
|
} else {
|
|
31
|
-
result = await callPathway('sys_generator_quick', { ...args, model: styleModel });
|
|
31
|
+
result = await callPathway('sys_generator_quick', { ...args, model: styleModel }, resolver);
|
|
32
32
|
}
|
|
33
33
|
|
|
34
34
|
resolver.tool = JSON.stringify({ toolUsed: "memory" });
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
// sys_ollama_chat.js
|
|
2
|
+
// override handler for ollama chat model
|
|
3
|
+
|
|
4
|
+
import { Prompt } from '../../../server/prompt.js';
|
|
5
|
+
|
|
6
|
+
export default {
|
|
7
|
+
prompt:
|
|
8
|
+
[
|
|
9
|
+
new Prompt({ messages: [
|
|
10
|
+
"{{messages}}",
|
|
11
|
+
]}),
|
|
12
|
+
],
|
|
13
|
+
inputParameters: {
|
|
14
|
+
messages: [{ role: '', content: '' }],
|
|
15
|
+
ollamaModel: '',
|
|
16
|
+
},
|
|
17
|
+
model: 'ollama-chat',
|
|
18
|
+
useInputChunking: false,
|
|
19
|
+
emulateOpenAIChatModel: 'ollama-chat',
|
|
20
|
+
timeout: 300,
|
|
21
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
// sys_ollama_completion.js
|
|
2
|
+
// default handler for ollama completion endpoints when REST endpoints are enabled
|
|
3
|
+
|
|
4
|
+
export default {
|
|
5
|
+
prompt: `{{text}}`,
|
|
6
|
+
inputParameters: {
|
|
7
|
+
text: '',
|
|
8
|
+
ollamaModel: '',
|
|
9
|
+
},
|
|
10
|
+
model: 'ollama-completion',
|
|
11
|
+
useInputChunking: false,
|
|
12
|
+
emulateOpenAICompletionModel: 'ollama-completion',
|
|
13
|
+
timeout: 300,
|
|
14
|
+
}
|
|
@@ -5,36 +5,124 @@ import { Prompt } from "../server/prompt.js";
|
|
|
5
5
|
|
|
6
6
|
const OFFSET_CHUNK = 500; //seconds of each chunk offset, only used if helper does not provide
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
10
8
|
export function convertSrtToVtt(data) {
|
|
11
9
|
if (!data || !data.trim()) {
|
|
12
10
|
return "WEBVTT\n\n";
|
|
13
11
|
}
|
|
14
|
-
|
|
12
|
+
|
|
13
|
+
// If it's already VTT format and has header
|
|
14
|
+
if (data.trim().startsWith("WEBVTT")) {
|
|
15
|
+
const lines = data.split("\n");
|
|
16
|
+
const result = ["WEBVTT", ""]; // Start with header and blank line
|
|
17
|
+
let currentCue = [];
|
|
18
|
+
|
|
19
|
+
for (let i = 0; i < lines.length; i++) {
|
|
20
|
+
const line = lines[i].trim();
|
|
21
|
+
|
|
22
|
+
// Skip empty lines and the WEBVTT header
|
|
23
|
+
if (!line || line === "WEBVTT") {
|
|
24
|
+
continue;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// If it's a number by itself, it's a cue identifier
|
|
28
|
+
if (/^\d+$/.test(line)) {
|
|
29
|
+
// If we have a previous cue, add it with proper spacing
|
|
30
|
+
if (currentCue.length > 0) {
|
|
31
|
+
result.push(currentCue.join("\n"));
|
|
32
|
+
result.push(""); // Add blank line between cues
|
|
33
|
+
currentCue = [];
|
|
34
|
+
}
|
|
35
|
+
currentCue.push(line);
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Check for and convert timestamps
|
|
40
|
+
const fullTimeRegex = /^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/;
|
|
41
|
+
const shortTimeRegex = /^(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2})[,.](\d{3})$/;
|
|
42
|
+
const ultraShortTimeRegex = /^(\d{1,2})[.](\d{3})\s*-->\s*(\d{1,2})[.](\d{3})$/;
|
|
43
|
+
|
|
44
|
+
const fullMatch = line.match(fullTimeRegex);
|
|
45
|
+
const shortMatch = line.match(shortTimeRegex);
|
|
46
|
+
const ultraShortMatch = line.match(ultraShortTimeRegex);
|
|
47
|
+
|
|
48
|
+
if (fullMatch) {
|
|
49
|
+
// Already in correct format, just convert comma to dot
|
|
50
|
+
const convertedTime = line.replace(/,/g, '.');
|
|
51
|
+
currentCue.push(convertedTime);
|
|
52
|
+
} else if (shortMatch) {
|
|
53
|
+
// Convert MM:SS to HH:MM:SS
|
|
54
|
+
const convertedTime = `00:${shortMatch[1]}:${shortMatch[2]}.${shortMatch[3]} --> 00:${shortMatch[4]}:${shortMatch[5]}.${shortMatch[6]}`;
|
|
55
|
+
currentCue.push(convertedTime);
|
|
56
|
+
} else if (ultraShortMatch) {
|
|
57
|
+
// Convert SS to HH:MM:SS
|
|
58
|
+
const convertedTime = `00:00:${ultraShortMatch[1].padStart(2, '0')}.${ultraShortMatch[2]} --> 00:00:${ultraShortMatch[3].padStart(2, '0')}.${ultraShortMatch[4]}`;
|
|
59
|
+
currentCue.push(convertedTime);
|
|
60
|
+
} else if (!line.includes('-->')) {
|
|
61
|
+
// Must be subtitle text
|
|
62
|
+
currentCue.push(line);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Add the last cue if there is one
|
|
67
|
+
if (currentCue.length > 0) {
|
|
68
|
+
result.push(currentCue.join("\n"));
|
|
69
|
+
result.push(""); // Add final blank line
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Join with newlines and ensure proper ending
|
|
73
|
+
return result.join("\n") + "\n";
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// remove dos newlines and trim
|
|
15
77
|
var srt = data.replace(/\r+/g, "");
|
|
16
|
-
// trim white space start and end
|
|
17
78
|
srt = srt.replace(/^\s+|\s+$/g, "");
|
|
18
79
|
|
|
19
|
-
//
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
//
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
//
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
80
|
+
// Split into cues and filter out empty ones
|
|
81
|
+
var cuelist = srt.split("\n\n").filter(cue => cue.trim());
|
|
82
|
+
|
|
83
|
+
// Always add WEBVTT header
|
|
84
|
+
var result = "WEBVTT\n\n";
|
|
85
|
+
|
|
86
|
+
// Convert each cue to VTT format
|
|
87
|
+
for (const cue of cuelist) {
|
|
88
|
+
const lines = cue.split("\n").map(line => line.trim()).filter(line => line);
|
|
89
|
+
if (lines.length < 2) continue;
|
|
90
|
+
|
|
91
|
+
let output = [];
|
|
92
|
+
|
|
93
|
+
// Handle cue identifier
|
|
94
|
+
if (/^\d+$/.test(lines[0])) {
|
|
95
|
+
output.push(lines[0]);
|
|
96
|
+
lines.shift();
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Handle timestamp line
|
|
100
|
+
const timeLine = lines[0];
|
|
101
|
+
const fullTimeRegex = /^(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})$/;
|
|
102
|
+
const shortTimeRegex = /^(\d{2}):(\d{2})[,.](\d{3})\s*-->\s*(\d{2}):(\d{2})[,.](\d{3})$/;
|
|
103
|
+
const ultraShortTimeRegex = /^(\d{1,2})[.](\d{3})\s*-->\s*(\d{1,2})[.](\d{3})$/;
|
|
104
|
+
|
|
105
|
+
const fullMatch = timeLine.match(fullTimeRegex);
|
|
106
|
+
const shortMatch = timeLine.match(shortTimeRegex);
|
|
107
|
+
const ultraShortMatch = timeLine.match(ultraShortTimeRegex);
|
|
108
|
+
|
|
109
|
+
if (fullMatch) {
|
|
110
|
+
output.push(timeLine.replace(/,/g, '.'));
|
|
111
|
+
} else if (shortMatch) {
|
|
112
|
+
output.push(`00:${shortMatch[1]}:${shortMatch[2]}.${shortMatch[3]} --> 00:${shortMatch[4]}:${shortMatch[5]}.${shortMatch[6]}`);
|
|
113
|
+
} else if (ultraShortMatch) {
|
|
114
|
+
output.push(`00:00:${ultraShortMatch[1].padStart(2, '0')}.${ultraShortMatch[2]} --> 00:00:${ultraShortMatch[3].padStart(2, '0')}.${ultraShortMatch[4]}`);
|
|
115
|
+
} else {
|
|
116
|
+
continue; // Invalid timestamp format
|
|
36
117
|
}
|
|
118
|
+
|
|
119
|
+
// Add remaining lines as subtitle text
|
|
120
|
+
output.push(...lines.slice(1));
|
|
121
|
+
|
|
122
|
+
// Add the cue to result
|
|
123
|
+
result += output.join("\n") + "\n\n";
|
|
37
124
|
}
|
|
125
|
+
|
|
38
126
|
return result;
|
|
39
127
|
}
|
|
40
128
|
|
|
@@ -42,18 +130,20 @@ function convertSrtCue(caption) {
|
|
|
42
130
|
if (!caption || !caption.trim()) {
|
|
43
131
|
return "";
|
|
44
132
|
}
|
|
45
|
-
|
|
46
|
-
//srt = srt.replace(/<[a-zA-Z\/][^>]*>/g, '');
|
|
133
|
+
|
|
47
134
|
var cue = "";
|
|
48
135
|
var s = caption.split(/\n/);
|
|
49
|
-
|
|
136
|
+
|
|
137
|
+
// concatenate multi-line string separated in array into one
|
|
50
138
|
while (s.length > 3) {
|
|
51
139
|
for (var i = 3; i < s.length; i++) {
|
|
52
140
|
s[2] += "\n" + s[i];
|
|
53
141
|
}
|
|
54
142
|
s.splice(3, s.length - 3);
|
|
55
143
|
}
|
|
144
|
+
|
|
56
145
|
var line = 0;
|
|
146
|
+
|
|
57
147
|
// detect identifier
|
|
58
148
|
if (
|
|
59
149
|
s[0] &&
|
|
@@ -67,10 +157,11 @@ function convertSrtCue(caption) {
|
|
|
67
157
|
line += 1;
|
|
68
158
|
}
|
|
69
159
|
}
|
|
160
|
+
|
|
70
161
|
// get time strings
|
|
71
162
|
if (s[line] && s[line].match(/\d+:\d+:\d+/)) {
|
|
72
163
|
// convert time string
|
|
73
|
-
var m = s[
|
|
164
|
+
var m = s[line].match(
|
|
74
165
|
/(\d{2}):(\d{2}):(\d{2})[,.](\d{3})\s*--?>\s*(\d{2}):(\d{2}):(\d{2})[,.](\d{3})/,
|
|
75
166
|
);
|
|
76
167
|
if (m) {
|
|
@@ -93,17 +184,43 @@ function convertSrtCue(caption) {
|
|
|
93
184
|
"\n";
|
|
94
185
|
line += 1;
|
|
95
186
|
} else {
|
|
96
|
-
//
|
|
97
|
-
|
|
187
|
+
// Try alternate timestamp format
|
|
188
|
+
m = s[line].match(
|
|
189
|
+
/(\d{2}):(\d{2})\.(\d{3})\s*--?>\s*(\d{2}):(\d{2})\.(\d{3})/,
|
|
190
|
+
);
|
|
191
|
+
if (m) {
|
|
192
|
+
// Convert to full timestamp format
|
|
193
|
+
cue +=
|
|
194
|
+
"00:" +
|
|
195
|
+
m[1] +
|
|
196
|
+
":" +
|
|
197
|
+
m[2] +
|
|
198
|
+
"." +
|
|
199
|
+
m[3] +
|
|
200
|
+
" --> " +
|
|
201
|
+
"00:" +
|
|
202
|
+
m[4] +
|
|
203
|
+
":" +
|
|
204
|
+
m[5] +
|
|
205
|
+
"." +
|
|
206
|
+
m[6] +
|
|
207
|
+
"\n";
|
|
208
|
+
line += 1;
|
|
209
|
+
} else {
|
|
210
|
+
// Unrecognized timestring
|
|
211
|
+
return "";
|
|
212
|
+
}
|
|
98
213
|
}
|
|
99
214
|
} else {
|
|
100
215
|
// file format error or comment lines
|
|
101
216
|
return "";
|
|
102
217
|
}
|
|
218
|
+
|
|
103
219
|
// get cue text
|
|
104
220
|
if (s[line]) {
|
|
105
221
|
cue += s[line] + "\n\n";
|
|
106
222
|
}
|
|
223
|
+
|
|
107
224
|
return cue;
|
|
108
225
|
}
|
|
109
226
|
|
|
@@ -112,48 +229,59 @@ export function detectSubtitleFormat(text) {
|
|
|
112
229
|
const cleanText = text.replace(/\r+/g, "").trim();
|
|
113
230
|
const lines = cleanText.split("\n");
|
|
114
231
|
|
|
115
|
-
// Check if it's VTT format
|
|
232
|
+
// Check if it's VTT format - be more lenient with the header
|
|
116
233
|
if (lines[0]?.trim() === "WEBVTT") {
|
|
117
234
|
return "vtt";
|
|
118
235
|
}
|
|
119
236
|
|
|
120
|
-
//
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
// 4. Blank line
|
|
126
|
-
const timeRegex =
|
|
127
|
-
/(\d{2}:\d{2}:\d{2})[,.](\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2})[,.](\d{3})/;
|
|
237
|
+
// Define regex patterns for timestamp formats
|
|
238
|
+
const srtTimeRegex =
|
|
239
|
+
/(\d{2}:\d{2}:\d{2})[,.]\d{3}\s*-->\s*(\d{2}:\d{2}:\d{2})[,.]\d{3}/;
|
|
240
|
+
const vttTimeRegex =
|
|
241
|
+
/(?:\d{2}:)?(\d{1,2})[.]\d{3}\s*-->\s*(?:\d{2}:)?(\d{1,2})[.]\d{3}/;
|
|
128
242
|
|
|
129
|
-
let
|
|
130
|
-
let
|
|
243
|
+
let hasSrtTimestamps = false;
|
|
244
|
+
let hasVttTimestamps = false;
|
|
245
|
+
let hasSequentialNumbers = false;
|
|
246
|
+
let lastNumber = 0;
|
|
131
247
|
|
|
132
|
-
//
|
|
248
|
+
// Look through first few lines to detect patterns
|
|
133
249
|
for (let i = 0; i < Math.min(lines.length, 12); i++) {
|
|
134
250
|
const line = lines[i]?.trim();
|
|
135
251
|
if (!line) continue;
|
|
136
252
|
|
|
137
|
-
// Check
|
|
138
|
-
if (
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
253
|
+
// Check for timestamps
|
|
254
|
+
if (srtTimeRegex.test(line)) {
|
|
255
|
+
hasSrtTimestamps = true;
|
|
256
|
+
}
|
|
257
|
+
if (vttTimeRegex.test(line)) {
|
|
258
|
+
hasVttTimestamps = true;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Check for sequential numbers
|
|
262
|
+
const numberMatch = line.match(/^(\d+)$/);
|
|
263
|
+
if (numberMatch) {
|
|
264
|
+
const num = parseInt(numberMatch[1]);
|
|
265
|
+
if (lastNumber === 0 || num === lastNumber + 1) {
|
|
266
|
+
hasSequentialNumbers = true;
|
|
267
|
+
lastNumber = num;
|
|
145
268
|
}
|
|
146
269
|
}
|
|
147
270
|
}
|
|
148
271
|
|
|
149
|
-
|
|
272
|
+
// If it has SRT-style timestamps (HH:MM:SS), it's SRT
|
|
273
|
+
if (hasSrtTimestamps && hasSequentialNumbers) {
|
|
150
274
|
return "srt";
|
|
151
275
|
}
|
|
152
276
|
|
|
277
|
+
// If it has VTT-style timestamps (MM:SS) or WEBVTT header, it's VTT
|
|
278
|
+
if (hasVttTimestamps) {
|
|
279
|
+
return "vtt";
|
|
280
|
+
}
|
|
281
|
+
|
|
153
282
|
return null;
|
|
154
283
|
}
|
|
155
284
|
|
|
156
|
-
|
|
157
285
|
export default {
|
|
158
286
|
prompt:
|
|
159
287
|
[
|
|
@@ -203,7 +331,6 @@ export default {
|
|
|
203
331
|
const progress = (completedCount + partialRatio) / totalCount;
|
|
204
332
|
logger.info(`Progress for ${requestId}: ${progress}`);
|
|
205
333
|
|
|
206
|
-
console.log(`Progress for ${requestId}: ${progress}`);
|
|
207
334
|
publishRequestProgress({
|
|
208
335
|
requestId,
|
|
209
336
|
progress,
|
|
@@ -290,7 +417,7 @@ WEBVTT
|
|
|
290
417
|
Hello World2!
|
|
291
418
|
|
|
292
419
|
2
|
|
293
|
-
00:05.344 --> 00:00:08.809
|
|
420
|
+
00:00:05.344 --> 00:00:08.809
|
|
294
421
|
Being AI is also great!
|
|
295
422
|
|
|
296
423
|
- If asked text format, e.g.:
|
|
@@ -327,6 +454,7 @@ Even a single newline or space can cause the response to be rejected. You must f
|
|
|
327
454
|
return messages;
|
|
328
455
|
}
|
|
329
456
|
|
|
457
|
+
|
|
330
458
|
const processChunksParallel = async (chunks, args) => {
|
|
331
459
|
try {
|
|
332
460
|
const chunkPromises = chunks.map(async (chunk, index) => ({
|
|
@@ -338,8 +466,6 @@ Even a single newline or space can cause the response to be rejected. You must f
|
|
|
338
466
|
})
|
|
339
467
|
}));
|
|
340
468
|
|
|
341
|
-
// const results = await Promise.all(chunkPromises);
|
|
342
|
-
|
|
343
469
|
const results = await Promise.all(
|
|
344
470
|
chunkPromises.map(promise =>
|
|
345
471
|
promise.then(result => {
|
|
@@ -374,6 +500,8 @@ Even a single newline or space can cause the response to be rejected. You must f
|
|
|
374
500
|
|
|
375
501
|
if (['srt','vtt'].includes(responseFormat) || wordTimestamped) { // align subtitles for formats
|
|
376
502
|
|
|
503
|
+
|
|
504
|
+
|
|
377
505
|
// convert as gemini output is unstable
|
|
378
506
|
for(let i = 0; i < result.length; i++) {
|
|
379
507
|
try{
|
package/server/modelExecutor.js
CHANGED
|
@@ -28,6 +28,8 @@ import NeuralSpacePlugin from './plugins/neuralSpacePlugin.js';
|
|
|
28
28
|
import RunwareAiPlugin from './plugins/runwareAiPlugin.js';
|
|
29
29
|
import ReplicateApiPlugin from './plugins/replicateApiPlugin.js';
|
|
30
30
|
import AzureVideoTranslatePlugin from './plugins/azureVideoTranslatePlugin.js';
|
|
31
|
+
import OllamaChatPlugin from './plugins/ollamaChatPlugin.js';
|
|
32
|
+
import OllamaCompletionPlugin from './plugins/ollamaCompletionPlugin.js';
|
|
31
33
|
|
|
32
34
|
class ModelExecutor {
|
|
33
35
|
constructor(pathway, model) {
|
|
@@ -116,6 +118,12 @@ class ModelExecutor {
|
|
|
116
118
|
case 'AZURE-VIDEO-TRANSLATE':
|
|
117
119
|
plugin = new AzureVideoTranslatePlugin(pathway, model);
|
|
118
120
|
break;
|
|
121
|
+
case 'OLLAMA-CHAT':
|
|
122
|
+
plugin = new OllamaChatPlugin(pathway, model);
|
|
123
|
+
break;
|
|
124
|
+
case 'OLLAMA-COMPLETION':
|
|
125
|
+
plugin = new OllamaCompletionPlugin(pathway, model);
|
|
126
|
+
break;
|
|
119
127
|
default:
|
|
120
128
|
throw new Error(`Unsupported model type: ${model.type}`);
|
|
121
129
|
}
|