@aj-archipelago/cortex 1.1.37 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +60 -0
- package/package.json +1 -1
- package/pathways/flux_image.js +2 -1
- package/pathways/index.js +6 -1
- package/pathways/sys_parse_numbered_object_list.js +19 -0
- package/pathways/sys_repair_json.js +17 -0
- package/server/chunker.js +156 -113
- package/server/modelExecutor.js +9 -1
- package/server/parser.js +18 -36
- package/server/pathwayResolver.js +1 -1
- package/server/pathwayResponseParser.js +3 -3
- package/server/plugins/azureCognitivePlugin.js +1 -1
- package/server/plugins/azureVideoTranslatePlugin.js +163 -0
- package/server/plugins/openAiVisionPlugin.js +0 -3
- package/server/plugins/{runwareAIPlugin.js → runwareAiPlugin.js} +1 -1
- package/tests/chunkfunction.test.js +270 -4
- package/tests/main.test.js +0 -55
- package/tests/parser.test.js +255 -0
- package/tests/translate_srt.test.js +82 -0
package/config.js
CHANGED
|
@@ -155,6 +155,21 @@ var config = convict({
|
|
|
155
155
|
"maxReturnTokens": 4096,
|
|
156
156
|
"supportsStreaming": true
|
|
157
157
|
},
|
|
158
|
+
"oai-gpt4o-mini": {
|
|
159
|
+
"type": "OPENAI-VISION",
|
|
160
|
+
"url": "https://api.openai.com/v1/chat/completions",
|
|
161
|
+
"headers": {
|
|
162
|
+
"Authorization": "Bearer {{OPENAI_API_KEY}}",
|
|
163
|
+
"Content-Type": "application/json"
|
|
164
|
+
},
|
|
165
|
+
"params": {
|
|
166
|
+
"model": "gpt-4o-mini"
|
|
167
|
+
},
|
|
168
|
+
"requestsPerSecond": 50,
|
|
169
|
+
"maxTokenLength": 131072,
|
|
170
|
+
"maxReturnTokens": 4096,
|
|
171
|
+
"supportsStreaming": true
|
|
172
|
+
},
|
|
158
173
|
"oai-o1-mini": {
|
|
159
174
|
"type": "OPENAI-REASONING",
|
|
160
175
|
"url": "https://api.openai.com/v1/chat/completions",
|
|
@@ -202,9 +217,48 @@ var config = convict({
|
|
|
202
217
|
"Content-Type": "application/json"
|
|
203
218
|
},
|
|
204
219
|
},
|
|
220
|
+
"replicate-flux-11-pro": {
|
|
221
|
+
"type": "REPLICATE-API",
|
|
222
|
+
"url": "https://api.replicate.com/v1/models/black-forest-labs/flux-1.1-pro/predictions",
|
|
223
|
+
"headers": {
|
|
224
|
+
"Prefer": "wait",
|
|
225
|
+
"Authorization": "Token {{REPLICATE_API_KEY}}",
|
|
226
|
+
"Content-Type": "application/json"
|
|
227
|
+
},
|
|
228
|
+
},
|
|
229
|
+
"replicate-flux-1-schnell": {
|
|
230
|
+
"type": "REPLICATE-API",
|
|
231
|
+
"url": "https://api.replicate.com/v1/models/black-forest-labs/flux-schnell/predictions",
|
|
232
|
+
"headers": {
|
|
233
|
+
"Prefer": "wait",
|
|
234
|
+
"Authorization": "Token {{REPLICATE_API_KEY}}",
|
|
235
|
+
"Content-Type": "application/json"
|
|
236
|
+
},
|
|
237
|
+
},
|
|
238
|
+
"replicate-flux-1-dev": {
|
|
239
|
+
"type": "REPLICATE-API",
|
|
240
|
+
"url": "https://api.replicate.com/v1/models/black-forest-labs/flux-dev/predictions",
|
|
241
|
+
"headers": {
|
|
242
|
+
"Prefer": "wait",
|
|
243
|
+
"Authorization": "Token {{REPLICATE_API_KEY}}",
|
|
244
|
+
"Content-Type": "application/json"
|
|
245
|
+
},
|
|
246
|
+
},
|
|
247
|
+
"azure-video-translate": {
|
|
248
|
+
"type": "AZURE-VIDEO-TRANSLATE",
|
|
249
|
+
"headers": {
|
|
250
|
+
"Content-Type": "application/json"
|
|
251
|
+
},
|
|
252
|
+
"supportsStreaming": true,
|
|
253
|
+
}
|
|
205
254
|
},
|
|
206
255
|
env: 'CORTEX_MODELS'
|
|
207
256
|
},
|
|
257
|
+
azureVideoTranslationApiUrl: {
|
|
258
|
+
format: String,
|
|
259
|
+
default: 'http://127.0.0.1:5005',
|
|
260
|
+
env: 'AZURE_VIDEO_TRANSLATION_API_URL'
|
|
261
|
+
},
|
|
208
262
|
openaiApiKey: {
|
|
209
263
|
format: String,
|
|
210
264
|
default: null,
|
|
@@ -248,6 +302,12 @@ var config = convict({
|
|
|
248
302
|
env: 'REDIS_ENCRYPTION_KEY',
|
|
249
303
|
sensitive: true
|
|
250
304
|
},
|
|
305
|
+
replicateApiKey: {
|
|
306
|
+
format: String,
|
|
307
|
+
default: null,
|
|
308
|
+
env: 'REPLICATE_API_KEY',
|
|
309
|
+
sensitive: true
|
|
310
|
+
},
|
|
251
311
|
runwareAiApiKey: {
|
|
252
312
|
format: String,
|
|
253
313
|
default: null,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"private": false,
|
|
6
6
|
"repository": {
|
package/pathways/flux_image.js
CHANGED
package/pathways/index.js
CHANGED
|
@@ -65,6 +65,8 @@ import sys_openai_chat_gpt4 from './sys_openai_chat_gpt4.js';
|
|
|
65
65
|
import sys_openai_chat_gpt4_32 from './sys_openai_chat_gpt4_32.js';
|
|
66
66
|
import sys_openai_chat_gpt4_turbo from './sys_openai_chat_gpt4_turbo.js';
|
|
67
67
|
import sys_openai_completion from './sys_openai_completion.js';
|
|
68
|
+
import sys_parse_numbered_object_list from './sys_parse_numbered_object_list.js';
|
|
69
|
+
import sys_repair_json from './sys_repair_json.js';
|
|
68
70
|
import tags from './tags.js';
|
|
69
71
|
import taxonomy from './taxonomy.js';
|
|
70
72
|
import timeline from './timeline.js';
|
|
@@ -133,7 +135,10 @@ export {
|
|
|
133
135
|
sys_google_code_chat,
|
|
134
136
|
sys_google_gemini_chat, sys_openai_chat, sys_openai_chat_16, sys_openai_chat_gpt4, sys_openai_chat_gpt4_32,
|
|
135
137
|
sys_openai_completion,
|
|
136
|
-
sys_openai_chat_gpt4_turbo,
|
|
138
|
+
sys_openai_chat_gpt4_turbo,
|
|
139
|
+
sys_parse_numbered_object_list,
|
|
140
|
+
sys_repair_json,
|
|
141
|
+
tags,
|
|
137
142
|
taxonomy,
|
|
138
143
|
timeline, topics, topics_sentiment, transcribe,
|
|
139
144
|
transcribe_neuralspace,
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { Prompt } from '../server/prompt.js';
|
|
2
|
+
|
|
3
|
+
export default {
|
|
4
|
+
prompt: [
|
|
5
|
+
new Prompt({
|
|
6
|
+
messages: [
|
|
7
|
+
{ "role": "system", "content": "Assistant is a list parsing AI. When user posts text including a numbered list and a desired set of fields, assistant will carefully read the list and attempt to convert the list into a JSON object with the given fields. If there are extra fields, assistant will ignore them. If there are some missing fields, assistant will just skip the missing fields and return the rest. If the conversion is not at all possible, assistant will return an empty JSON array. Assistant will generate only the repaired JSON object in a directly parseable format with no markdown surrounding it and no other response or commentary." },
|
|
8
|
+
{ "role": "user", "content": `Fields: {{{format}}}\nList: {{{text}}}`},
|
|
9
|
+
]
|
|
10
|
+
})
|
|
11
|
+
],
|
|
12
|
+
format: '',
|
|
13
|
+
model: 'oai-gpt4o',
|
|
14
|
+
temperature: 0.0,
|
|
15
|
+
enableCache: true,
|
|
16
|
+
enableDuplicateRequests: false,
|
|
17
|
+
json: true
|
|
18
|
+
}
|
|
19
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { Prompt } from '../server/prompt.js';
|
|
2
|
+
|
|
3
|
+
export default {
|
|
4
|
+
prompt: [
|
|
5
|
+
new Prompt({
|
|
6
|
+
messages: [
|
|
7
|
+
{ "role": "system", "content": "Assistant is a JSON repair assistant. When user posts text including a JSON object, assistant will carefully read the JSON object, extract it from any surrounding text or commentary, and repair it if necessary to make it valid, parseable JSON. If there is no JSON in the response, assistant will return an empty JSON object. Assistant will generate only the repaired JSON object in a directly parseable format with no markdown surrounding it and no other response or commentary." },
|
|
8
|
+
{ "role": "user", "content": `{{{text}}}`},
|
|
9
|
+
]
|
|
10
|
+
})
|
|
11
|
+
],
|
|
12
|
+
model: 'oai-gpt4o-mini',
|
|
13
|
+
temperature: 0.0,
|
|
14
|
+
enableCache: true,
|
|
15
|
+
enableDuplicateRequests: false,
|
|
16
|
+
}
|
|
17
|
+
|
package/server/chunker.js
CHANGED
|
@@ -27,7 +27,6 @@ const getFirstNTokenSingle = (text, maxTokenLen) => {
|
|
|
27
27
|
return text;
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
-
|
|
31
30
|
function getFirstNTokenArray(content, tokensToKeep) {
|
|
32
31
|
let totalTokens = 0;
|
|
33
32
|
let result = [];
|
|
@@ -71,138 +70,182 @@ const determineTextFormat = (text) => {
|
|
|
71
70
|
}
|
|
72
71
|
|
|
73
72
|
const getSemanticChunks = (text, chunkSize, inputFormat = 'text') => {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
while ((match = regex.exec(str)) !== null) {
|
|
79
|
-
const value = str.slice(0, match.index);
|
|
80
|
-
result.push(value);
|
|
81
|
-
|
|
82
|
-
if (preserveWhitespace || /\S/.test(match[0])) {
|
|
83
|
-
result.push(match[0]);
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
str = str.slice(match.index + match[0].length);
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
if (str) {
|
|
90
|
-
result.push(str);
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
return result.filter(Boolean);
|
|
94
|
-
};
|
|
95
|
-
|
|
96
|
-
const breakByParagraphs = (str) => breakByRegex(str, /[\r\n]+/, true);
|
|
97
|
-
const breakBySentences = (str) => breakByRegex(str, /(?<=[.。؟!?!\n])\s+/, true);
|
|
98
|
-
const breakByWords = (str) => breakByRegex(str, /(\s,;:.+)/);
|
|
99
|
-
|
|
100
|
-
const breakByHtmlElements = (str) => {
|
|
101
|
-
const $ = cheerio.load(str, null, true);
|
|
102
|
-
|
|
103
|
-
// the .filter() call is important to get the text nodes
|
|
104
|
-
// https://stackoverflow.com/questions/54878673/cheerio-get-normal-text-nodes
|
|
105
|
-
let rootNodes = $('body').contents();
|
|
106
|
-
|
|
107
|
-
// create an array with the outerHTML of each node
|
|
108
|
-
const nodes = rootNodes.map((i, el) => $(el).prop('outerHTML') || $(el).text()).get();
|
|
73
|
+
if (!Number.isInteger(chunkSize) || chunkSize <= 0) {
|
|
74
|
+
throw new Error('Invalid chunkSize: must be a positive integer');
|
|
75
|
+
}
|
|
109
76
|
|
|
110
|
-
|
|
111
|
-
|
|
77
|
+
if (inputFormat === 'html') {
|
|
78
|
+
return getHtmlChunks(text, chunkSize);
|
|
79
|
+
} else {
|
|
80
|
+
// Pre-calculate encoding ratio with a sample to avoid encoding entire text
|
|
81
|
+
const sampleSize = Math.min(500, text.length);
|
|
82
|
+
const sample = text.slice(0, sampleSize);
|
|
83
|
+
const sampleEncoded = encode(sample);
|
|
84
|
+
const avgCharsPerToken = sample.length / sampleEncoded.length;
|
|
85
|
+
const charChunkSize = Math.round(chunkSize * avgCharsPerToken);
|
|
86
|
+
return findChunks(text, charChunkSize, chunkSize);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
112
89
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
90
|
+
const getHtmlChunks = (html, chunkSize) => {
|
|
91
|
+
const $ = cheerio.load(html, null, true);
|
|
92
|
+
const nodes = $('body').contents().map((_, el) => $.html(el)).get();
|
|
116
93
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
if (currentTokenLength <= chunkSize) {
|
|
120
|
-
currentChunk += token;
|
|
121
|
-
} else {
|
|
122
|
-
if (currentChunk) {
|
|
123
|
-
chunks.push(currentChunk);
|
|
124
|
-
}
|
|
125
|
-
currentChunk = token;
|
|
126
|
-
}
|
|
127
|
-
}
|
|
94
|
+
let chunks = [];
|
|
95
|
+
let currentChunk = '';
|
|
128
96
|
|
|
129
|
-
|
|
130
|
-
|
|
97
|
+
for (const node of nodes) {
|
|
98
|
+
if (encode(node).length > chunkSize && node.startsWith('<') && node.endsWith('>')) {
|
|
99
|
+
throw new Error('The HTML contains elements that are larger than the chunk size. Please try again with HTML that has smaller elements.');
|
|
131
100
|
}
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
if (
|
|
141
|
-
|
|
142
|
-
const
|
|
143
|
-
|
|
144
|
-
if (combinedLen <= chunkSize) {
|
|
145
|
-
optimizedChunks.push(combinedChunk);
|
|
146
|
-
i += 1;
|
|
147
|
-
} else {
|
|
148
|
-
optimizedChunks.push(chunks[i]);
|
|
149
|
-
}
|
|
101
|
+
|
|
102
|
+
if (encode(currentChunk + node).length <= chunkSize) {
|
|
103
|
+
currentChunk += node;
|
|
104
|
+
} else {
|
|
105
|
+
if (currentChunk) {
|
|
106
|
+
chunks.push(currentChunk);
|
|
107
|
+
currentChunk = '';
|
|
108
|
+
}
|
|
109
|
+
if (encode(node).length > chunkSize) {
|
|
110
|
+
// If the node is larger than chunkSize, split it
|
|
111
|
+
const textChunks = getSemanticChunks(node, chunkSize, 'text');
|
|
112
|
+
chunks.push(...textChunks);
|
|
150
113
|
} else {
|
|
151
|
-
|
|
114
|
+
currentChunk = node;
|
|
152
115
|
}
|
|
153
116
|
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (currentChunk) {
|
|
120
|
+
chunks.push(currentChunk);
|
|
121
|
+
}
|
|
154
122
|
|
|
155
|
-
|
|
156
|
-
|
|
123
|
+
return chunks;
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
const findChunks = (text, chunkSize, maxTokenLen) => {
|
|
127
|
+
const chunks = [];
|
|
128
|
+
let startIndex = 0;
|
|
157
129
|
|
|
158
|
-
|
|
159
|
-
|
|
130
|
+
while (startIndex < text.length) {
|
|
131
|
+
let endIndex = Math.min(startIndex + chunkSize, text.length);
|
|
160
132
|
|
|
161
|
-
if (
|
|
162
|
-
|
|
133
|
+
if (endIndex == text.length) {
|
|
134
|
+
chunks.push(text.slice(startIndex));
|
|
135
|
+
break;
|
|
163
136
|
}
|
|
164
137
|
|
|
165
|
-
const
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
138
|
+
const searchWindow = text.slice(startIndex, endIndex);
|
|
139
|
+
|
|
140
|
+
// Find semantic break point, minimum 1 character
|
|
141
|
+
let breakPoint = Math.max(findSemanticBreak(searchWindow), 1);
|
|
142
|
+
let chunk = searchWindow.slice(0, breakPoint);
|
|
143
|
+
|
|
144
|
+
// If chunk is too large, reduce size until it fits
|
|
145
|
+
while (encode(chunk).length > maxTokenLen && chunkSize > 1) {
|
|
146
|
+
// reduce chunk size by a proportional amount
|
|
147
|
+
const reductionFactor = maxTokenLen / encode(chunk).length;
|
|
148
|
+
chunkSize = Math.floor(chunkSize * reductionFactor);
|
|
149
|
+
endIndex = Math.min(chunkSize, searchWindow.length);
|
|
150
|
+
breakPoint = Math.max(findSemanticBreak(searchWindow.slice(0, endIndex)), 1);
|
|
151
|
+
chunk = searchWindow.slice(0, breakPoint);
|
|
177
152
|
}
|
|
178
153
|
|
|
179
|
-
|
|
180
|
-
|
|
154
|
+
// Force single character if still too large
|
|
155
|
+
if (encode(chunk).length > maxTokenLen) {
|
|
156
|
+
breakPoint = 1;
|
|
157
|
+
chunk = searchWindow.slice(0, 1);
|
|
158
|
+
}
|
|
181
159
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
chunks = combineChunks(chunks);
|
|
160
|
+
chunks.push(chunk);
|
|
161
|
+
startIndex += breakPoint;
|
|
162
|
+
}
|
|
186
163
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
return getSemanticChunks(chunk, chunkSize);
|
|
190
|
-
} else {
|
|
191
|
-
return chunk;
|
|
192
|
-
}
|
|
193
|
-
});
|
|
164
|
+
return chunks;
|
|
165
|
+
}
|
|
194
166
|
|
|
195
|
-
|
|
196
|
-
|
|
167
|
+
const findSemanticBreak = (text) => {
|
|
168
|
+
const findLastDelimiter = (text, delimiters) => {
|
|
169
|
+
let lastIndex = -1;
|
|
170
|
+
for (const delimiter of delimiters) {
|
|
171
|
+
const index = text.lastIndexOf(delimiter);
|
|
172
|
+
if (index > -1) {
|
|
173
|
+
const delimitedIndex = index + delimiter.length;
|
|
174
|
+
if (delimitedIndex > lastIndex) lastIndex = delimitedIndex;
|
|
175
|
+
}
|
|
197
176
|
}
|
|
198
|
-
|
|
199
|
-
return chunks;
|
|
177
|
+
return lastIndex;
|
|
200
178
|
}
|
|
201
|
-
else {
|
|
202
|
-
return breakText(text);
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
179
|
|
|
180
|
+
let breakIndex;
|
|
181
|
+
|
|
182
|
+
// Look for paragraph break (including different newline styles)
|
|
183
|
+
const paragraphDelimiters = ['\n\n', '\r\n\r\n', '\r\r', '\n'];
|
|
184
|
+
breakIndex = findLastDelimiter(text, paragraphDelimiters);
|
|
185
|
+
if (breakIndex !== -1) return breakIndex;
|
|
186
|
+
|
|
187
|
+
// Look for sentence break
|
|
188
|
+
const sentenceDelimiters = [
|
|
189
|
+
// Latin/European
|
|
190
|
+
'.', '!', '?',
|
|
191
|
+
// CJK
|
|
192
|
+
'。', '!', '?', '.', '…',
|
|
193
|
+
// Arabic/Persian/Urdu
|
|
194
|
+
'؟', '۔', '.',
|
|
195
|
+
// Devanagari/Hindi
|
|
196
|
+
'।',
|
|
197
|
+
// Thai
|
|
198
|
+
'๏', 'ฯ',
|
|
199
|
+
// Armenian
|
|
200
|
+
'։',
|
|
201
|
+
// Ethiopian
|
|
202
|
+
'።'
|
|
203
|
+
];
|
|
204
|
+
breakIndex = findLastDelimiter(text, sentenceDelimiters);
|
|
205
|
+
if (breakIndex !== -1) return breakIndex;
|
|
206
|
+
|
|
207
|
+
// Look for phrase break
|
|
208
|
+
const phraseDelimiters = [
|
|
209
|
+
// Latin/European
|
|
210
|
+
'-', ';', ':', ',',
|
|
211
|
+
// CJK
|
|
212
|
+
'、', ',', ';', ':', '─',
|
|
213
|
+
// Arabic/Persian/Urdu
|
|
214
|
+
'،', '؛', '٬',
|
|
215
|
+
// Devanagari/Hindi
|
|
216
|
+
'॥', ',',
|
|
217
|
+
// Thai
|
|
218
|
+
'๚', '、'
|
|
219
|
+
];
|
|
220
|
+
breakIndex = findLastDelimiter(text, phraseDelimiters);
|
|
221
|
+
if (breakIndex !== -1) return breakIndex;
|
|
222
|
+
|
|
223
|
+
// Look for word break (Unicode whitespace)
|
|
224
|
+
const whitespaceDelimiters = [
|
|
225
|
+
' ', // Space
|
|
226
|
+
'\t', // Tab
|
|
227
|
+
'\u00A0', // No-Break Space
|
|
228
|
+
'\u1680', // Ogham Space Mark
|
|
229
|
+
'\u2000', // En Quad
|
|
230
|
+
'\u2001', // Em Quad
|
|
231
|
+
'\u2002', // En Space
|
|
232
|
+
'\u2003', // Em Space
|
|
233
|
+
'\u2004', // Three-Per-Em Space
|
|
234
|
+
'\u2005', // Four-Per-Em Space
|
|
235
|
+
'\u2006', // Six-Per-Em Space
|
|
236
|
+
'\u2007', // Figure Space
|
|
237
|
+
'\u2008', // Punctuation Space
|
|
238
|
+
'\u2009', // Thin Space
|
|
239
|
+
'\u200A', // Hair Space
|
|
240
|
+
'\u202F', // Narrow No-Break Space
|
|
241
|
+
'\u205F', // Medium Mathematical Space
|
|
242
|
+
'\u3000' // Ideographic Space
|
|
243
|
+
];
|
|
244
|
+
breakIndex = findLastDelimiter(text, whitespaceDelimiters);
|
|
245
|
+
if (breakIndex !== -1) return breakIndex;
|
|
246
|
+
|
|
247
|
+
return text.length - 1;
|
|
248
|
+
};
|
|
206
249
|
|
|
207
250
|
const semanticTruncate = (text, maxLength) => {
|
|
208
251
|
if (text.length <= maxLength) {
|
|
@@ -224,4 +267,4 @@ const getSingleTokenChunks = (text) => {
|
|
|
224
267
|
|
|
225
268
|
export {
|
|
226
269
|
getSemanticChunks, semanticTruncate, getLastNToken, getFirstNToken, determineTextFormat, getSingleTokenChunks
|
|
227
|
-
};
|
|
270
|
+
};
|
package/server/modelExecutor.js
CHANGED
|
@@ -25,7 +25,9 @@ import Gemini15VisionPlugin from './plugins/gemini15VisionPlugin.js';
|
|
|
25
25
|
import AzureBingPlugin from './plugins/azureBingPlugin.js';
|
|
26
26
|
import Claude3VertexPlugin from './plugins/claude3VertexPlugin.js';
|
|
27
27
|
import NeuralSpacePlugin from './plugins/neuralSpacePlugin.js';
|
|
28
|
-
import RunwareAiPlugin from './plugins/
|
|
28
|
+
import RunwareAiPlugin from './plugins/runwareAiPlugin.js';
|
|
29
|
+
import ReplicateApiPlugin from './plugins/replicateApiPlugin.js';
|
|
30
|
+
import AzureVideoTranslatePlugin from './plugins/azureVideoTranslatePlugin.js';
|
|
29
31
|
|
|
30
32
|
class ModelExecutor {
|
|
31
33
|
constructor(pathway, model) {
|
|
@@ -108,6 +110,12 @@ class ModelExecutor {
|
|
|
108
110
|
case 'RUNWARE-AI':
|
|
109
111
|
plugin = new RunwareAiPlugin(pathway, model);
|
|
110
112
|
break;
|
|
113
|
+
case 'REPLICATE-API':
|
|
114
|
+
plugin = new ReplicateApiPlugin(pathway, model);
|
|
115
|
+
break;
|
|
116
|
+
case 'AZURE-VIDEO-TRANSLATE':
|
|
117
|
+
plugin = new AzureVideoTranslatePlugin(pathway, model);
|
|
118
|
+
break;
|
|
111
119
|
default:
|
|
112
120
|
throw new Error(`Unsupported model type: ${model.type}`);
|
|
113
121
|
}
|
package/server/parser.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logger from '../lib/logger.js';
|
|
2
|
+
import { callPathway } from '../lib/pathwayTools.js';
|
|
2
3
|
|
|
3
4
|
//simply trim and parse with given regex
|
|
4
5
|
const regexParser = (text, regex) => {
|
|
@@ -12,26 +13,14 @@ const parseNumberedList = (str) => {
|
|
|
12
13
|
return regexParser(str, /^\s*[\[\{\(]*\d+[\s.=\-:,;\]\)\}]/gm);
|
|
13
14
|
}
|
|
14
15
|
|
|
15
|
-
|
|
16
|
-
const
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
try {
|
|
23
|
-
const splitted = regexParser(value, /[:-](.*)/);
|
|
24
|
-
const obj = {};
|
|
25
|
-
for (let i = 0; i < fields.length; i++) {
|
|
26
|
-
obj[fields[i]] = splitted[i];
|
|
27
|
-
}
|
|
28
|
-
result.push(obj);
|
|
29
|
-
} catch (e) {
|
|
30
|
-
logger.warn(`Failed to parse value in parseNumberedObjectList, value: ${value}, fields: ${fields}`);
|
|
31
|
-
}
|
|
16
|
+
async function parseNumberedObjectList(text, format) {
|
|
17
|
+
const parsedList = await callPathway('sys_parse_numbered_object_list', { text, format });
|
|
18
|
+
try {
|
|
19
|
+
return JSON.parse(parsedList);
|
|
20
|
+
} catch (error) {
|
|
21
|
+
logger.warn(`Failed to parse numbered object list: ${error.message}`);
|
|
22
|
+
return [];
|
|
32
23
|
}
|
|
33
|
-
|
|
34
|
-
return result;
|
|
35
24
|
}
|
|
36
25
|
|
|
37
26
|
// parse a comma-separated list text format into list
|
|
@@ -49,25 +38,18 @@ const isNumberedList = (data) => {
|
|
|
49
38
|
return numberedListPattern.test(data.trim());
|
|
50
39
|
}
|
|
51
40
|
|
|
52
|
-
function parseJson(str) {
|
|
41
|
+
async function parseJson(str) {
|
|
53
42
|
try {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
str.indexOf('[') !== -1 ? str.indexOf('[') : Infinity
|
|
57
|
-
);
|
|
58
|
-
|
|
59
|
-
const end = Math.max(
|
|
60
|
-
str.lastIndexOf('}') !== -1 ? str.lastIndexOf('}') + 1 : 0,
|
|
61
|
-
str.lastIndexOf(']') !== -1 ? str.lastIndexOf(']') + 1 : 0
|
|
62
|
-
);
|
|
63
|
-
|
|
64
|
-
const jsonStr = str.slice(start, end);
|
|
65
|
-
// eslint-disable-next-line no-unused-vars
|
|
66
|
-
const json = JSON.parse(jsonStr);
|
|
67
|
-
return jsonStr;
|
|
43
|
+
JSON.parse(str); // Validate JSON
|
|
44
|
+
return str;
|
|
68
45
|
} catch (error) {
|
|
69
|
-
|
|
70
|
-
|
|
46
|
+
try {
|
|
47
|
+
const repairedJson = await callPathway('sys_repair_json', { text: str });
|
|
48
|
+
return JSON.parse(repairedJson) ? repairedJson : null;
|
|
49
|
+
} catch (repairError) {
|
|
50
|
+
logger.warn(`Failed to parse JSON: ${repairError.message}`);
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
71
53
|
}
|
|
72
54
|
}
|
|
73
55
|
|
|
@@ -5,7 +5,7 @@ class PathwayResponseParser {
|
|
|
5
5
|
this.pathway = pathway;
|
|
6
6
|
}
|
|
7
7
|
|
|
8
|
-
parse(data) {
|
|
8
|
+
async parse(data) {
|
|
9
9
|
if (this.pathway.parser) {
|
|
10
10
|
return this.pathway.parser(data);
|
|
11
11
|
}
|
|
@@ -13,7 +13,7 @@ class PathwayResponseParser {
|
|
|
13
13
|
if (this.pathway.list) {
|
|
14
14
|
if (isNumberedList(data)) {
|
|
15
15
|
if (this.pathway.format) {
|
|
16
|
-
return parseNumberedObjectList(data, this.pathway.format);
|
|
16
|
+
return await parseNumberedObjectList(data, this.pathway.format);
|
|
17
17
|
}
|
|
18
18
|
return parseNumberedList(data);
|
|
19
19
|
} else if (isCommaSeparatedList(data)) {
|
|
@@ -23,7 +23,7 @@ class PathwayResponseParser {
|
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
if (this.pathway.json) {
|
|
26
|
-
return parseJson(data);
|
|
26
|
+
return await parseJson(data);
|
|
27
27
|
}
|
|
28
28
|
|
|
29
29
|
return data;
|
|
@@ -123,7 +123,7 @@ class AzureCognitivePlugin extends ModelPlugin {
|
|
|
123
123
|
data.filter = `owner eq '${savedContextId}'`;
|
|
124
124
|
|
|
125
125
|
if(chatId){
|
|
126
|
-
data.filter += ` and chatId eq '${chatId}'`;
|
|
126
|
+
data.filter += ` and (chatId eq '${chatId}' or docId eq '${savedContextId}-indexmainpane')`;
|
|
127
127
|
}
|
|
128
128
|
}
|
|
129
129
|
|