@aj-archipelago/cortex 1.1.5 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/config.js CHANGED
@@ -118,7 +118,7 @@ var config = convict({
118
118
  "api-key": "{{AZURE_COGNITIVE_API_KEY}}",
119
119
  "Content-Type": "application/json"
120
120
  },
121
- "requestsPerSecond": 6
121
+ "requestsPerSecond": 10
122
122
  },
123
123
  "oai-embeddings": {
124
124
  "type": "OPENAI-EMBEDDINGS",
@@ -146,6 +146,16 @@ var config = convict({
146
146
  "maxTokenLength": 128000,
147
147
  "supportsStreaming": true
148
148
  },
149
+ "azure-bing": {
150
+ "type": "AZURE-BING",
151
+ "url": "https://api.bing.microsoft.com/v7.0/search",
152
+ "headers": {
153
+ "Ocp-Apim-Subscription-Key": "{{AZURE_BING_KEY}}",
154
+ "Content-Type": "application/json"
155
+ },
156
+ "requestsPerSecond": 10,
157
+ "maxTokenLength": 200000
158
+ },
149
159
  },
150
160
  env: 'CORTEX_MODELS'
151
161
  },
@@ -38,9 +38,14 @@ def transcribe(params):
38
38
  if 'word_timestamps' in params: #parse as bool
39
39
  word_timestamps = False if params['word_timestamps'] == 'False' else True
40
40
 
41
+ decode_options = {}
42
+ if 'language' in params:
43
+ decode_options["language"] = params["language"]
44
+ print(f"Transcription language set as {decode_options['language']}")
45
+
41
46
  print(f"Transcribing file {fileurl} with word_timestamps={word_timestamps}")
42
47
  start_time = time.time()
43
- result = model.transcribe(fileurl, word_timestamps=word_timestamps)
48
+ result = model.transcribe(fileurl, word_timestamps=word_timestamps, **decode_options)
44
49
  end_time = time.time()
45
50
  execution_time = end_time - start_time
46
51
  print("Transcribe execution time:", execution_time, "seconds")
@@ -11,6 +11,7 @@ class CortexRequest {
11
11
  this._pathwayResolver = pathwayResolver || {};
12
12
  this._selectedEndpoint = selectedEndpoint || {};
13
13
  this._stream = stream || false;
14
+ this._method = 'POST';
14
15
 
15
16
  if (this._pathwayResolver) {
16
17
  this._model = this._pathwayResolver.model;
@@ -41,6 +42,15 @@ class CortexRequest {
41
42
  this._url = value;
42
43
  }
43
44
 
45
+ // method getter and setter
46
+ get method() {
47
+ return this._method;
48
+ }
49
+
50
+ set method(value) {
51
+ this._method = value;
52
+ }
53
+
44
54
  // data getter and setter
45
55
  get data() {
46
56
  return this._data;
@@ -0,0 +1,38 @@
1
+ import { encode as gpt3Encode, decode as gpt3Decode } from 'gpt-3-encoder';
2
+ import { FastLRUCache } from './fastLruCache.js';
3
+
4
+ class EncodeCache {
5
+ constructor() {
6
+ this.encodeCache = new FastLRUCache(1000);
7
+ this.decodeCache = new FastLRUCache(100); // we don't use decode nearly as much
8
+ }
9
+
10
+ encode(value) {
11
+ if (this.encodeCache.get(value) !== -1) {
12
+ return this.encodeCache.get(value);
13
+ }
14
+ const encoded = gpt3Encode(value);
15
+ this.encodeCache.put(value, encoded);
16
+ return encoded;
17
+ }
18
+
19
+ decode(value) {
20
+ if (this.decodeCache.get(value) !== -1) {
21
+ return this.decodeCache.get(value);
22
+ }
23
+ const decoded = gpt3Decode(value);
24
+ this.decodeCache.put(value, decoded);
25
+ if (this.encodeCache.get(decoded) === -1) {
26
+ this.encodeCache.put(decoded, value);
27
+ }
28
+ return decoded;
29
+ }
30
+ }
31
+
32
+ // Create one instance of the cache
33
+ const cache = new EncodeCache();
34
+
35
+ // Make sure the instance is bound to the methods, so
36
+ // references to 'this' are correct
37
+ export const encode = cache.encode.bind(cache);
38
+ export const decode = cache.decode.bind(cache);
@@ -0,0 +1,82 @@
1
+ // This class implements a fast O(1) LRU cache using a Map and a doubly linked list.
2
+
3
+ class Node {
4
+ constructor(key, value) {
5
+ this.key = key;
6
+ this.value = value;
7
+ this.next = null;
8
+ this.prev = null;
9
+ }
10
+ }
11
+
12
+ class FastLRUCache {
13
+ constructor(capacity) {
14
+ this.capacity = capacity;
15
+ this.cache = new Map();
16
+ this.head = null;
17
+ this.tail = null;
18
+ }
19
+
20
+ get(key) {
21
+ if (!this.cache.has(key)) {
22
+ return -1;
23
+ }
24
+ const node = this.cache.get(key);
25
+ this.moveToEnd(node);
26
+ return node.value;
27
+ }
28
+
29
+ put(key, value) {
30
+ if (this.cache.has(key)) {
31
+ const node = this.cache.get(key);
32
+ node.value = value;
33
+ this.moveToEnd(node);
34
+ } else {
35
+ const node = new Node(key, value);
36
+ if (this.cache.size >= this.capacity) {
37
+ this.cache.delete(this.head.key);
38
+ this.shiftHeadToNext();
39
+ }
40
+ this.cache.set(key, node);
41
+ this.addNodeToTail(node);
42
+ }
43
+ }
44
+
45
+ addNodeToTail(node) {
46
+ if (!this.tail) {
47
+ this.head = node;
48
+ this.tail = node;
49
+ } else {
50
+ node.prev = this.tail;
51
+ this.tail.next = node;
52
+ this.tail = node;
53
+ }
54
+ }
55
+
56
+ moveToEnd(node) {
57
+ if (node === this.tail) {
58
+ return;
59
+ }
60
+ if (node === this.head) {
61
+ this.shiftHeadToNext();
62
+ } else {
63
+ node.prev.next = node.next;
64
+ node.next.prev = node.prev;
65
+ }
66
+ node.prev = this.tail;
67
+ node.next = null;
68
+ this.tail.next = node;
69
+ this.tail = node;
70
+ }
71
+
72
+ shiftHeadToNext() {
73
+ this.head = this.head.next;
74
+ if (this.head) {
75
+ this.head.prev = null;
76
+ } else {
77
+ this.tail = null;
78
+ }
79
+ }
80
+ }
81
+
82
+ export { FastLRUCache };
@@ -1,5 +1,5 @@
1
1
  // pathwayTools.js
2
- import { encode , decode } from 'gpt-3-encoder';
2
+ import { encode, decode } from '../lib/encodeCache.js';
3
3
  import { config } from '../config.js';
4
4
 
5
5
  // callPathway - call a pathway from another pathway
@@ -57,9 +57,10 @@ const createLimiter = (endpoint, name, index) => {
57
57
 
58
58
  endpoint.limiter.on('failed', (error, info) => {
59
59
  if (error.name === 'CanceledError') {
60
- logger.debug(`Request cancelled for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}`);
60
+ logger.debug(`Limiter request cancelled for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}`);
61
+ endpoint.monitor.incrementErrorCount();
61
62
  } else {
62
- logger.error(`Request failed for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}: ${error}`);
63
+ logger.error(`Limiter request failed for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}: ${error?.message || error}`);
63
64
  }
64
65
  });
65
66
 
@@ -154,6 +155,7 @@ if (config.get('enableCache')) {
154
155
  });
155
156
  }
156
157
 
158
+ //log statistics about active endpoints
157
159
  setInterval(() => {
158
160
  // Iterate over each model
159
161
  for (const [name, model] of Object.entries(modelEndpoints)) {
@@ -179,100 +181,106 @@ setInterval(() => {
179
181
  endpointIndex++;
180
182
  });
181
183
  }
182
- }, 10000); // Log rates every 10 seconds (10000 ms).
184
+ }, 30000); // Log rates every 30 seconds
183
185
 
184
- const postWithMonitor = async (endpoint, url, data, axiosConfigObj) => {
185
- return cortexAxios.post(url, data, axiosConfigObj);
186
+ const requestWithMonitor = async (endpoint, url, data, axiosConfigObj) => {
187
+ const callId = endpoint?.monitor?.startCall();
188
+ let response;
189
+ try {
190
+ if (axiosConfigObj?.method == 'GET'){
191
+ response = await cortexAxios.get(url, axiosConfigObj);
192
+ } else {
193
+ response = await cortexAxios.post(url, data, axiosConfigObj);
194
+ }
195
+ } catch (error) {
196
+ // throw new error with duration as part of the error data
197
+ throw { ...error, duration: endpoint?.monitor?.incrementErrorCount(callId, error?.response?.status || null) };
198
+ }
199
+ let duration;
200
+ if (response.status >= 200 && response.status < 300) {
201
+ duration = endpoint?.monitor?.endCall(callId);
202
+ } else {
203
+ duration = endpoint?.monitor?.incrementErrorCount(callId, response.status);
204
+ }
205
+
206
+ return { response, duration };
186
207
  }
187
208
 
188
209
  const MAX_RETRY = 10; // retries for error handling
189
210
  const MAX_DUPLICATE_REQUESTS = 3; // duplicate requests to manage latency spikes
190
211
  const DUPLICATE_REQUEST_AFTER = 10; // 10 seconds
191
212
 
192
- const postRequest = async (cortexRequest) => {
213
+ const getDuplicateRequestDelay = (index, duplicateRequestAfter) => {
214
+ const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
215
+ const jitter = duplicateRequestTime * 0.2 * Math.random();
216
+ const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
217
+ return duplicateRequestTimeout;
218
+ }
219
+
220
+ const makeRequest = async (cortexRequest) => {
193
221
  let promises = [];
222
+ // retry certain errors up to MAX_RETRY times
194
223
  for (let i = 0; i < MAX_RETRY; i++) {
195
- const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model, stream} = cortexRequest;
224
+ const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model, stream, method} = cortexRequest;
196
225
  const enableDuplicateRequests = pathway?.enableDuplicateRequests !== undefined ? pathway.enableDuplicateRequests : config.get('enableDuplicateRequests');
197
- let maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
198
- let duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
199
-
200
- if (enableDuplicateRequests) {
201
- //logger.info(`>>> [${requestId}] Duplicate requests enabled after ${duplicateRequestAfter / 1000} seconds`);
202
- }
226
+ const maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
227
+ const duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
203
228
 
204
- const axiosConfigObj = { params, headers, cache };
229
+ const axiosConfigObj = { params, headers, cache, method };
205
230
  const streamRequested = (stream || params?.stream || data?.stream);
231
+ // if we're using streaming, duplicate requests are
232
+ // not supported, so we just push one promise into the array
206
233
  if (streamRequested && model.supportsStreaming) {
207
234
  axiosConfigObj.responseType = 'stream';
208
- promises.push(selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`},() => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
235
+ promises.push(selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`},() => requestWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
209
236
  } else {
210
237
  if (streamRequested) {
211
238
  logger.info(`>>> [${requestId}] ${model} does not support streaming - sending non-streaming request`);
212
239
  axiosConfigObj.params.stream = false;
213
240
  data.stream = false;
214
241
  }
242
+ // if we're not streaming, we push at least one promise
243
+ // into the array, but if we're supporting duplicate
244
+ // requests we push one for each potential duplicate,
245
+ // heading to a new endpoint (if available) and
246
+ // staggered by a jittered amount of time
215
247
  const controllers = Array.from({ length: maxDuplicateRequests }, () => new AbortController());
216
248
  promises = controllers.map((controller, index) =>
217
249
  new Promise((resolve, reject) => {
218
- const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
219
- const jitter = duplicateRequestTime * 0.2 * Math.random();
220
- const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
221
250
  setTimeout(async () => {
222
251
  try {
252
+ if (index > 0) {
253
+ cortexRequest.selectNewEndpoint();
254
+ }
255
+ const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model } = cortexRequest;
223
256
  const endpointName = selectedEndpoint.name || model;
224
257
  if (!selectedEndpoint.limiter) {
225
258
  throw new Error(`No limiter for endpoint ${endpointName}!`);
226
259
  }
227
- const axiosConfigObj = { params, headers, cache };
260
+ const axiosConfigObj = { params, headers, cache, method };
228
261
 
229
262
  let response = null;
263
+ let duration = null;
230
264
 
231
265
  if (!controller.signal?.aborted) {
232
266
 
233
267
  axiosConfigObj.signal = controller.signal;
234
268
  axiosConfigObj.headers['X-Cortex-Request-Index'] = index;
235
269
 
236
- if (index === 0) {
237
- //logger.info(`>>> [${requestId}] sending request to ${endpointName} API ${axiosConfigObj.responseType === 'stream' ? 'with streaming' : ''}`);
238
- } else {
239
- if (model.supportsStreaming) {
240
- axiosConfigObj.responseType = 'stream';
241
- axiosConfigObj.cache = false;
242
- }
243
- const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API ${axiosConfigObj.responseType === 'stream' ? 'with streaming' : ''}`;
270
+ if (index > 0) {
271
+ const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API`;
244
272
  const header = '>'.repeat(logMessage.length);
245
273
  logger.info(`\n${header}\n${logMessage}`);
246
274
  }
247
275
 
248
- response = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj));
276
+ ({ response, duration } = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () => requestWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
249
277
 
250
278
  if (!controller.signal?.aborted) {
251
-
252
279
  logger.debug(`<<< [${requestId}] received response for request ${index}`);
253
-
254
- if (axiosConfigObj.responseType === 'stream') {
255
- // Buffering and collecting the stream data
256
- logger.info(`<<< [${requestId}] buffering streaming response for request ${index}`);
257
- response = await new Promise((resolve, reject) => {
258
- let responseData = '';
259
- response.data.on('data', (chunk) => {
260
- responseData += chunk;
261
- logger.debug(`<<< [${requestId}] received chunk for request ${index}`);
262
- });
263
- response.data.on('end', () => {
264
- response.data = JSON.parse(responseData);
265
- resolve(response);
266
- });
267
- response.data.on('error', (error) => {
268
- reject(error);
269
- });
270
- });
271
- }
272
280
  }
273
281
  }
274
282
 
275
- resolve(response);
283
+ resolve({ response, duration });
276
284
 
277
285
  } catch (error) {
278
286
  if (error.name === 'AbortError' || error.name === 'CanceledError') {
@@ -285,45 +293,48 @@ const postRequest = async (cortexRequest) => {
285
293
  } finally {
286
294
  controllers.forEach(controller => controller.abort());
287
295
  }
288
- }, duplicateRequestTimeout);
296
+ }, getDuplicateRequestDelay(index, duplicateRequestAfter));
289
297
  })
290
298
  );
291
299
  }
292
300
 
301
+ // no requests have been made yet, but the promises array
302
+ // is full, so now we execute them in parallel
293
303
  try {
294
- const response = await Promise.race(promises);
304
+ const { response, duration } = await Promise.race(promises);
295
305
 
296
306
  // if response status is 2xx
297
307
  if (response.status >= 200 && response.status < 300) {
298
- return response;
308
+ return { response, duration };
299
309
  } else {
300
310
  throw new Error(`Received error response: ${response.status}`);
301
311
  }
302
312
  } catch (error) {
303
- if (error.response) {
304
- selectedEndpoint.monitor.incrementErrorCount();
305
- const status = error.response.status;
306
-
307
- if (status === 429) {
308
- selectedEndpoint.monitor.incrementError429Count();
309
- }
310
-
313
+ const { response, duration } = error;
314
+ if (response) {
315
+ const status = response.status;
316
+ // if there is only one endpoint, only retry select error codes
311
317
  if (cortexRequest.model.endpoints.length === 1) {
312
- if (status !== 429) {
313
- return error.response;
318
+ if (status !== 429 &&
319
+ status !== 408 &&
320
+ status !== 502 &&
321
+ status !== 503 &&
322
+ status !== 504) {
323
+ return { response, duration };
314
324
  }
315
325
  } else {
316
- // if there are multiple endpoints, retry everything
326
+ // if there are multiple endpoints, retry everything as it
327
+ // could be going to a different host
317
328
  cortexRequest.selectNewEndpoint();
318
329
  }
319
330
 
320
- logger.info(`>>> [${requestId}] retrying request due to ${status} response. Retry count: ${i + 1}`);
331
+ logger.info(`>>> [${requestId}] retrying request (${duration}ms) due to ${status} response. Retry count: ${i + 1}`);
321
332
  if (i < MAX_RETRY - 1) {
322
333
  const backoffTime = 200 * Math.pow(2, i);
323
334
  const jitter = backoffTime * 0.2 * Math.random();
324
335
  await new Promise(r => setTimeout(r, backoffTime + jitter));
325
336
  } else {
326
- return error.response;
337
+ return { response, duration };
327
338
  }
328
339
  } else {
329
340
  throw error;
@@ -334,10 +345,7 @@ const postRequest = async (cortexRequest) => {
334
345
 
335
346
  const executeRequest = async (cortexRequest) => {
336
347
  try {
337
- const endpoint = cortexRequest.selectedEndpoint;
338
- const callId = endpoint?.monitor?.startCall();
339
- const response = await postRequest(cortexRequest);
340
- endpoint?.monitor?.endCall(callId);
348
+ const { response, duration } = await makeRequest(cortexRequest);
341
349
  const requestId = cortexRequest.requestId;
342
350
  const { error, data, cached } = response;
343
351
  if (cached) {
@@ -347,8 +355,7 @@ const executeRequest = async (cortexRequest) => {
347
355
  const lastError = error[error.length - 1];
348
356
  return { error: lastError.toJSON() ?? lastError ?? error };
349
357
  }
350
- //logger.info(`<<< [${requestId}] response: ${data.choices[0].delta || data.choices[0]}`)
351
- return data;
358
+ return { data, duration };
352
359
  } catch (error) {
353
360
  logger.error(`Error in request: ${error.message || error}`);
354
361
  return { error: error };
@@ -1,5 +1,4 @@
1
1
  import { v4 as uuidv4 } from 'uuid';
2
- // eslint-disable-next-line import/no-extraneous-dependencies
3
2
  import { Deque } from '@datastructures-js/deque';
4
3
 
5
4
  class RequestMonitor {
@@ -20,6 +19,15 @@ class RequestMonitor {
20
19
  return this.healthy;
21
20
  }
22
21
 
22
+ removeOldCallStarts() {
23
+ const currentTime = new Date();
24
+ for (const [callId, startTime] of this.callStartTimes) {
25
+ if (currentTime - startTime > this.ageOutTime) {
26
+ this.callStartTimes.delete(callId);
27
+ }
28
+ }
29
+ }
30
+
23
31
  removeOldCallStats(dq, timeProperty) {
24
32
  const currentTime = new Date();
25
33
  while (!dq.isEmpty() && currentTime - (timeProperty ? dq.front()[timeProperty] : dq.front()) > this.ageOutTime) {
@@ -28,6 +36,7 @@ class RequestMonitor {
28
36
  }
29
37
 
30
38
  maintain() {
39
+ this.removeOldCallStarts();
31
40
  this.removeOldCallStats(this.callCount);
32
41
  if (this.callCount.size() === 0) {
33
42
  this.peakCallRate = 0;
@@ -36,7 +45,7 @@ class RequestMonitor {
36
45
  this.removeOldCallStats(this.error429Count);
37
46
  this.removeOldCallStats(this.errorCount);
38
47
 
39
- if (this.getErrorRate() > 0.3) {
48
+ if (this.getErrorRate() > 0.1) {
40
49
  this.healthy = false;
41
50
  } else {
42
51
  this.healthy = true;
@@ -55,10 +64,11 @@ class RequestMonitor {
55
64
  endCall(callId) {
56
65
  const endTime = new Date();
57
66
  const startTime = this.callStartTimes.get(callId);
67
+ let callDuration = null;
58
68
 
59
69
  if (startTime) {
70
+ callDuration = (endTime - startTime);
60
71
  this.callStartTimes.delete(callId);
61
- const callDuration = endTime - startTime;
62
72
  this.callDurations.pushBack({endTime, callDuration});
63
73
 
64
74
  // Keep the callDurations length to 5
@@ -73,6 +83,7 @@ class RequestMonitor {
73
83
  }
74
84
 
75
85
  this.maintain();
86
+ return callDuration;
76
87
  }
77
88
 
78
89
  getAverageCallDuration() {
@@ -84,14 +95,13 @@ class RequestMonitor {
84
95
  return sum / this.callDurations.size();
85
96
  }
86
97
 
87
- incrementError429Count() {
88
- this.error429Count.pushBack(new Date());
89
- this.maintain();
90
- }
91
-
92
- incrementErrorCount() {
98
+ incrementErrorCount(callId, status) {
93
99
  this.errorCount.pushBack(new Date());
100
+ if (status === 429) {
101
+ this.error429Count.pushBack(new Date());
102
+ }
94
103
  this.maintain();
104
+ return callId ? this.endCall(callId) : null;
95
105
  }
96
106
 
97
107
  getCallRate() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.1.5",
3
+ "version": "1.1.7",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "private": false,
6
6
  "repository": {
@@ -52,7 +52,7 @@
52
52
  "handlebars": "^4.7.7",
53
53
  "ioredis": "^5.3.1",
54
54
  "keyv": "^4.5.2",
55
- "langchain": "^0.0.47",
55
+ "langchain": "^0.1.28",
56
56
  "mime-types": "^2.1.35",
57
57
  "subsrt": "^1.1.1",
58
58
  "uuid": "^9.0.0",
@@ -60,6 +60,7 @@
60
60
  "ws": "^8.12.0"
61
61
  },
62
62
  "devDependencies": {
63
+ "@faker-js/faker": "^8.4.1",
63
64
  "ava": "^5.2.0",
64
65
  "dotenv": "^16.0.3",
65
66
  "eslint": "^8.38.0",
@@ -14,19 +14,21 @@ export default {
14
14
  typeDef,
15
15
  rootResolver,
16
16
  resolver,
17
- inputFormat: 'text', // text or html - changes the behavior of the input chunking
17
+ inputFormat: 'text', // string - 'text' or 'html' - changes the behavior of the input chunking
18
18
  useInputChunking: true, // true or false - enables input to be split into multiple chunks to meet context window size
19
19
  useParallelChunkProcessing: false, // true or false - enables parallel processing of chunks
20
+ joinChunksWith: '\n\n', // string - the string to join result chunks with when useInputChunking is 'true'
20
21
  useInputSummarization: false, // true or false - instead of chunking, summarize the input and act on the summary
21
22
  truncateFromFront: false, // true or false - if true, truncate from the front of the input instead of the back
22
23
  timeout: 120, // seconds, cancels the pathway after this many seconds
24
+ enableDuplicateRequests: true, // true or false - if true, duplicate requests are sent if the request is not completed after duplicateRequestAfter seconds
23
25
  duplicateRequestAfter: 10, // seconds, if the request is not completed after this many seconds, a backup request is sent
24
26
  // override the default execution of the pathway
25
- // callback signature: excuteOverride({args: object, runAllPrompts: function})
27
+ // callback signature: executeOverride({args: object, runAllPrompts: function})
26
28
  // args: the input arguments to the pathway
27
29
  // runAllPrompts: a function that runs all prompts in the pathway and returns the result
28
30
  executePathway: undefined,
29
31
  // Set the temperature to 0 to favor more deterministic output when generating entity extraction.
30
- temperature: undefined,
32
+ temperature: 0.9,
31
33
  };
32
34
 
@@ -0,0 +1,12 @@
1
+ // bing.js
2
+ // Web search tool
3
+
4
+ export default {
5
+ inputParameters: {
6
+ text: ``,
7
+ },
8
+ timeout: 400,
9
+ enableDuplicateRequests: false,
10
+ model: 'azure-bing',
11
+ };
12
+
package/pathways/index.js CHANGED
@@ -19,8 +19,10 @@ import transcribe from './transcribe.js';
19
19
  import translate from './translate.js';
20
20
  import embeddings from './embeddings.js';
21
21
  import vision from './vision.js';
22
+ import bing from './bing.js';
22
23
 
23
24
  export {
25
+ bing,
24
26
  edit,
25
27
  chat,
26
28
  bias,
package/server/chunker.js CHANGED
@@ -1,4 +1,4 @@
1
- import { encode, decode } from 'gpt-3-encoder';
1
+ import { encode, decode } from '../lib/encodeCache.js';
2
2
  import cheerio from 'cheerio';
3
3
 
4
4
  const getLastNToken = (text, maxTokenLen) => {
package/server/graphql.js CHANGED
@@ -131,7 +131,7 @@ const build = async (config) => {
131
131
 
132
132
  const app = express();
133
133
 
134
- app.use(express.json({ limit: '50mb' }));
134
+ app.use(express.json({ limit: '200mb' }));
135
135
 
136
136
  const httpServer = http.createServer(app);
137
137
 
@@ -19,6 +19,7 @@ import OpenAIDallE3Plugin from './plugins/openAiDallE3Plugin.js';
19
19
  import OpenAIVisionPlugin from './plugins/openAiVisionPlugin.js';
20
20
  import GeminiChatPlugin from './plugins/geminiChatPlugin.js';
21
21
  import GeminiVisionPlugin from './plugins/geminiVisionPlugin.js';
22
+ import AzureBingPlugin from './plugins/azureBingPlugin.js';
22
23
 
23
24
  class ModelExecutor {
24
25
  constructor(pathway, model) {
@@ -80,6 +81,9 @@ class ModelExecutor {
80
81
  case 'GEMINI-VISION':
81
82
  plugin = new GeminiVisionPlugin(pathway, model);
82
83
  break;
84
+ case 'AZURE-BING':
85
+ plugin = new AzureBingPlugin(pathway, model);
86
+ break;
83
87
  default:
84
88
  throw new Error(`Unsupported model type: ${model.type}`);
85
89
  }