npm - @aj-archipelago/cortex - Versions diffs - 1.0.6 → 1.0.7 - Mend

@aj-archipelago/cortex 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/config/default.example.json +4 -2
package/package.json +2 -1
package/pathways/basePathway.js +1 -0
package/server/chunker.js +48 -3
package/server/graphql.js +7 -1
package/server/pathwayPrompter.js +13 -16
package/server/pathwayResolver.js +58 -32
package/server/plugins/azureTranslatePlugin.js +2 -2
package/server/plugins/localModelPlugin.js +2 -2
package/server/plugins/modelPlugin.js +8 -10
package/server/plugins/openAiChatPlugin.js +2 -2
package/server/plugins/openAiCompletionPlugin.js +2 -2
package/server/plugins/openAiWhisperPlugin.js +3 -3
package/server/plugins/palmChatPlugin.js +4 -6
package/server/plugins/palmCodeCompletionPlugin.js +46 -0
package/server/plugins/palmCompletionPlugin.js +13 -15
package/tests/chunkfunction.test.js +112 -26
package/tests/mocks.js +42 -1
package/tests/modelPlugin.test.js +3 -3
package/tests/openAiChatPlugin.test.js +20 -13
package/tests/palmChatPlugin.test.js +2 -3
package/tests/palmCompletionPlugin.test.js +2 -3
package/tests/truncateMessages.test.js +3 -4
package/tests/server.js +0 -23

package/config/default.example.json CHANGED Viewed

@@ -58,7 +58,8 @@
                 "Content-Type": "application/json"
             },
             "requestsPerSecond": 10,
-            "maxTokenLength": 2048
+            "maxTokenLength": 2048,
+            "maxReturnTokens": 1024
         },
         "palm-chat": {
             "type": "PALM-CHAT",
@@ -67,7 +68,8 @@
                 "Content-Type": "application/json"
             },
             "requestsPerSecond": 10,
-            "maxTokenLength": 2048
+            "maxTokenLength": 2048,
+            "maxReturnTokens": 1024
         },
         "local-llama13B": {
             "type": "LOCAL-CPP-MODEL",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "1.0.6",
+  "version": "1.0.7",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "repository": {
     "type": "git",
@@ -36,6 +36,7 @@
     "axios": "^1.3.4",
     "axios-cache-interceptor": "^1.0.1",
     "bottleneck": "^2.19.5",
+    "cheerio": "^1.0.0-rc.12",
     "compromise": "^14.8.1",
     "compromise-paragraphs": "^0.1.0",
     "convict": "^6.2.3",

package/pathways/basePathway.js CHANGED Viewed

@@ -14,6 +14,7 @@ export default {
     typeDef,
     rootResolver,
     resolver,
+    inputFormat: 'text',
     useInputChunking: true,
     useParallelChunkProcessing: false,
     useInputSummarization: false,

package/server/chunker.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { encode, decode } from 'gpt-3-encoder';
+import cheerio from 'cheerio';
 const getLastNToken = (text, maxTokenLen) => {
     const encoded = encode(text);
@@ -18,8 +19,18 @@ const getFirstNToken = (text, maxTokenLen) => {
     return text;
 }
-const getSemanticChunks = (text, chunkSize) => {
+const determineTextFormat = (text) => {
+  const htmlTagPattern = /<[^>]*>/g;
+  if (htmlTagPattern.test(text)) {
+    return 'html';
+  }
+  else {
+    return 'text';
+  }
+}
+const getSemanticChunks = (text, chunkSize, inputFormat = 'text') => {
   const breakByRegex = (str, regex, preserveWhitespace = false) => {
     const result = [];
     let match;
@@ -46,6 +57,19 @@ const getSemanticChunks = (text, chunkSize) => {
   const breakBySentences = (str) => breakByRegex(str, /(?<=[.。؟！?!\n])\s+/, true);
   const breakByWords = (str) => breakByRegex(str, /(\s,;:.+)/);
+  const breakByHtmlElements = (str) => {
+    const $ = cheerio.load(str, null, true);
+    // the .filter() call is important to get the text nodes
+    // https://stackoverflow.com/questions/54878673/cheerio-get-normal-text-nodes
+    let rootNodes = $('body').contents();
+    // create an array with the outerHTML of each node
+    const nodes = rootNodes.map((i, el) => $(el).prop('outerHTML') || $(el).text()).get();
+    return nodes;
+};
   const createChunks = (tokens) => {
     let chunks = [];
     let currentChunk = '';
@@ -115,7 +139,28 @@ const getSemanticChunks = (text, chunkSize) => {
     return createChunks([...str]); // Split by characters
   };
-  return breakText(text);
+  if (inputFormat === 'html') {
+    const tokens = breakByHtmlElements(text);
+    let chunks = createChunks(tokens);
+    chunks = combineChunks(chunks);
+    chunks = chunks.flatMap(chunk => {
+      if (determineTextFormat(chunk) === 'text') {
+        return getSemanticChunks(chunk, chunkSize);
+      } else {
+        return chunk;
+      }
+    });
+    if (chunks.some(chunk => encode(chunk).length > chunkSize)) {
+      throw new Error('The HTML contains elements that are larger than the chunk size. Please try again with HTML that has smaller elements.');
+    }
+    return chunks;
+  }
+  else {
+      return breakText(text);
+  }
 }
@@ -133,5 +178,5 @@ const semanticTruncate = (text, maxLength) => {
 };
 export {
-    getSemanticChunks, semanticTruncate, getLastNToken, getFirstNToken
+    getSemanticChunks, semanticTruncate, getLastNToken, getFirstNToken, determineTextFormat
 };

package/server/graphql.js CHANGED Viewed

@@ -164,7 +164,13 @@ const build = async (config) => {
     const cortexApiKey = config.get('cortexApiKey');
     if (cortexApiKey) {
         app.use((req, res, next) => {
-            if (cortexApiKey && req.headers['Cortex-Api-Key'] !== cortexApiKey && req.query['Cortex-Api-Key'] !== cortexApiKey) {
+            let providedApiKey = req.headers['cortex-api-key'] || req.query['cortex-api-key'];
+            if (!providedApiKey) {
+                providedApiKey = req.headers['authorization'];
+                providedApiKey = providedApiKey?.startsWith('Bearer ') ? providedApiKey.slice(7) : providedApiKey;
+            }
+            if (cortexApiKey && cortexApiKey !== providedApiKey) {
                 if (req.baseUrl === '/graphql' || req.headers['content-type'] === 'application/graphql') {
                     res.status(401)
                     .set('WWW-Authenticate', 'Cortex-Api-Key')

package/server/pathwayPrompter.js CHANGED Viewed

@@ -6,40 +6,37 @@ import OpenAIWhisperPlugin from './plugins/openAiWhisperPlugin.js';
 import LocalModelPlugin from './plugins/localModelPlugin.js';
 import PalmChatPlugin from './plugins/palmChatPlugin.js';
 import PalmCompletionPlugin from './plugins/palmCompletionPlugin.js';
+import PalmCodeCompletionPlugin from './plugins/palmCodeCompletionPlugin.js';
 class PathwayPrompter {
-    constructor({ config, pathway }) {
-        const modelName = pathway.model || config.get('defaultModelName');
-        const model = config.get('models')[modelName];
-        if (!model) {
-            throw new Error(`Model ${modelName} not found in config`);
-        }
+    constructor(config, pathway, modelName, model) {
         let plugin;
         switch (model.type) {
             case 'OPENAI-CHAT':
-                plugin = new OpenAIChatPlugin(config, pathway);
+                plugin = new OpenAIChatPlugin(config, pathway, modelName, model);
                 break;
             case 'AZURE-TRANSLATE':
-                plugin = new AzureTranslatePlugin(config, pathway);
+                plugin = new AzureTranslatePlugin(config, pathway, modelName, model);
                 break;
             case 'OPENAI-COMPLETION':
-                plugin = new OpenAICompletionPlugin(config, pathway);
+                plugin = new OpenAICompletionPlugin(config, pathway, modelName, model);
                 break;
             case 'OPENAI-WHISPER':
-                plugin = new OpenAIWhisperPlugin(config, pathway);
+                plugin = new OpenAIWhisperPlugin(config, pathway, modelName, model);
                 break;
             case 'LOCAL-CPP-MODEL':
-                plugin = new LocalModelPlugin(config, pathway);
+                plugin = new LocalModelPlugin(config, pathway, modelName, model);
                 break;
             case 'PALM-CHAT':
-                plugin = new PalmChatPlugin(config, pathway);
+                plugin = new PalmChatPlugin(config, pathway, modelName, model);
                 break;
             case 'PALM-COMPLETION':
-                plugin = new PalmCompletionPlugin(config, pathway);
+                plugin = new PalmCompletionPlugin(config, pathway, modelName, model);
+                break;
+            case 'PALM-CODE-COMPLETION':
+                plugin = new PalmCodeCompletionPlugin(config, pathway, modelName, model);
                 break;
             default:
                 throw new Error(`Unsupported model type: ${model.type}`);

package/server/pathwayResolver.js CHANGED Viewed

@@ -20,9 +20,31 @@ class PathwayResolver {
         this.warnings = [];
         this.requestId = uuidv4();
         this.responseParser = new PathwayResponseParser(pathway);
-        this.pathwayPrompter = new PathwayPrompter({ config, pathway });
+        this.modelName = [
+            pathway.model,
+            args?.model,
+            pathway.inputParameters?.model,
+            config.get('defaultModelName')
+            ].find(modelName => modelName && config.get('models').hasOwnProperty(modelName));
+        this.model = config.get('models')[this.modelName];
+        if (!this.model) {
+            throw new Error(`Model ${this.modelName} not found in config`);
+        }
+        const specifiedModelName = pathway.model || args?.model || pathway.inputParameters?.model;
+        if (this.modelName !== (specifiedModelName)) {
+            if (specifiedModelName) {
+                this.logWarning(`Specified model ${specifiedModelName} not found in config, using ${this.modelName} instead.`);
+            } else {
+                this.logWarning(`No model specified in the pathway, using ${this.modelName}.`);
+            }
+        }
         this.previousResult = '';
         this.prompts = [];
+        this.pathwayPrompter = new PathwayPrompter(this.config, this.pathway, this.modelName, this.model);
         Object.defineProperty(this, 'pathwayPrompt', {
             get() {
@@ -56,37 +78,41 @@ class PathwayResolver {
                 }
             });
         } else { // stream
-            const incomingMessage = Array.isArray(responseData) && responseData.length > 0 ? responseData[0] : responseData;
-            incomingMessage.on('data', data => {
-                const events = data.toString().split('\n');
-                events.forEach(event => {
-                    if (event.trim() === '') return; // Skip empty lines
-                    const message = event.replace(/^data: /, '');
-                    //console.log(`====================================`);
-                    //console.log(`STREAM EVENT: ${event}`);
-                    //console.log(`MESSAGE: ${message}`);
-                    const requestProgress = {
-                        requestId: this.requestId,
-                        data: message,
-                    }
-                    if (message.trim() === '[DONE]') {
-                        requestProgress.progress = 1;
-                    }
-                    try {
-                        pubsub.publish('REQUEST_PROGRESS', {
-                            requestProgress: requestProgress
-                        });
-                    } catch (error) {
-                        console.error('Could not JSON parse stream message', message, error);
-                    }
+            try {
+                const incomingMessage = Array.isArray(responseData) && responseData.length > 0 ? responseData[0] : responseData;
+                incomingMessage.on('data', data => {
+                    const events = data.toString().split('\n');
+                    events.forEach(event => {
+                        if (event.trim() === '') return; // Skip empty lines
+                        const message = event.replace(/^data: /, '');
+                        //console.log(`====================================`);
+                        //console.log(`STREAM EVENT: ${event}`);
+                        //console.log(`MESSAGE: ${message}`);
+                        const requestProgress = {
+                            requestId: this.requestId,
+                            data: message,
+                        }
+                        if (message.trim() === '[DONE]') {
+                            requestProgress.progress = 1;
+                        }
+                        try {
+                            pubsub.publish('REQUEST_PROGRESS', {
+                                requestProgress: requestProgress
+                            });
+                        } catch (error) {
+                            console.error('Could not JSON parse stream message', message, error);
+                        }
+                    });
                 });
-            });
+            } catch (error) {
+                console.error('Could not subscribe to stream', error);
+            }
         }
     }
@@ -152,7 +178,7 @@ class PathwayResolver {
         }
         // chunk the text and return the chunks with newline separators
-        return getSemanticChunks(text, chunkTokenLength);
+        return getSemanticChunks(text, chunkTokenLength, this.pathway.inputFormat);
     }
     truncate(str, n) {

package/server/plugins/azureTranslatePlugin.js CHANGED Viewed

@@ -2,8 +2,8 @@
 import ModelPlugin from './modelPlugin.js';
 class AzureTranslatePlugin extends ModelPlugin {
-    constructor(config, pathway) {
-        super(config, pathway);
+    constructor(config, pathway, modelName, model) {
+        super(config, pathway, modelName, model);
     }
     // Set up parameters specific to the Azure Translate API

package/server/plugins/localModelPlugin.js CHANGED Viewed

@@ -4,8 +4,8 @@ import { execFileSync } from 'child_process';
 import { encode } from 'gpt-3-encoder';
 class LocalModelPlugin extends ModelPlugin {
-    constructor(config, pathway) {
-        super(config, pathway);
+    constructor(config, pathway, modelName, model) {
+        super(config, pathway, modelName, model);
     }
     // if the input starts with a chatML response, just return that

package/server/plugins/modelPlugin.js CHANGED Viewed

@@ -6,19 +6,13 @@ import { encode } from 'gpt-3-encoder';
 import { getFirstNToken } from '../chunker.js';
 const DEFAULT_MAX_TOKENS = 4096;
+const DEFAULT_MAX_RETURN_TOKENS = 256;
 const DEFAULT_PROMPT_TOKEN_RATIO = 0.5;
 class ModelPlugin {
-    constructor(config, pathway) {
-        // If the pathway specifies a model, use that, otherwise use the default
-        this.modelName = pathway.model || config.get('defaultModelName');
-        // Get the model from the config
-        this.model = config.get('models')[this.modelName];
-        // If the model doesn't exist, throw an exception
-        if (!this.model) {
-            throw new Error(`Model ${this.modelName} not found in config`);
-        }
+    constructor(config, pathway, modelName, model) {
+        this.modelName = modelName;
+        this.model = model;
         this.config = config;
         this.environmentVariables = config.getEnv();
         this.temperature = pathway.temperature;
@@ -143,6 +137,10 @@ class ModelPlugin {
         return (this.promptParameters.maxTokenLength ?? this.model.maxTokenLength ?? DEFAULT_MAX_TOKENS);
     }
+    getModelMaxReturnTokens() {
+        return (this.promptParameters.maxReturnTokens ?? this.model.maxReturnTokens ?? DEFAULT_MAX_RETURN_TOKENS);
+    }
     getPromptTokenRatio() {
         // TODO: Is this the right order of precedence? inputParameters should maybe be second?
         return this.promptParameters.inputParameters?.tokenRatio ?? this.promptParameters.tokenRatio ?? DEFAULT_PROMPT_TOKEN_RATIO;

package/server/plugins/openAiChatPlugin.js CHANGED Viewed

@@ -3,8 +3,8 @@ import ModelPlugin from './modelPlugin.js';
 import { encode } from 'gpt-3-encoder';
 class OpenAIChatPlugin extends ModelPlugin {
-    constructor(config, pathway) {
-        super(config, pathway);
+    constructor(config, pathway, modelName, model) {
+        super(config, pathway, modelName, model);
     }
     // convert to OpenAI messages array format if necessary

package/server/plugins/openAiCompletionPlugin.js CHANGED Viewed

@@ -15,8 +15,8 @@ const truncatePromptIfNecessary = (text, textTokenCount, modelMaxTokenCount, tar
 }
 class OpenAICompletionPlugin extends ModelPlugin {
-    constructor(config, pathway) {
-        super(config, pathway);
+    constructor(config, pathway, modelName, model) {
+        super(config, pathway, modelName, model);
     }
     // Set up parameters specific to the OpenAI Completion API

package/server/plugins/openAiWhisperPlugin.js CHANGED Viewed

@@ -75,14 +75,14 @@ const downloadFile = async (fileUrl) => {
             fs.unlink(localFilePath, () => {
                 reject(error);
             });
-            throw error;
+            //throw error;
         }
     });
 };
 class OpenAIWhisperPlugin extends ModelPlugin {
-    constructor(config, pathway) {
-        super(config, pathway);
+    constructor(config, pathway, modelName, model) {
+        super(config, pathway, modelName, model);
     }
     async getMediaChunks(file, requestId) {

package/server/plugins/palmChatPlugin.js CHANGED Viewed

@@ -4,8 +4,8 @@ import { encode } from 'gpt-3-encoder';
 import HandleBars from '../../lib/handleBars.js';
 class PalmChatPlugin extends ModelPlugin {
-    constructor(config, pathway) {
-        super(config, pathway);
+    constructor(config, pathway, modelName, model) {
+        super(config, pathway, modelName, model);
     }
     // Convert to PaLM messages array format if necessary
@@ -92,10 +92,8 @@ class PalmChatPlugin extends ModelPlugin {
         const context = this.getCompiledContext(text, parameters, prompt.context || palmMessages.context || '');
         const examples = this.getCompiledExamples(text, parameters, prompt.examples || []);
-        // For PaLM right now, the max return tokens is 1024, regardless of the max context length
-        // I can't think of a time you'd want to constrain it to fewer at the moment.
-        const max_tokens = 1024//this.getModelMaxTokenLength() - tokenLength;
+        const max_tokens = this.getModelMaxReturnTokens();
         if (max_tokens < 0) {
             throw new Error(`Prompt is too long to successfully call the model at ${tokenLength} tokens.  The model will not be called.`);
         }

package/server/plugins/palmCodeCompletionPlugin.js ADDED Viewed

@@ -0,0 +1,46 @@
+// palmCodeCompletionPlugin.js
+import PalmCompletionPlugin from './palmCompletionPlugin.js';
+// PalmCodeCompletionPlugin class for handling requests and responses to the PaLM API Code Completion API
+class PalmCodeCompletionPlugin extends PalmCompletionPlugin {
+    constructor(config, pathway, modelName, model) {
+        super(config, pathway, modelName, model);
+    }
+    // Set up parameters specific to the PaLM API Code Completion API
+    getRequestParameters(text, parameters, prompt, pathwayResolver) {
+        const { modelPromptText, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
+        const { stream } = parameters;
+        // Define the model's max token length
+        const modelTargetTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
+        const truncatedPrompt = this.truncatePromptIfNecessary(modelPromptText, tokenLength, this.getModelMaxTokenLength(), modelTargetTokenLength, pathwayResolver);
+        const max_tokens = this.getModelMaxReturnTokens();
+        if (max_tokens < 0) {
+            throw new Error(`Prompt is too long to successfully call the model at ${tokenLength} tokens.  The model will not be called.`);
+        }
+        if (!truncatedPrompt) {
+            throw new Error(`Prompt is empty.  The model will not be called.`);
+        }
+        const requestParameters = {
+            instances: [
+                { prefix: truncatedPrompt }
+            ],
+            parameters: {
+                temperature: this.temperature ?? 0.7,
+                maxOutputTokens: max_tokens,
+                topP: parameters.topP ?? 0.95,
+                topK: parameters.topK ?? 40,
+            }
+        };
+        return requestParameters;
+    }
+}
+export default PalmCodeCompletionPlugin;

package/server/plugins/palmCompletionPlugin.js CHANGED Viewed

@@ -2,23 +2,21 @@
 import ModelPlugin from './modelPlugin.js';
-// Helper function to truncate the prompt if it is too long
-const truncatePromptIfNecessary = (text, textTokenCount, modelMaxTokenCount, targetTextTokenCount, pathwayResolver) => {
-    const maxAllowedTokens = textTokenCount + ((modelMaxTokenCount - targetTextTokenCount) * 0.5);
-    if (textTokenCount > maxAllowedTokens) {
-        pathwayResolver.logWarning(`Prompt is too long at ${textTokenCount} tokens (this target token length for this pathway is ${targetTextTokenCount} tokens because the response is expected to take up the rest of the model's max tokens (${modelMaxTokenCount}). Prompt will be truncated.`);
-        return pathwayResolver.truncate(text, maxAllowedTokens);
-    }
-    return text;
-}
 // PalmCompletionPlugin class for handling requests and responses to the PaLM API Text Completion API
 class PalmCompletionPlugin extends ModelPlugin {
-    constructor(config, pathway) {
-        super(config, pathway);
+    constructor(config, pathway, modelName, model) {
+        super(config, pathway, modelName, model);
     }
+    truncatePromptIfNecessary (text, textTokenCount, modelMaxTokenCount, targetTextTokenCount, pathwayResolver) {
+        const maxAllowedTokens = textTokenCount + ((modelMaxTokenCount - targetTextTokenCount) * 0.5);
+        if (textTokenCount > maxAllowedTokens) {
+            pathwayResolver.logWarning(`Prompt is too long at ${textTokenCount} tokens (this target token length for this pathway is ${targetTextTokenCount} tokens because the response is expected to take up the rest of the model's max tokens (${modelMaxTokenCount}). Prompt will be truncated.`);
+            return pathwayResolver.truncate(text, maxAllowedTokens);
+        }
+        return text;
+    }
     // Set up parameters specific to the PaLM API Text Completion API
     getRequestParameters(text, parameters, prompt, pathwayResolver) {
         const { modelPromptText, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
@@ -26,9 +24,9 @@ class PalmCompletionPlugin extends ModelPlugin {
         // Define the model's max token length
         const modelTargetTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
-        const truncatedPrompt = truncatePromptIfNecessary(modelPromptText, tokenLength, this.getModelMaxTokenLength(), modelTargetTokenLength, pathwayResolver);
+        const truncatedPrompt = this.truncatePromptIfNecessary(modelPromptText, tokenLength, this.getModelMaxTokenLength(), modelTargetTokenLength, pathwayResolver);
-        const max_tokens = 1024//this.getModelMaxTokenLength() - tokenLength;
+        const max_tokens = this.getModelMaxReturnTokens();
         if (max_tokens < 0) {
             throw new Error(`Prompt is too long to successfully call the model at ${tokenLength} tokens.  The model will not be called.`);

package/tests/chunkfunction.test.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import test from 'ava';
-import { getSemanticChunks } from '../server/chunker.js';
+import { getSemanticChunks, determineTextFormat } from '../server/chunker.js';
 import { encode } from 'gpt-3-encoder';
 const testText = `Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id erat sem. Phasellus ac dapibus purus, in fermentum nunc. Mauris quis rutrum magna. Quisque rutrum, augue vel blandit posuere, augue magna convallis turpis, nec elementum augue mauris sit amet nunc. Aenean sit amet leo est. Nunc ante ex, blandit et felis ut, iaculis lacinia est. Phasellus dictum orci id libero ullamcorper tempor.
@@ -69,34 +70,119 @@ test('should return identical text that chunker was passed, given tiny chunk siz
     t.is(recomposedText, testText); //check recomposition
 });
-/*
-it('should return identical text that chunker was passed, given tiny chunk size (1)', () => {
-    const maxChunkToken = 1;
-    const chunks = getSemanticChunks(testText, maxChunkToken);
-    expect(chunks.length).toBeGreaterThan(1); //check chunking
-    expect(chunks.every(chunk => encode(chunk).length <= maxChunkToken)).toBe(true); //check chunk size
-    const recomposedText = chunks.reduce((acc, chunk) => acc + chunk, '');
-    expect(recomposedText).toBe(testText); //check recomposition
+const htmlChunkOne = `<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. <a href="https://www.google.com">Google</a></p> Vivamus id pharetra odio.   Sed consectetur leo sed tortor dictum venenatis.Donec gravida libero non accumsan suscipit.Donec lectus turpis, ullamcorper eu pulvinar iaculis, ornare ut risus.Phasellus aliquam, turpis quis viverra condimentum, risus est pretium    metus, in porta ipsum tortor vitae elit.Pellentesque id finibus erat.  In suscipit, sapien non posuere dignissim, augue nisl ultrices tortor, sit amet eleifend nibh elit at risus.`
+const htmlVoidElement = `<br>`
+const htmlChunkTwo = `<p><img src="https://www.google.com/googlelogo_color_272x92dp.png"></p>`
+const htmlSelfClosingElement = `<img src="https://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png" />`
+const plainTextChunk = 'Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Fusce at dignissim quam.'
+test('should throw an error if html cannot be accommodated within the chunk size', async t => {
+    const chunkSize = encode(htmlChunkTwo).length;
+    const error = t.throws(() => getSemanticChunks(htmlChunkTwo, chunkSize - 1, 'html'));
+    t.is(error.message, 'The HTML contains elements that are larger than the chunk size. Please try again with HTML that has smaller elements.');
+});
+test('should chunk text between html elements if needed', async t => {
+    const chunkSize = encode(htmlChunkTwo).length;
+    const chunks = getSemanticChunks(htmlChunkTwo + plainTextChunk + htmlChunkTwo, chunkSize, 'html');
+    t.is(chunks.length, 4);
+    t.is(chunks[0], htmlChunkTwo);
+    t.is(chunks[1], 'Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae');
+    t.is(encode(chunks[1]).length, chunkSize);
+    t.is(chunks[2], '; Fusce at dignissim quam.');
+    t.is(chunks[3], htmlChunkTwo);
+});
+test('should chunk html element correctly when chunk size is exactly the same as the element length', async t => {
+    const chunkSize = encode(htmlChunkTwo).length;
+    const chunks = getSemanticChunks(htmlChunkTwo, chunkSize, 'html');
+    t.is(chunks.length, 1);
+    t.is(chunks[0], htmlChunkTwo);
+});
+test('should chunk html element correctly when chunk size is greater than the element length', async t => {
+    const chunkSize = encode(htmlChunkTwo).length;
+    const chunks = getSemanticChunks(htmlChunkTwo, chunkSize + 1, 'html');
+    t.is(chunks.length, 1);
+    t.is(chunks[0], htmlChunkTwo);
+});
+test('should not break up second html element correctly when chunk size is greater than the first element length', async t => {
+    const chunkSize = encode(htmlChunkTwo).length;
+    const chunks = getSemanticChunks(htmlChunkTwo + htmlChunkTwo, chunkSize + 10, 'html');
+    t.is(chunks.length, 2);
+    t.is(chunks[0], htmlChunkTwo);
+    t.is(chunks[1], htmlChunkTwo);
+});
+test('should treat text chunks as also unbreakable chunks', async t => {
+    const chunkSize = encode(htmlChunkTwo).length;
+    const chunks = getSemanticChunks(htmlChunkTwo + plainTextChunk + htmlChunkTwo, chunkSize + 20, 'html');
+    t.is(chunks.length, 3);
+    t.is(chunks[0], htmlChunkTwo);
+    t.is(chunks[1], plainTextChunk);
+    t.is(chunks[2], htmlChunkTwo);
+});
+test('should determine format correctly for text only', async t => {
+    const format = determineTextFormat(plainTextChunk);
+    t.is(format, 'text');
+});
+test('should determine format correctly for simple html element', async t => {
+    const format = determineTextFormat(htmlChunkTwo);
+    t.is(format, 'html');
+});
+test('should determine format correctly for simple html element embedded in text', async t => {
+    const format = determineTextFormat(plainTextChunk + htmlChunkTwo + plainTextChunk);
+    t.is(format, 'html');
+});
+test('should determine format correctly for self-closing html element', async t => {
+    const format = determineTextFormat(htmlSelfClosingElement);
+    t.is(format, 'html');
+});
+test('should determine format correctly for self-closing html element embedded in text', async t => {
+    const format = determineTextFormat(plainTextChunk + htmlSelfClosingElement + plainTextChunk);
+    t.is(format, 'html');
+});
+test('should determine format correctly for void element', async t => {
+    const format = determineTextFormat(htmlVoidElement);
+    t.is(format, 'html');
+});
+test('should determine format correctly for void element embedded in text', async t => {
+    const format = determineTextFormat(plainTextChunk + htmlVoidElement + plainTextChunk);
+    t.is(format, 'html');
 });
-it('should return identical text that chunker was passed, given huge chunk size (32000)', () => {
+test('should return identical text that chunker was passed, given huge chunk size (32000)', t => {
     const maxChunkToken = 32000;
     const chunks = getSemanticChunks(testText, maxChunkToken);
-    expect(chunks.length).toBe(1); //check chunking
-    expect(chunks.every(chunk => encode(chunk).length <= maxChunkToken)).toBe(true); //check chunk size
+    t.assert(chunks.length === 1); //check chunking
+    t.assert(chunks.every(chunk => encode(chunk).length <= maxChunkToken)); //check chunk size
     const recomposedText = chunks.reduce((acc, chunk) => acc + chunk, '');
-    expect(recomposedText).toBe(testText); //check recomposition
+    t.assert(recomposedText === testText); //check recomposition
 });
 const testTextNoSpaces = `Loremipsumdolorsitamet,consecteturadipiscingelit.Inideratsem.Phasellusacdapibuspurus,infermentumnunc.Maurisquisrutrummagna.Quisquerutrum,auguevelblanditposuere,auguemagnacon vallisturpis,necelementumauguemaurissitametnunc.Aeneansitametleoest.Nuncanteex,blanditetfelisut,iaculislaciniaest.Phasellusdictumorciidliberoullamcorpertempor.Vivamusidpharetraodioq.Sedconsecteturleosedtortordictumvenenatis.Donecgravidaliberononaccumsansuscipit.Doneclectusturpis,ullamcorpereupulvinariaculis,ornareutrisus.Phasellusaliquam,turpisquisviverracondimentum,risusestpretiummetus,inportaips umtortorvita elit.Pellentesqueidfinibuserat.Insuscipit,sapiennonposueredignissim,auguenisl ultricestortor,sitameteleifendnibhelitatrisus.`;
-it('should return identical text that chunker was passed, given no spaces and small chunks(5)', () => {
+test('should return identical text that chunker was passed, given no spaces and small chunks(5)', t => {
     const maxChunkToken = 5;
     const chunks = getSemanticChunks(testTextNoSpaces, maxChunkToken);
-    expect(chunks.length).toBeGreaterThan(0); //check chunking
-    expect(chunks.every(chunk => encode(chunk).length <= maxChunkToken)).toBe(true); //check chunk size
+    t.assert(chunks.length > 0); //check chunking
+    t.assert(chunks.every(chunk => encode(chunk).length <= maxChunkToken)); //check chunk size
     const recomposedText = chunks.reduce((acc, chunk) => acc + chunk, '');
-    expect(recomposedText).toBe(testTextNoSpaces); //check recomposition
+    t.assert(recomposedText === testTextNoSpaces); //check recomposition
 });
 const testTextShortWeirdSpaces=`Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id erat sem. Phasellus ac dapibus purus, in fermentum nunc.............................. Mauris quis rutrum magna. Quisque rutrum, augue vel blandit posuere, augue magna convallis turpis, nec elementum augue mauris sit amet nunc. Aenean sit a;lksjdf 098098- -23 eln ;lkn l;kn09 oij[0u ,,,,,,,,,,,,,,,,,,,,, amet leo est. Nunc ante ex, blandit et felis ut, iaculis lacinia est. Phasellus dictum orci id libero ullamcorper tempor.
@@ -106,20 +192,20 @@ const testTextShortWeirdSpaces=`Lorem ipsum dolor sit amet, consectetur adipisci
     Vivamus id pharetra odio.   Sed consectetur leo sed tortor dictum venenatis.Donec gravida libero non accumsan suscipit.Donec lectus turpis, ullamcorper eu pulvinar iaculis, ornare ut risus.Phasellus aliquam, turpis quis viverra condimentum, risus est pretium    metus, in porta ipsum tortor vitae elit.Pellentesque id finibus erat.  In suscipit, sapien non posuere dignissim, augue nisl ultrices tortor, sit amet eleifend nibh elit at risus.`;
-it('should return identical text that chunker was passed, given weird spaces and tiny chunks(1)', () => {
+test('should return identical text that chunker was passed, given weird spaces and tiny chunks(1)', t => {
     const maxChunkToken = 1;
     const chunks = getSemanticChunks(testTextShortWeirdSpaces, maxChunkToken);
-    expect(chunks.length).toBeGreaterThan(0); //check chunking
-    expect(chunks.every(chunk => encode(chunk).length <= maxChunkToken)).toBe(true); //check chunk size
+    t.assert(chunks.length > 0); //check chunking
+    t.assert(chunks.every(chunk => encode(chunk).length <= maxChunkToken)); //check chunk size
     const recomposedText = chunks.reduce((acc, chunk) => acc + chunk, '');
-    expect(recomposedText).toBe(testTextShortWeirdSpaces); //check recomposition
+    t.assert(recomposedText === testTextShortWeirdSpaces); //check recomposition
 });
-it('should return identical text that chunker was passed, given weird spaces and small chunks(10)', () => {
+test('should return identical text that chunker was passed, given weird spaces and small chunks(10)', t => {
     const maxChunkToken = 1;
     const chunks = getSemanticChunks(testTextShortWeirdSpaces, maxChunkToken);
-    expect(chunks.length).toBeGreaterThan(0); //check chunking
-    expect(chunks.every(chunk => encode(chunk).length <= maxChunkToken)).toBe(true); //check chunk size
+    t.assert(chunks.length > 0); //check chunking
+    t.assert(chunks.every(chunk => encode(chunk).length <= maxChunkToken)); //check chunk size
     const recomposedText = chunks.reduce((acc, chunk) => acc + chunk, '');
-    expect(recomposedText).toBe(testTextShortWeirdSpaces); //check recomposition
-});*/
+    t.assert(recomposedText === testTextShortWeirdSpaces); //check recomposition
+});

package/tests/mocks.js CHANGED Viewed

@@ -36,4 +36,45 @@ export const mockConfig = {
           { role: 'assistant', content: 'Translating: {{{text}}}' },
         ],
       }),
-  };
+  };
+  export const mockPathwayResolverString = {
+    model: {
+      url: 'https://api.example.com/testModel',
+      type: 'OPENAI-COMPLETION',
+    },
+    modelName: 'testModel',
+    pathway: mockPathwayString,
+    config: mockConfig,
+    prompt: new Prompt('User: {{text}}\nAssistant: Please help {{name}} who is {{age}} years old.'),
+  };
+  export const mockPathwayResolverFunction = {
+    model: {
+      url: 'https://api.example.com/testModel',
+      type: 'OPENAI-COMPLETION',
+    },
+    modelName: 'testModel',
+    pathway: mockPathwayFunction,
+    config: mockConfig,
+    prompt: () => {
+        return new Prompt('User: {{text}}\nAssistant: Please help {{name}} who is {{age}} years old.')
+    }
+  };
+  export const mockPathwayResolverMessages = {
+    model: {
+      url: 'https://api.example.com/testModel',
+      type: 'OPENAI-COMPLETION',
+    },
+    modelName: 'testModel',
+    pathway: mockPathwayMessages,
+    config: mockConfig,
+    prompt: new Prompt({
+        messages: [
+          { role: 'user', content: 'Translate this: {{{text}}}' },
+          { role: 'assistant', content: 'Translating: {{{text}}}' },
+        ],
+      }),
+  };

package/tests/modelPlugin.test.js CHANGED Viewed

@@ -2,7 +2,7 @@
 import test from 'ava';
 import ModelPlugin from '../server/plugins/modelPlugin.js';
 import HandleBars from '../lib/handleBars.js';
-import { mockConfig, mockPathwayString, mockPathwayFunction, mockPathwayMessages } from './mocks.js';
+import { mockConfig, mockPathwayString, mockPathwayFunction, mockPathwayMessages, mockPathwayResolverString } from './mocks.js';
 const DEFAULT_MAX_TOKENS = 4096;
 const DEFAULT_PROMPT_TOKEN_RATIO = 0.5;
@@ -12,7 +12,7 @@ const config = mockConfig;
 const pathway = mockPathwayString;
 test('ModelPlugin constructor', (t) => {
-    const modelPlugin = new ModelPlugin(config, pathway);
+    const modelPlugin = new ModelPlugin(mockPathwayResolverString);
     t.is(modelPlugin.modelName, pathway.model, 'modelName should be set from pathway');
     t.deepEqual(modelPlugin.model, config.get('models')[pathway.model], 'model should be set from config');
@@ -21,7 +21,7 @@ test('ModelPlugin constructor', (t) => {
 });
 test.beforeEach((t) => {
-  t.context.modelPlugin = new ModelPlugin(mockConfig, mockPathwayString);
+  t.context.modelPlugin = new ModelPlugin(mockPathwayResolverString);
 });
 test('getCompiledPrompt - text and parameters', (t) => {

package/tests/openAiChatPlugin.test.js CHANGED Viewed

@@ -1,17 +1,17 @@
 import test from 'ava';
 import OpenAIChatPlugin from '../server/plugins/openAiChatPlugin.js';
-import { mockConfig, mockPathwayString, mockPathwayFunction, mockPathwayMessages } from './mocks.js';
+import { mockPathwayResolverMessages } from './mocks.js';
 // Test the constructor
 test('constructor', (t) => {
-    const plugin = new OpenAIChatPlugin(mockConfig, mockPathwayString);
-    t.is(plugin.config, mockConfig);
-    t.is(plugin.pathwayPrompt, mockPathwayString.prompt);
+    const plugin = new OpenAIChatPlugin(mockPathwayResolverMessages);
+    t.is(plugin.config, mockPathwayResolverMessages.config);
+    t.is(plugin.pathwayPrompt, mockPathwayResolverMessages.pathway.prompt);
 });
 // Test the convertPalmToOpenAIMessages function
 test('convertPalmToOpenAIMessages', (t) => {
-    const plugin = new OpenAIChatPlugin(mockConfig, mockPathwayString);
+    const plugin = new OpenAIChatPlugin(mockPathwayResolverMessages);
     const context = 'This is a test context.';
     const examples = [
         {
@@ -35,14 +35,21 @@ test('convertPalmToOpenAIMessages', (t) => {
 // Test the getRequestParameters function
 test('getRequestParameters', async (t) => {
-    const plugin = new OpenAIChatPlugin(mockConfig, mockPathwayString);
+    const plugin = new OpenAIChatPlugin(mockPathwayResolverMessages);
     const text = 'Help me';
     const parameters = { name: 'John', age: 30 };
-    const prompt = mockPathwayString.prompt;
+    const prompt = mockPathwayResolverMessages.pathway.prompt;
     const result = await plugin.getRequestParameters(text, parameters, prompt);
     t.deepEqual(result, {
         messages: [
-            { role: 'user', content: 'User: Help me\nAssistant: Please help John who is 30 years old.' },
+            {
+                content: 'Translate this: Help me',
+                role: 'user',
+            },
+            {
+                content: 'Translating: Help me',
+                role: 'assistant',
+            },
         ],
         temperature: 0.7,
     });
@@ -50,10 +57,10 @@ test('getRequestParameters', async (t) => {
 // Test the execute function
 test('execute', async (t) => {
-    const plugin = new OpenAIChatPlugin(mockConfig, mockPathwayString);
+    const plugin = new OpenAIChatPlugin(mockPathwayResolverMessages);
     const text = 'Help me';
     const parameters = { name: 'John', age: 30 };
-    const prompt = mockPathwayString.prompt;
+    const prompt = mockPathwayResolverMessages.pathway.prompt;
     // Mock the executeRequest function
     plugin.executeRequest = () => {
@@ -82,7 +89,7 @@ test('execute', async (t) => {
 // Test the parseResponse function
 test('parseResponse', (t) => {
-    const plugin = new OpenAIChatPlugin(mockConfig, mockPathwayString);
+    const plugin = new OpenAIChatPlugin(mockPathwayResolverMessages);
     const data = {
         choices: [
             {
@@ -98,7 +105,7 @@ test('parseResponse', (t) => {
 // Test the logRequestData function
 test('logRequestData', (t) => {
-    const plugin = new OpenAIChatPlugin(mockConfig, mockPathwayString);
+    const plugin = new OpenAIChatPlugin(mockPathwayResolverMessages);
     const data = {
         messages: [
             { role: 'user', content: 'User: Help me\nAssistant: Please help John who is 30 years old.' },
@@ -113,7 +120,7 @@ test('logRequestData', (t) => {
             },
         ],
     };
-    const prompt = mockPathwayString.prompt;
+    const prompt = mockPathwayResolverMessages.pathway.prompt;
     // Mock console.log function
     const originalConsoleLog = console.log;

package/tests/palmChatPlugin.test.js CHANGED Viewed

@@ -1,11 +1,10 @@
 // test_palmChatPlugin.js
 import test from 'ava';
 import PalmChatPlugin from '../server/plugins/palmChatPlugin.js';
-import { mockConfig } from './mocks.js';
+import { mockPathwayResolverMessages } from './mocks.js';
 test.beforeEach((t) => {
-  const pathway = 'testPathway';
-  const palmChatPlugin = new PalmChatPlugin(mockConfig, pathway);
+  const palmChatPlugin = new PalmChatPlugin(mockPathwayResolverMessages);
   t.context = { palmChatPlugin };
 });

package/tests/palmCompletionPlugin.test.js CHANGED Viewed

@@ -2,11 +2,10 @@
 import test from 'ava';
 import PalmCompletionPlugin from '../server/plugins/palmCompletionPlugin.js';
-import { mockConfig } from './mocks.js';
+import { mockPathwayResolverString } from './mocks.js';
 test.beforeEach((t) => {
-  const pathway = 'testPathway';
-  const palmCompletionPlugin = new PalmCompletionPlugin(mockConfig, pathway);
+  const palmCompletionPlugin = new PalmCompletionPlugin(mockPathwayResolverString);
   t.context = { palmCompletionPlugin };
 });

package/tests/truncateMessages.test.js CHANGED Viewed

@@ -2,12 +2,11 @@
 import test from 'ava';
 import ModelPlugin from '../server/plugins/modelPlugin.js';
 import { encode } from 'gpt-3-encoder';
-import { mockConfig, mockPathwayString } from './mocks.js';
+import { mockPathwayResolverString } from './mocks.js';
-const config = mockConfig;
-const pathway = mockPathwayString;
+const { config, pathway } = mockPathwayResolverString;
-const modelPlugin = new ModelPlugin(config, pathway);
+const modelPlugin = new ModelPlugin(mockPathwayResolverString);
 const generateMessage = (role, content) => ({ role, content });

package/tests/server.js DELETED Viewed

@@ -1,23 +0,0 @@
-import 'dotenv/config'
-import { ApolloServer } from 'apollo-server';
-import { config } from '../config.js';
-import typeDefsresolversFactory from '../index.js';
-let typeDefs;
-let resolvers;
-const initTypeDefsResolvers = async () => {
-    const result = await typeDefsresolversFactory();
-    typeDefs = result.typeDefs;
-    resolvers = result.resolvers;
-};
-export const startTestServer = async () => {
-    await initTypeDefsResolvers();
-    return new ApolloServer({
-        typeDefs,
-        resolvers,
-        context: () => ({ config, requestState: {} }),
-    });
-};