npm - @aj-archipelago/cortex - Versions diffs - 0.0.6 → 0.0.7 - Mend

@aj-archipelago/cortex 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +108 -72
package/config.js +20 -0
package/graphql/graphql.js +53 -9
package/graphql/pathwayPrompter.js +10 -6
package/graphql/pathwayResolver.js +27 -36
package/graphql/plugins/azureTranslatePlugin.js +16 -8
package/graphql/plugins/modelPlugin.js +54 -22
package/graphql/plugins/openAiChatPlugin.js +33 -9
package/graphql/plugins/openAiCompletionPlugin.js +53 -37
package/graphql/plugins/openAiWhisperPlugin.js +79 -0
package/graphql/prompt.js +1 -0
package/graphql/resolver.js +5 -5
package/graphql/typeDef.js +47 -38
package/lib/fileChunker.js +152 -0
package/package.json +5 -2
package/pathways/bias.js +6 -0
package/pathways/chat.js +4 -1
package/pathways/complete.js +4 -0
package/pathways/edit.js +6 -0
package/pathways/entities.js +12 -0
package/pathways/index.js +1 -1
package/pathways/paraphrase.js +4 -0
package/pathways/sentiment.js +5 -1
package/pathways/summary.js +25 -8
package/pathways/transcribe.js +8 -0
package/pathways/translate.js +10 -2
package/tests/main.test.js +0 -13
package/pathways/topics.js +0 -9

package/lib/fileChunker.js ADDED Viewed

@@ -0,0 +1,152 @@
+const fs = require('fs');
+const ffmpeg = require('fluent-ffmpeg');
+const path = require('path');
+const { v4: uuidv4 } = require('uuid');
+const os = require('os');
+const util = require('util');
+const ffmpegProbe = util.promisify(ffmpeg.ffprobe);
+const pipeline = util.promisify(require('stream').pipeline);
+const ytdl = require('ytdl-core');
+async function processChunk(inputPath, outputFileName, start, duration) {
+    return new Promise((resolve, reject) => {
+        ffmpeg(inputPath)
+            .seekInput(start)
+            .duration(duration)
+            .on('start', (cmd) => {
+                console.log(`Started FFmpeg with command: ${cmd}`);
+            })
+            .on('error', (err) => {
+                console.error(`Error occurred while processing chunk:`, err);
+                reject(err);
+            })
+            .on('end', () => {
+                console.log(`Finished processing chunk`);
+                resolve(outputFileName);
+            })
+            .save(outputFileName);
+    });
+}
+const generateUniqueFolderName = () => {
+    const uniqueFolderName = uuidv4();
+    const tempFolderPath = os.tmpdir(); // Get the system's temporary folder
+    const uniqueOutputPath = path.join(tempFolderPath, uniqueFolderName);
+    return uniqueOutputPath;
+}
+const generateUniqueTempFileName = () => {
+    return path.join(os.tmpdir(), uuidv4());
+}
+async function splitMediaFile(inputPath, chunkDurationInSeconds = 600) {
+    try {
+        const metadata = await ffmpegProbe(inputPath);
+        const duration = metadata.format.duration;
+        const numChunks = Math.ceil((duration - 1) / chunkDurationInSeconds);
+        const chunkPromises = [];
+        const uniqueOutputPath = generateUniqueFolderName();
+        // Create unique folder
+        fs.mkdirSync(uniqueOutputPath, { recursive: true });
+        for (let i = 0; i < numChunks; i++) {
+            const outputFileName = path.join(
+                uniqueOutputPath,
+                `chunk-${i + 1}-${path.basename(inputPath)}`
+            );
+            const chunkPromise = processChunk(
+                inputPath,
+                outputFileName,
+                i * chunkDurationInSeconds,
+                chunkDurationInSeconds
+            );
+            chunkPromises.push(chunkPromise);
+        }
+        const chunkedFiles = await Promise.all(chunkPromises);
+        console.log('All chunks processed. Chunked file names:', chunkedFiles);
+        return { chunks: chunkedFiles, folder: uniqueOutputPath }
+    } catch (err) {
+        console.error('Error occurred during the splitting process:', err);
+    }
+}
+async function deleteTempPath(path) {
+    try {
+        if (!path) return;
+        const stats = fs.statSync(path);
+        if (stats.isFile()) {
+            fs.unlinkSync(path);
+            console.log(`Temporary file ${path} deleted successfully.`);
+        } else if (stats.isDirectory()) {
+            fs.rmdirSync(path, { recursive: true });
+            console.log(`Temporary folder ${path} and its contents deleted successfully.`);
+        }
+    } catch (err) {
+        console.error('Error occurred while deleting the temporary path:', err);
+    }
+}
+function isValidYoutubeUrl(url) {
+    const regex = /^(https?:\/\/)?(www\.)?(youtube\.com|youtu\.?be)\/.+$/;
+    return regex.test(url);
+}
+function convertYoutubeToMp3Stream(video) {
+    // Configure ffmpeg to convert the video to mp3
+    const mp3Stream = ffmpeg(video)
+        .withAudioCodec('libmp3lame')
+        .toFormat('mp3')
+        .on('error', (err) => {
+            console.error(`An error occurred during conversion: ${err.message}`);
+        });
+    return mp3Stream;
+}
+async function pipeStreamToFile(stream, filePath) {
+    try {
+        await pipeline(stream, fs.createWriteStream(filePath));
+        console.log('Stream piped to file successfully.');
+    } catch (error) {
+        console.error(`Error piping stream to file: ${error.message}`);
+    }
+}
+const processYoutubeUrl = async (url) => {
+    const info = await ytdl.getInfo(url);
+    const audioFormat = ytdl.chooseFormat(info.formats, { quality: 'highestaudio' });
+    if (!audioFormat) {
+        throw new Error('No suitable audio format found');
+    }
+    const stream = ytdl.downloadFromInfo(info, { format: audioFormat });
+    const mp3Stream = convertYoutubeToMp3Stream(stream);
+    const outputFileName = path.join(os.tmpdir(), `${uuidv4()}.mp3`);
+    await pipeStreamToFile(mp3Stream, outputFileName); // You can also pipe the stream to a file
+    return outputFileName;
+}
+function deleteFile(filePath) {
+    try {
+        fs.unlinkSync(filePath);
+        console.log(`File ${filePath} cleaned successfully.`);
+    } catch (error) {
+        console.error(`Error deleting file ${filePath}:`, error);
+    }
+}
+module.exports = {
+    splitMediaFile, deleteTempPath, processYoutubeUrl, isValidYoutubeUrl
+};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "0.0.6",
+  "version": "0.0.7",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "repository": {
     "type": "git",
@@ -40,13 +40,16 @@
     "compromise": "^14.8.1",
     "compromise-paragraphs": "^0.1.0",
     "convict": "^6.2.3",
+    "fluent-ffmpeg": "^2.1.2",
+    "form-data": "^4.0.0",
     "gpt-3-encoder": "^1.1.4",
     "graphql": "^16.6.0",
     "graphql-subscriptions": "^2.0.0",
     "graphql-ws": "^5.11.2",
     "handlebars": "^4.7.7",
     "keyv": "^4.5.2",
-    "ws": "^8.12.0"
+    "ws": "^8.12.0",
+    "ytdl-core": "^4.11.2"
   },
   "devDependencies": {
     "dotenv": "^16.0.3",

package/pathways/bias.js CHANGED Viewed

@@ -1,4 +1,10 @@
+// bias.js
+// Objectivity analysis of text
+// This module exports a prompt that analyzes the given text and determines if it's written objectively. It also provides a detailed explanation of the decision.
 module.exports = {
+    // Uncomment the following line to enable caching for this prompt, if desired.
     // enableCache: true,
     prompt: `{{text}}\n\nIs the above text written objectively?  Why or why not, explain with details:\n`
 }

package/pathways/chat.js CHANGED Viewed

@@ -1,4 +1,7 @@
-// Description: Have a chat with a bot that uses context to understand the conversation
+// chat.js
+// Simple context-aware chat bot
+// This is a two prompt implementation of a context aware chat bot. The first prompt generates content that will be stored in the previousResult variable and will be returned to the client. In the optimum implementation, the client will then update their chatContext variable for the next call. The second prompt actually responds to the user. The second prompt *could* use previousResult instead of chatContext, but in this situation previousResult will also include the current turn of the conversation to which it is responding. That can get a little confusing as it tends to overemphasize the current turn in the response.
 module.exports = {
     prompt:
         [

package/pathways/complete.js CHANGED Viewed

@@ -1,3 +1,7 @@
+// complete.js
+// Text completion module
+// This module exports a prompt that takes an input text and completes it by generating a continuation of the given text.
 module.exports = {
     prompt: `Continue and complete the following:\n\n{{text}}`
 }

package/pathways/edit.js CHANGED Viewed

@@ -1,4 +1,10 @@
+// edit.js
+// Grammar and spelling correction module
+// This module exports a prompt that takes an input text and corrects all spelling and grammar errors found within the text.
 module.exports = {
+    // Set the temperature to 0 to favor more deterministic output when generating corrections.
     temperature: 0,
     prompt: `Correct all spelling and grammar errors in the input text.\n\nInput:\n{{text}}\n\nOutput:\n`
 }

package/pathways/entities.js CHANGED Viewed

@@ -1,9 +1,21 @@
+// entities.js
+// Entity extraction module
+// This module exports a prompt that takes an input text and extracts the top entities and their definitions as specified by the count parameter.
 module.exports = {
+    // Set the temperature to 0 to favor more deterministic output when generating entity extraction.
     temperature: 0,
     prompt: `{{text}}\n\nList the top {{count}} entities and their definitions for the above in the format {{format}}:`,
+    // Define the format for displaying the extracted entities and their definitions.
     format: `(name: definition)`,
+    // Define input parameters for the prompt, such as the number of entities to extract.
     inputParameters: {
         count: 5,
     },
+    // Set the list option to true as the prompt is expected to return a list of entities.
     list: true,
 }

package/pathways/index.js CHANGED Viewed

@@ -7,6 +7,6 @@ module.exports = {
     "paraphrase": require('./paraphrase'),
     "sentiment": require('./sentiment'),
     "summary": require('./summary'),
-    "topics": require('./topics'),
+    "transcribe": require('./transcribe'),
     "translate": require('./translate'),
 }

package/pathways/paraphrase.js CHANGED Viewed

@@ -1,3 +1,7 @@
+// paraphrase.js
+// Paraphrasing module
+// This module exports a prompt that takes an input text and rewrites it in a different way while maintaining the original meaning.
 module.exports = {
     prompt: `Rewrite the following:\n\n{{{text}}}`
 }

package/pathways/sentiment.js CHANGED Viewed

@@ -1,3 +1,7 @@
+// sentiment.js
+// Sentiment detection module
+// This module exports a prompt that takes an input text and asks how it makes the AI feel.
 module.exports = {
-    prompt: `How does this article make you feel?\n\n{{text}}`,
+    prompt: `How does the text below make you feel?\n\n{{text}}`,
 }

package/pathways/summary.js CHANGED Viewed

@@ -1,43 +1,60 @@
+// summary.js
+// Text summarization module with custom resolver
+// This module exports a prompt that takes an input text and generates a summary using a custom resolver.
+// Import required modules
 const { semanticTruncate } = require('../graphql/chunker');
 const { PathwayResolver } = require('../graphql/pathwayResolver');
 module.exports = {
+    // The main prompt function that takes the input text and asks to generate a summary.
     prompt: `{{{text}}}\n\nWrite a summary of the above text:\n\n`,
+    // Define input parameters for the prompt, such as the target length of the summary.
     inputParameters: {
-        targetLength: 500,
+        targetLength: 0,
     },
+    // Custom resolver to generate summaries by reprompting if they are too long or too short.
     resolver: async (parent, args, contextValue, info) => {
         const { config, pathway, requestState } = contextValue;
         const originalTargetLength = args.targetLength;
+        // If targetLength is not provided, execute the prompt once and return the result.
+        if (originalTargetLength === 0) {
+            let pathwayResolver = new PathwayResolver({ config, pathway, args, requestState });
+            return await pathwayResolver.resolve(args);
+        }
         const errorMargin = 0.2;
         const lowTargetLength = originalTargetLength * (1 - errorMargin);
         const targetWords = Math.round(originalTargetLength / 6.6);
-        // if the text is shorter than the summary length, just return the text
+        // If the text is shorter than the summary length, just return the text.
         if (args.text.length <= originalTargetLength) {
             return args.text;
         }
         const MAX_ITERATIONS = 5;
         let summary = '';
-        let bestSummary = '';
-        let pathwayResolver = new PathwayResolver({ config, pathway, requestState });
-        // modify the prompt to be words-based instead of characters-based
+        let pathwayResolver = new PathwayResolver({ config, pathway, args, requestState });
+        // Modify the prompt to be words-based instead of characters-based.
         pathwayResolver.pathwayPrompt = `{{{text}}}\n\nWrite a summary of the above text in exactly ${targetWords} words:\n\n`
         let i = 0;
-        // reprompt if summary is too long or too short
+        // Reprompt if summary is too long or too short.
         while (((summary.length > originalTargetLength) || (summary.length < lowTargetLength)) && i < MAX_ITERATIONS) {
             summary = await pathwayResolver.resolve(args);
             i++;
         }
-        // if the summary is still too long, truncate it
+        // If the summary is still too long, truncate it.
         if (summary.length > originalTargetLength) {
             return semanticTruncate(summary, originalTargetLength);
         } else {
             return summary;
         }
     }
-}
+}

package/pathways/transcribe.js ADDED Viewed

@@ -0,0 +1,8 @@
+module.exports = {
+    prompt: `{{text}}`,
+    model: `oai-whisper`,
+    inputParameters: {
+        file: ``,
+    },
+    timeout: 600, // in seconds
+}

package/pathways/translate.js CHANGED Viewed

@@ -1,10 +1,18 @@
-// Description: Translate a text from one language to another
+// translate.js
+// Translation module
+// This module exports a prompt that takes an input text and translates it from one language to another.
 module.exports = {
+    // Set the temperature to 0 to favor more deterministic output when generating translations.
     temperature: 0,
     prompt: `Translate the following text to {{to}}:\n\nOriginal Language:\n{{{text}}}\n\n{{to}}:\n`,
+    // Define input parameters for the prompt, such as the target language for translation.
     inputParameters: {
         to: `Arabic`,
     },
-    timeout: 300, // in seconds
+    // Set the timeout for the translation process, in seconds.
+    timeout: 300,
 }

package/tests/main.test.js CHANGED Viewed

@@ -93,19 +93,6 @@ it('validates summary endpoint', async () => {
     expect(response.data?.summary.result).toBeDefined();
 });
-it('validates topics endpoint with given num of count return', async () => {
-    const response = await testServer.executeOperation({
-        query: 'query topics($text: String!, $count: Int) { topics(text: $text, count: $count) { result } }',
-        variables: { text: 'hello there my dear world!', count: 3 },
-    });
-    expect(response.errors).toBeUndefined();
-    expect(response.data?.topics.result.length).toBe(3);
-    response.data?.topics.result.forEach((topic) => {
-        expect(topic).toBeDefined();
-    });
-});
 module.exports = {
     getTestServer,
 };

package/pathways/topics.js DELETED Viewed

@@ -1,9 +0,0 @@
-module.exports = {
-    prompt: [`{{text}}\n\nList the top {{count}} news categories for the above article (e.g. 1. Finance):`,
-            `{{previousResult}}\n\nPick the {{count}} most important news categories from the above:`
-    ],
-    inputParameters: {
-        count: 5,
-    },
-    list: true,
-}