npm - @aj-archipelago/cortex - Versions diffs - 1.0.13 → 1.0.15 - Mend

@aj-archipelago/cortex 1.0.13 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/helper_apps/MediaFileChunker/Dockerfile +3 -4
package/helper_apps/MediaFileChunker/docHelper.js +15 -0
package/helper_apps/MediaFileChunker/index.js +20 -4
package/helper_apps/MediaFileChunker/package-lock.json +11 -0
package/helper_apps/MediaFileChunker/package.json +1 -0
package/package.json +1 -1
package/pathways/index.js +2 -0
package/server/plugins/azureCognitivePlugin.js +46 -2

package/helper_apps/MediaFileChunker/Dockerfile CHANGED Viewed

@@ -6,10 +6,9 @@ COPY package*.json ./
 RUN npm install
-## following 3 lines are for installing ffmepg
-RUN apk update
-RUN apk add
-RUN apk add ffmpeg
+## installing ffmepg
+RUN apk update && \
+    apk add ffmpeg=6.0-r15
 COPY . .

package/helper_apps/MediaFileChunker/docHelper.js CHANGED Viewed

@@ -2,6 +2,7 @@ import pdfjsLib from 'pdfjs-dist';
 import fs from 'fs/promises';
 import mammoth from 'mammoth';
 import XLSX from 'xlsx';
+import Papa from 'papaparse';
 export async function txtToText(filePath) {
     const text = await fs.readFile(filePath, 'utf-8');
@@ -43,6 +44,18 @@ export async function pdfToText(filePath) {
     return finalText;
 }
+export async function csvToText(filePath) {
+    const text = await fs.readFile(filePath, 'utf-8');
+    const results = Papa.parse(text);
+    let finalText = '';
+    results.data.forEach(row => {
+        finalText += row.join(' ') + '\n';
+    });
+    return finalText;
+}
 export async function documentToText(filePath) {
     const fileExtension = filePath.split('.').pop();
@@ -55,6 +68,8 @@ export async function documentToText(filePath) {
             return docxToText(filePath);
         case 'xlsx':
             return xlsxToText(filePath);
+        case 'csv':
+            return csvToText(filePath);
         default:
             throw new Error(`Unsupported file type: ${fileExtension}`);
     }

package/helper_apps/MediaFileChunker/index.js CHANGED Viewed

@@ -7,6 +7,7 @@ import { documentToText, easyChunker } from './docHelper.js';
 import path from 'path';
 import os from 'os';
 import { v4 as uuidv4 } from 'uuid';
+import fs from 'fs';
 const useAzure = process.env.AZURE_STORAGE_CONNECTION_STRING ? true : false;
 console.log(useAzure ? 'Using Azure Storage' : 'Using local file system');
@@ -38,7 +39,7 @@ async function main(context, req) {
         return
     }
-    const { uri, requestId } = req.body?.params || req.query;
+    const { uri, requestId, save } = req.body?.params || req.query;
     if (!uri || !requestId) {
         context.res = {
             status: 400,
@@ -63,14 +64,29 @@ async function main(context, req) {
         await publishRequestProgress({ requestId, progress, completedCount, totalCount, numberOfChunks, data });
     }
-    const isDocument = ['.pdf', '.txt', '.docx', '.xlsx'].some(ext => uri.toLowerCase().endsWith(ext));
+    const isDocument = ['.pdf', '.txt', '.docx', '.xlsx', '.csv'].some(ext => uri.toLowerCase().endsWith(ext));
     try {
         if (isDocument) {
             const extension = path.extname(uri).toLowerCase();
             const file = path.join(os.tmpdir(), `${uuidv4()}${extension}`);
-            await downloadFile(uri,file)
-            result.push(...easyChunker(await documentToText(file)));
+            await downloadFile(uri, file)
+            const text = await documentToText(file);
+            if (save) {
+                const fileName = `${uuidv4()}.txt`; // generate unique file name
+                const filePath = path.join(os.tmpdir(), fileName);
+                const tmpPath = filePath;
+                fs.writeFileSync(filePath, text); // write text to file
+                // save file to the cloud or local file system
+                const saveResult = useAzure ? await saveFileToBlob(filePath, requestId) : await moveFileToPublicFolder(filePath, requestId);
+                result.push(saveResult);
+                // delete temporary file
+                fs.unlinkSync(tmpPath);
+            } else {
+                result.push(...easyChunker(text));
+            }
         }else{
             if (isYoutubeUrl) {

package/helper_apps/MediaFileChunker/package-lock.json CHANGED Viewed

@@ -18,6 +18,7 @@
         "fluent-ffmpeg": "^2.1.2",
         "ioredis": "^5.3.1",
         "mammoth": "^1.6.0",
+        "papaparse": "^5.4.1",
         "pdfjs-dist": "^3.9.179",
         "public-ip": "^6.0.1",
         "uuid": "^9.0.0",
@@ -1989,6 +1990,11 @@
       "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz",
       "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw=="
     },
+    "node_modules/papaparse": {
+      "version": "5.4.1",
+      "resolved": "https://registry.npmjs.org/papaparse/-/papaparse-5.4.1.tgz",
+      "integrity": "sha512-HipMsgJkZu8br23pW15uvo6sib6wne/4woLZPlFf3rpDyMe9ywEXUsuD7+6K9PRkJlVT51j/sCOYDKGGS3ZJrw=="
+    },
     "node_modules/parseurl": {
       "version": "1.3.3",
       "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
@@ -4098,6 +4104,11 @@
       "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz",
       "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw=="
     },
+    "papaparse": {
+      "version": "5.4.1",
+      "resolved": "https://registry.npmjs.org/papaparse/-/papaparse-5.4.1.tgz",
+      "integrity": "sha512-HipMsgJkZu8br23pW15uvo6sib6wne/4woLZPlFf3rpDyMe9ywEXUsuD7+6K9PRkJlVT51j/sCOYDKGGS3ZJrw=="
+    },
     "parseurl": {
       "version": "1.3.3",
       "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",

package/helper_apps/MediaFileChunker/package.json CHANGED Viewed

@@ -18,6 +18,7 @@
     "fluent-ffmpeg": "^2.1.2",
     "ioredis": "^5.3.1",
     "mammoth": "^1.6.0",
+    "papaparse": "^5.4.1",
     "pdfjs-dist": "^3.9.179",
     "public-ip": "^6.0.1",
     "uuid": "^9.0.0",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "1.0.13",
+  "version": "1.0.15",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "repository": {
     "type": "git",

package/pathways/index.js CHANGED Viewed

@@ -6,6 +6,7 @@ import cognitive_insert from './cognitive_insert.js';
 import cognitive_search from './cognitive_search.js';
 import complete from './complete.js';
 import entities from './entities.js';
+import language from './language.js';
 import paraphrase from './paraphrase.js';
 import sentiment from './sentiment.js';
 import summary from './summary.js';
@@ -28,6 +29,7 @@ export {
     complete,
     embeddings,
     entities,
+    language,
     paraphrase,
     sentiment,
     summary,

package/server/plugins/azureCognitivePlugin.js CHANGED Viewed

@@ -2,6 +2,11 @@
 import { callPathway } from '../../lib/pathwayTools.js';
 import ModelPlugin from './modelPlugin.js';
 import { v4 as uuidv4 } from 'uuid';
+import path from 'path';
+import { config } from '../../config.js';
+import { axios } from '../../lib/request.js';
+const API_URL = config.get('whisperMediaApiUrl');
 const TOP = 1000;
@@ -55,7 +60,8 @@ class AzureCognitivePlugin extends ModelPlugin {
                 content: text,
                 contentVector: inputVector || (await calculateInputVector()),
                 owner: savedContextId,
-                docId: docId || uuidv4()
+                docId: docId || uuidv4(),
+                createdAt: new Date().toISOString()
             }
             // if(!privateData){
             //     delete doc.owner;
@@ -108,17 +114,55 @@ class AzureCognitivePlugin extends ModelPlugin {
         }
     }
+    async markCompletedForCleanUp(requestId) {
+        try {
+            if (API_URL) {
+                //call helper api to mark processing as completed
+                const res = await axios.delete(API_URL, { params: { requestId } });
+                console.log(`Marked request ${requestId} as completed:`, res.data);
+                return res.data;
+            }
+        } catch (err) {
+            console.log(`Error marking request ${requestId} as completed:`, err);
+        }
+    }
     // Execute the request to the Azure Cognitive API
     async execute(text, parameters, prompt, pathwayResolver) {
-        const { requestId, pathway, savedContextId } = pathwayResolver;
+        const { requestId, pathway, savedContextId, savedContext } = pathwayResolver;
         const mode = this.promptParameters.mode || 'search';
         let url = this.ensureMode(this.requestUrl(text), mode == 'delete' ? 'index' : mode);
         const indexName = parameters.indexName || 'indexcortex';
         url = this.ensureIndex(url, indexName);
         const headers = this.model.headers;
+        const { file } = parameters;
+        if(file){
+            let url = file;
+            //if not txt file, use helper app to convert to txt
+            const extension = path.extname(file).toLowerCase();
+            if (extension !== '.txt') {
+                try {
+                    const {data} = await axios.get(API_URL, { params: { uri: file, requestId, save: true } });
+                    url = data[0]
+                } catch (error) {
+                    console.log(`Error converting file ${file} to txt:`, error);
+                    throw error;
+                }
+            }
+            const { data } = await axios.get(url);
+            await this.markCompletedForCleanUp(requestId);
+            //return await this.execute(data, {...parameters, file:null}, prompt, pathwayResolver);
+            return await callPathway(this.config, 'cognitive_insert', {...parameters, file:null, text:data });
+        }
         const { data, params } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, {headers, requestId, pathway, url});
+        // update contextid last used
+        savedContext["lastUsed"] = new Date().toISOString();
         if (mode === 'delete' && data.value.length == 0){
             return; // nothing to delete
         }