@aj-archipelago/cortex 1.0.16 → 1.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,10 +38,10 @@ export async function pdfToText(filePath) {
38
38
  const page = await pdf.getPage(i);
39
39
  const textContent = await page.getTextContent();
40
40
  const strings = textContent.items.map(item => item.str);
41
- finalText += strings.join(' ');
41
+ finalText += strings.join(' ') + '\n';
42
42
  }
43
43
 
44
- return finalText;
44
+ return finalText.trim();
45
45
  }
46
46
 
47
47
  export async function csvToText(filePath) {
@@ -9,6 +9,8 @@ import os from 'os';
9
9
  import { v4 as uuidv4 } from 'uuid';
10
10
  import fs from 'fs';
11
11
 
12
+ const DOC_EXTENSIONS = [".txt", ".json", ".csv", ".md", ".xml", ".js", ".html", ".css", '.pdf', '.docx', '.xlsx', '.csv'];
13
+
12
14
  const useAzure = process.env.AZURE_STORAGE_CONNECTION_STRING ? true : false;
13
15
  console.log(useAzure ? 'Using Azure Storage' : 'Using local file system');
14
16
 
@@ -64,7 +66,7 @@ async function main(context, req) {
64
66
  await publishRequestProgress({ requestId, progress, completedCount, totalCount, numberOfChunks, data });
65
67
  }
66
68
 
67
- const isDocument = ['.pdf', '.txt', '.docx', '.xlsx', '.csv'].some(ext => uri.toLowerCase().endsWith(ext));
69
+ const isDocument = DOC_EXTENSIONS.some(ext => uri.toLowerCase().endsWith(ext));
68
70
 
69
71
  try {
70
72
  if (isDocument) {
@@ -72,20 +74,47 @@ async function main(context, req) {
72
74
  const file = path.join(os.tmpdir(), `${uuidv4()}${extension}`);
73
75
  await downloadFile(uri, file)
74
76
  const text = await documentToText(file);
75
- if (save) {
76
- const fileName = `${uuidv4()}.txt`; // generate unique file name
77
- const filePath = path.join(os.tmpdir(), fileName);
78
- const tmpPath = filePath;
79
- fs.writeFileSync(filePath, text); // write text to file
80
-
81
- // save file to the cloud or local file system
82
- const saveResult = useAzure ? await saveFileToBlob(filePath, requestId) : await moveFileToPublicFolder(filePath, requestId);
83
- result.push(saveResult);
84
-
85
- // delete temporary file
86
- fs.unlinkSync(tmpPath);
87
- } else {
88
- result.push(...easyChunker(text));
77
+ let tmpPath;
78
+
79
+ try{
80
+ if (save) {
81
+ const fileName = `${uuidv4()}.txt`; // generate unique file name
82
+ const filePath = path.join(os.tmpdir(), fileName);
83
+ tmpPath = filePath;
84
+ fs.writeFileSync(filePath, text); // write text to file
85
+
86
+ // save file to the cloud or local file system
87
+ const saveResult = useAzure ? await saveFileToBlob(filePath, requestId) : await moveFileToPublicFolder(filePath, requestId);
88
+ result.push(saveResult);
89
+
90
+ } else {
91
+ result.push(...easyChunker(text));
92
+ }
93
+ }catch(err){
94
+ console.log(`Error saving file ${uri} with request id ${requestId}:`, err);
95
+ }finally{
96
+ try{
97
+ // delete temporary files
98
+ tmpPath && fs.unlinkSync(tmpPath);
99
+ file && fs.unlinkSync(file);
100
+ console.log(`Cleaned temp files ${tmpPath}, ${file}`);
101
+ }catch(err){
102
+ console.log(`Error cleaning temp files ${tmpPath}, ${file}:`, err);
103
+ }
104
+
105
+ try{
106
+ //delete uploaded prev nontext file
107
+ //check cleanup for whisper temp uploaded files url
108
+ const regex = /whispertempfiles\/([a-z0-9-]+)/;
109
+ const match = uri.match(regex);
110
+ if (match && match[1]) {
111
+ const extractedValue = match[1];
112
+ useAzure ? await deleteBlob(extractedValue) : await deleteFolder(extractedValue);
113
+ console.log(`Cleaned temp file ${uri} with request id ${extractedValue}`);
114
+ }
115
+ }catch(err){
116
+ console.log(`Error cleaning temp file ${uri}:`, err);
117
+ }
89
118
  }
90
119
  }else{
91
120
 
@@ -123,7 +152,7 @@ async function main(context, req) {
123
152
  console.error("An error occurred:", error);
124
153
  } finally {
125
154
  try {
126
- (isYoutubeUrl||isDocument) && (await deleteTempPath(file));
155
+ (isYoutubeUrl) && (await deleteTempPath(file));
127
156
  folder && (await deleteTempPath(folder));
128
157
  } catch (error) {
129
158
  console.error("An error occurred while deleting:", error);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.0.16",
3
+ "version": "1.0.17",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "repository": {
6
6
  "type": "git",
@@ -10,6 +10,8 @@ const API_URL = config.get('whisperMediaApiUrl');
10
10
 
11
11
  const TOP = 1000;
12
12
 
13
+ let DIRECT_FILE_EXTENSIONS = [".txt", ".json", ".csv", ".md", ".xml", ".js", ".html", ".css"];
14
+
13
15
  class AzureCognitivePlugin extends ModelPlugin {
14
16
  constructor(config, pathway, modelName, model) {
15
17
  super(config, pathway, modelName, model);
@@ -52,7 +54,14 @@ class AzureCognitivePlugin extends ModelPlugin {
52
54
 
53
55
  if (mode == 'index') {
54
56
  const calculateInputVector = async () => {
55
- return JSON.parse(await callPathway(this.config, 'embeddings', { text }))[0];
57
+ try{
58
+ if(!text || !text.trim()){
59
+ return;
60
+ }
61
+ return JSON.parse(await callPathway(this.config, 'embeddings', { text }))[0];
62
+ }catch(err){
63
+ console.log(`Error in calculating input vector for text: ${text}, error: ${err}`);
64
+ }
56
65
  }
57
66
 
58
67
  const doc = {
@@ -141,7 +150,7 @@ class AzureCognitivePlugin extends ModelPlugin {
141
150
  let url = file;
142
151
  //if not txt file, use helper app to convert to txt
143
152
  const extension = path.extname(file).toLowerCase();
144
- if (extension !== '.txt') {
153
+ if (!DIRECT_FILE_EXTENSIONS.includes(extension)) {
145
154
  try {
146
155
  const {data} = await axios.get(API_URL, { params: { uri: file, requestId, save: true } });
147
156
  url = data[0]
@@ -154,10 +163,18 @@ class AzureCognitivePlugin extends ModelPlugin {
154
163
  const { data } = await axios.get(url);
155
164
  await this.markCompletedForCleanUp(requestId);
156
165
 
166
+ if(!data){
167
+ throw Error(`No data can be extracted out of file!`);
168
+ }
169
+
157
170
  //return await this.execute(data, {...parameters, file:null}, prompt, pathwayResolver);
158
171
  return await callPathway(this.config, 'cognitive_insert', {...parameters, file:null, text:data });
159
172
  }
160
173
 
174
+ if (mode === 'index' && (!text || !text.trim()) ){
175
+ return; // nothing to index
176
+ }
177
+
161
178
  const { data, params } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, {headers, requestId, pathway, url});
162
179
 
163
180
  // update contextid last used