@aj-archipelago/cortex 1.0.14 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ import pdfjsLib from 'pdfjs-dist';
2
2
  import fs from 'fs/promises';
3
3
  import mammoth from 'mammoth';
4
4
  import XLSX from 'xlsx';
5
+ import Papa from 'papaparse';
5
6
 
6
7
  export async function txtToText(filePath) {
7
8
  const text = await fs.readFile(filePath, 'utf-8');
@@ -43,6 +44,18 @@ export async function pdfToText(filePath) {
43
44
  return finalText;
44
45
  }
45
46
 
47
+ export async function csvToText(filePath) {
48
+ const text = await fs.readFile(filePath, 'utf-8');
49
+ const results = Papa.parse(text);
50
+ let finalText = '';
51
+
52
+ results.data.forEach(row => {
53
+ finalText += row.join(' ') + '\n';
54
+ });
55
+
56
+ return finalText;
57
+ }
58
+
46
59
  export async function documentToText(filePath) {
47
60
  const fileExtension = filePath.split('.').pop();
48
61
 
@@ -55,6 +68,8 @@ export async function documentToText(filePath) {
55
68
  return docxToText(filePath);
56
69
  case 'xlsx':
57
70
  return xlsxToText(filePath);
71
+ case 'csv':
72
+ return csvToText(filePath);
58
73
  default:
59
74
  throw new Error(`Unsupported file type: ${fileExtension}`);
60
75
  }
@@ -7,6 +7,7 @@ import { documentToText, easyChunker } from './docHelper.js';
7
7
  import path from 'path';
8
8
  import os from 'os';
9
9
  import { v4 as uuidv4 } from 'uuid';
10
+ import fs from 'fs';
10
11
 
11
12
  const useAzure = process.env.AZURE_STORAGE_CONNECTION_STRING ? true : false;
12
13
  console.log(useAzure ? 'Using Azure Storage' : 'Using local file system');
@@ -38,7 +39,7 @@ async function main(context, req) {
38
39
  return
39
40
  }
40
41
 
41
- const { uri, requestId } = req.body?.params || req.query;
42
+ const { uri, requestId, save } = req.body?.params || req.query;
42
43
  if (!uri || !requestId) {
43
44
  context.res = {
44
45
  status: 400,
@@ -63,14 +64,29 @@ async function main(context, req) {
63
64
  await publishRequestProgress({ requestId, progress, completedCount, totalCount, numberOfChunks, data });
64
65
  }
65
66
 
66
- const isDocument = ['.pdf', '.txt', '.docx', '.xlsx'].some(ext => uri.toLowerCase().endsWith(ext));
67
+ const isDocument = ['.pdf', '.txt', '.docx', '.xlsx', '.csv'].some(ext => uri.toLowerCase().endsWith(ext));
67
68
 
68
69
  try {
69
70
  if (isDocument) {
70
71
  const extension = path.extname(uri).toLowerCase();
71
72
  const file = path.join(os.tmpdir(), `${uuidv4()}${extension}`);
72
- await downloadFile(uri,file)
73
- result.push(...easyChunker(await documentToText(file)));
73
+ await downloadFile(uri, file)
74
+ const text = await documentToText(file);
75
+ if (save) {
76
+ const fileName = `${uuidv4()}.txt`; // generate unique file name
77
+ const filePath = path.join(os.tmpdir(), fileName);
78
+ const tmpPath = filePath;
79
+ fs.writeFileSync(filePath, text); // write text to file
80
+
81
+ // save file to the cloud or local file system
82
+ const saveResult = useAzure ? await saveFileToBlob(filePath, requestId) : await moveFileToPublicFolder(filePath, requestId);
83
+ result.push(saveResult);
84
+
85
+ // delete temporary file
86
+ fs.unlinkSync(tmpPath);
87
+ } else {
88
+ result.push(...easyChunker(text));
89
+ }
74
90
  }else{
75
91
 
76
92
  if (isYoutubeUrl) {
@@ -18,6 +18,7 @@
18
18
  "fluent-ffmpeg": "^2.1.2",
19
19
  "ioredis": "^5.3.1",
20
20
  "mammoth": "^1.6.0",
21
+ "papaparse": "^5.4.1",
21
22
  "pdfjs-dist": "^3.9.179",
22
23
  "public-ip": "^6.0.1",
23
24
  "uuid": "^9.0.0",
@@ -1989,6 +1990,11 @@
1989
1990
  "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz",
1990
1991
  "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw=="
1991
1992
  },
1993
+ "node_modules/papaparse": {
1994
+ "version": "5.4.1",
1995
+ "resolved": "https://registry.npmjs.org/papaparse/-/papaparse-5.4.1.tgz",
1996
+ "integrity": "sha512-HipMsgJkZu8br23pW15uvo6sib6wne/4woLZPlFf3rpDyMe9ywEXUsuD7+6K9PRkJlVT51j/sCOYDKGGS3ZJrw=="
1997
+ },
1992
1998
  "node_modules/parseurl": {
1993
1999
  "version": "1.3.3",
1994
2000
  "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
@@ -4098,6 +4104,11 @@
4098
4104
  "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz",
4099
4105
  "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw=="
4100
4106
  },
4107
+ "papaparse": {
4108
+ "version": "5.4.1",
4109
+ "resolved": "https://registry.npmjs.org/papaparse/-/papaparse-5.4.1.tgz",
4110
+ "integrity": "sha512-HipMsgJkZu8br23pW15uvo6sib6wne/4woLZPlFf3rpDyMe9ywEXUsuD7+6K9PRkJlVT51j/sCOYDKGGS3ZJrw=="
4111
+ },
4101
4112
  "parseurl": {
4102
4113
  "version": "1.3.3",
4103
4114
  "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
@@ -18,6 +18,7 @@
18
18
  "fluent-ffmpeg": "^2.1.2",
19
19
  "ioredis": "^5.3.1",
20
20
  "mammoth": "^1.6.0",
21
+ "papaparse": "^5.4.1",
21
22
  "pdfjs-dist": "^3.9.179",
22
23
  "public-ip": "^6.0.1",
23
24
  "uuid": "^9.0.0",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.0.14",
3
+ "version": "1.0.16",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "repository": {
6
6
  "type": "git",
package/pathways/index.js CHANGED
@@ -6,6 +6,7 @@ import cognitive_insert from './cognitive_insert.js';
6
6
  import cognitive_search from './cognitive_search.js';
7
7
  import complete from './complete.js';
8
8
  import entities from './entities.js';
9
+ import language from './language.js';
9
10
  import paraphrase from './paraphrase.js';
10
11
  import sentiment from './sentiment.js';
11
12
  import summary from './summary.js';
@@ -28,6 +29,7 @@ export {
28
29
  complete,
29
30
  embeddings,
30
31
  entities,
32
+ language,
31
33
  paraphrase,
32
34
  sentiment,
33
35
  summary,
package/server/graphql.js CHANGED
@@ -144,6 +144,7 @@ const build = async (config) => {
144
144
 
145
145
  const server = new ApolloServer({
146
146
  schema,
147
+ introspection: process.env.NODE_ENV === 'development',
147
148
  csrfPrevention: true,
148
149
  plugins: plugins.concat([// Proper shutdown for the HTTP server.
149
150
  ApolloServerPluginDrainHttpServer({ httpServer }),
@@ -2,6 +2,11 @@
2
2
  import { callPathway } from '../../lib/pathwayTools.js';
3
3
  import ModelPlugin from './modelPlugin.js';
4
4
  import { v4 as uuidv4 } from 'uuid';
5
+ import path from 'path';
6
+ import { config } from '../../config.js';
7
+ import { axios } from '../../lib/request.js';
8
+
9
+ const API_URL = config.get('whisperMediaApiUrl');
5
10
 
6
11
  const TOP = 1000;
7
12
 
@@ -109,6 +114,19 @@ class AzureCognitivePlugin extends ModelPlugin {
109
114
  }
110
115
  }
111
116
 
117
+ async markCompletedForCleanUp(requestId) {
118
+ try {
119
+ if (API_URL) {
120
+ //call helper api to mark processing as completed
121
+ const res = await axios.delete(API_URL, { params: { requestId } });
122
+ console.log(`Marked request ${requestId} as completed:`, res.data);
123
+ return res.data;
124
+ }
125
+ } catch (err) {
126
+ console.log(`Error marking request ${requestId} as completed:`, err);
127
+ }
128
+ }
129
+
112
130
  // Execute the request to the Azure Cognitive API
113
131
  async execute(text, parameters, prompt, pathwayResolver) {
114
132
  const { requestId, pathway, savedContextId, savedContext } = pathwayResolver;
@@ -118,6 +136,28 @@ class AzureCognitivePlugin extends ModelPlugin {
118
136
  url = this.ensureIndex(url, indexName);
119
137
  const headers = this.model.headers;
120
138
 
139
+ const { file } = parameters;
140
+ if(file){
141
+ let url = file;
142
+ //if not txt file, use helper app to convert to txt
143
+ const extension = path.extname(file).toLowerCase();
144
+ if (extension !== '.txt') {
145
+ try {
146
+ const {data} = await axios.get(API_URL, { params: { uri: file, requestId, save: true } });
147
+ url = data[0]
148
+ } catch (error) {
149
+ console.log(`Error converting file ${file} to txt:`, error);
150
+ throw error;
151
+ }
152
+ }
153
+
154
+ const { data } = await axios.get(url);
155
+ await this.markCompletedForCleanUp(requestId);
156
+
157
+ //return await this.execute(data, {...parameters, file:null}, prompt, pathwayResolver);
158
+ return await callPathway(this.config, 'cognitive_insert', {...parameters, file:null, text:data });
159
+ }
160
+
121
161
  const { data, params } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, {headers, requestId, pathway, url});
122
162
 
123
163
  // update contextid last used