utilitas 1995.2.6 → 1995.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/hal.mjs CHANGED
@@ -13,7 +13,10 @@ const li = (id, text, url) => `\n${id}. ` + (url ? link(text, url) : text);
13
13
  const cardReg = /^\[\d*\]:\ ([^\ ]*)\ "(.*)"$/ig;
14
14
  // https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
15
15
  const countTokens = txt => Math.ceil(txt.split(/[^a-z0-9]/i).length * 100 / 75);
16
- const MAX_CONTEXT_TOKENS = 8192;
16
+ // Keep this for GPT4 {
17
+ // const MAX_CONTEXT_TOKENS = 8192;
18
+ // }
19
+ const MAX_CONTEXT_TOKENS = 4096;
17
20
  const MAX_PROMPT_TOKENS = Math.floor(MAX_CONTEXT_TOKENS * 0.6);
18
21
  const MAX_RESPONSE_TOKENS = MAX_CONTEXT_TOKENS - MAX_PROMPT_TOKENS;
19
22
 
@@ -35,10 +38,15 @@ const init = async options => {
35
38
  engine = (await import('@waylaidwanderer/chatgpt-api')).ChatGPTClient;
36
39
  client = new engine(options?.clientOptions?.apiKey, {
37
40
  keepNecessaryMessagesOnly: true,
38
- maxContextTokens: MAX_CONTEXT_TOKENS,
41
+ // Keep this for GPT4 {
42
+ // maxContextTokens: MAX_CONTEXT_TOKENS,
43
+ // }
39
44
  modelOptions: {
40
- model: options?.model || 'gpt-4',
41
- max_tokens: MAX_RESPONSE_TOKENS,
45
+ model: options?.model || 'gpt-3.5-turbo',
46
+ // Keep this for GPT4 {
47
+ // model: options?.model || 'gpt-4',
48
+ // max_tokens: MAX_RESPONSE_TOKENS,
49
+ // }
42
50
  ...options?.clientOptions?.modelOptions || {}
43
51
  }, ...options?.clientOptions || {},
44
52
  }, options?.cacheOptions);
package/lib/manifest.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  const manifest = {
2
2
  "name": "utilitas",
3
3
  "description": "Just another common utility for JavaScript.",
4
- "version": "1995.2.6",
4
+ "version": "1995.2.8",
5
5
  "private": false,
6
6
  "homepage": "https://github.com/Leask/utilitas",
7
7
  "main": "index.mjs",
@@ -28,7 +28,7 @@ const manifest = {
28
28
  "@google-cloud/text-to-speech": "^5.0.1",
29
29
  "@google-cloud/vision": "^4.0.2",
30
30
  "@mozilla/readability": "^0.4.4",
31
- "@sentry/node": "^7.80.1",
31
+ "@sentry/node": "^7.83.0",
32
32
  "@waylaidwanderer/chatgpt-api": "^1.37.3",
33
33
  "acme-client": "^5.0.0",
34
34
  "browserify-fs": "^1.0.0",
@@ -37,24 +37,25 @@ const manifest = {
37
37
  "fluent-ffmpeg": "^2.1.2",
38
38
  "form-data": "^4.0.0",
39
39
  "ioredis": "^5.3.2",
40
- "jsdom": "^22.1.0",
40
+ "jsdom": "^23.0.0",
41
41
  "lorem-ipsum": "^2.0.8",
42
42
  "mailgun.js": "^9.3.0",
43
43
  "mime-types": "^2.1.35",
44
- "mysql2": "^3.6.3",
45
- "node-mailjet": "^6.0.4",
44
+ "mysql2": "^3.6.5",
45
+ "node-mailjet": "^6.0.5",
46
46
  "node-polyfill-webpack-plugin": "^2.0.1",
47
47
  "office-text-extractor": "^3.0.2",
48
- "openai": "^4.19.0",
48
+ "openai": "^4.20.1",
49
+ "pdfjs-dist": "^4.0.269",
49
50
  "ping": "^0.4.4",
50
51
  "say": "^0.16.0",
51
- "telegraf": "^4.15.0",
52
+ "telegraf": "^4.15.2",
52
53
  "telesignsdk": "^2.2.3",
53
54
  "tesseract.js": "^5.0.3",
54
55
  "twilio": "^4.19.0",
55
56
  "url": "github:Leask/node-url",
56
57
  "webpack-cli": "^5.1.4",
57
- "whisper-node": "^0.2.12",
58
+ "whisper-node": "^1.1.1",
58
59
  "youtube-transcript": "^1.0.6"
59
60
  }
60
61
  };
package/lib/shot.mjs CHANGED
@@ -122,14 +122,12 @@ const get = async (url, options) => {
122
122
  case _JSON:
123
123
  content = parseJson(buf2utf(buffer), null);
124
124
  break;
125
- case 'TEXT':
126
- content = buf2utf(buffer);
127
- break;
128
125
  case _PARSED:
129
126
  content = await distillHtml(buf2utf(buffer));
130
127
  break;
131
128
  default:
132
129
  assert(!options.encode, 'Invalid encoding.', 400);
130
+ case 'TEXT':
133
131
  content = buf2utf(buffer);
134
132
  }
135
133
  }
package/lib/vision.mjs CHANGED
@@ -1,6 +1,3 @@
1
- import { getApiKeyCredentials } from './encryption.mjs';
2
- import get from './shot.mjs';
3
-
4
1
  import {
5
2
  convert, deleteOnCloud, downloadFromCloud, getIdByGs, uploadToCloud,
6
3
  } from './storage.mjs';
@@ -9,7 +6,9 @@ import {
9
6
  ensureArray, ignoreErrFunc, log as _log, need, throwError, trim,
10
7
  } from './utilitas.mjs';
11
8
 
12
- const _NEED = ['@google-cloud/vision', 'tesseract.js'];
9
+ import { getApiKeyCredentials } from './encryption.mjs';
10
+
11
+ const _NEED = ['@google-cloud/vision', 'pdfjs-dist', 'tesseract.js'];
13
12
  const [BASE64, BUFFER, FILE, DEFAULT_LANG] = ['BASE64', 'BUFFER', 'FILE', 'eng'];
14
13
  const ceil = num => num.toFixed(4);
15
14
  const errorMessage = 'Invalid image data.';
@@ -128,6 +127,12 @@ const see = async (image, options) => {
128
127
 
129
128
  const read = async (image, options) => {
130
129
  assert(client, 'Vision API has not been initialized.', 500);
130
+ if (options?.allPages) {
131
+ assert(options?.input === FILE, 'Only file input is supported.', 400);
132
+ if ((await getPdfInfo(image)).numPages > pages.length) {
133
+ return await readAll(image, options);
134
+ }
135
+ }
131
136
  const content = await convert(image, {
132
137
  input: options?.input, expected: BASE64, errorMessage,
133
138
  });
@@ -163,9 +168,45 @@ const readAll = async (image, options) => {
163
168
  ).flat();
164
169
  };
165
170
 
171
+ const getPdfPage = async (doc, pageNum) => {
172
+ const page = await doc.getPage(pageNum);
173
+ const viewport = page.getViewport({ scale: 1.0 });
174
+ const result = {
175
+ pageNum: pageNum,
176
+ width: viewport.width,
177
+ height: viewport.height,
178
+ content: (await page.getTextContent()).items.map(x => x.str).join(' '),
179
+ };
180
+ page.cleanup();
181
+ return result
182
+ };
183
+
184
+ const getPdfPages = async (doc) => {
185
+ const result = [];
186
+ for (let i = 1; i <= doc.numPages; i++) { result.push(getPdfPage(doc, i)); }
187
+ return await Promise.all(result);
188
+ };
189
+
190
+ // https://github.com/mozilla/pdf.js/blob/master/examples/node/getinfo.mjs
191
+ const getPdfInfo = async (file, options) => {
192
+ const { getDocument } = await need('pdfjs-dist');
193
+ const doc = await getDocument(file).promise;
194
+ const data = await doc.getMetadata();
195
+ const result = {
196
+ numPages: doc.numPages,
197
+ info: data.info,
198
+ metadata: { ...data.metadata.getAll() },
199
+ pages: options?.withPages ? await getPdfPages(doc) : null,
200
+ };
201
+ return result;
202
+ };
203
+
166
204
  export {
167
205
  _NEED,
168
206
  annotateImage,
207
+ getPdfInfo,
208
+ getPdfPage,
209
+ getPdfPages,
169
210
  init,
170
211
  ocrImage,
171
212
  ocrImageGoogle,
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "utilitas",
3
3
  "description": "Just another common utility for JavaScript.",
4
- "version": "1995.2.6",
4
+ "version": "1995.2.8",
5
5
  "private": false,
6
6
  "homepage": "https://github.com/Leask/utilitas",
7
7
  "main": "index.mjs",
@@ -39,7 +39,7 @@
39
39
  "@google-cloud/text-to-speech": "^5.0.1",
40
40
  "@google-cloud/vision": "^4.0.2",
41
41
  "@mozilla/readability": "^0.4.4",
42
- "@sentry/node": "^7.80.1",
42
+ "@sentry/node": "^7.83.0",
43
43
  "@waylaidwanderer/chatgpt-api": "^1.37.3",
44
44
  "acme-client": "^5.0.0",
45
45
  "browserify-fs": "^1.0.0",
@@ -48,24 +48,25 @@
48
48
  "fluent-ffmpeg": "^2.1.2",
49
49
  "form-data": "^4.0.0",
50
50
  "ioredis": "^5.3.2",
51
- "jsdom": "^22.1.0",
51
+ "jsdom": "^23.0.0",
52
52
  "lorem-ipsum": "^2.0.8",
53
53
  "mailgun.js": "^9.3.0",
54
54
  "mime-types": "^2.1.35",
55
- "mysql2": "^3.6.3",
56
- "node-mailjet": "^6.0.4",
55
+ "mysql2": "^3.6.5",
56
+ "node-mailjet": "^6.0.5",
57
57
  "node-polyfill-webpack-plugin": "^2.0.1",
58
58
  "office-text-extractor": "^3.0.2",
59
- "openai": "^4.19.0",
59
+ "openai": "^4.20.1",
60
+ "pdfjs-dist": "^4.0.269",
60
61
  "ping": "^0.4.4",
61
62
  "say": "^0.16.0",
62
- "telegraf": "^4.15.0",
63
+ "telegraf": "^4.15.2",
63
64
  "telesignsdk": "^2.2.3",
64
65
  "tesseract.js": "^5.0.3",
65
66
  "twilio": "^4.19.0",
66
67
  "url": "github:Leask/node-url",
67
68
  "webpack-cli": "^5.1.4",
68
- "whisper-node": "^0.2.12",
69
+ "whisper-node": "^1.1.1",
69
70
  "youtube-transcript": "^1.0.6"
70
71
  }
71
72
  }