utilitas 1995.2.6 → 1995.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -4
- package/dist/utilitas.lite.mjs +1 -1
- package/dist/utilitas.lite.mjs.map +1 -1
- package/lib/hal.mjs +12 -4
- package/lib/manifest.mjs +9 -8
- package/lib/shot.mjs +1 -3
- package/lib/vision.mjs +45 -4
- package/package.json +9 -8
package/lib/hal.mjs
CHANGED
|
@@ -13,7 +13,10 @@ const li = (id, text, url) => `\n${id}. ` + (url ? link(text, url) : text);
|
|
|
13
13
|
const cardReg = /^\[\d*\]:\ ([^\ ]*)\ "(.*)"$/ig;
|
|
14
14
|
// https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
|
|
15
15
|
const countTokens = txt => Math.ceil(txt.split(/[^a-z0-9]/i).length * 100 / 75);
|
|
16
|
-
|
|
16
|
+
// Keep this for GPT4 {
|
|
17
|
+
// const MAX_CONTEXT_TOKENS = 8192;
|
|
18
|
+
// }
|
|
19
|
+
const MAX_CONTEXT_TOKENS = 4096;
|
|
17
20
|
const MAX_PROMPT_TOKENS = Math.floor(MAX_CONTEXT_TOKENS * 0.6);
|
|
18
21
|
const MAX_RESPONSE_TOKENS = MAX_CONTEXT_TOKENS - MAX_PROMPT_TOKENS;
|
|
19
22
|
|
|
@@ -35,10 +38,15 @@ const init = async options => {
|
|
|
35
38
|
engine = (await import('@waylaidwanderer/chatgpt-api')).ChatGPTClient;
|
|
36
39
|
client = new engine(options?.clientOptions?.apiKey, {
|
|
37
40
|
keepNecessaryMessagesOnly: true,
|
|
38
|
-
|
|
41
|
+
// Keep this for GPT4 {
|
|
42
|
+
// maxContextTokens: MAX_CONTEXT_TOKENS,
|
|
43
|
+
// }
|
|
39
44
|
modelOptions: {
|
|
40
|
-
model: options?.model || 'gpt-
|
|
41
|
-
|
|
45
|
+
model: options?.model || 'gpt-3.5-turbo',
|
|
46
|
+
// Keep this for GPT4 {
|
|
47
|
+
// model: options?.model || 'gpt-4',
|
|
48
|
+
// max_tokens: MAX_RESPONSE_TOKENS,
|
|
49
|
+
// }
|
|
42
50
|
...options?.clientOptions?.modelOptions || {}
|
|
43
51
|
}, ...options?.clientOptions || {},
|
|
44
52
|
}, options?.cacheOptions);
|
package/lib/manifest.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
const manifest = {
|
|
2
2
|
"name": "utilitas",
|
|
3
3
|
"description": "Just another common utility for JavaScript.",
|
|
4
|
-
"version": "1995.2.
|
|
4
|
+
"version": "1995.2.8",
|
|
5
5
|
"private": false,
|
|
6
6
|
"homepage": "https://github.com/Leask/utilitas",
|
|
7
7
|
"main": "index.mjs",
|
|
@@ -28,7 +28,7 @@ const manifest = {
|
|
|
28
28
|
"@google-cloud/text-to-speech": "^5.0.1",
|
|
29
29
|
"@google-cloud/vision": "^4.0.2",
|
|
30
30
|
"@mozilla/readability": "^0.4.4",
|
|
31
|
-
"@sentry/node": "^7.
|
|
31
|
+
"@sentry/node": "^7.83.0",
|
|
32
32
|
"@waylaidwanderer/chatgpt-api": "^1.37.3",
|
|
33
33
|
"acme-client": "^5.0.0",
|
|
34
34
|
"browserify-fs": "^1.0.0",
|
|
@@ -37,24 +37,25 @@ const manifest = {
|
|
|
37
37
|
"fluent-ffmpeg": "^2.1.2",
|
|
38
38
|
"form-data": "^4.0.0",
|
|
39
39
|
"ioredis": "^5.3.2",
|
|
40
|
-
"jsdom": "^
|
|
40
|
+
"jsdom": "^23.0.0",
|
|
41
41
|
"lorem-ipsum": "^2.0.8",
|
|
42
42
|
"mailgun.js": "^9.3.0",
|
|
43
43
|
"mime-types": "^2.1.35",
|
|
44
|
-
"mysql2": "^3.6.
|
|
45
|
-
"node-mailjet": "^6.0.
|
|
44
|
+
"mysql2": "^3.6.5",
|
|
45
|
+
"node-mailjet": "^6.0.5",
|
|
46
46
|
"node-polyfill-webpack-plugin": "^2.0.1",
|
|
47
47
|
"office-text-extractor": "^3.0.2",
|
|
48
|
-
"openai": "^4.
|
|
48
|
+
"openai": "^4.20.1",
|
|
49
|
+
"pdfjs-dist": "^4.0.269",
|
|
49
50
|
"ping": "^0.4.4",
|
|
50
51
|
"say": "^0.16.0",
|
|
51
|
-
"telegraf": "^4.15.
|
|
52
|
+
"telegraf": "^4.15.2",
|
|
52
53
|
"telesignsdk": "^2.2.3",
|
|
53
54
|
"tesseract.js": "^5.0.3",
|
|
54
55
|
"twilio": "^4.19.0",
|
|
55
56
|
"url": "github:Leask/node-url",
|
|
56
57
|
"webpack-cli": "^5.1.4",
|
|
57
|
-
"whisper-node": "^
|
|
58
|
+
"whisper-node": "^1.1.1",
|
|
58
59
|
"youtube-transcript": "^1.0.6"
|
|
59
60
|
}
|
|
60
61
|
};
|
package/lib/shot.mjs
CHANGED
|
@@ -122,14 +122,12 @@ const get = async (url, options) => {
|
|
|
122
122
|
case _JSON:
|
|
123
123
|
content = parseJson(buf2utf(buffer), null);
|
|
124
124
|
break;
|
|
125
|
-
case 'TEXT':
|
|
126
|
-
content = buf2utf(buffer);
|
|
127
|
-
break;
|
|
128
125
|
case _PARSED:
|
|
129
126
|
content = await distillHtml(buf2utf(buffer));
|
|
130
127
|
break;
|
|
131
128
|
default:
|
|
132
129
|
assert(!options.encode, 'Invalid encoding.', 400);
|
|
130
|
+
case 'TEXT':
|
|
133
131
|
content = buf2utf(buffer);
|
|
134
132
|
}
|
|
135
133
|
}
|
package/lib/vision.mjs
CHANGED
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
import { getApiKeyCredentials } from './encryption.mjs';
|
|
2
|
-
import get from './shot.mjs';
|
|
3
|
-
|
|
4
1
|
import {
|
|
5
2
|
convert, deleteOnCloud, downloadFromCloud, getIdByGs, uploadToCloud,
|
|
6
3
|
} from './storage.mjs';
|
|
@@ -9,7 +6,9 @@ import {
|
|
|
9
6
|
ensureArray, ignoreErrFunc, log as _log, need, throwError, trim,
|
|
10
7
|
} from './utilitas.mjs';
|
|
11
8
|
|
|
12
|
-
|
|
9
|
+
import { getApiKeyCredentials } from './encryption.mjs';
|
|
10
|
+
|
|
11
|
+
const _NEED = ['@google-cloud/vision', 'pdfjs-dist', 'tesseract.js'];
|
|
13
12
|
const [BASE64, BUFFER, FILE, DEFAULT_LANG] = ['BASE64', 'BUFFER', 'FILE', 'eng'];
|
|
14
13
|
const ceil = num => num.toFixed(4);
|
|
15
14
|
const errorMessage = 'Invalid image data.';
|
|
@@ -128,6 +127,12 @@ const see = async (image, options) => {
|
|
|
128
127
|
|
|
129
128
|
const read = async (image, options) => {
|
|
130
129
|
assert(client, 'Vision API has not been initialized.', 500);
|
|
130
|
+
if (options?.allPages) {
|
|
131
|
+
assert(options?.input === FILE, 'Only file input is supported.', 400);
|
|
132
|
+
if ((await getPdfInfo(image)).numPages > pages.length) {
|
|
133
|
+
return await readAll(image, options);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
131
136
|
const content = await convert(image, {
|
|
132
137
|
input: options?.input, expected: BASE64, errorMessage,
|
|
133
138
|
});
|
|
@@ -163,9 +168,45 @@ const readAll = async (image, options) => {
|
|
|
163
168
|
).flat();
|
|
164
169
|
};
|
|
165
170
|
|
|
171
|
+
const getPdfPage = async (doc, pageNum) => {
|
|
172
|
+
const page = await doc.getPage(pageNum);
|
|
173
|
+
const viewport = page.getViewport({ scale: 1.0 });
|
|
174
|
+
const result = {
|
|
175
|
+
pageNum: pageNum,
|
|
176
|
+
width: viewport.width,
|
|
177
|
+
height: viewport.height,
|
|
178
|
+
content: (await page.getTextContent()).items.map(x => x.str).join(' '),
|
|
179
|
+
};
|
|
180
|
+
page.cleanup();
|
|
181
|
+
return result
|
|
182
|
+
};
|
|
183
|
+
|
|
184
|
+
const getPdfPages = async (doc) => {
|
|
185
|
+
const result = [];
|
|
186
|
+
for (let i = 1; i <= doc.numPages; i++) { result.push(getPdfPage(doc, i)); }
|
|
187
|
+
return await Promise.all(result);
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
// https://github.com/mozilla/pdf.js/blob/master/examples/node/getinfo.mjs
|
|
191
|
+
const getPdfInfo = async (file, options) => {
|
|
192
|
+
const { getDocument } = await need('pdfjs-dist');
|
|
193
|
+
const doc = await getDocument(file).promise;
|
|
194
|
+
const data = await doc.getMetadata();
|
|
195
|
+
const result = {
|
|
196
|
+
numPages: doc.numPages,
|
|
197
|
+
info: data.info,
|
|
198
|
+
metadata: { ...data.metadata.getAll() },
|
|
199
|
+
pages: options?.withPages ? await getPdfPages(doc) : null,
|
|
200
|
+
};
|
|
201
|
+
return result;
|
|
202
|
+
};
|
|
203
|
+
|
|
166
204
|
export {
|
|
167
205
|
_NEED,
|
|
168
206
|
annotateImage,
|
|
207
|
+
getPdfInfo,
|
|
208
|
+
getPdfPage,
|
|
209
|
+
getPdfPages,
|
|
169
210
|
init,
|
|
170
211
|
ocrImage,
|
|
171
212
|
ocrImageGoogle,
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "utilitas",
|
|
3
3
|
"description": "Just another common utility for JavaScript.",
|
|
4
|
-
"version": "1995.2.
|
|
4
|
+
"version": "1995.2.8",
|
|
5
5
|
"private": false,
|
|
6
6
|
"homepage": "https://github.com/Leask/utilitas",
|
|
7
7
|
"main": "index.mjs",
|
|
@@ -39,7 +39,7 @@
|
|
|
39
39
|
"@google-cloud/text-to-speech": "^5.0.1",
|
|
40
40
|
"@google-cloud/vision": "^4.0.2",
|
|
41
41
|
"@mozilla/readability": "^0.4.4",
|
|
42
|
-
"@sentry/node": "^7.
|
|
42
|
+
"@sentry/node": "^7.83.0",
|
|
43
43
|
"@waylaidwanderer/chatgpt-api": "^1.37.3",
|
|
44
44
|
"acme-client": "^5.0.0",
|
|
45
45
|
"browserify-fs": "^1.0.0",
|
|
@@ -48,24 +48,25 @@
|
|
|
48
48
|
"fluent-ffmpeg": "^2.1.2",
|
|
49
49
|
"form-data": "^4.0.0",
|
|
50
50
|
"ioredis": "^5.3.2",
|
|
51
|
-
"jsdom": "^
|
|
51
|
+
"jsdom": "^23.0.0",
|
|
52
52
|
"lorem-ipsum": "^2.0.8",
|
|
53
53
|
"mailgun.js": "^9.3.0",
|
|
54
54
|
"mime-types": "^2.1.35",
|
|
55
|
-
"mysql2": "^3.6.
|
|
56
|
-
"node-mailjet": "^6.0.
|
|
55
|
+
"mysql2": "^3.6.5",
|
|
56
|
+
"node-mailjet": "^6.0.5",
|
|
57
57
|
"node-polyfill-webpack-plugin": "^2.0.1",
|
|
58
58
|
"office-text-extractor": "^3.0.2",
|
|
59
|
-
"openai": "^4.
|
|
59
|
+
"openai": "^4.20.1",
|
|
60
|
+
"pdfjs-dist": "^4.0.269",
|
|
60
61
|
"ping": "^0.4.4",
|
|
61
62
|
"say": "^0.16.0",
|
|
62
|
-
"telegraf": "^4.15.
|
|
63
|
+
"telegraf": "^4.15.2",
|
|
63
64
|
"telesignsdk": "^2.2.3",
|
|
64
65
|
"tesseract.js": "^5.0.3",
|
|
65
66
|
"twilio": "^4.19.0",
|
|
66
67
|
"url": "github:Leask/node-url",
|
|
67
68
|
"webpack-cli": "^5.1.4",
|
|
68
|
-
"whisper-node": "^
|
|
69
|
+
"whisper-node": "^1.1.1",
|
|
69
70
|
"youtube-transcript": "^1.0.6"
|
|
70
71
|
}
|
|
71
72
|
}
|