utilitas 1995.2.5 → 1995.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/utilitas.lite.mjs +1 -1
- package/dist/utilitas.lite.mjs.map +1 -1
- package/lib/hal.mjs +12 -4
- package/lib/manifest.mjs +1 -1
- package/lib/vision.mjs +8 -9
- package/package.json +1 -1
package/lib/hal.mjs
CHANGED
|
@@ -13,7 +13,10 @@ const li = (id, text, url) => `\n${id}. ` + (url ? link(text, url) : text);
|
|
|
13
13
|
const cardReg = /^\[\d*\]:\ ([^\ ]*)\ "(.*)"$/ig;
|
|
14
14
|
// https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
|
|
15
15
|
const countTokens = txt => Math.ceil(txt.split(/[^a-z0-9]/i).length * 100 / 75);
|
|
16
|
-
|
|
16
|
+
// Keep this for GPT4 {
|
|
17
|
+
// const MAX_CONTEXT_TOKENS = 8192;
|
|
18
|
+
// }
|
|
19
|
+
const MAX_CONTEXT_TOKENS = 4096;
|
|
17
20
|
const MAX_PROMPT_TOKENS = Math.floor(MAX_CONTEXT_TOKENS * 0.6);
|
|
18
21
|
const MAX_RESPONSE_TOKENS = MAX_CONTEXT_TOKENS - MAX_PROMPT_TOKENS;
|
|
19
22
|
|
|
@@ -35,10 +38,15 @@ const init = async options => {
|
|
|
35
38
|
engine = (await import('@waylaidwanderer/chatgpt-api')).ChatGPTClient;
|
|
36
39
|
client = new engine(options?.clientOptions?.apiKey, {
|
|
37
40
|
keepNecessaryMessagesOnly: true,
|
|
38
|
-
|
|
41
|
+
// Keep this for GPT4 {
|
|
42
|
+
// maxContextTokens: MAX_CONTEXT_TOKENS,
|
|
43
|
+
// }
|
|
39
44
|
modelOptions: {
|
|
40
|
-
model: options?.model || 'gpt-
|
|
41
|
-
|
|
45
|
+
model: options?.model || 'gpt-3.5-turbo',
|
|
46
|
+
// Keep this for GPT4 {
|
|
47
|
+
// model: options?.model || 'gpt-4',
|
|
48
|
+
// max_tokens: MAX_RESPONSE_TOKENS,
|
|
49
|
+
// }
|
|
42
50
|
...options?.clientOptions?.modelOptions || {}
|
|
43
51
|
}, ...options?.clientOptions || {},
|
|
44
52
|
}, options?.cacheOptions);
|
package/lib/manifest.mjs
CHANGED
package/lib/vision.mjs
CHANGED
|
@@ -6,7 +6,7 @@ import {
|
|
|
6
6
|
} from './storage.mjs';
|
|
7
7
|
|
|
8
8
|
import {
|
|
9
|
-
ensureArray, ignoreErrFunc, log as _log, need, throwError, trim,
|
|
9
|
+
ensureArray, ignoreErrFunc, log as _log, need, throwError, trim,
|
|
10
10
|
} from './utilitas.mjs';
|
|
11
11
|
|
|
12
12
|
const _NEED = ['@google-cloud/vision', 'tesseract.js'];
|
|
@@ -14,6 +14,10 @@ const [BASE64, BUFFER, FILE, DEFAULT_LANG] = ['BASE64', 'BUFFER', 'FILE', 'eng']
|
|
|
14
14
|
const ceil = num => num.toFixed(4);
|
|
15
15
|
const errorMessage = 'Invalid image data.';
|
|
16
16
|
const getTextFromBatch = bt => bt.responses.map(p => p.fullTextAnnotation.text);
|
|
17
|
+
const DOCUMENT_TEXT_DETECTION = 'DOCUMENT_TEXT_DETECTION';
|
|
18
|
+
const features = [{ type: DOCUMENT_TEXT_DETECTION }];
|
|
19
|
+
const mimeType = 'application/pdf';
|
|
20
|
+
const pages = [1, 2, 3, 4, 5]; // max 5 pages limit for batchAnnotateFiles API
|
|
17
21
|
const log = content => _log(content, import.meta.url);
|
|
18
22
|
|
|
19
23
|
let client;
|
|
@@ -128,11 +132,7 @@ const read = async (image, options) => {
|
|
|
128
132
|
input: options?.input, expected: BASE64, errorMessage,
|
|
129
133
|
});
|
|
130
134
|
let result = await client.batchAnnotateFiles({
|
|
131
|
-
requests: [{
|
|
132
|
-
inputConfig: { mimeType: 'application/pdf', content },
|
|
133
|
-
features: [{ type: 'DOCUMENT_TEXT_DETECTION' }],
|
|
134
|
-
pages: [1, 2, 3, 4, 5], // max 5 pages
|
|
135
|
-
}],
|
|
135
|
+
requests: [{ inputConfig: { mimeType, content }, features, pages }],
|
|
136
136
|
});
|
|
137
137
|
return options?.raw ? result : getTextFromBatch(result[0].responses[0]);
|
|
138
138
|
};
|
|
@@ -149,9 +149,8 @@ const readAll = async (image, options) => {
|
|
|
149
149
|
result.clear = await deleteOnCloud(resultId);
|
|
150
150
|
result.submit = await client.asyncBatchAnnotateFiles({
|
|
151
151
|
requests: [{
|
|
152
|
-
inputConfig: { mimeType
|
|
153
|
-
outputConfig: { gcsDestination: { uri: destination } },
|
|
154
|
-
features: [{ type: 'DOCUMENT_TEXT_DETECTION' }],
|
|
152
|
+
inputConfig: { mimeType, gcsSource: { uri } },
|
|
153
|
+
outputConfig: { gcsDestination: { uri: destination } }, features,
|
|
155
154
|
}],
|
|
156
155
|
});
|
|
157
156
|
result.response = await result.submit[0].promise();
|