@promptbook/remote-server 0.84.0-12 → 0.84.0-14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +103 -50
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +4 -0
- package/esm/typings/src/config.d.ts +12 -0
- package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.test.d.ts +1 -0
- package/esm/typings/src/utils/editable/utils/isFlatPipeline.test.d.ts +1 -0
- package/esm/typings/src/utils/files/mimeTypeToExtension.d.ts +10 -0
- package/esm/typings/src/utils/files/mimeTypeToExtension.test.d.ts +1 -0
- package/package.json +2 -2
- package/umd/index.umd.js +102 -49
- package/umd/index.umd.js.map +1 -1
package/esm/index.es.js
CHANGED
|
@@ -12,7 +12,7 @@ import { unparse, parse } from 'papaparse';
|
|
|
12
12
|
import hexEncoder from 'crypto-js/enc-hex';
|
|
13
13
|
import sha256 from 'crypto-js/sha256';
|
|
14
14
|
import { SHA256 } from 'crypto-js';
|
|
15
|
-
import { lookup } from 'mime-types';
|
|
15
|
+
import { lookup, extension } from 'mime-types';
|
|
16
16
|
|
|
17
17
|
// ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
|
|
18
18
|
/**
|
|
@@ -28,7 +28,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
28
28
|
* @generated
|
|
29
29
|
* @see https://github.com/webgptorg/promptbook
|
|
30
30
|
*/
|
|
31
|
-
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-
|
|
31
|
+
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-13';
|
|
32
32
|
/**
|
|
33
33
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
34
34
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -5999,22 +5999,6 @@ function $registeredScrapersMessage(availableScrapers) {
|
|
|
5999
5999
|
* TODO: [®] DRY Register logic
|
|
6000
6000
|
*/
|
|
6001
6001
|
|
|
6002
|
-
/**
|
|
6003
|
-
* Removes emojis from a string and fix whitespaces
|
|
6004
|
-
*
|
|
6005
|
-
* @param text with emojis
|
|
6006
|
-
* @returns text without emojis
|
|
6007
|
-
* @public exported from `@promptbook/utils`
|
|
6008
|
-
*/
|
|
6009
|
-
function removeEmojis(text) {
|
|
6010
|
-
// Replace emojis (and also ZWJ sequence) with hyphens
|
|
6011
|
-
text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
|
|
6012
|
-
text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
|
|
6013
|
-
text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
|
|
6014
|
-
text = text.replace(/\p{Extended_Pictographic}/gu, '');
|
|
6015
|
-
return text;
|
|
6016
|
-
}
|
|
6017
|
-
|
|
6018
6002
|
/**
|
|
6019
6003
|
* @@@
|
|
6020
6004
|
*
|
|
@@ -6077,30 +6061,6 @@ function normalizeToKebabCase(text) {
|
|
|
6077
6061
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
6078
6062
|
*/
|
|
6079
6063
|
|
|
6080
|
-
/**
|
|
6081
|
-
* @@@
|
|
6082
|
-
*
|
|
6083
|
-
* @param value @@@
|
|
6084
|
-
* @returns @@@
|
|
6085
|
-
* @example @@@
|
|
6086
|
-
* @public exported from `@promptbook/utils`
|
|
6087
|
-
*/
|
|
6088
|
-
function titleToName(value) {
|
|
6089
|
-
if (isValidUrl(value)) {
|
|
6090
|
-
value = value.replace(/^https?:\/\//, '');
|
|
6091
|
-
value = value.replace(/\.html$/, '');
|
|
6092
|
-
}
|
|
6093
|
-
else if (isValidFilePath(value)) {
|
|
6094
|
-
value = basename(value);
|
|
6095
|
-
// Note: Keeping extension in the name
|
|
6096
|
-
}
|
|
6097
|
-
value = value.split('/').join('-');
|
|
6098
|
-
value = removeEmojis(value);
|
|
6099
|
-
value = normalizeToKebabCase(value);
|
|
6100
|
-
// TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
|
|
6101
|
-
return value;
|
|
6102
|
-
}
|
|
6103
|
-
|
|
6104
6064
|
/**
|
|
6105
6065
|
* Creates unique name for the source
|
|
6106
6066
|
*
|
|
@@ -6186,6 +6146,57 @@ function isFileExisting(filename, fs) {
|
|
|
6186
6146
|
* TODO: [🖇] What about symlinks?
|
|
6187
6147
|
*/
|
|
6188
6148
|
|
|
6149
|
+
/**
|
|
6150
|
+
* Convert mime type to file extension
|
|
6151
|
+
*
|
|
6152
|
+
* Note: If the mime type is invalid, `null` is returned
|
|
6153
|
+
*
|
|
6154
|
+
* @private within the repository
|
|
6155
|
+
*/
|
|
6156
|
+
function mimeTypeToExtension(value) {
|
|
6157
|
+
return extension(value) || null;
|
|
6158
|
+
}
|
|
6159
|
+
|
|
6160
|
+
/**
|
|
6161
|
+
* Removes emojis from a string and fix whitespaces
|
|
6162
|
+
*
|
|
6163
|
+
* @param text with emojis
|
|
6164
|
+
* @returns text without emojis
|
|
6165
|
+
* @public exported from `@promptbook/utils`
|
|
6166
|
+
*/
|
|
6167
|
+
function removeEmojis(text) {
|
|
6168
|
+
// Replace emojis (and also ZWJ sequence) with hyphens
|
|
6169
|
+
text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
|
|
6170
|
+
text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
|
|
6171
|
+
text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
|
|
6172
|
+
text = text.replace(/\p{Extended_Pictographic}/gu, '');
|
|
6173
|
+
return text;
|
|
6174
|
+
}
|
|
6175
|
+
|
|
6176
|
+
/**
|
|
6177
|
+
* @@@
|
|
6178
|
+
*
|
|
6179
|
+
* @param value @@@
|
|
6180
|
+
* @returns @@@
|
|
6181
|
+
* @example @@@
|
|
6182
|
+
* @public exported from `@promptbook/utils`
|
|
6183
|
+
*/
|
|
6184
|
+
function titleToName(value) {
|
|
6185
|
+
if (isValidUrl(value)) {
|
|
6186
|
+
value = value.replace(/^https?:\/\//, '');
|
|
6187
|
+
value = value.replace(/\.html$/, '');
|
|
6188
|
+
}
|
|
6189
|
+
else if (isValidFilePath(value)) {
|
|
6190
|
+
value = basename(value);
|
|
6191
|
+
// Note: Keeping extension in the name
|
|
6192
|
+
}
|
|
6193
|
+
value = value.split('/').join('-');
|
|
6194
|
+
value = removeEmojis(value);
|
|
6195
|
+
value = normalizeToKebabCase(value);
|
|
6196
|
+
// TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
|
|
6197
|
+
return value;
|
|
6198
|
+
}
|
|
6199
|
+
|
|
6189
6200
|
/**
|
|
6190
6201
|
* The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
|
|
6191
6202
|
*
|
|
@@ -6221,7 +6232,7 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
|
|
|
6221
6232
|
function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
6222
6233
|
var _a;
|
|
6223
6234
|
return __awaiter(this, void 0, void 0, function () {
|
|
6224
|
-
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url,
|
|
6235
|
+
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
|
|
6225
6236
|
return __generator(this, function (_l) {
|
|
6226
6237
|
switch (_l.label) {
|
|
6227
6238
|
case 0:
|
|
@@ -6237,25 +6248,67 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
6237
6248
|
url = knowledgeSourceContent;
|
|
6238
6249
|
return [4 /*yield*/, fetch(url)];
|
|
6239
6250
|
case 1:
|
|
6240
|
-
|
|
6241
|
-
mimeType = ((_a =
|
|
6242
|
-
|
|
6251
|
+
response_1 = _l.sent();
|
|
6252
|
+
mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
|
|
6253
|
+
if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [💵] */)) {
|
|
6254
|
+
return [2 /*return*/, {
|
|
6255
|
+
source: name,
|
|
6256
|
+
filename: null,
|
|
6257
|
+
url: url,
|
|
6258
|
+
mimeType: mimeType,
|
|
6259
|
+
/*
|
|
6260
|
+
TODO: [🥽]
|
|
6261
|
+
> async asBlob() {
|
|
6262
|
+
> // TODO: [👨🏻🤝👨🏻] This can be called multiple times BUT when called second time, response in already consumed
|
|
6263
|
+
> const content = await response.blob();
|
|
6264
|
+
> return content;
|
|
6265
|
+
> },
|
|
6266
|
+
*/
|
|
6267
|
+
asJson: function () {
|
|
6268
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
6269
|
+
var content;
|
|
6270
|
+
return __generator(this, function (_a) {
|
|
6271
|
+
switch (_a.label) {
|
|
6272
|
+
case 0: return [4 /*yield*/, response_1.json()];
|
|
6273
|
+
case 1:
|
|
6274
|
+
content = _a.sent();
|
|
6275
|
+
return [2 /*return*/, content];
|
|
6276
|
+
}
|
|
6277
|
+
});
|
|
6278
|
+
});
|
|
6279
|
+
},
|
|
6280
|
+
asText: function () {
|
|
6281
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
6282
|
+
var content;
|
|
6283
|
+
return __generator(this, function (_a) {
|
|
6284
|
+
switch (_a.label) {
|
|
6285
|
+
case 0: return [4 /*yield*/, response_1.text()];
|
|
6286
|
+
case 1:
|
|
6287
|
+
content = _a.sent();
|
|
6288
|
+
return [2 /*return*/, content];
|
|
6289
|
+
}
|
|
6290
|
+
});
|
|
6291
|
+
});
|
|
6292
|
+
},
|
|
6293
|
+
}];
|
|
6294
|
+
}
|
|
6295
|
+
basename = url.split('/').pop() || titleToName(url);
|
|
6243
6296
|
hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
|
|
6244
6297
|
rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
|
|
6245
|
-
filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(
|
|
6298
|
+
filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".").concat(mimeTypeToExtension(mimeType))], false));
|
|
6246
6299
|
return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
|
|
6247
6300
|
case 2:
|
|
6248
6301
|
_l.sent();
|
|
6249
6302
|
_g = (_f = tools.fs).writeFile;
|
|
6250
6303
|
_h = [join(rootDirname_1, filepath)];
|
|
6251
6304
|
_k = (_j = Buffer).from;
|
|
6252
|
-
return [4 /*yield*/,
|
|
6305
|
+
return [4 /*yield*/, response_1.arrayBuffer()];
|
|
6253
6306
|
case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
|
|
6254
6307
|
case 4:
|
|
6255
6308
|
_l.sent();
|
|
6256
|
-
// TODO:
|
|
6309
|
+
// TODO: [💵] Check the file security
|
|
6257
6310
|
// TODO: !!!!!!!! Check the file size (if it is not too big)
|
|
6258
|
-
// TODO: !!!!!!!! Delete the file
|
|
6311
|
+
// TODO: !!!!!!!! Delete the file after the scraping is done
|
|
6259
6312
|
return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
|
|
6260
6313
|
case 5:
|
|
6261
6314
|
if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
|