@promptbook/remote-server 0.84.0-12 → 0.84.0-14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -12,7 +12,7 @@ import { unparse, parse } from 'papaparse';
12
12
  import hexEncoder from 'crypto-js/enc-hex';
13
13
  import sha256 from 'crypto-js/sha256';
14
14
  import { SHA256 } from 'crypto-js';
15
- import { lookup } from 'mime-types';
15
+ import { lookup, extension } from 'mime-types';
16
16
 
17
17
  // ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
18
18
  /**
@@ -28,7 +28,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
28
28
  * @generated
29
29
  * @see https://github.com/webgptorg/promptbook
30
30
  */
31
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-11';
31
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-13';
32
32
  /**
33
33
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
34
34
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -5999,22 +5999,6 @@ function $registeredScrapersMessage(availableScrapers) {
5999
5999
  * TODO: [®] DRY Register logic
6000
6000
  */
6001
6001
 
6002
- /**
6003
- * Removes emojis from a string and fix whitespaces
6004
- *
6005
- * @param text with emojis
6006
- * @returns text without emojis
6007
- * @public exported from `@promptbook/utils`
6008
- */
6009
- function removeEmojis(text) {
6010
- // Replace emojis (and also ZWJ sequence) with hyphens
6011
- text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
6012
- text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
6013
- text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
6014
- text = text.replace(/\p{Extended_Pictographic}/gu, '');
6015
- return text;
6016
- }
6017
-
6018
6002
  /**
6019
6003
  * @@@
6020
6004
  *
@@ -6077,30 +6061,6 @@ function normalizeToKebabCase(text) {
6077
6061
  * Note: [💞] Ignore a discrepancy between file name and entity name
6078
6062
  */
6079
6063
 
6080
- /**
6081
- * @@@
6082
- *
6083
- * @param value @@@
6084
- * @returns @@@
6085
- * @example @@@
6086
- * @public exported from `@promptbook/utils`
6087
- */
6088
- function titleToName(value) {
6089
- if (isValidUrl(value)) {
6090
- value = value.replace(/^https?:\/\//, '');
6091
- value = value.replace(/\.html$/, '');
6092
- }
6093
- else if (isValidFilePath(value)) {
6094
- value = basename(value);
6095
- // Note: Keeping extension in the name
6096
- }
6097
- value = value.split('/').join('-');
6098
- value = removeEmojis(value);
6099
- value = normalizeToKebabCase(value);
6100
- // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
6101
- return value;
6102
- }
6103
-
6104
6064
  /**
6105
6065
  * Creates unique name for the source
6106
6066
  *
@@ -6186,6 +6146,57 @@ function isFileExisting(filename, fs) {
6186
6146
  * TODO: [🖇] What about symlinks?
6187
6147
  */
6188
6148
 
6149
+ /**
6150
+ * Convert mime type to file extension
6151
+ *
6152
+ * Note: If the mime type is invalid, `null` is returned
6153
+ *
6154
+ * @private within the repository
6155
+ */
6156
+ function mimeTypeToExtension(value) {
6157
+ return extension(value) || null;
6158
+ }
6159
+
6160
+ /**
6161
+ * Removes emojis from a string and fix whitespaces
6162
+ *
6163
+ * @param text with emojis
6164
+ * @returns text without emojis
6165
+ * @public exported from `@promptbook/utils`
6166
+ */
6167
+ function removeEmojis(text) {
6168
+ // Replace emojis (and also ZWJ sequence) with hyphens
6169
+ text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
6170
+ text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
6171
+ text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
6172
+ text = text.replace(/\p{Extended_Pictographic}/gu, '');
6173
+ return text;
6174
+ }
6175
+
6176
+ /**
6177
+ * @@@
6178
+ *
6179
+ * @param value @@@
6180
+ * @returns @@@
6181
+ * @example @@@
6182
+ * @public exported from `@promptbook/utils`
6183
+ */
6184
+ function titleToName(value) {
6185
+ if (isValidUrl(value)) {
6186
+ value = value.replace(/^https?:\/\//, '');
6187
+ value = value.replace(/\.html$/, '');
6188
+ }
6189
+ else if (isValidFilePath(value)) {
6190
+ value = basename(value);
6191
+ // Note: Keeping extension in the name
6192
+ }
6193
+ value = value.split('/').join('-');
6194
+ value = removeEmojis(value);
6195
+ value = normalizeToKebabCase(value);
6196
+ // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
6197
+ return value;
6198
+ }
6199
+
6189
6200
  /**
6190
6201
  * The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
6191
6202
  *
@@ -6221,7 +6232,7 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
6221
6232
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
6222
6233
  var _a;
6223
6234
  return __awaiter(this, void 0, void 0, function () {
6224
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response, mimeType, filename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
6235
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
6225
6236
  return __generator(this, function (_l) {
6226
6237
  switch (_l.label) {
6227
6238
  case 0:
@@ -6237,25 +6248,67 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
6237
6248
  url = knowledgeSourceContent;
6238
6249
  return [4 /*yield*/, fetch(url)];
6239
6250
  case 1:
6240
- response = _l.sent();
6241
- mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
6242
- filename = url.split('/').pop() || titleToName(url);
6251
+ response_1 = _l.sent();
6252
+ mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
6253
+ if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [💵] */)) {
6254
+ return [2 /*return*/, {
6255
+ source: name,
6256
+ filename: null,
6257
+ url: url,
6258
+ mimeType: mimeType,
6259
+ /*
6260
+ TODO: [🥽]
6261
+ > async asBlob() {
6262
+ > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
6263
+ > const content = await response.blob();
6264
+ > return content;
6265
+ > },
6266
+ */
6267
+ asJson: function () {
6268
+ return __awaiter(this, void 0, void 0, function () {
6269
+ var content;
6270
+ return __generator(this, function (_a) {
6271
+ switch (_a.label) {
6272
+ case 0: return [4 /*yield*/, response_1.json()];
6273
+ case 1:
6274
+ content = _a.sent();
6275
+ return [2 /*return*/, content];
6276
+ }
6277
+ });
6278
+ });
6279
+ },
6280
+ asText: function () {
6281
+ return __awaiter(this, void 0, void 0, function () {
6282
+ var content;
6283
+ return __generator(this, function (_a) {
6284
+ switch (_a.label) {
6285
+ case 0: return [4 /*yield*/, response_1.text()];
6286
+ case 1:
6287
+ content = _a.sent();
6288
+ return [2 /*return*/, content];
6289
+ }
6290
+ });
6291
+ });
6292
+ },
6293
+ }];
6294
+ }
6295
+ basename = url.split('/').pop() || titleToName(url);
6243
6296
  hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
6244
6297
  rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
6245
- filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(filename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
6298
+ filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".").concat(mimeTypeToExtension(mimeType))], false));
6246
6299
  return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
6247
6300
  case 2:
6248
6301
  _l.sent();
6249
6302
  _g = (_f = tools.fs).writeFile;
6250
6303
  _h = [join(rootDirname_1, filepath)];
6251
6304
  _k = (_j = Buffer).from;
6252
- return [4 /*yield*/, response.arrayBuffer()];
6305
+ return [4 /*yield*/, response_1.arrayBuffer()];
6253
6306
  case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
6254
6307
  case 4:
6255
6308
  _l.sent();
6256
- // TODO: !!!!!!!! Check the file security
6309
+ // TODO: [💵] Check the file security
6257
6310
  // TODO: !!!!!!!! Check the file size (if it is not too big)
6258
- // TODO: !!!!!!!! Delete the file
6311
+ // TODO: !!!!!!!! Delete the file after the scraping is done
6259
6312
  return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
6260
6313
  case 5:
6261
6314
  if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];