@promptbook/remote-server 0.84.0-11 → 0.84.0-12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -4,13 +4,14 @@ import { Server } from 'socket.io';
4
4
  import spaceTrim$1, { spaceTrim } from 'spacetrim';
5
5
  import { spawn } from 'child_process';
6
6
  import { forTime } from 'waitasecond';
7
- import { stat, access, constants, readFile, writeFile, readdir } from 'fs/promises';
8
- import { join } from 'path';
7
+ import { stat, access, constants, readFile, writeFile, readdir, mkdir } from 'fs/promises';
8
+ import { join, basename, dirname } from 'path';
9
9
  import { format } from 'prettier';
10
10
  import parserHtml from 'prettier/parser-html';
11
11
  import { unparse, parse } from 'papaparse';
12
- import { SHA256 } from 'crypto-js';
13
12
  import hexEncoder from 'crypto-js/enc-hex';
13
+ import sha256 from 'crypto-js/sha256';
14
+ import { SHA256 } from 'crypto-js';
14
15
  import { lookup } from 'mime-types';
15
16
 
16
17
  // ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
@@ -27,7 +28,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
27
28
  * @generated
28
29
  * @see https://github.com/webgptorg/promptbook
29
30
  */
30
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-10';
31
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-11';
31
32
  /**
32
33
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
33
34
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -231,6 +232,12 @@ var SMALL_NUMBER = 0.001;
231
232
  * @private within the repository - too low-level in comparison with other `MAX_...`
232
233
  */
233
234
  var IMMEDIATE_TIME = 10;
235
+ /**
236
+ * The maximum length of the (generated) filename
237
+ *
238
+ * @public exported from `@promptbook/core`
239
+ */
240
+ var MAX_FILENAME_LENGTH = 30;
234
241
  /**
235
242
  * Strategy for caching the intermediate results for knowledge sources
236
243
  *
@@ -250,6 +257,15 @@ var DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹‍♂️]
250
257
  * @public exported from `@promptbook/core`
251
258
  */
252
259
  var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹‍♂️]
260
+ // <- TODO: [🕝] Make also `BOOKS_DIRNAME_ALTERNATIVES`
261
+ /**
262
+ * Where to store the temporary downloads
263
+ *
264
+ * Note: When the folder does not exist, it is created recursively
265
+ *
266
+ * @public exported from `@promptbook/core`
267
+ */
268
+ var DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
253
269
  /**
254
270
  * Where to store the scrape cache
255
271
  *
@@ -858,6 +874,7 @@ function $provideFilesystemForNode(options) {
858
874
  readFile: readFile,
859
875
  writeFile: writeFile,
860
876
  readdir: readdir,
877
+ mkdir: mkdir,
861
878
  };
862
879
  }
863
880
  /**
@@ -5982,6 +5999,22 @@ function $registeredScrapersMessage(availableScrapers) {
5982
5999
  * TODO: [®] DRY Register logic
5983
6000
  */
5984
6001
 
6002
+ /**
6003
+ * Removes emojis from a string and fix whitespaces
6004
+ *
6005
+ * @param text with emojis
6006
+ * @returns text without emojis
6007
+ * @public exported from `@promptbook/utils`
6008
+ */
6009
+ function removeEmojis(text) {
6010
+ // Replace emojis (and also ZWJ sequence) with hyphens
6011
+ text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
6012
+ text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
6013
+ text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
6014
+ text = text.replace(/\p{Extended_Pictographic}/gu, '');
6015
+ return text;
6016
+ }
6017
+
5985
6018
  /**
5986
6019
  * @@@
5987
6020
  *
@@ -6044,6 +6077,30 @@ function normalizeToKebabCase(text) {
6044
6077
  * Note: [💞] Ignore a discrepancy between file name and entity name
6045
6078
  */
6046
6079
 
6080
+ /**
6081
+ * @@@
6082
+ *
6083
+ * @param value @@@
6084
+ * @returns @@@
6085
+ * @example @@@
6086
+ * @public exported from `@promptbook/utils`
6087
+ */
6088
+ function titleToName(value) {
6089
+ if (isValidUrl(value)) {
6090
+ value = value.replace(/^https?:\/\//, '');
6091
+ value = value.replace(/\.html$/, '');
6092
+ }
6093
+ else if (isValidFilePath(value)) {
6094
+ value = basename(value);
6095
+ // Note: Keeping extension in the name
6096
+ }
6097
+ value = value.split('/').join('-');
6098
+ value = removeEmojis(value);
6099
+ value = normalizeToKebabCase(value);
6100
+ // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
6101
+ return value;
6102
+ }
6103
+
6047
6104
  /**
6048
6105
  * Creates unique name for the source
6049
6106
  *
@@ -6065,6 +6122,15 @@ function knowledgeSourceContentToName(knowledgeSourceContent) {
6065
6122
  * TODO: [🐱‍🐉][🧠] Make some smart crop NOT source-i-m-pavol-a-develop-... BUT source-i-m-pavol-a-developer-...
6066
6123
  */
6067
6124
 
6125
+ /**
6126
+ * @@@
6127
+ *
6128
+ * @private for `FileCacheStorage`
6129
+ */
6130
+ function nameToSubfolderPath(name) {
6131
+ return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()];
6132
+ }
6133
+
6068
6134
  /**
6069
6135
  * Convert file extension to mime type
6070
6136
  *
@@ -6155,10 +6221,11 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
6155
6221
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
6156
6222
  var _a;
6157
6223
  return __awaiter(this, void 0, void 0, function () {
6158
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, filename_1, fileExtension, mimeType;
6159
- return __generator(this, function (_f) {
6160
- switch (_f.label) {
6224
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response, mimeType, filename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
6225
+ return __generator(this, function (_l) {
6226
+ switch (_l.label) {
6161
6227
  case 0:
6228
+ console.log('!!! makeKnowledgeSourceHandler', knowledgeSource);
6162
6229
  _b = tools.fetch, fetch = _b === void 0 ? scraperFetch : _b;
6163
6230
  knowledgeSourceContent = knowledgeSource.knowledgeSourceContent;
6164
6231
  name = knowledgeSource.name;
@@ -6166,54 +6233,32 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
6166
6233
  if (!name) {
6167
6234
  name = knowledgeSourceContentToName(knowledgeSourceContent);
6168
6235
  }
6169
- if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 2];
6236
+ if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 5];
6170
6237
  url = knowledgeSourceContent;
6171
6238
  return [4 /*yield*/, fetch(url)];
6172
6239
  case 1:
6173
- response_1 = _f.sent();
6174
- mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
6175
- return [2 /*return*/, {
6176
- source: name,
6177
- filename: null,
6178
- url: url,
6179
- mimeType: mimeType,
6180
- /*
6181
- TODO: [🥽]
6182
- > async asBlob() {
6183
- > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
6184
- > const content = await response.blob();
6185
- > return content;
6186
- > },
6187
- */
6188
- asJson: function () {
6189
- return __awaiter(this, void 0, void 0, function () {
6190
- var content;
6191
- return __generator(this, function (_a) {
6192
- switch (_a.label) {
6193
- case 0: return [4 /*yield*/, response_1.json()];
6194
- case 1:
6195
- content = _a.sent();
6196
- return [2 /*return*/, content];
6197
- }
6198
- });
6199
- });
6200
- },
6201
- asText: function () {
6202
- return __awaiter(this, void 0, void 0, function () {
6203
- var content;
6204
- return __generator(this, function (_a) {
6205
- switch (_a.label) {
6206
- case 0: return [4 /*yield*/, response_1.text()];
6207
- case 1:
6208
- content = _a.sent();
6209
- return [2 /*return*/, content];
6210
- }
6211
- });
6212
- });
6213
- },
6214
- }];
6240
+ response = _l.sent();
6241
+ mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
6242
+ filename = url.split('/').pop() || titleToName(url);
6243
+ hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
6244
+ rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
6245
+ filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(filename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
6246
+ return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
6215
6247
  case 2:
6216
- if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 4];
6248
+ _l.sent();
6249
+ _g = (_f = tools.fs).writeFile;
6250
+ _h = [join(rootDirname_1, filepath)];
6251
+ _k = (_j = Buffer).from;
6252
+ return [4 /*yield*/, response.arrayBuffer()];
6253
+ case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
6254
+ case 4:
6255
+ _l.sent();
6256
+ // TODO: !!!!!!!! Check the file security
6257
+ // TODO: !!!!!!!! Check the file size (if it is not too big)
6258
+ // TODO: !!!!!!!! Delete the file
6259
+ return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
6260
+ case 5:
6261
+ if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
6217
6262
  if (tools.fs === undefined) {
6218
6263
  throw new EnvironmentMismatchError('Can not import file knowledge without filesystem tools');
6219
6264
  // <- TODO: [🧠] What is the best error type here`
@@ -6226,8 +6271,8 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
6226
6271
  fileExtension = getFileExtension(filename_1);
6227
6272
  mimeType = extensionToMimeType(fileExtension || '');
6228
6273
  return [4 /*yield*/, isFileExisting(filename_1, tools.fs)];
6229
- case 3:
6230
- if (!(_f.sent())) {
6274
+ case 6:
6275
+ if (!(_l.sent())) {
6231
6276
  throw new NotFoundError(spaceTrim$1(function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(knowledgeSourceContent), "\n\n Full file path:\n ").concat(block(filename_1), "\n "); }));
6232
6277
  }
6233
6278
  // TODO: [🧠][😿] Test security file - file is scoped to the project (BUT maybe do this in `filesystemTools`)
@@ -6273,7 +6318,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
6273
6318
  });
6274
6319
  },
6275
6320
  }];
6276
- case 4: return [2 /*return*/, {
6321
+ case 7: return [2 /*return*/, {
6277
6322
  source: name,
6278
6323
  filename: null,
6279
6324
  url: null,