@promptbook/core 0.84.0-12 → 0.84.0-13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -25,7 +25,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
25
25
  * @generated
26
26
  * @see https://github.com/webgptorg/promptbook
27
27
  */
28
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-11';
28
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-12';
29
29
  /**
30
30
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
31
31
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -5674,22 +5674,6 @@ function $registeredScrapersMessage(availableScrapers) {
5674
5674
  * TODO: [®] DRY Register logic
5675
5675
  */
5676
5676
 
5677
- /**
5678
- * Removes emojis from a string and fix whitespaces
5679
- *
5680
- * @param text with emojis
5681
- * @returns text without emojis
5682
- * @public exported from `@promptbook/utils`
5683
- */
5684
- function removeEmojis(text) {
5685
- // Replace emojis (and also ZWJ sequence) with hyphens
5686
- text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
5687
- text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
5688
- text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
5689
- text = text.replace(/\p{Extended_Pictographic}/gu, '');
5690
- return text;
5691
- }
5692
-
5693
5677
  /**
5694
5678
  * @@@
5695
5679
  *
@@ -5752,30 +5736,6 @@ function normalizeToKebabCase(text) {
5752
5736
  * Note: [💞] Ignore a discrepancy between file name and entity name
5753
5737
  */
5754
5738
 
5755
- /**
5756
- * @@@
5757
- *
5758
- * @param value @@@
5759
- * @returns @@@
5760
- * @example @@@
5761
- * @public exported from `@promptbook/utils`
5762
- */
5763
- function titleToName(value) {
5764
- if (isValidUrl(value)) {
5765
- value = value.replace(/^https?:\/\//, '');
5766
- value = value.replace(/\.html$/, '');
5767
- }
5768
- else if (isValidFilePath(value)) {
5769
- value = basename(value);
5770
- // Note: Keeping extension in the name
5771
- }
5772
- value = value.split('/').join('-');
5773
- value = removeEmojis(value);
5774
- value = normalizeToKebabCase(value);
5775
- // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
5776
- return value;
5777
- }
5778
-
5779
5739
  /**
5780
5740
  * Creates unique name for the source
5781
5741
  *
@@ -5861,6 +5821,46 @@ function isFileExisting(filename, fs) {
5861
5821
  * TODO: [🖇] What about symlinks?
5862
5822
  */
5863
5823
 
5824
+ /**
5825
+ * Removes emojis from a string and fix whitespaces
5826
+ *
5827
+ * @param text with emojis
5828
+ * @returns text without emojis
5829
+ * @public exported from `@promptbook/utils`
5830
+ */
5831
+ function removeEmojis(text) {
5832
+ // Replace emojis (and also ZWJ sequence) with hyphens
5833
+ text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
5834
+ text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
5835
+ text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
5836
+ text = text.replace(/\p{Extended_Pictographic}/gu, '');
5837
+ return text;
5838
+ }
5839
+
5840
+ /**
5841
+ * @@@
5842
+ *
5843
+ * @param value @@@
5844
+ * @returns @@@
5845
+ * @example @@@
5846
+ * @public exported from `@promptbook/utils`
5847
+ */
5848
+ function titleToName(value) {
5849
+ if (isValidUrl(value)) {
5850
+ value = value.replace(/^https?:\/\//, '');
5851
+ value = value.replace(/\.html$/, '');
5852
+ }
5853
+ else if (isValidFilePath(value)) {
5854
+ value = basename(value);
5855
+ // Note: Keeping extension in the name
5856
+ }
5857
+ value = value.split('/').join('-');
5858
+ value = removeEmojis(value);
5859
+ value = normalizeToKebabCase(value);
5860
+ // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
5861
+ return value;
5862
+ }
5863
+
5864
5864
  /**
5865
5865
  * The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
5866
5866
  *
@@ -5896,7 +5896,7 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
5896
5896
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5897
5897
  var _a;
5898
5898
  return __awaiter(this, void 0, void 0, function () {
5899
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response, mimeType, filename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
5899
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
5900
5900
  return __generator(this, function (_l) {
5901
5901
  switch (_l.label) {
5902
5902
  case 0:
@@ -5912,19 +5912,61 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5912
5912
  url = knowledgeSourceContent;
5913
5913
  return [4 /*yield*/, fetch(url)];
5914
5914
  case 1:
5915
- response = _l.sent();
5916
- mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
5917
- filename = url.split('/').pop() || titleToName(url);
5915
+ response_1 = _l.sent();
5916
+ mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
5917
+ if (tools.fs === undefined || !url.endsWith('.pdf')) {
5918
+ return [2 /*return*/, {
5919
+ source: name,
5920
+ filename: null,
5921
+ url: url,
5922
+ mimeType: mimeType,
5923
+ /*
5924
+ TODO: [🥽]
5925
+ > async asBlob() {
5926
+ > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
5927
+ > const content = await response.blob();
5928
+ > return content;
5929
+ > },
5930
+ */
5931
+ asJson: function () {
5932
+ return __awaiter(this, void 0, void 0, function () {
5933
+ var content;
5934
+ return __generator(this, function (_a) {
5935
+ switch (_a.label) {
5936
+ case 0: return [4 /*yield*/, response_1.json()];
5937
+ case 1:
5938
+ content = _a.sent();
5939
+ return [2 /*return*/, content];
5940
+ }
5941
+ });
5942
+ });
5943
+ },
5944
+ asText: function () {
5945
+ return __awaiter(this, void 0, void 0, function () {
5946
+ var content;
5947
+ return __generator(this, function (_a) {
5948
+ switch (_a.label) {
5949
+ case 0: return [4 /*yield*/, response_1.text()];
5950
+ case 1:
5951
+ content = _a.sent();
5952
+ return [2 /*return*/, content];
5953
+ }
5954
+ });
5955
+ });
5956
+ },
5957
+ }];
5958
+ }
5959
+ basename = url.split('/').pop() || titleToName(url);
5918
5960
  hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
5919
5961
  rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
5920
- filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(filename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
5962
+ filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
5921
5963
  return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
5922
5964
  case 2:
5923
5965
  _l.sent();
5924
5966
  _g = (_f = tools.fs).writeFile;
5925
5967
  _h = [join(rootDirname_1, filepath)];
5926
5968
  _k = (_j = Buffer).from;
5927
- return [4 /*yield*/, response.arrayBuffer()];
5969
+ return [4 /*yield*/, response_1.arrayBuffer()];
5928
5970
  case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
5929
5971
  case 4:
5930
5972
  _l.sent();