@promptbook/pdf 0.77.1 → 0.78.0-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/umd/index.umd.js CHANGED
@@ -1,8 +1,8 @@
1
1
  (function (global, factory) {
2
- typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports, require('spacetrim'), require('prettier'), require('prettier/parser-html'), require('path'), require('waitasecond'), require('crypto-js'), require('crypto-js/enc-hex'), require('mime-types'), require('papaparse')) :
3
- typeof define === 'function' && define.amd ? define(['exports', 'spacetrim', 'prettier', 'prettier/parser-html', 'path', 'waitasecond', 'crypto-js', 'crypto-js/enc-hex', 'mime-types', 'papaparse'], factory) :
4
- (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global["promptbook-pdf"] = {}, global.spaceTrim, global.prettier, global.parserHtml, global.path, global.waitasecond, global.cryptoJs, global.hexEncoder, global.mimeTypes, global.papaparse));
5
- })(this, (function (exports, spaceTrim, prettier, parserHtml, path, waitasecond, cryptoJs, hexEncoder, mimeTypes, papaparse) { 'use strict';
2
+ typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports, require('spacetrim'), require('prettier'), require('prettier/parser-html'), require('waitasecond'), require('path'), require('crypto-js'), require('crypto-js/enc-hex'), require('mime-types'), require('papaparse')) :
3
+ typeof define === 'function' && define.amd ? define(['exports', 'spacetrim', 'prettier', 'prettier/parser-html', 'waitasecond', 'path', 'crypto-js', 'crypto-js/enc-hex', 'mime-types', 'papaparse'], factory) :
4
+ (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global["promptbook-pdf"] = {}, global.spaceTrim, global.prettier, global.parserHtml, global.waitasecond, global.path, global.cryptoJs, global.hexEncoder, global.mimeTypes, global.papaparse));
5
+ })(this, (function (exports, spaceTrim, prettier, parserHtml, waitasecond, path, cryptoJs, hexEncoder, mimeTypes, papaparse) { 'use strict';
6
6
 
7
7
  function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; }
8
8
 
@@ -22,7 +22,7 @@
22
22
  *
23
23
  * @see https://github.com/webgptorg/promptbook
24
24
  */
25
- var PROMPTBOOK_ENGINE_VERSION = '0.77.0';
25
+ var PROMPTBOOK_ENGINE_VERSION = '0.77.1';
26
26
  /**
27
27
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
28
28
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -1398,415 +1398,26 @@
1398
1398
  return new (SimplePipelineCollection.bind.apply(SimplePipelineCollection, __spreadArray([void 0], __read(promptbooks), false)))();
1399
1399
  }
1400
1400
 
1401
- var defaultDiacriticsRemovalMap = [
1402
- {
1403
- base: 'A',
1404
- letters: '\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F',
1405
- },
1406
- { base: 'AA', letters: '\uA732' },
1407
- { base: 'AE', letters: '\u00C6\u01FC\u01E2' },
1408
- { base: 'AO', letters: '\uA734' },
1409
- { base: 'AU', letters: '\uA736' },
1410
- { base: 'AV', letters: '\uA738\uA73A' },
1411
- { base: 'AY', letters: '\uA73C' },
1412
- {
1413
- base: 'B',
1414
- letters: '\u0042\u24B7\uFF22\u1E02\u1E04\u1E06\u0243\u0182\u0181',
1415
- },
1416
- {
1417
- base: 'C',
1418
- letters: '\u0043\u24B8\uFF23\u0106\u0108\u010A\u010C\u00C7\u1E08\u0187\u023B\uA73E',
1419
- },
1420
- {
1421
- base: 'D',
1422
- letters: '\u0044\u24B9\uFF24\u1E0A\u010E\u1E0C\u1E10\u1E12\u1E0E\u0110\u018B\u018A\u0189\uA779\u00D0',
1423
- },
1424
- { base: 'DZ', letters: '\u01F1\u01C4' },
1425
- { base: 'Dz', letters: '\u01F2\u01C5' },
1426
- {
1427
- base: 'E',
1428
- letters: '\u0045\u24BA\uFF25\u00C8\u00C9\u00CA\u1EC0\u1EBE\u1EC4\u1EC2\u1EBC\u0112\u1E14\u1E16\u0114\u0116\u00CB\u1EBA\u011A\u0204\u0206\u1EB8\u1EC6\u0228\u1E1C\u0118\u1E18\u1E1A\u0190\u018E',
1429
- },
1430
- { base: 'F', letters: '\u0046\u24BB\uFF26\u1E1E\u0191\uA77B' },
1431
- {
1432
- base: 'G',
1433
- letters: '\u0047\u24BC\uFF27\u01F4\u011C\u1E20\u011E\u0120\u01E6\u0122\u01E4\u0193\uA7A0\uA77D\uA77E',
1434
- },
1435
- {
1436
- base: 'H',
1437
- letters: '\u0048\u24BD\uFF28\u0124\u1E22\u1E26\u021E\u1E24\u1E28\u1E2A\u0126\u2C67\u2C75\uA78D',
1438
- },
1439
- {
1440
- base: 'I',
1441
- letters: '\u0049\u24BE\uFF29\u00CC\u00CD\u00CE\u0128\u012A\u012C\u0130\u00CF\u1E2E\u1EC8\u01CF\u0208\u020A\u1ECA\u012E\u1E2C\u0197',
1442
- },
1443
- { base: 'J', letters: '\u004A\u24BF\uFF2A\u0134\u0248' },
1444
- {
1445
- base: 'K',
1446
- letters: '\u004B\u24C0\uFF2B\u1E30\u01E8\u1E32\u0136\u1E34\u0198\u2C69\uA740\uA742\uA744\uA7A2',
1447
- },
1448
- {
1449
- base: 'L',
1450
- letters: '\u004C\u24C1\uFF2C\u013F\u0139\u013D\u1E36\u1E38\u013B\u1E3C\u1E3A\u0141\u023D\u2C62\u2C60\uA748\uA746\uA780',
1451
- },
1452
- { base: 'LJ', letters: '\u01C7' },
1453
- { base: 'Lj', letters: '\u01C8' },
1454
- { base: 'M', letters: '\u004D\u24C2\uFF2D\u1E3E\u1E40\u1E42\u2C6E\u019C' },
1455
- {
1456
- base: 'N',
1457
- letters: '\u004E\u24C3\uFF2E\u01F8\u0143\u00D1\u1E44\u0147\u1E46\u0145\u1E4A\u1E48\u0220\u019D\uA790\uA7A4',
1458
- },
1459
- { base: 'NJ', letters: '\u01CA' },
1460
- { base: 'Nj', letters: '\u01CB' },
1461
- {
1462
- base: 'O',
1463
- letters: '\u004F\u24C4\uFF2F\u00D2\u00D3\u00D4\u1ED2\u1ED0\u1ED6\u1ED4\u00D5\u1E4C\u022C\u1E4E\u014C\u1E50\u1E52\u014E\u022E\u0230\u00D6\u022A\u1ECE\u0150\u01D1\u020C\u020E\u01A0\u1EDC\u1EDA\u1EE0\u1EDE\u1EE2\u1ECC\u1ED8\u01EA\u01EC\u00D8\u01FE\u0186\u019F\uA74A\uA74C',
1464
- },
1465
- { base: 'OI', letters: '\u01A2' },
1466
- { base: 'OO', letters: '\uA74E' },
1467
- { base: 'OU', letters: '\u0222' },
1468
- { base: 'OE', letters: '\u008C\u0152' },
1469
- { base: 'oe', letters: '\u009C\u0153' },
1470
- {
1471
- base: 'P',
1472
- letters: '\u0050\u24C5\uFF30\u1E54\u1E56\u01A4\u2C63\uA750\uA752\uA754',
1473
- },
1474
- { base: 'Q', letters: '\u0051\u24C6\uFF31\uA756\uA758\u024A' },
1475
- {
1476
- base: 'R',
1477
- letters: '\u0052\u24C7\uFF32\u0154\u1E58\u0158\u0210\u0212\u1E5A\u1E5C\u0156\u1E5E\u024C\u2C64\uA75A\uA7A6\uA782',
1478
- },
1479
- {
1480
- base: 'S',
1481
- letters: '\u0053\u24C8\uFF33\u1E9E\u015A\u1E64\u015C\u1E60\u0160\u1E66\u1E62\u1E68\u0218\u015E\u2C7E\uA7A8\uA784',
1482
- },
1483
- {
1484
- base: 'T',
1485
- letters: '\u0054\u24C9\uFF34\u1E6A\u0164\u1E6C\u021A\u0162\u1E70\u1E6E\u0166\u01AC\u01AE\u023E\uA786',
1486
- },
1487
- { base: 'TZ', letters: '\uA728' },
1488
- {
1489
- base: 'U',
1490
- letters: '\u0055\u24CA\uFF35\u00D9\u00DA\u00DB\u0168\u1E78\u016A\u1E7A\u016C\u00DC\u01DB\u01D7\u01D5\u01D9\u1EE6\u016E\u0170\u01D3\u0214\u0216\u01AF\u1EEA\u1EE8\u1EEE\u1EEC\u1EF0\u1EE4\u1E72\u0172\u1E76\u1E74\u0244',
1491
- },
1492
- { base: 'V', letters: '\u0056\u24CB\uFF36\u1E7C\u1E7E\u01B2\uA75E\u0245' },
1493
- { base: 'VY', letters: '\uA760' },
1494
- {
1495
- base: 'W',
1496
- letters: '\u0057\u24CC\uFF37\u1E80\u1E82\u0174\u1E86\u1E84\u1E88\u2C72',
1497
- },
1498
- { base: 'X', letters: '\u0058\u24CD\uFF38\u1E8A\u1E8C' },
1499
- {
1500
- base: 'Y',
1501
- letters: '\u0059\u24CE\uFF39\u1EF2\u00DD\u0176\u1EF8\u0232\u1E8E\u0178\u1EF6\u1EF4\u01B3\u024E\u1EFE',
1502
- },
1503
- {
1504
- base: 'Z',
1505
- letters: '\u005A\u24CF\uFF3A\u0179\u1E90\u017B\u017D\u1E92\u1E94\u01B5\u0224\u2C7F\u2C6B\uA762',
1506
- },
1507
- {
1508
- base: 'a',
1509
- letters: '\u0061\u24D0\uFF41\u1E9A\u00E0\u00E1\u00E2\u1EA7\u1EA5\u1EAB\u1EA9\u00E3\u0101\u0103\u1EB1\u1EAF\u1EB5\u1EB3\u0227\u01E1\u00E4\u01DF\u1EA3\u00E5\u01FB\u01CE\u0201\u0203\u1EA1\u1EAD\u1EB7\u1E01\u0105\u2C65\u0250',
1510
- },
1511
- { base: 'aa', letters: '\uA733' },
1512
- { base: 'ae', letters: '\u00E6\u01FD\u01E3' },
1513
- { base: 'ao', letters: '\uA735' },
1514
- { base: 'au', letters: '\uA737' },
1515
- { base: 'av', letters: '\uA739\uA73B' },
1516
- { base: 'ay', letters: '\uA73D' },
1517
- {
1518
- base: 'b',
1519
- letters: '\u0062\u24D1\uFF42\u1E03\u1E05\u1E07\u0180\u0183\u0253',
1520
- },
1521
- {
1522
- base: 'c',
1523
- letters: '\u0063\u24D2\uFF43\u0107\u0109\u010B\u010D\u00E7\u1E09\u0188\u023C\uA73F\u2184',
1524
- },
1525
- {
1526
- base: 'd',
1527
- letters: '\u0064\u24D3\uFF44\u1E0B\u010F\u1E0D\u1E11\u1E13\u1E0F\u0111\u018C\u0256\u0257\uA77A',
1528
- },
1529
- { base: 'dz', letters: '\u01F3\u01C6' },
1530
- {
1531
- base: 'e',
1532
- letters: '\u0065\u24D4\uFF45\u00E8\u00E9\u00EA\u1EC1\u1EBF\u1EC5\u1EC3\u1EBD\u0113\u1E15\u1E17\u0115\u0117\u00EB\u1EBB\u011B\u0205\u0207\u1EB9\u1EC7\u0229\u1E1D\u0119\u1E19\u1E1B\u0247\u025B\u01DD',
1533
- },
1534
- { base: 'f', letters: '\u0066\u24D5\uFF46\u1E1F\u0192\uA77C' },
1535
- {
1536
- base: 'g',
1537
- letters: '\u0067\u24D6\uFF47\u01F5\u011D\u1E21\u011F\u0121\u01E7\u0123\u01E5\u0260\uA7A1\u1D79\uA77F',
1538
- },
1539
- {
1540
- base: 'h',
1541
- letters: '\u0068\u24D7\uFF48\u0125\u1E23\u1E27\u021F\u1E25\u1E29\u1E2B\u1E96\u0127\u2C68\u2C76\u0265',
1542
- },
1543
- { base: 'hv', letters: '\u0195' },
1544
- {
1545
- base: 'i',
1546
- letters: '\u0069\u24D8\uFF49\u00EC\u00ED\u00EE\u0129\u012B\u012D\u00EF\u1E2F\u1EC9\u01D0\u0209\u020B\u1ECB\u012F\u1E2D\u0268\u0131',
1547
- },
1548
- { base: 'j', letters: '\u006A\u24D9\uFF4A\u0135\u01F0\u0249' },
1549
- {
1550
- base: 'k',
1551
- letters: '\u006B\u24DA\uFF4B\u1E31\u01E9\u1E33\u0137\u1E35\u0199\u2C6A\uA741\uA743\uA745\uA7A3',
1552
- },
1553
- {
1554
- base: 'l',
1555
- letters: '\u006C\u24DB\uFF4C\u0140\u013A\u013E\u1E37\u1E39\u013C\u1E3D\u1E3B\u017F\u0142\u019A\u026B\u2C61\uA749\uA781\uA747',
1556
- },
1557
- { base: 'lj', letters: '\u01C9' },
1558
- { base: 'm', letters: '\u006D\u24DC\uFF4D\u1E3F\u1E41\u1E43\u0271\u026F' },
1559
- {
1560
- base: 'n',
1561
- letters: '\u006E\u24DD\uFF4E\u01F9\u0144\u00F1\u1E45\u0148\u1E47\u0146\u1E4B\u1E49\u019E\u0272\u0149\uA791\uA7A5',
1562
- },
1563
- { base: 'nj', letters: '\u01CC' },
1564
- {
1565
- base: 'o',
1566
- letters: '\u006F\u24DE\uFF4F\u00F2\u00F3\u00F4\u1ED3\u1ED1\u1ED7\u1ED5\u00F5\u1E4D\u022D\u1E4F\u014D\u1E51\u1E53\u014F\u022F\u0231\u00F6\u022B\u1ECF\u0151\u01D2\u020D\u020F\u01A1\u1EDD\u1EDB\u1EE1\u1EDF\u1EE3\u1ECD\u1ED9\u01EB\u01ED\u00F8\u01FF\u0254\uA74B\uA74D\u0275',
1567
- },
1568
- { base: 'oi', letters: '\u01A3' },
1569
- { base: 'ou', letters: '\u0223' },
1570
- { base: 'oo', letters: '\uA74F' },
1571
- {
1572
- base: 'p',
1573
- letters: '\u0070\u24DF\uFF50\u1E55\u1E57\u01A5\u1D7D\uA751\uA753\uA755',
1574
- },
1575
- { base: 'q', letters: '\u0071\u24E0\uFF51\u024B\uA757\uA759' },
1576
- {
1577
- base: 'r',
1578
- letters: '\u0072\u24E1\uFF52\u0155\u1E59\u0159\u0211\u0213\u1E5B\u1E5D\u0157\u1E5F\u024D\u027D\uA75B\uA7A7\uA783',
1579
- },
1580
- {
1581
- base: 's',
1582
- letters: '\u0073\u24E2\uFF53\u00DF\u015B\u1E65\u015D\u1E61\u0161\u1E67\u1E63\u1E69\u0219\u015F\u023F\uA7A9\uA785\u1E9B',
1583
- },
1584
- {
1585
- base: 't',
1586
- letters: '\u0074\u24E3\uFF54\u1E6B\u1E97\u0165\u1E6D\u021B\u0163\u1E71\u1E6F\u0167\u01AD\u0288\u2C66\uA787',
1587
- },
1588
- { base: 'tz', letters: '\uA729' },
1589
- {
1590
- base: 'u',
1591
- letters: '\u0075\u24E4\uFF55\u00F9\u00FA\u00FB\u0169\u1E79\u016B\u1E7B\u016D\u00FC\u01DC\u01D8\u01D6\u01DA\u1EE7\u016F\u0171\u01D4\u0215\u0217\u01B0\u1EEB\u1EE9\u1EEF\u1EED\u1EF1\u1EE5\u1E73\u0173\u1E77\u1E75\u0289',
1592
- },
1593
- { base: 'v', letters: '\u0076\u24E5\uFF56\u1E7D\u1E7F\u028B\uA75F\u028C' },
1594
- { base: 'vy', letters: '\uA761' },
1595
- {
1596
- base: 'w',
1597
- letters: '\u0077\u24E6\uFF57\u1E81\u1E83\u0175\u1E87\u1E85\u1E98\u1E89\u2C73',
1598
- },
1599
- { base: 'x', letters: '\u0078\u24E7\uFF58\u1E8B\u1E8D' },
1600
- {
1601
- base: 'y',
1602
- letters: '\u0079\u24E8\uFF59\u1EF3\u00FD\u0177\u1EF9\u0233\u1E8F\u00FF\u1EF7\u1E99\u1EF5\u01B4\u024F\u1EFF',
1603
- },
1604
- {
1605
- base: 'z',
1606
- letters: '\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763',
1607
- },
1608
- ];
1609
1401
  /**
1610
- * Map of letters from diacritic variant to diacritless variant
1611
- * Contains lowercase and uppercase separatelly
1612
- *
1613
- * > "á" => "a"
1614
- * > "ě" => "e"
1615
- * > "Ă" => "A"
1616
- * > ...
1402
+ * This error type indicates that some tools are missing for pipeline execution or preparation
1617
1403
  *
1618
- * @public exported from `@promptbook/utils`
1404
+ * @public exported from `@promptbook/core`
1619
1405
  */
1620
- var DIACRITIC_VARIANTS_LETTERS = {};
1621
- // tslint:disable-next-line: prefer-for-of
1622
- for (var i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
1623
- var letters = defaultDiacriticsRemovalMap[i].letters;
1624
- // tslint:disable-next-line: prefer-for-of
1625
- for (var j = 0; j < letters.length; j++) {
1626
- DIACRITIC_VARIANTS_LETTERS[letters[j]] = defaultDiacriticsRemovalMap[i].base;
1406
+ var MissingToolsError = /** @class */ (function (_super) {
1407
+ __extends(MissingToolsError, _super);
1408
+ function MissingToolsError(message) {
1409
+ var _this = _super.call(this, spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(message), "\n\n Note: You have probbably forgot to provide some tools for pipeline execution or preparation\n\n "); })) || this;
1410
+ _this.name = 'MissingToolsError';
1411
+ Object.setPrototypeOf(_this, MissingToolsError.prototype);
1412
+ return _this;
1627
1413
  }
1628
- }
1629
- // <- TODO: [🍓] Put to maker function to save execution time if not needed
1630
- /*
1631
- @see https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript
1632
- Licensed under the Apache License, Version 2.0 (the "License");
1633
- you may not use this file except in compliance with the License.
1634
- You may obtain a copy of the License at
1635
-
1636
- http://www.apache.org/licenses/LICENSE-2.0
1637
-
1638
- Unless required by applicable law or agreed to in writing, software
1639
- distributed under the License is distributed on an "AS IS" BASIS,
1640
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1641
- See the License for the specific language governing permissions and
1642
- limitations under the License.
1643
- */
1414
+ return MissingToolsError;
1415
+ }(Error));
1644
1416
 
1645
1417
  /**
1646
- * @@@
1418
+ * This error indicates errors during the execution of the pipeline
1647
1419
  *
1648
- * @param input @@@
1649
- * @returns @@@
1650
- * @public exported from `@promptbook/utils`
1651
- */
1652
- function removeDiacritics(input) {
1653
- /*eslint no-control-regex: "off"*/
1654
- return input.replace(/[^\u0000-\u007E]/g, function (a) {
1655
- return DIACRITIC_VARIANTS_LETTERS[a] || a;
1656
- });
1657
- }
1658
- /**
1659
- * TODO: [Ж] Variant for cyrillic (and in general non-latin) letters
1660
- */
1661
-
1662
- /**
1663
- * @@@
1664
- *
1665
- * @param text @@@
1666
- * @returns @@@
1667
- * @example 'hello-world'
1668
- * @example 'i-love-promptbook'
1669
- * @public exported from `@promptbook/utils`
1670
- */
1671
- function normalizeToKebabCase(text) {
1672
- var e_1, _a;
1673
- text = removeDiacritics(text);
1674
- var charType;
1675
- var lastCharType = 'OTHER';
1676
- var normalizedName = '';
1677
- try {
1678
- for (var text_1 = __values(text), text_1_1 = text_1.next(); !text_1_1.done; text_1_1 = text_1.next()) {
1679
- var char = text_1_1.value;
1680
- var normalizedChar = void 0;
1681
- if (/^[a-z]$/.test(char)) {
1682
- charType = 'LOWERCASE';
1683
- normalizedChar = char;
1684
- }
1685
- else if (/^[A-Z]$/.test(char)) {
1686
- charType = 'UPPERCASE';
1687
- normalizedChar = char.toLowerCase();
1688
- }
1689
- else if (/^[0-9]$/.test(char)) {
1690
- charType = 'NUMBER';
1691
- normalizedChar = char;
1692
- }
1693
- else {
1694
- charType = 'OTHER';
1695
- normalizedChar = '-';
1696
- }
1697
- if (charType !== lastCharType &&
1698
- !(lastCharType === 'UPPERCASE' && charType === 'LOWERCASE') &&
1699
- !(lastCharType === 'NUMBER') &&
1700
- !(charType === 'NUMBER')) {
1701
- normalizedName += '-';
1702
- }
1703
- normalizedName += normalizedChar;
1704
- lastCharType = charType;
1705
- }
1706
- }
1707
- catch (e_1_1) { e_1 = { error: e_1_1 }; }
1708
- finally {
1709
- try {
1710
- if (text_1_1 && !text_1_1.done && (_a = text_1.return)) _a.call(text_1);
1711
- }
1712
- finally { if (e_1) throw e_1.error; }
1713
- }
1714
- normalizedName = normalizedName.split(/-+/g).join('-');
1715
- normalizedName = normalizedName.split(/-?\/-?/g).join('/');
1716
- normalizedName = normalizedName.replace(/^-/, '');
1717
- normalizedName = normalizedName.replace(/-$/, '');
1718
- return normalizedName;
1719
- }
1720
- /**
1721
- * Note: [💞] Ignore a discrepancy between file name and entity name
1722
- */
1723
-
1724
- /**
1725
- * Removes emojis from a string and fix whitespaces
1726
- *
1727
- * @param text with emojis
1728
- * @returns text without emojis
1729
- * @public exported from `@promptbook/utils`
1730
- */
1731
- function removeEmojis(text) {
1732
- // Replace emojis (and also ZWJ sequence) with hyphens
1733
- text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
1734
- text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
1735
- text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
1736
- text = text.replace(/\p{Extended_Pictographic}/gu, '');
1737
- return text;
1738
- }
1739
-
1740
- /**
1741
- * Tests if given string is valid URL.
1742
- *
1743
- * Note: This does not check if the file exists only if the path is valid
1744
- * @public exported from `@promptbook/utils`
1745
- */
1746
- function isValidFilePath(filename) {
1747
- if (typeof filename !== 'string') {
1748
- return false;
1749
- }
1750
- var filenameSlashes = filename.split('\\').join('/');
1751
- // Absolute Unix path: /hello.txt
1752
- if (/^(\/)/i.test(filenameSlashes)) {
1753
- return true;
1754
- }
1755
- // Absolute Windows path: /hello.txt
1756
- if (/^([A-Z]{1,2}:\/?)\//i.test(filenameSlashes)) {
1757
- return true;
1758
- }
1759
- // Relative path: ./hello.txt
1760
- if (/^(\.\.?\/)+/i.test(filenameSlashes)) {
1761
- return true;
1762
- }
1763
- return false;
1764
- }
1765
-
1766
- /**
1767
- * @@@
1768
- *
1769
- * @param value @@@
1770
- * @returns @@@
1771
- * @example @@@
1772
- * @public exported from `@promptbook/utils`
1773
- */
1774
- function titleToName(value) {
1775
- if (isValidUrl(value)) {
1776
- value = value.replace(/^https?:\/\//, '');
1777
- value = value.replace(/\.html$/, '');
1778
- }
1779
- else if (isValidFilePath(value)) {
1780
- value = path.basename(value);
1781
- // Note: Keeping extension in the name
1782
- }
1783
- value = value.split('/').join('-');
1784
- value = removeEmojis(value);
1785
- value = normalizeToKebabCase(value);
1786
- // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
1787
- return value;
1788
- }
1789
-
1790
- /**
1791
- * This error type indicates that some tools are missing for pipeline execution or preparation
1792
- *
1793
- * @public exported from `@promptbook/core`
1794
- */
1795
- var MissingToolsError = /** @class */ (function (_super) {
1796
- __extends(MissingToolsError, _super);
1797
- function MissingToolsError(message) {
1798
- var _this = _super.call(this, spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(message), "\n\n Note: You have probbably forgot to provide some tools for pipeline execution or preparation\n\n "); })) || this;
1799
- _this.name = 'MissingToolsError';
1800
- Object.setPrototypeOf(_this, MissingToolsError.prototype);
1801
- return _this;
1802
- }
1803
- return MissingToolsError;
1804
- }(Error));
1805
-
1806
- /**
1807
- * This error indicates errors during the execution of the pipeline
1808
- *
1809
- * @public exported from `@promptbook/core`
1420
+ * @public exported from `@promptbook/core`
1810
1421
  */
1811
1422
  var PipelineExecutionError = /** @class */ (function (_super) {
1812
1423
  __extends(PipelineExecutionError, _super);
@@ -2920,148 +2531,471 @@
2920
2531
  return $Register;
2921
2532
  }());
2922
2533
 
2923
- /**
2924
- * @@@
2925
- *
2926
- * Note: `$` is used to indicate that this interacts with the global scope
2927
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
2928
- * @public exported from `@promptbook/core`
2929
- */
2930
- var $scrapersMetadataRegister = new $Register('scrapers_metadata');
2931
- /**
2932
- * TODO: [®] DRY Register logic
2933
- */
2534
+ /**
2535
+ * @@@
2536
+ *
2537
+ * Note: `$` is used to indicate that this interacts with the global scope
2538
+ * @singleton Only one instance of each register is created per build, but thare can be more @@@
2539
+ * @public exported from `@promptbook/core`
2540
+ */
2541
+ var $scrapersMetadataRegister = new $Register('scrapers_metadata');
2542
+ /**
2543
+ * TODO: [®] DRY Register logic
2544
+ */
2545
+
2546
+ /**
2547
+ * @@@
2548
+ *
2549
+ * Note: `$` is used to indicate that this interacts with the global scope
2550
+ * @singleton Only one instance of each register is created per build, but thare can be more @@@
2551
+ * @public exported from `@promptbook/core`
2552
+ */
2553
+ var $scrapersRegister = new $Register('scraper_constructors');
2554
+ /**
2555
+ * TODO: [®] DRY Register logic
2556
+ */
2557
+
2558
+ /**
2559
+ * Creates a message with all registered scrapers
2560
+ *
2561
+ * Note: This function is used to create a (error) message when there is no scraper for particular mime type
2562
+ *
2563
+ * @private internal function of `createScrapersFromConfiguration` and `createScrapersFromEnv`
2564
+ */
2565
+ function $registeredScrapersMessage(availableScrapers) {
2566
+ var e_1, _a, e_2, _b, e_3, _c;
2567
+ /**
2568
+ * Mixes registered scrapers from $scrapersMetadataRegister and $scrapersRegister
2569
+ */
2570
+ var all = [];
2571
+ var _loop_1 = function (packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser) {
2572
+ if (all.some(function (item) { return item.packageName === packageName && item.className === className; })) {
2573
+ return "continue";
2574
+ }
2575
+ all.push({ packageName: packageName, className: className, mimeTypes: mimeTypes, documentationUrl: documentationUrl, isAvilableInBrowser: isAvilableInBrowser });
2576
+ };
2577
+ try {
2578
+ for (var _d = __values($scrapersMetadataRegister.list()), _e = _d.next(); !_e.done; _e = _d.next()) {
2579
+ var _f = _e.value, packageName = _f.packageName, className = _f.className, mimeTypes = _f.mimeTypes, documentationUrl = _f.documentationUrl, isAvilableInBrowser = _f.isAvilableInBrowser;
2580
+ _loop_1(packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser);
2581
+ }
2582
+ }
2583
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
2584
+ finally {
2585
+ try {
2586
+ if (_e && !_e.done && (_a = _d.return)) _a.call(_d);
2587
+ }
2588
+ finally { if (e_1) throw e_1.error; }
2589
+ }
2590
+ var _loop_2 = function (packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser) {
2591
+ if (all.some(function (item) { return item.packageName === packageName && item.className === className; })) {
2592
+ return "continue";
2593
+ }
2594
+ all.push({ packageName: packageName, className: className, mimeTypes: mimeTypes, documentationUrl: documentationUrl, isAvilableInBrowser: isAvilableInBrowser });
2595
+ };
2596
+ try {
2597
+ for (var _g = __values($scrapersRegister.list()), _h = _g.next(); !_h.done; _h = _g.next()) {
2598
+ var _j = _h.value, packageName = _j.packageName, className = _j.className, mimeTypes = _j.mimeTypes, documentationUrl = _j.documentationUrl, isAvilableInBrowser = _j.isAvilableInBrowser;
2599
+ _loop_2(packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser);
2600
+ }
2601
+ }
2602
+ catch (e_2_1) { e_2 = { error: e_2_1 }; }
2603
+ finally {
2604
+ try {
2605
+ if (_h && !_h.done && (_b = _g.return)) _b.call(_g);
2606
+ }
2607
+ finally { if (e_2) throw e_2.error; }
2608
+ }
2609
+ try {
2610
+ for (var availableScrapers_1 = __values(availableScrapers), availableScrapers_1_1 = availableScrapers_1.next(); !availableScrapers_1_1.done; availableScrapers_1_1 = availableScrapers_1.next()) {
2611
+ var metadata_1 = availableScrapers_1_1.value.metadata;
2612
+ all.push(metadata_1);
2613
+ }
2614
+ }
2615
+ catch (e_3_1) { e_3 = { error: e_3_1 }; }
2616
+ finally {
2617
+ try {
2618
+ if (availableScrapers_1_1 && !availableScrapers_1_1.done && (_c = availableScrapers_1.return)) _c.call(availableScrapers_1);
2619
+ }
2620
+ finally { if (e_3) throw e_3.error; }
2621
+ }
2622
+ var metadata = all.map(function (metadata) {
2623
+ var isMetadataAviailable = $scrapersMetadataRegister
2624
+ .list()
2625
+ .find(function (_a) {
2626
+ var packageName = _a.packageName, className = _a.className;
2627
+ return metadata.packageName === packageName && metadata.className === className;
2628
+ });
2629
+ var isInstalled = $scrapersRegister
2630
+ .list()
2631
+ .find(function (_a) {
2632
+ var packageName = _a.packageName, className = _a.className;
2633
+ return metadata.packageName === packageName && metadata.className === className;
2634
+ });
2635
+ var isAvilableInTools = availableScrapers.some(function (_a) {
2636
+ var _b = _a.metadata, packageName = _b.packageName, className = _b.className;
2637
+ return metadata.packageName === packageName && metadata.className === className;
2638
+ });
2639
+ return __assign(__assign({}, metadata), { isMetadataAviailable: isMetadataAviailable, isInstalled: isInstalled, isAvilableInTools: isAvilableInTools });
2640
+ });
2641
+ if (metadata.length === 0) {
2642
+ return spaceTrim__default["default"]("\n **No scrapers are available**\n\n This is a unexpected behavior, you are probably using some broken version of Promptbook\n At least there should be available the metadata of the scrapers\n ");
2643
+ }
2644
+ return spaceTrim__default["default"](function (block) { return "\n Available scrapers are:\n ".concat(block(metadata
2645
+ .map(function (_a, i) {
2646
+ var packageName = _a.packageName, className = _a.className, isMetadataAviailable = _a.isMetadataAviailable, isInstalled = _a.isInstalled, mimeTypes = _a.mimeTypes, isAvilableInBrowser = _a.isAvilableInBrowser, isAvilableInTools = _a.isAvilableInTools;
2647
+ var more = [];
2648
+ // TODO: [🧠] Maybe use `documentationUrl`
2649
+ if (isMetadataAviailable) {
2650
+ more.push("\u2B1C Metadata registered");
2651
+ } // not else
2652
+ if (isInstalled) {
2653
+ more.push("\uD83D\uDFE9 Installed");
2654
+ } // not else
2655
+ if (isAvilableInTools) {
2656
+ more.push("\uD83D\uDFE6 Available in tools");
2657
+ } // not else
2658
+ if (!isMetadataAviailable && isInstalled) {
2659
+ more.push("When no metadata registered but scraper is installed, it is an unexpected behavior");
2660
+ } // not else
2661
+ if (!isInstalled && isAvilableInTools) {
2662
+ more.push("When the scraper is not installed but available in tools, it is an unexpected compatibility behavior");
2663
+ } // not else
2664
+ if (!isAvilableInBrowser) {
2665
+ more.push("Not usable in browser");
2666
+ }
2667
+ var moreText = more.length === 0 ? '' : " *(".concat(more.join('; '), ")*");
2668
+ return "".concat(i + 1, ") `").concat(className, "` from `").concat(packageName, "` compatible to scrape ").concat(mimeTypes
2669
+ .map(function (mimeType) { return "\"".concat(mimeType, "\""); })
2670
+ .join(', ')).concat(moreText);
2671
+ })
2672
+ .join('\n')), "\n\n Legend:\n - \u2B1C **Metadata registered** means that Promptbook knows about the scraper, it is similar to registration in some registry\n - \uD83D\uDFE9 **Installed** means that you have imported package with particular scraper\n - \uD83D\uDFE6 **Available in tools** means that you have passed scraper as dependency into prepare or execution process\n\n "); });
2673
+ }
2674
+ /**
2675
+ * TODO: [®] DRY Register logic
2676
+ */
2677
+
2678
+ var defaultDiacriticsRemovalMap = [
2679
+ {
2680
+ base: 'A',
2681
+ letters: '\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F',
2682
+ },
2683
+ { base: 'AA', letters: '\uA732' },
2684
+ { base: 'AE', letters: '\u00C6\u01FC\u01E2' },
2685
+ { base: 'AO', letters: '\uA734' },
2686
+ { base: 'AU', letters: '\uA736' },
2687
+ { base: 'AV', letters: '\uA738\uA73A' },
2688
+ { base: 'AY', letters: '\uA73C' },
2689
+ {
2690
+ base: 'B',
2691
+ letters: '\u0042\u24B7\uFF22\u1E02\u1E04\u1E06\u0243\u0182\u0181',
2692
+ },
2693
+ {
2694
+ base: 'C',
2695
+ letters: '\u0043\u24B8\uFF23\u0106\u0108\u010A\u010C\u00C7\u1E08\u0187\u023B\uA73E',
2696
+ },
2697
+ {
2698
+ base: 'D',
2699
+ letters: '\u0044\u24B9\uFF24\u1E0A\u010E\u1E0C\u1E10\u1E12\u1E0E\u0110\u018B\u018A\u0189\uA779\u00D0',
2700
+ },
2701
+ { base: 'DZ', letters: '\u01F1\u01C4' },
2702
+ { base: 'Dz', letters: '\u01F2\u01C5' },
2703
+ {
2704
+ base: 'E',
2705
+ letters: '\u0045\u24BA\uFF25\u00C8\u00C9\u00CA\u1EC0\u1EBE\u1EC4\u1EC2\u1EBC\u0112\u1E14\u1E16\u0114\u0116\u00CB\u1EBA\u011A\u0204\u0206\u1EB8\u1EC6\u0228\u1E1C\u0118\u1E18\u1E1A\u0190\u018E',
2706
+ },
2707
+ { base: 'F', letters: '\u0046\u24BB\uFF26\u1E1E\u0191\uA77B' },
2708
+ {
2709
+ base: 'G',
2710
+ letters: '\u0047\u24BC\uFF27\u01F4\u011C\u1E20\u011E\u0120\u01E6\u0122\u01E4\u0193\uA7A0\uA77D\uA77E',
2711
+ },
2712
+ {
2713
+ base: 'H',
2714
+ letters: '\u0048\u24BD\uFF28\u0124\u1E22\u1E26\u021E\u1E24\u1E28\u1E2A\u0126\u2C67\u2C75\uA78D',
2715
+ },
2716
+ {
2717
+ base: 'I',
2718
+ letters: '\u0049\u24BE\uFF29\u00CC\u00CD\u00CE\u0128\u012A\u012C\u0130\u00CF\u1E2E\u1EC8\u01CF\u0208\u020A\u1ECA\u012E\u1E2C\u0197',
2719
+ },
2720
+ { base: 'J', letters: '\u004A\u24BF\uFF2A\u0134\u0248' },
2721
+ {
2722
+ base: 'K',
2723
+ letters: '\u004B\u24C0\uFF2B\u1E30\u01E8\u1E32\u0136\u1E34\u0198\u2C69\uA740\uA742\uA744\uA7A2',
2724
+ },
2725
+ {
2726
+ base: 'L',
2727
+ letters: '\u004C\u24C1\uFF2C\u013F\u0139\u013D\u1E36\u1E38\u013B\u1E3C\u1E3A\u0141\u023D\u2C62\u2C60\uA748\uA746\uA780',
2728
+ },
2729
+ { base: 'LJ', letters: '\u01C7' },
2730
+ { base: 'Lj', letters: '\u01C8' },
2731
+ { base: 'M', letters: '\u004D\u24C2\uFF2D\u1E3E\u1E40\u1E42\u2C6E\u019C' },
2732
+ {
2733
+ base: 'N',
2734
+ letters: '\u004E\u24C3\uFF2E\u01F8\u0143\u00D1\u1E44\u0147\u1E46\u0145\u1E4A\u1E48\u0220\u019D\uA790\uA7A4',
2735
+ },
2736
+ { base: 'NJ', letters: '\u01CA' },
2737
+ { base: 'Nj', letters: '\u01CB' },
2738
+ {
2739
+ base: 'O',
2740
+ letters: '\u004F\u24C4\uFF2F\u00D2\u00D3\u00D4\u1ED2\u1ED0\u1ED6\u1ED4\u00D5\u1E4C\u022C\u1E4E\u014C\u1E50\u1E52\u014E\u022E\u0230\u00D6\u022A\u1ECE\u0150\u01D1\u020C\u020E\u01A0\u1EDC\u1EDA\u1EE0\u1EDE\u1EE2\u1ECC\u1ED8\u01EA\u01EC\u00D8\u01FE\u0186\u019F\uA74A\uA74C',
2741
+ },
2742
+ { base: 'OI', letters: '\u01A2' },
2743
+ { base: 'OO', letters: '\uA74E' },
2744
+ { base: 'OU', letters: '\u0222' },
2745
+ { base: 'OE', letters: '\u008C\u0152' },
2746
+ { base: 'oe', letters: '\u009C\u0153' },
2747
+ {
2748
+ base: 'P',
2749
+ letters: '\u0050\u24C5\uFF30\u1E54\u1E56\u01A4\u2C63\uA750\uA752\uA754',
2750
+ },
2751
+ { base: 'Q', letters: '\u0051\u24C6\uFF31\uA756\uA758\u024A' },
2752
+ {
2753
+ base: 'R',
2754
+ letters: '\u0052\u24C7\uFF32\u0154\u1E58\u0158\u0210\u0212\u1E5A\u1E5C\u0156\u1E5E\u024C\u2C64\uA75A\uA7A6\uA782',
2755
+ },
2756
+ {
2757
+ base: 'S',
2758
+ letters: '\u0053\u24C8\uFF33\u1E9E\u015A\u1E64\u015C\u1E60\u0160\u1E66\u1E62\u1E68\u0218\u015E\u2C7E\uA7A8\uA784',
2759
+ },
2760
+ {
2761
+ base: 'T',
2762
+ letters: '\u0054\u24C9\uFF34\u1E6A\u0164\u1E6C\u021A\u0162\u1E70\u1E6E\u0166\u01AC\u01AE\u023E\uA786',
2763
+ },
2764
+ { base: 'TZ', letters: '\uA728' },
2765
+ {
2766
+ base: 'U',
2767
+ letters: '\u0055\u24CA\uFF35\u00D9\u00DA\u00DB\u0168\u1E78\u016A\u1E7A\u016C\u00DC\u01DB\u01D7\u01D5\u01D9\u1EE6\u016E\u0170\u01D3\u0214\u0216\u01AF\u1EEA\u1EE8\u1EEE\u1EEC\u1EF0\u1EE4\u1E72\u0172\u1E76\u1E74\u0244',
2768
+ },
2769
+ { base: 'V', letters: '\u0056\u24CB\uFF36\u1E7C\u1E7E\u01B2\uA75E\u0245' },
2770
+ { base: 'VY', letters: '\uA760' },
2771
+ {
2772
+ base: 'W',
2773
+ letters: '\u0057\u24CC\uFF37\u1E80\u1E82\u0174\u1E86\u1E84\u1E88\u2C72',
2774
+ },
2775
+ { base: 'X', letters: '\u0058\u24CD\uFF38\u1E8A\u1E8C' },
2776
+ {
2777
+ base: 'Y',
2778
+ letters: '\u0059\u24CE\uFF39\u1EF2\u00DD\u0176\u1EF8\u0232\u1E8E\u0178\u1EF6\u1EF4\u01B3\u024E\u1EFE',
2779
+ },
2780
+ {
2781
+ base: 'Z',
2782
+ letters: '\u005A\u24CF\uFF3A\u0179\u1E90\u017B\u017D\u1E92\u1E94\u01B5\u0224\u2C7F\u2C6B\uA762',
2783
+ },
2784
+ {
2785
+ base: 'a',
2786
+ letters: '\u0061\u24D0\uFF41\u1E9A\u00E0\u00E1\u00E2\u1EA7\u1EA5\u1EAB\u1EA9\u00E3\u0101\u0103\u1EB1\u1EAF\u1EB5\u1EB3\u0227\u01E1\u00E4\u01DF\u1EA3\u00E5\u01FB\u01CE\u0201\u0203\u1EA1\u1EAD\u1EB7\u1E01\u0105\u2C65\u0250',
2787
+ },
2788
+ { base: 'aa', letters: '\uA733' },
2789
+ { base: 'ae', letters: '\u00E6\u01FD\u01E3' },
2790
+ { base: 'ao', letters: '\uA735' },
2791
+ { base: 'au', letters: '\uA737' },
2792
+ { base: 'av', letters: '\uA739\uA73B' },
2793
+ { base: 'ay', letters: '\uA73D' },
2794
+ {
2795
+ base: 'b',
2796
+ letters: '\u0062\u24D1\uFF42\u1E03\u1E05\u1E07\u0180\u0183\u0253',
2797
+ },
2798
+ {
2799
+ base: 'c',
2800
+ letters: '\u0063\u24D2\uFF43\u0107\u0109\u010B\u010D\u00E7\u1E09\u0188\u023C\uA73F\u2184',
2801
+ },
2802
+ {
2803
+ base: 'd',
2804
+ letters: '\u0064\u24D3\uFF44\u1E0B\u010F\u1E0D\u1E11\u1E13\u1E0F\u0111\u018C\u0256\u0257\uA77A',
2805
+ },
2806
+ { base: 'dz', letters: '\u01F3\u01C6' },
2807
+ {
2808
+ base: 'e',
2809
+ letters: '\u0065\u24D4\uFF45\u00E8\u00E9\u00EA\u1EC1\u1EBF\u1EC5\u1EC3\u1EBD\u0113\u1E15\u1E17\u0115\u0117\u00EB\u1EBB\u011B\u0205\u0207\u1EB9\u1EC7\u0229\u1E1D\u0119\u1E19\u1E1B\u0247\u025B\u01DD',
2810
+ },
2811
+ { base: 'f', letters: '\u0066\u24D5\uFF46\u1E1F\u0192\uA77C' },
2812
+ {
2813
+ base: 'g',
2814
+ letters: '\u0067\u24D6\uFF47\u01F5\u011D\u1E21\u011F\u0121\u01E7\u0123\u01E5\u0260\uA7A1\u1D79\uA77F',
2815
+ },
2816
+ {
2817
+ base: 'h',
2818
+ letters: '\u0068\u24D7\uFF48\u0125\u1E23\u1E27\u021F\u1E25\u1E29\u1E2B\u1E96\u0127\u2C68\u2C76\u0265',
2819
+ },
2820
+ { base: 'hv', letters: '\u0195' },
2821
+ {
2822
+ base: 'i',
2823
+ letters: '\u0069\u24D8\uFF49\u00EC\u00ED\u00EE\u0129\u012B\u012D\u00EF\u1E2F\u1EC9\u01D0\u0209\u020B\u1ECB\u012F\u1E2D\u0268\u0131',
2824
+ },
2825
+ { base: 'j', letters: '\u006A\u24D9\uFF4A\u0135\u01F0\u0249' },
2826
+ {
2827
+ base: 'k',
2828
+ letters: '\u006B\u24DA\uFF4B\u1E31\u01E9\u1E33\u0137\u1E35\u0199\u2C6A\uA741\uA743\uA745\uA7A3',
2829
+ },
2830
+ {
2831
+ base: 'l',
2832
+ letters: '\u006C\u24DB\uFF4C\u0140\u013A\u013E\u1E37\u1E39\u013C\u1E3D\u1E3B\u017F\u0142\u019A\u026B\u2C61\uA749\uA781\uA747',
2833
+ },
2834
+ { base: 'lj', letters: '\u01C9' },
2835
+ { base: 'm', letters: '\u006D\u24DC\uFF4D\u1E3F\u1E41\u1E43\u0271\u026F' },
2836
+ {
2837
+ base: 'n',
2838
+ letters: '\u006E\u24DD\uFF4E\u01F9\u0144\u00F1\u1E45\u0148\u1E47\u0146\u1E4B\u1E49\u019E\u0272\u0149\uA791\uA7A5',
2839
+ },
2840
+ { base: 'nj', letters: '\u01CC' },
2841
+ {
2842
+ base: 'o',
2843
+ letters: '\u006F\u24DE\uFF4F\u00F2\u00F3\u00F4\u1ED3\u1ED1\u1ED7\u1ED5\u00F5\u1E4D\u022D\u1E4F\u014D\u1E51\u1E53\u014F\u022F\u0231\u00F6\u022B\u1ECF\u0151\u01D2\u020D\u020F\u01A1\u1EDD\u1EDB\u1EE1\u1EDF\u1EE3\u1ECD\u1ED9\u01EB\u01ED\u00F8\u01FF\u0254\uA74B\uA74D\u0275',
2844
+ },
2845
+ { base: 'oi', letters: '\u01A3' },
2846
+ { base: 'ou', letters: '\u0223' },
2847
+ { base: 'oo', letters: '\uA74F' },
2848
+ {
2849
+ base: 'p',
2850
+ letters: '\u0070\u24DF\uFF50\u1E55\u1E57\u01A5\u1D7D\uA751\uA753\uA755',
2851
+ },
2852
+ { base: 'q', letters: '\u0071\u24E0\uFF51\u024B\uA757\uA759' },
2853
+ {
2854
+ base: 'r',
2855
+ letters: '\u0072\u24E1\uFF52\u0155\u1E59\u0159\u0211\u0213\u1E5B\u1E5D\u0157\u1E5F\u024D\u027D\uA75B\uA7A7\uA783',
2856
+ },
2857
+ {
2858
+ base: 's',
2859
+ letters: '\u0073\u24E2\uFF53\u00DF\u015B\u1E65\u015D\u1E61\u0161\u1E67\u1E63\u1E69\u0219\u015F\u023F\uA7A9\uA785\u1E9B',
2860
+ },
2861
+ {
2862
+ base: 't',
2863
+ letters: '\u0074\u24E3\uFF54\u1E6B\u1E97\u0165\u1E6D\u021B\u0163\u1E71\u1E6F\u0167\u01AD\u0288\u2C66\uA787',
2864
+ },
2865
+ { base: 'tz', letters: '\uA729' },
2866
+ {
2867
+ base: 'u',
2868
+ letters: '\u0075\u24E4\uFF55\u00F9\u00FA\u00FB\u0169\u1E79\u016B\u1E7B\u016D\u00FC\u01DC\u01D8\u01D6\u01DA\u1EE7\u016F\u0171\u01D4\u0215\u0217\u01B0\u1EEB\u1EE9\u1EEF\u1EED\u1EF1\u1EE5\u1E73\u0173\u1E77\u1E75\u0289',
2869
+ },
2870
+ { base: 'v', letters: '\u0076\u24E5\uFF56\u1E7D\u1E7F\u028B\uA75F\u028C' },
2871
+ { base: 'vy', letters: '\uA761' },
2872
+ {
2873
+ base: 'w',
2874
+ letters: '\u0077\u24E6\uFF57\u1E81\u1E83\u0175\u1E87\u1E85\u1E98\u1E89\u2C73',
2875
+ },
2876
+ { base: 'x', letters: '\u0078\u24E7\uFF58\u1E8B\u1E8D' },
2877
+ {
2878
+ base: 'y',
2879
+ letters: '\u0079\u24E8\uFF59\u1EF3\u00FD\u0177\u1EF9\u0233\u1E8F\u00FF\u1EF7\u1E99\u1EF5\u01B4\u024F\u1EFF',
2880
+ },
2881
+ {
2882
+ base: 'z',
2883
+ letters: '\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763',
2884
+ },
2885
+ ];
2886
+ /**
2887
+ * Map of letters from diacritic variant to diacritless variant
2888
+ * Contains lowercase and uppercase separatelly
2889
+ *
2890
+ * > "á" => "a"
2891
+ * > "ě" => "e"
2892
+ * > "Ă" => "A"
2893
+ * > ...
2894
+ *
2895
+ * @public exported from `@promptbook/utils`
2896
+ */
2897
+ var DIACRITIC_VARIANTS_LETTERS = {};
2898
+ // tslint:disable-next-line: prefer-for-of
2899
+ for (var i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
2900
+ var letters = defaultDiacriticsRemovalMap[i].letters;
2901
+ // tslint:disable-next-line: prefer-for-of
2902
+ for (var j = 0; j < letters.length; j++) {
2903
+ DIACRITIC_VARIANTS_LETTERS[letters[j]] = defaultDiacriticsRemovalMap[i].base;
2904
+ }
2905
+ }
2906
+ // <- TODO: [🍓] Put to maker function to save execution time if not needed
2907
+ /*
2908
+ @see https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript
2909
+ Licensed under the Apache License, Version 2.0 (the "License");
2910
+ you may not use this file except in compliance with the License.
2911
+ You may obtain a copy of the License at
2912
+
2913
+ http://www.apache.org/licenses/LICENSE-2.0
2914
+
2915
+ Unless required by applicable law or agreed to in writing, software
2916
+ distributed under the License is distributed on an "AS IS" BASIS,
2917
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
2918
+ See the License for the specific language governing permissions and
2919
+ limitations under the License.
2920
+ */
2934
2921
 
2935
2922
  /**
2936
2923
  * @@@
2937
2924
  *
2938
- * Note: `$` is used to indicate that this interacts with the global scope
2939
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
2940
- * @public exported from `@promptbook/core`
2925
+ * @param input @@@
2926
+ * @returns @@@
2927
+ * @public exported from `@promptbook/utils`
2941
2928
  */
2942
- var $scrapersRegister = new $Register('scraper_constructors');
2929
+ function removeDiacritics(input) {
2930
+ /*eslint no-control-regex: "off"*/
2931
+ return input.replace(/[^\u0000-\u007E]/g, function (a) {
2932
+ return DIACRITIC_VARIANTS_LETTERS[a] || a;
2933
+ });
2934
+ }
2943
2935
  /**
2944
- * TODO: [®] DRY Register logic
2936
+ * TODO: [Ж] Variant for cyrillic (and in general non-latin) letters
2945
2937
  */
2946
2938
 
2947
2939
  /**
2948
- * Creates a message with all registered scrapers
2949
- *
2950
- * Note: This function is used to create a (error) message when there is no scraper for particular mime type
2940
+ * @@@
2951
2941
  *
2952
- * @private internal function of `createScrapersFromConfiguration` and `createScrapersFromEnv`
2942
+ * @param text @@@
2943
+ * @returns @@@
2944
+ * @example 'hello-world'
2945
+ * @example 'i-love-promptbook'
2946
+ * @public exported from `@promptbook/utils`
2953
2947
  */
2954
- function $registeredScrapersMessage(availableScrapers) {
2955
- var e_1, _a, e_2, _b, e_3, _c;
2956
- /**
2957
- * Mixes registered scrapers from $scrapersMetadataRegister and $scrapersRegister
2958
- */
2959
- var all = [];
2960
- var _loop_1 = function (packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser) {
2961
- if (all.some(function (item) { return item.packageName === packageName && item.className === className; })) {
2962
- return "continue";
2963
- }
2964
- all.push({ packageName: packageName, className: className, mimeTypes: mimeTypes, documentationUrl: documentationUrl, isAvilableInBrowser: isAvilableInBrowser });
2965
- };
2948
+ function normalizeToKebabCase(text) {
2949
+ var e_1, _a;
2950
+ text = removeDiacritics(text);
2951
+ var charType;
2952
+ var lastCharType = 'OTHER';
2953
+ var normalizedName = '';
2966
2954
  try {
2967
- for (var _d = __values($scrapersMetadataRegister.list()), _e = _d.next(); !_e.done; _e = _d.next()) {
2968
- var _f = _e.value, packageName = _f.packageName, className = _f.className, mimeTypes = _f.mimeTypes, documentationUrl = _f.documentationUrl, isAvilableInBrowser = _f.isAvilableInBrowser;
2969
- _loop_1(packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser);
2955
+ for (var text_1 = __values(text), text_1_1 = text_1.next(); !text_1_1.done; text_1_1 = text_1.next()) {
2956
+ var char = text_1_1.value;
2957
+ var normalizedChar = void 0;
2958
+ if (/^[a-z]$/.test(char)) {
2959
+ charType = 'LOWERCASE';
2960
+ normalizedChar = char;
2961
+ }
2962
+ else if (/^[A-Z]$/.test(char)) {
2963
+ charType = 'UPPERCASE';
2964
+ normalizedChar = char.toLowerCase();
2965
+ }
2966
+ else if (/^[0-9]$/.test(char)) {
2967
+ charType = 'NUMBER';
2968
+ normalizedChar = char;
2969
+ }
2970
+ else {
2971
+ charType = 'OTHER';
2972
+ normalizedChar = '-';
2973
+ }
2974
+ if (charType !== lastCharType &&
2975
+ !(lastCharType === 'UPPERCASE' && charType === 'LOWERCASE') &&
2976
+ !(lastCharType === 'NUMBER') &&
2977
+ !(charType === 'NUMBER')) {
2978
+ normalizedName += '-';
2979
+ }
2980
+ normalizedName += normalizedChar;
2981
+ lastCharType = charType;
2970
2982
  }
2971
2983
  }
2972
2984
  catch (e_1_1) { e_1 = { error: e_1_1 }; }
2973
2985
  finally {
2974
2986
  try {
2975
- if (_e && !_e.done && (_a = _d.return)) _a.call(_d);
2987
+ if (text_1_1 && !text_1_1.done && (_a = text_1.return)) _a.call(text_1);
2976
2988
  }
2977
2989
  finally { if (e_1) throw e_1.error; }
2978
2990
  }
2979
- var _loop_2 = function (packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser) {
2980
- if (all.some(function (item) { return item.packageName === packageName && item.className === className; })) {
2981
- return "continue";
2982
- }
2983
- all.push({ packageName: packageName, className: className, mimeTypes: mimeTypes, documentationUrl: documentationUrl, isAvilableInBrowser: isAvilableInBrowser });
2984
- };
2985
- try {
2986
- for (var _g = __values($scrapersRegister.list()), _h = _g.next(); !_h.done; _h = _g.next()) {
2987
- var _j = _h.value, packageName = _j.packageName, className = _j.className, mimeTypes = _j.mimeTypes, documentationUrl = _j.documentationUrl, isAvilableInBrowser = _j.isAvilableInBrowser;
2988
- _loop_2(packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser);
2989
- }
2990
- }
2991
- catch (e_2_1) { e_2 = { error: e_2_1 }; }
2992
- finally {
2993
- try {
2994
- if (_h && !_h.done && (_b = _g.return)) _b.call(_g);
2995
- }
2996
- finally { if (e_2) throw e_2.error; }
2997
- }
2998
- try {
2999
- for (var availableScrapers_1 = __values(availableScrapers), availableScrapers_1_1 = availableScrapers_1.next(); !availableScrapers_1_1.done; availableScrapers_1_1 = availableScrapers_1.next()) {
3000
- var metadata_1 = availableScrapers_1_1.value.metadata;
3001
- all.push(metadata_1);
3002
- }
3003
- }
3004
- catch (e_3_1) { e_3 = { error: e_3_1 }; }
3005
- finally {
3006
- try {
3007
- if (availableScrapers_1_1 && !availableScrapers_1_1.done && (_c = availableScrapers_1.return)) _c.call(availableScrapers_1);
3008
- }
3009
- finally { if (e_3) throw e_3.error; }
3010
- }
3011
- var metadata = all.map(function (metadata) {
3012
- var isMetadataAviailable = $scrapersMetadataRegister
3013
- .list()
3014
- .find(function (_a) {
3015
- var packageName = _a.packageName, className = _a.className;
3016
- return metadata.packageName === packageName && metadata.className === className;
3017
- });
3018
- var isInstalled = $scrapersRegister
3019
- .list()
3020
- .find(function (_a) {
3021
- var packageName = _a.packageName, className = _a.className;
3022
- return metadata.packageName === packageName && metadata.className === className;
3023
- });
3024
- var isAvilableInTools = availableScrapers.some(function (_a) {
3025
- var _b = _a.metadata, packageName = _b.packageName, className = _b.className;
3026
- return metadata.packageName === packageName && metadata.className === className;
3027
- });
3028
- return __assign(__assign({}, metadata), { isMetadataAviailable: isMetadataAviailable, isInstalled: isInstalled, isAvilableInTools: isAvilableInTools });
3029
- });
3030
- if (metadata.length === 0) {
3031
- return spaceTrim__default["default"]("\n **No scrapers are available**\n\n This is a unexpected behavior, you are probably using some broken version of Promptbook\n At least there should be available the metadata of the scrapers\n ");
3032
- }
3033
- return spaceTrim__default["default"](function (block) { return "\n Available scrapers are:\n ".concat(block(metadata
3034
- .map(function (_a, i) {
3035
- var packageName = _a.packageName, className = _a.className, isMetadataAviailable = _a.isMetadataAviailable, isInstalled = _a.isInstalled, mimeTypes = _a.mimeTypes, isAvilableInBrowser = _a.isAvilableInBrowser, isAvilableInTools = _a.isAvilableInTools;
3036
- var more = [];
3037
- // TODO: [🧠] Maybe use `documentationUrl`
3038
- if (isMetadataAviailable) {
3039
- more.push("\u2B1C Metadata registered");
3040
- } // not else
3041
- if (isInstalled) {
3042
- more.push("\uD83D\uDFE9 Installed");
3043
- } // not else
3044
- if (isAvilableInTools) {
3045
- more.push("\uD83D\uDFE6 Available in tools");
3046
- } // not else
3047
- if (!isMetadataAviailable && isInstalled) {
3048
- more.push("When no metadata registered but scraper is installed, it is an unexpected behavior");
3049
- } // not else
3050
- if (!isInstalled && isAvilableInTools) {
3051
- more.push("When the scraper is not installed but available in tools, it is an unexpected compatibility behavior");
3052
- } // not else
3053
- if (!isAvilableInBrowser) {
3054
- more.push("Not usable in browser");
3055
- }
3056
- var moreText = more.length === 0 ? '' : " *(".concat(more.join('; '), ")*");
3057
- return "".concat(i + 1, ") `").concat(className, "` from `").concat(packageName, "` compatible to scrape ").concat(mimeTypes
3058
- .map(function (mimeType) { return "\"".concat(mimeType, "\""); })
3059
- .join(', ')).concat(moreText);
3060
- })
3061
- .join('\n')), "\n\n Legend:\n - \u2B1C **Metadata registered** means that Promptbook knows about the scraper, it is similar to registration in some registry\n - \uD83D\uDFE9 **Installed** means that you have imported package with particular scraper\n - \uD83D\uDFE6 **Available in tools** means that you have passed scraper as dependency into prepare or execution process\n\n "); });
2991
+ normalizedName = normalizedName.split(/-+/g).join('-');
2992
+ normalizedName = normalizedName.split(/-?\/-?/g).join('/');
2993
+ normalizedName = normalizedName.replace(/^-/, '');
2994
+ normalizedName = normalizedName.replace(/-$/, '');
2995
+ return normalizedName;
3062
2996
  }
3063
2997
  /**
3064
- * TODO: [®] DRY Register logic
2998
+ * Note: [💞] Ignore a discrepancy between file name and entity name
3065
2999
  */
3066
3000
 
3067
3001
  /**
@@ -3140,6 +3074,32 @@
3140
3074
  * TODO: [🖇] What about symlinks?
3141
3075
  */
3142
3076
 
3077
+ /**
3078
+ * Tests if given string is valid URL.
3079
+ *
3080
+ * Note: This does not check if the file exists only if the path is valid
3081
+ * @public exported from `@promptbook/utils`
3082
+ */
3083
+ function isValidFilePath(filename) {
3084
+ if (typeof filename !== 'string') {
3085
+ return false;
3086
+ }
3087
+ var filenameSlashes = filename.split('\\').join('/');
3088
+ // Absolute Unix path: /hello.txt
3089
+ if (/^(\/)/i.test(filenameSlashes)) {
3090
+ return true;
3091
+ }
3092
+ // Absolute Windows path: /hello.txt
3093
+ if (/^([A-Z]{1,2}:\/?)\//i.test(filenameSlashes)) {
3094
+ return true;
3095
+ }
3096
+ // Relative path: ./hello.txt
3097
+ if (/^(\.\.?\/)+/i.test(filenameSlashes)) {
3098
+ return true;
3099
+ }
3100
+ return false;
3101
+ }
3102
+
3143
3103
  /**
3144
3104
  * @@@
3145
3105
  *
@@ -3601,9 +3561,9 @@
3601
3561
  * @param script from which to extract the variables
3602
3562
  * @returns the list of variable names
3603
3563
  * @throws {ParseError} if the script is invalid
3604
- * @public exported from `@promptbook/utils`
3564
+ * @public exported from `@promptbook/utils` <- Note: [👖] This is usable elsewhere than in Promptbook, so keeping in utils
3605
3565
  */
3606
- function extractVariables(script) {
3566
+ function extractVariablesFromScript(script) {
3607
3567
  var variables = new Set();
3608
3568
  script = "(()=>{".concat(script, "})()");
3609
3569
  try {
@@ -3650,7 +3610,7 @@
3650
3610
  * @param task the task with used parameters
3651
3611
  * @returns the set of parameter names
3652
3612
  * @throws {ParseError} if the script is invalid
3653
- * @public exported from `@promptbook/utils`
3613
+ * @public exported from `@promptbook/core` <- Note: [👖] This utility is so tightly interconnected with the Promptbook that it is not exported as util but in core
3654
3614
  */
3655
3615
  function extractParameterNamesFromTask(task) {
3656
3616
  var e_1, _a, e_2, _b, e_3, _c, e_4, _d;
@@ -3671,7 +3631,7 @@
3671
3631
  }
3672
3632
  if (taskType === 'SCRIPT_TASK') {
3673
3633
  try {
3674
- for (var _g = __values(extractVariables(content)), _h = _g.next(); !_h.done; _h = _g.next()) {
3634
+ for (var _g = __values(extractVariablesFromScript(content)), _h = _g.next(); !_h.done; _h = _g.next()) {
3675
3635
  var parameterName = _h.value;
3676
3636
  parameterNames.add(parameterName);
3677
3637
  }
@@ -5580,6 +5540,46 @@
5580
5540
  * TODO: [🐚] Change onProgress to object that represents the running execution, can be subscribed via RxJS to and also awaited
5581
5541
  */
5582
5542
 
5543
+ /**
5544
+ * Removes emojis from a string and fix whitespaces
5545
+ *
5546
+ * @param text with emojis
5547
+ * @returns text without emojis
5548
+ * @public exported from `@promptbook/utils`
5549
+ */
5550
+ function removeEmojis(text) {
5551
+ // Replace emojis (and also ZWJ sequence) with hyphens
5552
+ text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
5553
+ text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
5554
+ text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
5555
+ text = text.replace(/\p{Extended_Pictographic}/gu, '');
5556
+ return text;
5557
+ }
5558
+
5559
+ /**
5560
+ * @@@
5561
+ *
5562
+ * @param value @@@
5563
+ * @returns @@@
5564
+ * @example @@@
5565
+ * @public exported from `@promptbook/utils`
5566
+ */
5567
+ function titleToName(value) {
5568
+ if (isValidUrl(value)) {
5569
+ value = value.replace(/^https?:\/\//, '');
5570
+ value = value.replace(/\.html$/, '');
5571
+ }
5572
+ else if (isValidFilePath(value)) {
5573
+ value = path.basename(value);
5574
+ // Note: Keeping extension in the name
5575
+ }
5576
+ value = value.split('/').join('-');
5577
+ value = removeEmojis(value);
5578
+ value = normalizeToKebabCase(value);
5579
+ // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
5580
+ return value;
5581
+ }
5582
+
5583
5583
  /**
5584
5584
  * Metadata of the scraper
5585
5585
  *