@promptbook/website-crawler 0.72.0-33 → 0.72.0-34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -52,15 +52,15 @@ Rest of the documentation is common for **entire promptbook ecosystem**:
52
52
 
53
53
 
54
54
 
55
- If you have a simple, single prompt for ChatGPT, GPT-4, Anthropic Claude, Google Gemini, Llama 2, or whatever, it doesn't matter how you integrate it. Whether it's calling a REST API directly, using the SDK, hardcoding the prompt into the source code, or importing a text file, the process remains the same.
55
+ If you have a simple, single prompt for ChatGPT, GPT-4, Anthropic Claude, Google Gemini, Llama 3, or whatever, it doesn't matter how you integrate it. Whether it's calling a REST API directly, using the SDK, hardcoding the prompt into the source code, or importing a text file, the process remains the same.
56
56
 
57
- But often you will struggle with the limitations of LLMs, such as hallucinations, off-topic responses, poor quality output, language drift, word repetition repetition repetition repetition or misuse, lack of context, or just plain w𝒆𝐢rd responses. When this happens, you generally have three options:
57
+ But often you will struggle with the **limitations of LLMs**, such as **hallucinations, off-topic responses, poor quality output, language and prompt drift, word repetition repetition repetition repetition or misuse, lack of context, or just plain w𝒆𝐢rd responses**. When this happens, you generally have three options:
58
58
 
59
59
  1. **Fine-tune** the model to your specifications or even train your own.
60
60
  2. **Prompt-engineer** the prompt to the best shape you can achieve.
61
61
  3. Orchestrate **multiple prompts** in a [pipeline](https://github.com/webgptorg/promptbook/discussions/64) to get the best result.
62
62
 
63
- In all of these situations, but especially in 3., the Promptbook library can make your life easier.
63
+ In all of these situations, but especially in 3., the **✨ Promptbook can make your life waaaaaaaaaay easier**.
64
64
 
65
65
  - [**Separates concerns**](https://github.com/webgptorg/promptbook/discussions/32) between prompt-engineer and programmer, between code files and prompt files, and between prompts and their execution logic.
66
66
  - Establishes a [**common format `.ptbk.md`**](https://github.com/webgptorg/promptbook/discussions/85) that can be used to describe your prompt business logic without having to write code or deal with the technicalities of LLMs.
package/esm/index.es.js CHANGED
@@ -1,7 +1,6 @@
1
1
  import spaceTrim$1, { spaceTrim } from 'spacetrim';
2
2
  import { Readability } from '@mozilla/readability';
3
3
  import { JSDOM } from 'jsdom';
4
- import { Converter } from 'showdown';
5
4
  import { SHA256 } from 'crypto-js';
6
5
  import hexEncoder from 'crypto-js/enc-hex';
7
6
  import { mkdir, rm } from 'fs/promises';
@@ -11,12 +10,13 @@ import parserHtml from 'prettier/parser-html';
11
10
  import { forTime } from 'waitasecond';
12
11
  import { lookup } from 'mime-types';
13
12
  import { unparse, parse } from 'papaparse';
13
+ import { Converter } from 'showdown';
14
14
 
15
15
  // ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
16
16
  /**
17
17
  * The version of the Promptbook library
18
18
  */
19
- var PROMPTBOOK_VERSION = '0.72.0-32';
19
+ var PROMPTBOOK_VERSION = '0.72.0-33';
20
20
  // TODO: [main] !!!! List here all the versions and annotate + put into script
21
21
 
22
22
  /*! *****************************************************************************
@@ -679,44 +679,6 @@ var KnowledgeScrapeError = /** @class */ (function (_super) {
679
679
  return KnowledgeScrapeError;
680
680
  }(Error));
681
681
 
682
- /**
683
- * Just says that the variable is not used but should be kept
684
- * No side effects.
685
- *
686
- * Note: It can be usefull for:
687
- *
688
- * 1) Suppressing eager optimization of unused imports
689
- * 2) Suppressing eslint errors of unused variables in the tests
690
- * 3) Keeping the type of the variable for type testing
691
- *
692
- * @param value any values
693
- * @returns void
694
- * @private within the repository
695
- */
696
- function keepUnused() {
697
- var valuesToKeep = [];
698
- for (var _i = 0; _i < arguments.length; _i++) {
699
- valuesToKeep[_i] = arguments[_i];
700
- }
701
- }
702
-
703
- /**
704
- * Just marks a place of place where should be something implemented
705
- * No side effects.
706
- *
707
- * Note: It can be usefull suppressing eslint errors of unused variables
708
- *
709
- * @param value any values
710
- * @returns void
711
- * @private within the repository
712
- */
713
- function TODO_USE() {
714
- var value = [];
715
- for (var _i = 0; _i < arguments.length; _i++) {
716
- value[_i] = arguments[_i];
717
- }
718
- }
719
-
720
682
  /**
721
683
  * Tests if given string is valid URL.
722
684
  *
@@ -1141,6 +1103,23 @@ function nameToSubfolderPath(name) {
1141
1103
  return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()];
1142
1104
  }
1143
1105
 
1106
+ /**
1107
+ * Just marks a place of place where should be something implemented
1108
+ * No side effects.
1109
+ *
1110
+ * Note: It can be usefull suppressing eslint errors of unused variables
1111
+ *
1112
+ * @param value any values
1113
+ * @returns void
1114
+ * @private within the repository
1115
+ */
1116
+ function TODO_USE() {
1117
+ var value = [];
1118
+ for (var _i = 0; _i < arguments.length; _i++) {
1119
+ value[_i] = arguments[_i];
1120
+ }
1121
+ }
1122
+
1144
1123
  /**
1145
1124
  * Create a filename for intermediate cache for scrapers
1146
1125
  *
@@ -4301,6 +4280,27 @@ function extractJsonBlock(markdown) {
4301
4280
  * TODO: [🏢] Make this logic part of `JsonFormatDefinition` or `isValidJsonString`
4302
4281
  */
4303
4282
 
4283
+ /**
4284
+ * Just says that the variable is not used but should be kept
4285
+ * No side effects.
4286
+ *
4287
+ * Note: It can be usefull for:
4288
+ *
4289
+ * 1) Suppressing eager optimization of unused imports
4290
+ * 2) Suppressing eslint errors of unused variables in the tests
4291
+ * 3) Keeping the type of the variable for type testing
4292
+ *
4293
+ * @param value any values
4294
+ * @returns void
4295
+ * @private within the repository
4296
+ */
4297
+ function keepUnused() {
4298
+ var valuesToKeep = [];
4299
+ for (var _i = 0; _i < arguments.length; _i++) {
4300
+ valuesToKeep[_i] = arguments[_i];
4301
+ }
4302
+ }
4303
+
4304
4304
  /**
4305
4305
  * Replaces parameters in template with values from parameters object
4306
4306
  *
@@ -5826,25 +5826,6 @@ var WebsiteScraper = /** @class */ (function () {
5826
5826
  this.options = options;
5827
5827
  this.markdownScraper = new MarkdownScraper(tools, options);
5828
5828
  this.showdownConverter = createShowdownConverter();
5829
- /**/
5830
- // TODO: [🏄] !!!!!! Remove or describe why it is here
5831
- TODO_USE(Readability);
5832
- TODO_USE(Converter);
5833
- TODO_USE(JSDOM);
5834
- TODO_USE(createShowdownConverter);
5835
- /**/
5836
- /**/
5837
- var jsdom = new JSDOM();
5838
- var reader = new Readability(jsdom.window.document);
5839
- keepUnused(reader);
5840
- /**/
5841
- /**/
5842
- keepUnused(DEFAULT_INTERMEDIATE_FILES_STRATEGY, DEFAULT_IS_VERBOSE, DEFAULT_SCRAPE_CACHE_DIRNAME);
5843
- keepUnused(EnvironmentMismatchError);
5844
- keepUnused(KnowledgeScrapeError);
5845
- keepUnused(getScraperIntermediateSource);
5846
- keepUnused();
5847
- /**/
5848
5829
  }
5849
5830
  Object.defineProperty(WebsiteScraper.prototype, "metadata", {
5850
5831
  /**
@@ -5870,8 +5851,6 @@ var WebsiteScraper = /** @class */ (function () {
5870
5851
  switch (_g.label) {
5871
5852
  case 0:
5872
5853
  _a = this.options, _b = _a.rootDirname, rootDirname = _b === void 0 ? process.cwd() : _b, _c = _a.cacheDirname, cacheDirname = _c === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _c, _d = _a.intermediateFilesStrategy, intermediateFilesStrategy = _d === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _d, _e = _a.isVerbose, isVerbose = _e === void 0 ? DEFAULT_IS_VERBOSE : _e;
5873
- // [🏄] !!!!!!!
5874
- keepUnused(rootDirname, cacheDirname, intermediateFilesStrategy, isVerbose);
5875
5854
  if (source.url === null) {
5876
5855
  throw new KnowledgeScrapeError('Website scraper requires URL');
5877
5856
  }
@@ -5882,11 +5861,8 @@ var WebsiteScraper = /** @class */ (function () {
5882
5861
  return [4 /*yield*/, source.asText()];
5883
5862
  case 1:
5884
5863
  jsdom = new (_f.apply(JSDOM, [void 0, _g.sent(), {
5885
- // <- TODO: !!!!!!! Problem with build is probbably in `new JSDOM();`
5886
5864
  url: source.url,
5887
5865
  }]))();
5888
- // [🏄] !!!!!!!
5889
- keepUnused(jsdom);
5890
5866
  reader = new Readability(jsdom.window.document);
5891
5867
  article = reader.parse();
5892
5868
  html = (article === null || article === void 0 ? void 0 : article.content) || (article === null || article === void 0 ? void 0 : article.textContent) || jsdom.window.document.body.innerHTML;