@promptbook/cli 0.72.0-23 → 0.72.0-27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -24,7 +24,7 @@ import OpenAI from 'openai';
24
24
  /**
25
25
  * The version of the Promptbook library
26
26
  */
27
- var PROMPTBOOK_VERSION = '0.72.0-22';
27
+ var PROMPTBOOK_VERSION = '0.72.0-26';
28
28
  // TODO: [main] !!!! List here all the versions and annotate + put into script
29
29
 
30
30
  /*! *****************************************************************************
@@ -14060,5 +14060,142 @@ var _PdfScraperRegistration = $scrapersRegister.register(createPdfScraper);
14060
14060
  * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
14061
14061
  */
14062
14062
 
14063
- export { PROMPTBOOK_VERSION, _AnthropicClaudeMetadataRegistration, _AnthropicClaudeRegistration, _AzureOpenAiMetadataRegistration, _AzureOpenAiRegistration, _CLI, _DocumentScraperMetadataRegistration, _DocumentScraperRegistration, _LegacyDocumentScraperMetadataRegistration, _LegacyDocumentScraperRegistration, _MarkdownScraperMetadataRegistration, _MarkdownScraperRegistration, _OpenAiAssistantMetadataRegistration, _OpenAiAssistantRegistration, _OpenAiMetadataRegistration, _OpenAiRegistration, _PdfScraperMetadataRegistration, _PdfScraperRegistration };
14063
+ /**
14064
+ * Metadata of the scraper
14065
+ *
14066
+ * @private within the scraper directory
14067
+ */
14068
+ var websiteScraperMetadata = $deepFreeze({
14069
+ title: 'Website scraper',
14070
+ packageName: '@promptbook/website-crawler',
14071
+ className: 'WebsiteScraper',
14072
+ mimeTypes: ['text/html'],
14073
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
14074
+ isAvilableInBrowser: false,
14075
+ requiredExecutables: [],
14076
+ }); /* <- TODO: [🤛] */
14077
+ /**
14078
+ * Registration of known scraper metadata
14079
+ *
14080
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
14081
+ *
14082
+ * @public exported from `@promptbook/core`
14083
+ * @public exported from `@promptbook/cli`
14084
+ */
14085
+ var _WebsiteScraperMetadataRegistration = $scrapersMetadataRegister.register(websiteScraperMetadata);
14086
+
14087
+ /**
14088
+ * Scraper for websites
14089
+ *
14090
+ * @see `documentationUrl` for more details
14091
+ * @public exported from `@promptbook/website-crawler`
14092
+ */
14093
+ var WebsiteScraper = /** @class */ (function () {
14094
+ function WebsiteScraper(tools, options) {
14095
+ this.tools = tools;
14096
+ this.options = options;
14097
+ this.markdownScraper = new MarkdownScraper(tools, options);
14098
+ }
14099
+ Object.defineProperty(WebsiteScraper.prototype, "metadata", {
14100
+ /**
14101
+ * Metadata of the scraper which includes title, mime types, etc.
14102
+ */
14103
+ get: function () {
14104
+ return websiteScraperMetadata;
14105
+ },
14106
+ enumerable: false,
14107
+ configurable: true
14108
+ });
14109
+ /**
14110
+ * Convert the website to `.md` file and returns intermediate source
14111
+ *
14112
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
14113
+ */
14114
+ WebsiteScraper.prototype.$convert = function (source) {
14115
+ return __awaiter(this, void 0, void 0, function () {
14116
+ var markdown;
14117
+ return __generator(this, function (_a) {
14118
+ if (source.url === null) {
14119
+ throw new KnowledgeScrapeError('Website scraper requires URL');
14120
+ }
14121
+ markdown = "";
14122
+ return [2 /*return*/, __assign(__assign({}, source), { markdown: markdown, destroy: function () { } })];
14123
+ });
14124
+ });
14125
+ };
14126
+ /**
14127
+ * Scrapes the website and returns the knowledge pieces or `null` if it can't scrape it
14128
+ */
14129
+ WebsiteScraper.prototype.scrape = function (source) {
14130
+ return __awaiter(this, void 0, void 0, function () {
14131
+ var cacheFilehandler, markdownSource, knowledge;
14132
+ return __generator(this, function (_a) {
14133
+ switch (_a.label) {
14134
+ case 0: return [4 /*yield*/, this.$convert(source)];
14135
+ case 1:
14136
+ cacheFilehandler = _a.sent();
14137
+ markdownSource = {
14138
+ source: source.source,
14139
+ filename: cacheFilehandler.filename,
14140
+ url: null,
14141
+ mimeType: 'text/markdown',
14142
+ asText: function () {
14143
+ return cacheFilehandler.markdown;
14144
+ },
14145
+ asJson: function () {
14146
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
14147
+ },
14148
+ /*
14149
+ TODO: [🥽]
14150
+ > asBlob() {
14151
+ > throw new UnexpectedError(
14152
+ > 'Did not expect that `markdownScraper` would need to get the content `asBlob`',
14153
+ > );
14154
+ > },
14155
+ */
14156
+ };
14157
+ knowledge = this.markdownScraper.scrape(markdownSource);
14158
+ return [4 /*yield*/, cacheFilehandler.destroy()];
14159
+ case 2:
14160
+ _a.sent();
14161
+ return [2 /*return*/, knowledge];
14162
+ }
14163
+ });
14164
+ });
14165
+ };
14166
+ return WebsiteScraper;
14167
+ }());
14168
+ /**
14169
+ * TODO: [👣] Scraped website in .md can act as cache item - there is no need to run conversion each time
14170
+ * TODO: [🪂] Do it in parallel 11:11
14171
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
14172
+ * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
14173
+ */
14174
+
14175
+ /**
14176
+ * @@@
14177
+ *
14178
+ * @public exported from `@promptbook/website-crawler`
14179
+ */
14180
+ var createWebsiteScraper = Object.assign(function (tools, options) {
14181
+ return new WebsiteScraper(tools, options);
14182
+ }, websiteScraperMetadata); /* <- TODO: [🤛] */
14183
+ /**
14184
+ * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
14185
+ */
14186
+
14187
+ /**
14188
+ * Registration of known scraper
14189
+ *
14190
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
14191
+ *
14192
+ * @public exported from `@promptbook/website-crawler`
14193
+ * @public exported from `@promptbook/cli`
14194
+ */
14195
+ var _WebsiteScraperRegistration = $scrapersRegister.register(createWebsiteScraper);
14196
+ /**
14197
+ * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
14198
+ */
14199
+
14200
+ export { PROMPTBOOK_VERSION, _AnthropicClaudeMetadataRegistration, _AnthropicClaudeRegistration, _AzureOpenAiMetadataRegistration, _AzureOpenAiRegistration, _CLI, _DocumentScraperMetadataRegistration, _DocumentScraperRegistration, _LegacyDocumentScraperMetadataRegistration, _LegacyDocumentScraperRegistration, _MarkdownScraperMetadataRegistration, _MarkdownScraperRegistration, _OpenAiAssistantMetadataRegistration, _OpenAiAssistantRegistration, _OpenAiMetadataRegistration, _OpenAiRegistration, _PdfScraperMetadataRegistration, _PdfScraperRegistration, _WebsiteScraperMetadataRegistration, _WebsiteScraperRegistration };
14064
14201
  //# sourceMappingURL=index.es.js.map