@promptbook/cli 0.72.0-23 → 0.72.0-27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,8 @@ import { _MarkdownScraperRegistration } from '../scrapers/markdown/register-cons
16
16
  import { _MarkdownScraperMetadataRegistration } from '../scrapers/markdown/register-metadata';
17
17
  import { _PdfScraperRegistration } from '../scrapers/pdf/register-constructor';
18
18
  import { _PdfScraperMetadataRegistration } from '../scrapers/pdf/register-metadata';
19
+ import { _WebsiteScraperRegistration } from '../scrapers/website/register-constructor';
20
+ import { _WebsiteScraperMetadataRegistration } from '../scrapers/website/register-metadata';
19
21
  export { PROMPTBOOK_VERSION };
20
22
  export { _CLI };
21
23
  export { _AnthropicClaudeMetadataRegistration };
@@ -34,3 +36,5 @@ export { _MarkdownScraperRegistration };
34
36
  export { _MarkdownScraperMetadataRegistration };
35
37
  export { _PdfScraperRegistration };
36
38
  export { _PdfScraperMetadataRegistration };
39
+ export { _WebsiteScraperRegistration };
40
+ export { _WebsiteScraperMetadataRegistration };
@@ -83,6 +83,7 @@ import { _LegacyDocumentScraperMetadataRegistration } from '../scrapers/document
83
83
  import { _DocumentScraperMetadataRegistration } from '../scrapers/document/register-metadata';
84
84
  import { _MarkdownScraperMetadataRegistration } from '../scrapers/markdown/register-metadata';
85
85
  import { _PdfScraperMetadataRegistration } from '../scrapers/pdf/register-metadata';
86
+ import { _WebsiteScraperMetadataRegistration } from '../scrapers/website/register-metadata';
86
87
  import { MemoryStorage } from '../storage/memory/MemoryStorage';
87
88
  import { PrefixStorage } from '../storage/memory/utils/PrefixStorage';
88
89
  import { executionReportJsonToString } from '../types/execution-report/executionReportJsonToString';
@@ -175,6 +176,7 @@ export { _LegacyDocumentScraperMetadataRegistration };
175
176
  export { _DocumentScraperMetadataRegistration };
176
177
  export { _MarkdownScraperMetadataRegistration };
177
178
  export { _PdfScraperMetadataRegistration };
179
+ export { _WebsiteScraperMetadataRegistration };
178
180
  export { MemoryStorage };
179
181
  export { PrefixStorage };
180
182
  export { executionReportJsonToString };
@@ -0,0 +1,8 @@
1
+ import { PROMPTBOOK_VERSION } from '../version';
2
+ import { createWebsiteScraper } from '../scrapers/website/createWebsiteScraper';
3
+ import { _WebsiteScraperRegistration } from '../scrapers/website/register-constructor';
4
+ import { WebsiteScraper } from '../scrapers/website/WebsiteScraper';
5
+ export { PROMPTBOOK_VERSION };
6
+ export { createWebsiteScraper };
7
+ export { _WebsiteScraperRegistration };
8
+ export { WebsiteScraper };
@@ -0,0 +1,46 @@
1
+ import type { KnowledgePiecePreparedJson } from '../../types/PipelineJson/KnowledgePieceJson';
2
+ import type { string_markdown } from '../../types/typeAliases';
3
+ import type { Converter } from '../_common/Converter';
4
+ import type { Scraper } from '../_common/Scraper';
5
+ import type { ScraperSourceHandler } from '../_common/Scraper';
6
+ import type { ExecutionTools } from '../../execution/ExecutionTools';
7
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
8
+ import type { ScraperAndConverterMetadata } from '../_common/register/ScraperAndConverterMetadata';
9
+ import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource';
10
+ /**
11
+ * Scraper for websites
12
+ *
13
+ * @see `documentationUrl` for more details
14
+ * @public exported from `@promptbook/website-crawler`
15
+ */
16
+ export declare class WebsiteScraper implements Converter, Scraper {
17
+ private readonly tools;
18
+ private readonly options;
19
+ /**
20
+ * Metadata of the scraper which includes title, mime types, etc.
21
+ */
22
+ get metadata(): ScraperAndConverterMetadata;
23
+ /**
24
+ * Markdown scraper is used internally
25
+ */
26
+ private readonly markdownScraper;
27
+ constructor(tools: Pick<ExecutionTools, 'fs' | 'llm'>, options: PrepareAndScrapeOptions);
28
+ /**
29
+ * Convert the website to `.md` file and returns intermediate source
30
+ *
31
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
32
+ */
33
+ $convert(source: ScraperSourceHandler): Promise<ScraperIntermediateSource & {
34
+ markdown: string_markdown;
35
+ }>;
36
+ /**
37
+ * Scrapes the website and returns the knowledge pieces or `null` if it can't scrape it
38
+ */
39
+ scrape(source: ScraperSourceHandler): Promise<ReadonlyArray<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
40
+ }
41
+ /**
42
+ * TODO: [👣] Scraped website in .md can act as cache item - there is no need to run conversion each time
43
+ * TODO: [🪂] Do it in parallel 11:11
44
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
45
+ * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
46
+ */
@@ -0,0 +1,20 @@
1
+ import type { ExecutionTools } from '../../execution/ExecutionTools';
2
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
3
+ import { WebsiteScraper } from './WebsiteScraper';
4
+ /**
5
+ * @@@
6
+ *
7
+ * @public exported from `@promptbook/website-crawler`
8
+ */
9
+ export declare const createWebsiteScraper: ((tools: Pick<ExecutionTools, 'llm'>, options: PrepareAndScrapeOptions) => WebsiteScraper) & import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
10
+ title: string;
11
+ packageName: string;
12
+ className: string;
13
+ mimeTypes: string[];
14
+ documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
15
+ isAvilableInBrowser: false;
16
+ requiredExecutables: never[];
17
+ }>;
18
+ /**
19
+ * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
20
+ */
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ts-node
2
+ export {};
3
+ /**
4
+ * Note: [⚫] Code in this file should never be published in any package
5
+ */
@@ -0,0 +1,13 @@
1
+ import type { Registration } from '../../utils/$Register';
2
+ /**
3
+ * Registration of known scraper
4
+ *
5
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
6
+ *
7
+ * @public exported from `@promptbook/website-crawler`
8
+ * @public exported from `@promptbook/cli`
9
+ */
10
+ export declare const _WebsiteScraperRegistration: Registration;
11
+ /**
12
+ * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
13
+ */
@@ -0,0 +1,24 @@
1
+ import type { Registration } from '../../utils/$Register';
2
+ /**
3
+ * Metadata of the scraper
4
+ *
5
+ * @private within the scraper directory
6
+ */
7
+ export declare const websiteScraperMetadata: import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
8
+ title: string;
9
+ packageName: string;
10
+ className: string;
11
+ mimeTypes: string[];
12
+ documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
13
+ isAvilableInBrowser: false;
14
+ requiredExecutables: never[];
15
+ }>;
16
+ /**
17
+ * Registration of known scraper metadata
18
+ *
19
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
20
+ *
21
+ * @public exported from `@promptbook/core`
22
+ * @public exported from `@promptbook/cli`
23
+ */
24
+ export declare const _WebsiteScraperMetadataRegistration: Registration;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@promptbook/cli",
3
- "version": "0.72.0-23",
3
+ "version": "0.72.0-27",
4
4
  "description": "Supercharge your use of large language models",
5
5
  "private": false,
6
6
  "sideEffects": false,
package/umd/index.umd.js CHANGED
@@ -39,7 +39,7 @@
39
39
  /**
40
40
  * The version of the Promptbook library
41
41
  */
42
- var PROMPTBOOK_VERSION = '0.72.0-22';
42
+ var PROMPTBOOK_VERSION = '0.72.0-26';
43
43
  // TODO: [main] !!!! List here all the versions and annotate + put into script
44
44
 
45
45
  /*! *****************************************************************************
@@ -14075,6 +14075,143 @@
14075
14075
  * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
14076
14076
  */
14077
14077
 
14078
+ /**
14079
+ * Metadata of the scraper
14080
+ *
14081
+ * @private within the scraper directory
14082
+ */
14083
+ var websiteScraperMetadata = $deepFreeze({
14084
+ title: 'Website scraper',
14085
+ packageName: '@promptbook/website-crawler',
14086
+ className: 'WebsiteScraper',
14087
+ mimeTypes: ['text/html'],
14088
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
14089
+ isAvilableInBrowser: false,
14090
+ requiredExecutables: [],
14091
+ }); /* <- TODO: [🤛] */
14092
+ /**
14093
+ * Registration of known scraper metadata
14094
+ *
14095
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
14096
+ *
14097
+ * @public exported from `@promptbook/core`
14098
+ * @public exported from `@promptbook/cli`
14099
+ */
14100
+ var _WebsiteScraperMetadataRegistration = $scrapersMetadataRegister.register(websiteScraperMetadata);
14101
+
14102
+ /**
14103
+ * Scraper for websites
14104
+ *
14105
+ * @see `documentationUrl` for more details
14106
+ * @public exported from `@promptbook/website-crawler`
14107
+ */
14108
+ var WebsiteScraper = /** @class */ (function () {
14109
+ function WebsiteScraper(tools, options) {
14110
+ this.tools = tools;
14111
+ this.options = options;
14112
+ this.markdownScraper = new MarkdownScraper(tools, options);
14113
+ }
14114
+ Object.defineProperty(WebsiteScraper.prototype, "metadata", {
14115
+ /**
14116
+ * Metadata of the scraper which includes title, mime types, etc.
14117
+ */
14118
+ get: function () {
14119
+ return websiteScraperMetadata;
14120
+ },
14121
+ enumerable: false,
14122
+ configurable: true
14123
+ });
14124
+ /**
14125
+ * Convert the website to `.md` file and returns intermediate source
14126
+ *
14127
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
14128
+ */
14129
+ WebsiteScraper.prototype.$convert = function (source) {
14130
+ return __awaiter(this, void 0, void 0, function () {
14131
+ var markdown;
14132
+ return __generator(this, function (_a) {
14133
+ if (source.url === null) {
14134
+ throw new KnowledgeScrapeError('Website scraper requires URL');
14135
+ }
14136
+ markdown = "";
14137
+ return [2 /*return*/, __assign(__assign({}, source), { markdown: markdown, destroy: function () { } })];
14138
+ });
14139
+ });
14140
+ };
14141
+ /**
14142
+ * Scrapes the website and returns the knowledge pieces or `null` if it can't scrape it
14143
+ */
14144
+ WebsiteScraper.prototype.scrape = function (source) {
14145
+ return __awaiter(this, void 0, void 0, function () {
14146
+ var cacheFilehandler, markdownSource, knowledge;
14147
+ return __generator(this, function (_a) {
14148
+ switch (_a.label) {
14149
+ case 0: return [4 /*yield*/, this.$convert(source)];
14150
+ case 1:
14151
+ cacheFilehandler = _a.sent();
14152
+ markdownSource = {
14153
+ source: source.source,
14154
+ filename: cacheFilehandler.filename,
14155
+ url: null,
14156
+ mimeType: 'text/markdown',
14157
+ asText: function () {
14158
+ return cacheFilehandler.markdown;
14159
+ },
14160
+ asJson: function () {
14161
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
14162
+ },
14163
+ /*
14164
+ TODO: [🥽]
14165
+ > asBlob() {
14166
+ > throw new UnexpectedError(
14167
+ > 'Did not expect that `markdownScraper` would need to get the content `asBlob`',
14168
+ > );
14169
+ > },
14170
+ */
14171
+ };
14172
+ knowledge = this.markdownScraper.scrape(markdownSource);
14173
+ return [4 /*yield*/, cacheFilehandler.destroy()];
14174
+ case 2:
14175
+ _a.sent();
14176
+ return [2 /*return*/, knowledge];
14177
+ }
14178
+ });
14179
+ });
14180
+ };
14181
+ return WebsiteScraper;
14182
+ }());
14183
+ /**
14184
+ * TODO: [👣] Scraped website in .md can act as cache item - there is no need to run conversion each time
14185
+ * TODO: [🪂] Do it in parallel 11:11
14186
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
14187
+ * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
14188
+ */
14189
+
14190
+ /**
14191
+ * @@@
14192
+ *
14193
+ * @public exported from `@promptbook/website-crawler`
14194
+ */
14195
+ var createWebsiteScraper = Object.assign(function (tools, options) {
14196
+ return new WebsiteScraper(tools, options);
14197
+ }, websiteScraperMetadata); /* <- TODO: [🤛] */
14198
+ /**
14199
+ * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
14200
+ */
14201
+
14202
+ /**
14203
+ * Registration of known scraper
14204
+ *
14205
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
14206
+ *
14207
+ * @public exported from `@promptbook/website-crawler`
14208
+ * @public exported from `@promptbook/cli`
14209
+ */
14210
+ var _WebsiteScraperRegistration = $scrapersRegister.register(createWebsiteScraper);
14211
+ /**
14212
+ * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
14213
+ */
14214
+
14078
14215
  exports.PROMPTBOOK_VERSION = PROMPTBOOK_VERSION;
14079
14216
  exports._AnthropicClaudeMetadataRegistration = _AnthropicClaudeMetadataRegistration;
14080
14217
  exports._AnthropicClaudeRegistration = _AnthropicClaudeRegistration;
@@ -14093,6 +14230,8 @@
14093
14230
  exports._OpenAiRegistration = _OpenAiRegistration;
14094
14231
  exports._PdfScraperMetadataRegistration = _PdfScraperMetadataRegistration;
14095
14232
  exports._PdfScraperRegistration = _PdfScraperRegistration;
14233
+ exports._WebsiteScraperMetadataRegistration = _WebsiteScraperMetadataRegistration;
14234
+ exports._WebsiteScraperRegistration = _WebsiteScraperRegistration;
14096
14235
 
14097
14236
  Object.defineProperty(exports, '__esModule', { value: true });
14098
14237