npm - @promptbook/cli - Versions diffs - 0.72.0-23 → 0.72.0-27 - Mend

@promptbook/cli 0.72.0-23 → 0.72.0-27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/esm/index.es.js +139 -2
package/esm/index.es.js.map +1 -1
package/esm/typings/src/_packages/cli.index.d.ts +4 -0
package/esm/typings/src/_packages/core.index.d.ts +2 -0
package/esm/typings/src/_packages/website-crawler.index.d.ts +8 -0
package/esm/typings/src/scrapers/website/WebsiteScraper.d.ts +46 -0
package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +20 -0
package/esm/typings/src/scrapers/website/playground/website-scraper-playground.d.ts +5 -0
package/esm/typings/src/scrapers/website/register-constructor.d.ts +13 -0
package/esm/typings/src/scrapers/website/register-metadata.d.ts +24 -0
package/package.json +1 -1
package/umd/index.umd.js +140 -1
package/umd/index.umd.js.map +1 -1

package/esm/typings/src/_packages/cli.index.d.ts CHANGED Viewed

@@ -16,6 +16,8 @@ import { _MarkdownScraperRegistration } from '../scrapers/markdown/register-cons
 import { _MarkdownScraperMetadataRegistration } from '../scrapers/markdown/register-metadata';
 import { _PdfScraperRegistration } from '../scrapers/pdf/register-constructor';
 import { _PdfScraperMetadataRegistration } from '../scrapers/pdf/register-metadata';
+import { _WebsiteScraperRegistration } from '../scrapers/website/register-constructor';
+import { _WebsiteScraperMetadataRegistration } from '../scrapers/website/register-metadata';
 export { PROMPTBOOK_VERSION };
 export { _CLI };
 export { _AnthropicClaudeMetadataRegistration };
@@ -34,3 +36,5 @@ export { _MarkdownScraperRegistration };
 export { _MarkdownScraperMetadataRegistration };
 export { _PdfScraperRegistration };
 export { _PdfScraperMetadataRegistration };
+export { _WebsiteScraperRegistration };
+export { _WebsiteScraperMetadataRegistration };

package/esm/typings/src/_packages/core.index.d.ts CHANGED Viewed

@@ -83,6 +83,7 @@ import { _LegacyDocumentScraperMetadataRegistration } from '../scrapers/document
 import { _DocumentScraperMetadataRegistration } from '../scrapers/document/register-metadata';
 import { _MarkdownScraperMetadataRegistration } from '../scrapers/markdown/register-metadata';
 import { _PdfScraperMetadataRegistration } from '../scrapers/pdf/register-metadata';
+import { _WebsiteScraperMetadataRegistration } from '../scrapers/website/register-metadata';
 import { MemoryStorage } from '../storage/memory/MemoryStorage';
 import { PrefixStorage } from '../storage/memory/utils/PrefixStorage';
 import { executionReportJsonToString } from '../types/execution-report/executionReportJsonToString';
@@ -175,6 +176,7 @@ export { _LegacyDocumentScraperMetadataRegistration };
 export { _DocumentScraperMetadataRegistration };
 export { _MarkdownScraperMetadataRegistration };
 export { _PdfScraperMetadataRegistration };
+export { _WebsiteScraperMetadataRegistration };
 export { MemoryStorage };
 export { PrefixStorage };
 export { executionReportJsonToString };

package/esm/typings/src/_packages/website-crawler.index.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+import { PROMPTBOOK_VERSION } from '../version';
+import { createWebsiteScraper } from '../scrapers/website/createWebsiteScraper';
+import { _WebsiteScraperRegistration } from '../scrapers/website/register-constructor';
+import { WebsiteScraper } from '../scrapers/website/WebsiteScraper';
+export { PROMPTBOOK_VERSION };
+export { createWebsiteScraper };
+export { _WebsiteScraperRegistration };
+export { WebsiteScraper };

package/esm/typings/src/scrapers/website/WebsiteScraper.d.ts ADDED Viewed

@@ -0,0 +1,46 @@
+import type { KnowledgePiecePreparedJson } from '../../types/PipelineJson/KnowledgePieceJson';
+import type { string_markdown } from '../../types/typeAliases';
+import type { Converter } from '../_common/Converter';
+import type { Scraper } from '../_common/Scraper';
+import type { ScraperSourceHandler } from '../_common/Scraper';
+import type { ExecutionTools } from '../../execution/ExecutionTools';
+import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
+import type { ScraperAndConverterMetadata } from '../_common/register/ScraperAndConverterMetadata';
+import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource';
+/**
+ * Scraper for websites
+ *
+ * @see `documentationUrl` for more details
+ * @public exported from `@promptbook/website-crawler`
+ */
+export declare class WebsiteScraper implements Converter, Scraper {
+    private readonly tools;
+    private readonly options;
+    /**
+     * Metadata of the scraper which includes title, mime types, etc.
+     */
+    get metadata(): ScraperAndConverterMetadata;
+    /**
+     * Markdown scraper is used internally
+     */
+    private readonly markdownScraper;
+    constructor(tools: Pick<ExecutionTools, 'fs' | 'llm'>, options: PrepareAndScrapeOptions);
+    /**
+     * Convert the website  to `.md` file and returns intermediate source
+     *
+     * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
+     */
+    $convert(source: ScraperSourceHandler): Promise<ScraperIntermediateSource & {
+        markdown: string_markdown;
+    }>;
+    /**
+     * Scrapes the website and returns the knowledge pieces or `null` if it can't scrape it
+     */
+    scrape(source: ScraperSourceHandler): Promise<ReadonlyArray<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
+}
+/**
+ * TODO: [👣] Scraped website in .md can act as cache item - there is no need to run conversion each time
+ * TODO: [🪂] Do it in parallel 11:11
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
+ * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
+ */

package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+import type { ExecutionTools } from '../../execution/ExecutionTools';
+import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
+import { WebsiteScraper } from './WebsiteScraper';
+/**
+ * @@@
+ *
+ * @public exported from `@promptbook/website-crawler`
+ */
+export declare const createWebsiteScraper: ((tools: Pick<ExecutionTools, 'llm'>, options: PrepareAndScrapeOptions) => WebsiteScraper) & import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
+    title: string;
+    packageName: string;
+    className: string;
+    mimeTypes: string[];
+    documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
+    isAvilableInBrowser: false;
+    requiredExecutables: never[];
+}>;
+/**
+ * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
+ */

package/esm/typings/src/scrapers/website/playground/website-scraper-playground.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+#!/usr/bin/env ts-node
+export {};
+/**
+ * Note: [⚫] Code in this file should never be published in any package
+ */

package/esm/typings/src/scrapers/website/register-constructor.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import type { Registration } from '../../utils/$Register';
+/**
+ * Registration of known scraper
+ *
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
+ *
+ * @public exported from `@promptbook/website-crawler`
+ * @public exported from `@promptbook/cli`
+ */
+export declare const _WebsiteScraperRegistration: Registration;
+/**
+ * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
+ */

package/esm/typings/src/scrapers/website/register-metadata.d.ts ADDED Viewed

@@ -0,0 +1,24 @@
+import type { Registration } from '../../utils/$Register';
+/**
+ * Metadata of the scraper
+ *
+ * @private within the scraper directory
+ */
+export declare const websiteScraperMetadata: import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
+    title: string;
+    packageName: string;
+    className: string;
+    mimeTypes: string[];
+    documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
+    isAvilableInBrowser: false;
+    requiredExecutables: never[];
+}>;
+/**
+ * Registration of known scraper metadata
+ *
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
+ *
+ * @public exported from `@promptbook/core`
+ * @public exported from `@promptbook/cli`
+ */
+export declare const _WebsiteScraperMetadataRegistration: Registration;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@promptbook/cli",
-    "version": "0.72.0-23",
+    "version": "0.72.0-27",
     "description": "Supercharge your use of large language models",
     "private": false,
     "sideEffects": false,

package/umd/index.umd.js CHANGED Viewed

@@ -39,7 +39,7 @@
     /**
      * The version of the Promptbook library
      */
-    var PROMPTBOOK_VERSION = '0.72.0-22';
+    var PROMPTBOOK_VERSION = '0.72.0-26';
     // TODO: [main] !!!! List here all the versions and annotate + put into script
     /*! *****************************************************************************
@@ -14075,6 +14075,143 @@
      * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
      */
+    /**
+     * Metadata of the scraper
+     *
+     * @private within the scraper directory
+     */
+    var websiteScraperMetadata = $deepFreeze({
+        title: 'Website scraper',
+        packageName: '@promptbook/website-crawler',
+        className: 'WebsiteScraper',
+        mimeTypes: ['text/html'],
+        documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
+        isAvilableInBrowser: false,
+        requiredExecutables: [],
+    }); /* <- TODO: [🤛] */
+    /**
+     * Registration of known scraper metadata
+     *
+     * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
+     *
+     * @public exported from `@promptbook/core`
+     * @public exported from `@promptbook/cli`
+     */
+    var _WebsiteScraperMetadataRegistration = $scrapersMetadataRegister.register(websiteScraperMetadata);
+    /**
+     * Scraper for websites
+     *
+     * @see `documentationUrl` for more details
+     * @public exported from `@promptbook/website-crawler`
+     */
+    var WebsiteScraper = /** @class */ (function () {
+        function WebsiteScraper(tools, options) {
+            this.tools = tools;
+            this.options = options;
+            this.markdownScraper = new MarkdownScraper(tools, options);
+        }
+        Object.defineProperty(WebsiteScraper.prototype, "metadata", {
+            /**
+             * Metadata of the scraper which includes title, mime types, etc.
+             */
+            get: function () {
+                return websiteScraperMetadata;
+            },
+            enumerable: false,
+            configurable: true
+        });
+        /**
+         * Convert the website  to `.md` file and returns intermediate source
+         *
+         * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
+         */
+        WebsiteScraper.prototype.$convert = function (source) {
+            return __awaiter(this, void 0, void 0, function () {
+                var markdown;
+                return __generator(this, function (_a) {
+                    if (source.url === null) {
+                        throw new KnowledgeScrapeError('Website scraper requires URL');
+                    }
+                    markdown = "";
+                    return [2 /*return*/, __assign(__assign({}, source), { markdown: markdown, destroy: function () { } })];
+                });
+            });
+        };
+        /**
+         * Scrapes the website and returns the knowledge pieces or `null` if it can't scrape it
+         */
+        WebsiteScraper.prototype.scrape = function (source) {
+            return __awaiter(this, void 0, void 0, function () {
+                var cacheFilehandler, markdownSource, knowledge;
+                return __generator(this, function (_a) {
+                    switch (_a.label) {
+                        case 0: return [4 /*yield*/, this.$convert(source)];
+                        case 1:
+                            cacheFilehandler = _a.sent();
+                            markdownSource = {
+                                source: source.source,
+                                filename: cacheFilehandler.filename,
+                                url: null,
+                                mimeType: 'text/markdown',
+                                asText: function () {
+                                    return cacheFilehandler.markdown;
+                                },
+                                asJson: function () {
+                                    throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
+                                },
+                                /*
+                                TODO: [🥽]
+                                    > asBlob() {
+                                    >     throw new UnexpectedError(
+                                    >         'Did not expect that `markdownScraper` would need to get the content `asBlob`',
+                                    >     );
+                                    > },
+                                */
+                            };
+                            knowledge = this.markdownScraper.scrape(markdownSource);
+                            return [4 /*yield*/, cacheFilehandler.destroy()];
+                        case 2:
+                            _a.sent();
+                            return [2 /*return*/, knowledge];
+                    }
+                });
+            });
+        };
+        return WebsiteScraper;
+    }());
+    /**
+     * TODO: [👣] Scraped website in .md can act as cache item - there is no need to run conversion each time
+     * TODO: [🪂] Do it in parallel 11:11
+     * Note: No need to aggregate usage here, it is done by intercepting the llmTools
+     * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
+     */
+    /**
+     * @@@
+     *
+     * @public exported from `@promptbook/website-crawler`
+     */
+    var createWebsiteScraper = Object.assign(function (tools, options) {
+        return new WebsiteScraper(tools, options);
+    }, websiteScraperMetadata); /* <- TODO: [🤛] */
+    /**
+     * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
+     */
+    /**
+     * Registration of known scraper
+     *
+     * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
+     *
+     * @public exported from `@promptbook/website-crawler`
+     * @public exported from `@promptbook/cli`
+     */
+    var _WebsiteScraperRegistration = $scrapersRegister.register(createWebsiteScraper);
+    /**
+     * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
+     */
     exports.PROMPTBOOK_VERSION = PROMPTBOOK_VERSION;
     exports._AnthropicClaudeMetadataRegistration = _AnthropicClaudeMetadataRegistration;
     exports._AnthropicClaudeRegistration = _AnthropicClaudeRegistration;
@@ -14093,6 +14230,8 @@
     exports._OpenAiRegistration = _OpenAiRegistration;
     exports._PdfScraperMetadataRegistration = _PdfScraperMetadataRegistration;
     exports._PdfScraperRegistration = _PdfScraperRegistration;
+    exports._WebsiteScraperMetadataRegistration = _WebsiteScraperMetadataRegistration;
+    exports._WebsiteScraperRegistration = _WebsiteScraperRegistration;
     Object.defineProperty(exports, '__esModule', { value: true });