npm - @promptbook/website-crawler - Versions diffs - 0.88.0 → 0.89.0-2 - Mend

@promptbook/website-crawler 0.88.0 → 0.89.0-2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md CHANGED Viewed

@@ -23,6 +23,10 @@
+<blockquote style="color: #ff8811">
+    <b>⚠ Warning:</b> This is a pre-release version of the library. It is not yet ready for production use. Please look at <a href="https://www.npmjs.com/package/@promptbook/core?activeTab=versions">latest stable release</a>.
+</blockquote>
 ## 📦 Package `@promptbook/website-crawler`
 - Promptbooks are [divided into several](#-packages) packages, all are published from [single monorepo](https://github.com/webgptorg/promptbook).

package/esm/index.es.js CHANGED Viewed

@@ -29,7 +29,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
  * @generated
  * @see https://github.com/webgptorg/promptbook
  */
-const PROMPTBOOK_ENGINE_VERSION = '0.88.0';
+const PROMPTBOOK_ENGINE_VERSION = '0.89.0-2';
 /**
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -2425,6 +2425,7 @@ function assertsTaskSuccessful(executionResult) {
     const { isSuccessful, errors, warnings } = executionResult;
     for (const warning of warnings) {
         console.warn(warning.message);
+        // <- TODO: [🏮] Some standard way how to transform errors into warnings and how to handle non-critical fails during the tasks
     }
     if (isSuccessful === true) {
         return;
@@ -2603,30 +2604,42 @@ async function forEachAsync(array, options, callbackfunction) {
     await Promise.all(tasks);
 }
+/**
+ * Represents the uncertain value
+ *
+ * @public exported from `@promptbook/core`
+ */
+const ZERO_VALUE = $deepFreeze({ value: 0 });
+/**
+ * Represents the uncertain value
+ *
+ * @public exported from `@promptbook/core`
+ */
+const UNCERTAIN_ZERO_VALUE = $deepFreeze({ value: 0, isUncertain: true });
 /**
  * Represents the usage with no resources consumed
  *
  * @public exported from `@promptbook/core`
  */
 const ZERO_USAGE = $deepFreeze({
-    price: { value: 0 },
+    price: ZERO_VALUE,
     input: {
-        tokensCount: { value: 0 },
-        charactersCount: { value: 0 },
-        wordsCount: { value: 0 },
-        sentencesCount: { value: 0 },
-        linesCount: { value: 0 },
-        paragraphsCount: { value: 0 },
-        pagesCount: { value: 0 },
+        tokensCount: ZERO_VALUE,
+        charactersCount: ZERO_VALUE,
+        wordsCount: ZERO_VALUE,
+        sentencesCount: ZERO_VALUE,
+        linesCount: ZERO_VALUE,
+        paragraphsCount: ZERO_VALUE,
+        pagesCount: ZERO_VALUE,
     },
     output: {
-        tokensCount: { value: 0 },
-        charactersCount: { value: 0 },
-        wordsCount: { value: 0 },
-        sentencesCount: { value: 0 },
-        linesCount: { value: 0 },
-        paragraphsCount: { value: 0 },
-        pagesCount: { value: 0 },
+        tokensCount: ZERO_VALUE,
+        charactersCount: ZERO_VALUE,
+        wordsCount: ZERO_VALUE,
+        sentencesCount: ZERO_VALUE,
+        linesCount: ZERO_VALUE,
+        paragraphsCount: ZERO_VALUE,
+        pagesCount: ZERO_VALUE,
     },
 });
 /**
@@ -2635,24 +2648,24 @@ const ZERO_USAGE = $deepFreeze({
  * @public exported from `@promptbook/core`
  */
 $deepFreeze({
-    price: { value: 0, isUncertain: true },
+    price: UNCERTAIN_ZERO_VALUE,
     input: {
-        tokensCount: { value: 0, isUncertain: true },
-        charactersCount: { value: 0, isUncertain: true },
-        wordsCount: { value: 0, isUncertain: true },
-        sentencesCount: { value: 0, isUncertain: true },
-        linesCount: { value: 0, isUncertain: true },
-        paragraphsCount: { value: 0, isUncertain: true },
-        pagesCount: { value: 0, isUncertain: true },
+        tokensCount: UNCERTAIN_ZERO_VALUE,
+        charactersCount: UNCERTAIN_ZERO_VALUE,
+        wordsCount: UNCERTAIN_ZERO_VALUE,
+        sentencesCount: UNCERTAIN_ZERO_VALUE,
+        linesCount: UNCERTAIN_ZERO_VALUE,
+        paragraphsCount: UNCERTAIN_ZERO_VALUE,
+        pagesCount: UNCERTAIN_ZERO_VALUE,
     },
     output: {
-        tokensCount: { value: 0, isUncertain: true },
-        charactersCount: { value: 0, isUncertain: true },
-        wordsCount: { value: 0, isUncertain: true },
-        sentencesCount: { value: 0, isUncertain: true },
-        linesCount: { value: 0, isUncertain: true },
-        paragraphsCount: { value: 0, isUncertain: true },
-        pagesCount: { value: 0, isUncertain: true },
+        tokensCount: UNCERTAIN_ZERO_VALUE,
+        charactersCount: UNCERTAIN_ZERO_VALUE,
+        wordsCount: UNCERTAIN_ZERO_VALUE,
+        sentencesCount: UNCERTAIN_ZERO_VALUE,
+        linesCount: UNCERTAIN_ZERO_VALUE,
+        paragraphsCount: UNCERTAIN_ZERO_VALUE,
+        pagesCount: UNCERTAIN_ZERO_VALUE,
     },
 });
 /**
@@ -2713,8 +2726,9 @@ function addUsage(...usageItems) {
  * @returns LLM tools with same functionality with added total cost counting
  * @public exported from `@promptbook/core`
  */
-function countTotalUsage(llmTools) {
+function countUsage(llmTools) {
     let totalUsage = ZERO_USAGE;
+    const spending = new Subject();
     const proxyTools = {
         get title() {
             // TODO: [🧠] Maybe put here some suffix
@@ -2724,12 +2738,15 @@ function countTotalUsage(llmTools) {
             // TODO: [🧠] Maybe put here some suffix
             return llmTools.description;
         },
-        async checkConfiguration() {
+        checkConfiguration() {
             return /* not await */ llmTools.checkConfiguration();
         },
         listModels() {
             return /* not await */ llmTools.listModels();
         },
+        spending() {
+            return spending.asObservable();
+        },
         getTotalUsage() {
             // <- Note: [🥫] Not using getter `get totalUsage` but `getTotalUsage` to allow this object to be proxied
             return totalUsage;
@@ -2740,6 +2757,7 @@ function countTotalUsage(llmTools) {
             // console.info('[🚕] callChatModel through countTotalUsage');
             const promptResult = await llmTools.callChatModel(prompt);
             totalUsage = addUsage(totalUsage, promptResult.usage);
+            spending.next(promptResult.usage);
             return promptResult;
         };
     }
@@ -2748,6 +2766,7 @@ function countTotalUsage(llmTools) {
             // console.info('[🚕] callCompletionModel through countTotalUsage');
             const promptResult = await llmTools.callCompletionModel(prompt);
             totalUsage = addUsage(totalUsage, promptResult.usage);
+            spending.next(promptResult.usage);
             return promptResult;
         };
     }
@@ -2756,6 +2775,7 @@ function countTotalUsage(llmTools) {
             // console.info('[🚕] callEmbeddingModel through countTotalUsage');
             const promptResult = await llmTools.callEmbeddingModel(prompt);
             totalUsage = addUsage(totalUsage, promptResult.usage);
+            spending.next(promptResult.usage);
             return promptResult;
         };
     }
@@ -2933,6 +2953,7 @@ function joinLlmExecutionTools(...llmExecutionTools) {
         `);
         // TODO: [🟥] Detect browser / node and make it colorfull
         console.warn(warningMessage);
+        // <- TODO: [🏮] Some standard way how to transform errors into warnings and how to handle non-critical fails during the tasks
         /*
         return {
             async listModels() {
@@ -3376,63 +3397,73 @@ async function prepareKnowledgePieces(knowledgeSources, tools, options) {
     const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT, rootDirname, isVerbose = DEFAULT_IS_VERBOSE } = options;
     const knowledgePreparedUnflatten = new Array(knowledgeSources.length);
     await forEachAsync(knowledgeSources, { maxParallelCount }, async (knowledgeSource, index) => {
-        let partialPieces = null;
-        const sourceHandler = await makeKnowledgeSourceHandler(knowledgeSource, tools, { rootDirname, isVerbose });
-        const scrapers = arrayableToArray(tools.scrapers);
-        for (const scraper of scrapers) {
-            if (!scraper.metadata.mimeTypes.includes(sourceHandler.mimeType)
-            // <- TODO: [🦔] Implement mime-type wildcards
-            ) {
-                continue;
-            }
-            const partialPiecesUnchecked = await scraper.scrape(sourceHandler);
-            if (partialPiecesUnchecked !== null) {
-                partialPieces = [...partialPiecesUnchecked];
-                // <- TODO: [🪓] Here should be no need for spreading new array, just `partialPieces = partialPiecesUnchecked`
-                break;
-            }
-            console.warn(spaceTrim$1((block) => `
-                        Cannot scrape knowledge from source despite the scraper \`${scraper.metadata.className}\` supports the mime type "${sourceHandler.mimeType}".
+        try {
+            let partialPieces = null;
+            const sourceHandler = await makeKnowledgeSourceHandler(knowledgeSource, tools, { rootDirname, isVerbose });
+            const scrapers = arrayableToArray(tools.scrapers);
+            for (const scraper of scrapers) {
+                if (!scraper.metadata.mimeTypes.includes(sourceHandler.mimeType)
+                // <- TODO: [🦔] Implement mime-type wildcards
+                ) {
+                    continue;
+                }
+                const partialPiecesUnchecked = await scraper.scrape(sourceHandler);
+                if (partialPiecesUnchecked !== null) {
+                    partialPieces = [...partialPiecesUnchecked];
+                    // <- TODO: [🪓] Here should be no need for spreading new array, just `partialPieces = partialPiecesUnchecked`
+                    break;
+                }
+                console.warn(spaceTrim$1((block) => `
+                            Cannot scrape knowledge from source despite the scraper \`${scraper.metadata.className}\` supports the mime type "${sourceHandler.mimeType}".
-                        The source:
-                        ${block(knowledgeSource.knowledgeSourceContent
-                .split('\n')
-                .map((line) => `> ${line}`)
-                .join('\n'))}
+                            The source:
+                            ${block(knowledgeSource.knowledgeSourceContent
+                    .split('\n')
+                    .map((line) => `> ${line}`)
+                    .join('\n'))}
-                        ${block($registeredScrapersMessage(scrapers))}
+                            ${block($registeredScrapersMessage(scrapers))}
-                    `));
-        }
-        if (partialPieces === null) {
-            throw new KnowledgeScrapeError(spaceTrim$1((block) => `
-                        Cannot scrape knowledge
+                        `));
+                // <- TODO: [🏮] Some standard way how to transform errors into warnings and how to handle non-critical fails during the tasks
+            }
+            if (partialPieces === null) {
+                throw new KnowledgeScrapeError(spaceTrim$1((block) => `
+                            Cannot scrape knowledge
-                        The source:
-                        > ${block(knowledgeSource.knowledgeSourceContent
-                .split('\n')
-                .map((line) => `> ${line}`)
-                .join('\n'))}
+                            The source:
+                            > ${block(knowledgeSource.knowledgeSourceContent
+                    .split('\n')
+                    .map((line) => `> ${line}`)
+                    .join('\n'))}
-                        No scraper found for the mime type "${sourceHandler.mimeType}"
+                            No scraper found for the mime type "${sourceHandler.mimeType}"
-                        ${block($registeredScrapersMessage(scrapers))}
+                            ${block($registeredScrapersMessage(scrapers))}
-                    `));
+                        `));
+            }
+            const pieces = partialPieces.map((partialPiece) => ({
+                ...partialPiece,
+                sources: [
+                    {
+                        name: knowledgeSource.name,
+                        // line, column <- TODO: [☀]
+                        // <- TODO: [❎]
+                    },
+                ],
+            }));
+            knowledgePreparedUnflatten[index] = pieces;
+        }
+        catch (error) {
+            if (!(error instanceof Error)) {
+                throw error;
+            }
+            console.warn(error);
+            // <- TODO: [🏮] Some standard way how to transform errors into warnings and how to handle non-critical fails during the tasks
         }
-        const pieces = partialPieces.map((partialPiece) => ({
-            ...partialPiece,
-            sources: [
-                {
-                    name: knowledgeSource.name,
-                    // line, column <- TODO: [☀]
-                    // <- TODO: [❎]
-                },
-            ],
-        }));
-        knowledgePreparedUnflatten[index] = pieces;
     });
     const knowledgePrepared = knowledgePreparedUnflatten.flat();
     return knowledgePrepared;
@@ -3538,7 +3569,7 @@ async function preparePipeline(pipeline, tools, options) {
     // TODO: [🚐] Make arrayable LLMs -> single LLM DRY
     const _llms = arrayableToArray(tools.llm);
     const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
-    const llmToolsWithUsage = countTotalUsage(llmTools);
+    const llmToolsWithUsage = countUsage(llmTools);
     //    <- TODO: [🌯]
     /*
     TODO: [🧠][🪑][🔃] Should this be done or not
@@ -3850,7 +3881,7 @@ function extractParameterNamesFromTask(task) {
             if (parameterNames.has(subparameterName)) {
                 parameterNames.delete(subparameterName);
                 parameterNames.add(foreach.parameterName);
-                // <- TODO: [🚎] Warn/logic error when `subparameterName` not used
+                // <- TODO: [🏮] Warn/logic error when `subparameterName` not used
             }
         }
     }
@@ -5446,6 +5477,7 @@ function createPipelineExecutor(options) {
                     @see more at https://ptbk.io/prepare-pipeline
                 `));
+        // <- TODO: [🏮] Some standard way how to transform errors into warnings and how to handle non-critical fails during the tasks
     }
     let runCount = 0;
     const pipelineExecutorWithCallback = async (inputParameters, onProgress) => {