npm - @promptbook/pdf - Versions diffs - 0.92.0-5 → 0.92.0-6 - Mend

@promptbook/pdf 0.92.0-5 → 0.92.0-6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/esm/index.es.js CHANGED Viewed

@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
  * @generated
  * @see https://github.com/webgptorg/promptbook
  */
-const PROMPTBOOK_ENGINE_VERSION = '0.92.0-5';
+const PROMPTBOOK_ENGINE_VERSION = '0.92.0-6';
 /**
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -2137,6 +2137,45 @@ function isPipelinePrepared(pipeline) {
  *     - [♨] Are tasks prepared
  */
+/**
+ * Converts a JavaScript Object Notation (JSON) string into an object.
+ *
+ * Note: This is wrapper around `JSON.parse()` with better error and type handling
+ *
+ * @public exported from `@promptbook/utils`
+ */
+function jsonParse(value) {
+    if (value === undefined) {
+        throw new Error(`Can not parse JSON from undefined value.`);
+    }
+    else if (typeof value !== 'string') {
+        console.error('Can not parse JSON from non-string value.', { text: value });
+        throw new Error(spaceTrim(`
+                    Can not parse JSON from non-string value.
+                    The value type: ${typeof value}
+                    See more in console.
+                `));
+    }
+    try {
+        return JSON.parse(value);
+    }
+    catch (error) {
+        if (!(error instanceof Error)) {
+            throw error;
+        }
+        throw new Error(spaceTrim((block) => `
+                    ${block(error.message)}
+                    The JSON text:
+                    ${block(value)}
+                `));
+    }
+}
+/**
+ * TODO: !!!! Use in Promptbook.studio
+ */
 /**
  * Recursively converts JSON strings to JSON objects
@@ -2155,7 +2194,7 @@ function jsonStringsToJsons(object) {
     const newObject = { ...object };
     for (const [key, value] of Object.entries(object)) {
         if (typeof value === 'string' && isValidJsonString(value)) {
-            newObject[key] = JSON.parse(value);
+            newObject[key] = jsonParse(value);
         }
         else {
             newObject[key] = jsonStringsToJsons(value);
@@ -3002,7 +3041,7 @@ async function preparePersona(personaDescription, tools, options) {
     }).asPromise();
     const { outputParameters } = result;
     const { modelsRequirements: modelsRequirementsJson } = outputParameters;
-    const modelsRequirementsUnchecked = JSON.parse(modelsRequirementsJson);
+    const modelsRequirementsUnchecked = jsonParse(modelsRequirementsJson);
     if (isVerbose) {
         console.info(`PERSONA ${personaDescription}`, modelsRequirementsUnchecked);
     }
@@ -3458,7 +3497,7 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
                 > },
             */
             async asJson() {
-                return JSON.parse(await tools.fs.readFile(filename, 'utf-8'));
+                return jsonParse(await tools.fs.readFile(filename, 'utf-8'));
             },
             async asText() {
                 return await tools.fs.readFile(filename, 'utf-8');
@@ -5145,13 +5184,79 @@ async function getExamplesForTask(task) {
 /**
  * @@@
  *
+ * Here is the place where RAG (retrieval-augmented generation) happens
+ *
  * @private internal utility of `createPipelineExecutor`
  */
 async function getKnowledgeForTask(options) {
-    const { preparedPipeline, task } = options;
-    return preparedPipeline.knowledgePieces.map(({ content }) => `- ${content}`).join('\n');
+    const { tools, preparedPipeline, task } = options;
+    const firstKnowlegePiece = preparedPipeline.knowledgePieces[0];
+    const firstKnowlegeIndex = firstKnowlegePiece === null || firstKnowlegePiece === void 0 ? void 0 : firstKnowlegePiece.index[0];
+    // <- TODO: Do not use just first knowledge piece and first index to determine embedding model, use also keyword search
+    if (firstKnowlegePiece === undefined || firstKnowlegeIndex === undefined) {
+        return 'No knowledge pieces found';
+    }
+    // TODO: [🚐] Make arrayable LLMs -> single LLM DRY
+    const _llms = arrayableToArray(tools.llm);
+    const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
+    const taskEmbeddingPrompt = {
+        title: 'Knowledge Search',
+        modelRequirements: {
+            modelVariant: 'EMBEDDING',
+            modelName: firstKnowlegeIndex.modelName,
+        },
+        content: task.content,
+        parameters: {
+        /* !!!!!!!! */
+        },
+    };
+    const taskEmbeddingResult = await llmTools.callEmbeddingModel(taskEmbeddingPrompt);
+    const knowledgePiecesWithRelevance = preparedPipeline.knowledgePieces.map((knowledgePiece) => {
+        const { index } = knowledgePiece;
+        const knowledgePieceIndex = index.find((i) => i.modelName === firstKnowlegeIndex.modelName);
+        // <- TODO: Do not use just first knowledge piece and first index to determine embedding model
+        if (knowledgePieceIndex === undefined) {
+            return {
+                content: knowledgePiece.content,
+                relevance: 0,
+            };
+        }
+        const relevance = computeCosineSimilarity(knowledgePieceIndex.position, taskEmbeddingResult.content);
+        return {
+            content: knowledgePiece.content,
+            relevance,
+        };
+    });
+    const knowledgePiecesSorted = knowledgePiecesWithRelevance.sort((a, b) => a.relevance - b.relevance);
+    const knowledgePiecesLimited = knowledgePiecesSorted.slice(0, 5);
+    console.log('!!! Embedding', {
+        task,
+        taskEmbeddingPrompt,
+        taskEmbeddingResult,
+        firstKnowlegePiece,
+        firstKnowlegeIndex,
+        knowledgePiecesWithRelevance,
+        knowledgePiecesSorted,
+        knowledgePiecesLimited,
+    });
+    return knowledgePiecesLimited.map(({ content }) => `- ${content}`).join('\n');
     //                                                      <- TODO: [🧠] Some smart aggregation of knowledge pieces, single-line vs multi-line vs mixed
 }
+// TODO: !!!!!! Annotate + to new file
+function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
+    if (embeddingVector1.length !== embeddingVector2.length) {
+        throw new TypeError('Embedding vectors must have the same length');
+    }
+    const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
+    const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
+    const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
+    return 1 - dotProduct / (magnitude1 * magnitude2);
+}
+/**
+ * TODO: !!!! Verify if this is working
+ * TODO: [♨] Implement Better - use keyword search
+ * TODO: [♨] Examples of values
+ */
 /**
  * @@@
@@ -5159,9 +5264,9 @@ async function getKnowledgeForTask(options) {
  * @private internal utility of `createPipelineExecutor`
  */
 async function getReservedParametersForTask(options) {
-    const { preparedPipeline, task, pipelineIdentification } = options;
+    const { tools, preparedPipeline, task, pipelineIdentification } = options;
     const context = await getContextForTask(); // <- [🏍]
-    const knowledge = await getKnowledgeForTask({ preparedPipeline, task });
+    const knowledge = await getKnowledgeForTask({ tools, preparedPipeline, task });
     const examples = await getExamplesForTask();
     const currentDate = new Date().toISOString(); // <- TODO: [🧠][💩] Better
     const modelName = RESERVED_PARAMETER_MISSING_VALUE;
@@ -5223,6 +5328,7 @@ async function executeTask(options) {
     }
     const definedParameters = Object.freeze({
         ...(await getReservedParametersForTask({
+            tools,
             preparedPipeline,
             task: currentTask,
             pipelineIdentification,