@promptbook/markitdown 0.92.0-5 → 0.92.0-7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
26
26
  * @generated
27
27
  * @see https://github.com/webgptorg/promptbook
28
28
  */
29
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-5';
29
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-7';
30
30
  /**
31
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
32
32
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -2124,6 +2124,45 @@ function isPipelinePrepared(pipeline) {
2124
2124
  * - [♨] Are tasks prepared
2125
2125
  */
2126
2126
 
2127
+ /**
2128
+ * Converts a JavaScript Object Notation (JSON) string into an object.
2129
+ *
2130
+ * Note: This is wrapper around `JSON.parse()` with better error and type handling
2131
+ *
2132
+ * @public exported from `@promptbook/utils`
2133
+ */
2134
+ function jsonParse(value) {
2135
+ if (value === undefined) {
2136
+ throw new Error(`Can not parse JSON from undefined value.`);
2137
+ }
2138
+ else if (typeof value !== 'string') {
2139
+ console.error('Can not parse JSON from non-string value.', { text: value });
2140
+ throw new Error(spaceTrim(`
2141
+ Can not parse JSON from non-string value.
2142
+
2143
+ The value type: ${typeof value}
2144
+ See more in console.
2145
+ `));
2146
+ }
2147
+ try {
2148
+ return JSON.parse(value);
2149
+ }
2150
+ catch (error) {
2151
+ if (!(error instanceof Error)) {
2152
+ throw error;
2153
+ }
2154
+ throw new Error(spaceTrim((block) => `
2155
+ ${block(error.message)}
2156
+
2157
+ The JSON text:
2158
+ ${block(value)}
2159
+ `));
2160
+ }
2161
+ }
2162
+ /**
2163
+ * TODO: !!!! Use in Promptbook.studio
2164
+ */
2165
+
2127
2166
  /**
2128
2167
  * Recursively converts JSON strings to JSON objects
2129
2168
 
@@ -2142,7 +2181,7 @@ function jsonStringsToJsons(object) {
2142
2181
  const newObject = { ...object };
2143
2182
  for (const [key, value] of Object.entries(object)) {
2144
2183
  if (typeof value === 'string' && isValidJsonString(value)) {
2145
- newObject[key] = JSON.parse(value);
2184
+ newObject[key] = jsonParse(value);
2146
2185
  }
2147
2186
  else {
2148
2187
  newObject[key] = jsonStringsToJsons(value);
@@ -2989,18 +3028,26 @@ async function preparePersona(personaDescription, tools, options) {
2989
3028
  }).asPromise();
2990
3029
  const { outputParameters } = result;
2991
3030
  const { modelsRequirements: modelsRequirementsJson } = outputParameters;
2992
- const modelsRequirementsUnchecked = JSON.parse(modelsRequirementsJson);
3031
+ let modelsRequirementsUnchecked = jsonParse(modelsRequirementsJson);
2993
3032
  if (isVerbose) {
2994
3033
  console.info(`PERSONA ${personaDescription}`, modelsRequirementsUnchecked);
2995
3034
  }
2996
3035
  if (!Array.isArray(modelsRequirementsUnchecked)) {
2997
- throw new UnexpectedError(spaceTrim((block) => `
3036
+ // <- TODO: Book should have syntax and system to enforce shape of JSON
3037
+ modelsRequirementsUnchecked = [modelsRequirementsUnchecked];
3038
+ /*
3039
+ throw new UnexpectedError(
3040
+ spaceTrim(
3041
+ (block) => `
2998
3042
  Invalid \`modelsRequirements\`:
2999
3043
 
3000
3044
  \`\`\`json
3001
3045
  ${block(JSON.stringify(modelsRequirementsUnchecked, null, 4))}
3002
3046
  \`\`\`
3003
- `));
3047
+ `,
3048
+ ),
3049
+ );
3050
+ */
3004
3051
  }
3005
3052
  const modelsRequirements = modelsRequirementsUnchecked.map((modelRequirements) => ({
3006
3053
  modelVariant: 'CHAT',
@@ -3445,7 +3492,7 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3445
3492
  > },
3446
3493
  */
3447
3494
  async asJson() {
3448
- return JSON.parse(await tools.fs.readFile(filename, 'utf-8'));
3495
+ return jsonParse(await tools.fs.readFile(filename, 'utf-8'));
3449
3496
  },
3450
3497
  async asText() {
3451
3498
  return await tools.fs.readFile(filename, 'utf-8');
@@ -5132,13 +5179,79 @@ async function getExamplesForTask(task) {
5132
5179
  /**
5133
5180
  * @@@
5134
5181
  *
5182
+ * Here is the place where RAG (retrieval-augmented generation) happens
5183
+ *
5135
5184
  * @private internal utility of `createPipelineExecutor`
5136
5185
  */
5137
5186
  async function getKnowledgeForTask(options) {
5138
- const { preparedPipeline, task } = options;
5139
- return preparedPipeline.knowledgePieces.map(({ content }) => `- ${content}`).join('\n');
5187
+ const { tools, preparedPipeline, task } = options;
5188
+ const firstKnowlegePiece = preparedPipeline.knowledgePieces[0];
5189
+ const firstKnowlegeIndex = firstKnowlegePiece === null || firstKnowlegePiece === void 0 ? void 0 : firstKnowlegePiece.index[0];
5190
+ // <- TODO: Do not use just first knowledge piece and first index to determine embedding model, use also keyword search
5191
+ if (firstKnowlegePiece === undefined || firstKnowlegeIndex === undefined) {
5192
+ return 'No knowledge pieces found';
5193
+ }
5194
+ // TODO: [🚐] Make arrayable LLMs -> single LLM DRY
5195
+ const _llms = arrayableToArray(tools.llm);
5196
+ const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
5197
+ const taskEmbeddingPrompt = {
5198
+ title: 'Knowledge Search',
5199
+ modelRequirements: {
5200
+ modelVariant: 'EMBEDDING',
5201
+ modelName: firstKnowlegeIndex.modelName,
5202
+ },
5203
+ content: task.content,
5204
+ parameters: {
5205
+ /* !!!!!!!! */
5206
+ },
5207
+ };
5208
+ const taskEmbeddingResult = await llmTools.callEmbeddingModel(taskEmbeddingPrompt);
5209
+ const knowledgePiecesWithRelevance = preparedPipeline.knowledgePieces.map((knowledgePiece) => {
5210
+ const { index } = knowledgePiece;
5211
+ const knowledgePieceIndex = index.find((i) => i.modelName === firstKnowlegeIndex.modelName);
5212
+ // <- TODO: Do not use just first knowledge piece and first index to determine embedding model
5213
+ if (knowledgePieceIndex === undefined) {
5214
+ return {
5215
+ content: knowledgePiece.content,
5216
+ relevance: 0,
5217
+ };
5218
+ }
5219
+ const relevance = computeCosineSimilarity(knowledgePieceIndex.position, taskEmbeddingResult.content);
5220
+ return {
5221
+ content: knowledgePiece.content,
5222
+ relevance,
5223
+ };
5224
+ });
5225
+ const knowledgePiecesSorted = knowledgePiecesWithRelevance.sort((a, b) => a.relevance - b.relevance);
5226
+ const knowledgePiecesLimited = knowledgePiecesSorted.slice(0, 5);
5227
+ console.log('!!! Embedding', {
5228
+ task,
5229
+ taskEmbeddingPrompt,
5230
+ taskEmbeddingResult,
5231
+ firstKnowlegePiece,
5232
+ firstKnowlegeIndex,
5233
+ knowledgePiecesWithRelevance,
5234
+ knowledgePiecesSorted,
5235
+ knowledgePiecesLimited,
5236
+ });
5237
+ return knowledgePiecesLimited.map(({ content }) => `- ${content}`).join('\n');
5140
5238
  // <- TODO: [🧠] Some smart aggregation of knowledge pieces, single-line vs multi-line vs mixed
5141
5239
  }
5240
+ // TODO: !!!!!! Annotate + to new file
5241
+ function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
5242
+ if (embeddingVector1.length !== embeddingVector2.length) {
5243
+ throw new TypeError('Embedding vectors must have the same length');
5244
+ }
5245
+ const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
5246
+ const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
5247
+ const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
5248
+ return 1 - dotProduct / (magnitude1 * magnitude2);
5249
+ }
5250
+ /**
5251
+ * TODO: !!!! Verify if this is working
5252
+ * TODO: [♨] Implement Better - use keyword search
5253
+ * TODO: [♨] Examples of values
5254
+ */
5142
5255
 
5143
5256
  /**
5144
5257
  * @@@
@@ -5146,9 +5259,9 @@ async function getKnowledgeForTask(options) {
5146
5259
  * @private internal utility of `createPipelineExecutor`
5147
5260
  */
5148
5261
  async function getReservedParametersForTask(options) {
5149
- const { preparedPipeline, task, pipelineIdentification } = options;
5262
+ const { tools, preparedPipeline, task, pipelineIdentification } = options;
5150
5263
  const context = await getContextForTask(); // <- [🏍]
5151
- const knowledge = await getKnowledgeForTask({ preparedPipeline, task });
5264
+ const knowledge = await getKnowledgeForTask({ tools, preparedPipeline, task });
5152
5265
  const examples = await getExamplesForTask();
5153
5266
  const currentDate = new Date().toISOString(); // <- TODO: [🧠][💩] Better
5154
5267
  const modelName = RESERVED_PARAMETER_MISSING_VALUE;
@@ -5210,6 +5323,7 @@ async function executeTask(options) {
5210
5323
  }
5211
5324
  const definedParameters = Object.freeze({
5212
5325
  ...(await getReservedParametersForTask({
5326
+ tools,
5213
5327
  preparedPipeline,
5214
5328
  task: currentTask,
5215
5329
  pipelineIdentification,