@promptbook/markitdown 0.92.0-5 → 0.92.0-7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +124 -10
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +4 -0
- package/esm/typings/src/_packages/utils.index.d.ts +2 -0
- package/esm/typings/src/cli/common/$provideLlmToolsForCli.d.ts +1 -1
- package/esm/typings/src/conversion/archive/loadArchive.d.ts +2 -2
- package/esm/typings/src/execution/createPipelineExecutor/getKnowledgeForTask.d.ts +12 -0
- package/esm/typings/src/execution/createPipelineExecutor/getReservedParametersForTask.d.ts +5 -0
- package/esm/typings/src/formats/json/utils/jsonParse.d.ts +11 -0
- package/esm/typings/src/llm-providers/_common/register/LlmToolsMetadata.d.ts +43 -0
- package/esm/typings/src/remote-server/openapi-types.d.ts +348 -6
- package/esm/typings/src/remote-server/openapi.d.ts +397 -3
- package/package.json +2 -2
- package/umd/index.umd.js +128 -14
- package/umd/index.umd.js.map +1 -1
package/esm/index.es.js
CHANGED
|
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
26
26
|
* @generated
|
|
27
27
|
* @see https://github.com/webgptorg/promptbook
|
|
28
28
|
*/
|
|
29
|
-
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-
|
|
29
|
+
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-7';
|
|
30
30
|
/**
|
|
31
31
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
32
32
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -2124,6 +2124,45 @@ function isPipelinePrepared(pipeline) {
|
|
|
2124
2124
|
* - [♨] Are tasks prepared
|
|
2125
2125
|
*/
|
|
2126
2126
|
|
|
2127
|
+
/**
|
|
2128
|
+
* Converts a JavaScript Object Notation (JSON) string into an object.
|
|
2129
|
+
*
|
|
2130
|
+
* Note: This is wrapper around `JSON.parse()` with better error and type handling
|
|
2131
|
+
*
|
|
2132
|
+
* @public exported from `@promptbook/utils`
|
|
2133
|
+
*/
|
|
2134
|
+
function jsonParse(value) {
|
|
2135
|
+
if (value === undefined) {
|
|
2136
|
+
throw new Error(`Can not parse JSON from undefined value.`);
|
|
2137
|
+
}
|
|
2138
|
+
else if (typeof value !== 'string') {
|
|
2139
|
+
console.error('Can not parse JSON from non-string value.', { text: value });
|
|
2140
|
+
throw new Error(spaceTrim(`
|
|
2141
|
+
Can not parse JSON from non-string value.
|
|
2142
|
+
|
|
2143
|
+
The value type: ${typeof value}
|
|
2144
|
+
See more in console.
|
|
2145
|
+
`));
|
|
2146
|
+
}
|
|
2147
|
+
try {
|
|
2148
|
+
return JSON.parse(value);
|
|
2149
|
+
}
|
|
2150
|
+
catch (error) {
|
|
2151
|
+
if (!(error instanceof Error)) {
|
|
2152
|
+
throw error;
|
|
2153
|
+
}
|
|
2154
|
+
throw new Error(spaceTrim((block) => `
|
|
2155
|
+
${block(error.message)}
|
|
2156
|
+
|
|
2157
|
+
The JSON text:
|
|
2158
|
+
${block(value)}
|
|
2159
|
+
`));
|
|
2160
|
+
}
|
|
2161
|
+
}
|
|
2162
|
+
/**
|
|
2163
|
+
* TODO: !!!! Use in Promptbook.studio
|
|
2164
|
+
*/
|
|
2165
|
+
|
|
2127
2166
|
/**
|
|
2128
2167
|
* Recursively converts JSON strings to JSON objects
|
|
2129
2168
|
|
|
@@ -2142,7 +2181,7 @@ function jsonStringsToJsons(object) {
|
|
|
2142
2181
|
const newObject = { ...object };
|
|
2143
2182
|
for (const [key, value] of Object.entries(object)) {
|
|
2144
2183
|
if (typeof value === 'string' && isValidJsonString(value)) {
|
|
2145
|
-
newObject[key] =
|
|
2184
|
+
newObject[key] = jsonParse(value);
|
|
2146
2185
|
}
|
|
2147
2186
|
else {
|
|
2148
2187
|
newObject[key] = jsonStringsToJsons(value);
|
|
@@ -2989,18 +3028,26 @@ async function preparePersona(personaDescription, tools, options) {
|
|
|
2989
3028
|
}).asPromise();
|
|
2990
3029
|
const { outputParameters } = result;
|
|
2991
3030
|
const { modelsRequirements: modelsRequirementsJson } = outputParameters;
|
|
2992
|
-
|
|
3031
|
+
let modelsRequirementsUnchecked = jsonParse(modelsRequirementsJson);
|
|
2993
3032
|
if (isVerbose) {
|
|
2994
3033
|
console.info(`PERSONA ${personaDescription}`, modelsRequirementsUnchecked);
|
|
2995
3034
|
}
|
|
2996
3035
|
if (!Array.isArray(modelsRequirementsUnchecked)) {
|
|
2997
|
-
|
|
3036
|
+
// <- TODO: Book should have syntax and system to enforce shape of JSON
|
|
3037
|
+
modelsRequirementsUnchecked = [modelsRequirementsUnchecked];
|
|
3038
|
+
/*
|
|
3039
|
+
throw new UnexpectedError(
|
|
3040
|
+
spaceTrim(
|
|
3041
|
+
(block) => `
|
|
2998
3042
|
Invalid \`modelsRequirements\`:
|
|
2999
3043
|
|
|
3000
3044
|
\`\`\`json
|
|
3001
3045
|
${block(JSON.stringify(modelsRequirementsUnchecked, null, 4))}
|
|
3002
3046
|
\`\`\`
|
|
3003
|
-
|
|
3047
|
+
`,
|
|
3048
|
+
),
|
|
3049
|
+
);
|
|
3050
|
+
*/
|
|
3004
3051
|
}
|
|
3005
3052
|
const modelsRequirements = modelsRequirementsUnchecked.map((modelRequirements) => ({
|
|
3006
3053
|
modelVariant: 'CHAT',
|
|
@@ -3445,7 +3492,7 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3445
3492
|
> },
|
|
3446
3493
|
*/
|
|
3447
3494
|
async asJson() {
|
|
3448
|
-
return
|
|
3495
|
+
return jsonParse(await tools.fs.readFile(filename, 'utf-8'));
|
|
3449
3496
|
},
|
|
3450
3497
|
async asText() {
|
|
3451
3498
|
return await tools.fs.readFile(filename, 'utf-8');
|
|
@@ -5132,13 +5179,79 @@ async function getExamplesForTask(task) {
|
|
|
5132
5179
|
/**
|
|
5133
5180
|
* @@@
|
|
5134
5181
|
*
|
|
5182
|
+
* Here is the place where RAG (retrieval-augmented generation) happens
|
|
5183
|
+
*
|
|
5135
5184
|
* @private internal utility of `createPipelineExecutor`
|
|
5136
5185
|
*/
|
|
5137
5186
|
async function getKnowledgeForTask(options) {
|
|
5138
|
-
const { preparedPipeline, task } = options;
|
|
5139
|
-
|
|
5187
|
+
const { tools, preparedPipeline, task } = options;
|
|
5188
|
+
const firstKnowlegePiece = preparedPipeline.knowledgePieces[0];
|
|
5189
|
+
const firstKnowlegeIndex = firstKnowlegePiece === null || firstKnowlegePiece === void 0 ? void 0 : firstKnowlegePiece.index[0];
|
|
5190
|
+
// <- TODO: Do not use just first knowledge piece and first index to determine embedding model, use also keyword search
|
|
5191
|
+
if (firstKnowlegePiece === undefined || firstKnowlegeIndex === undefined) {
|
|
5192
|
+
return 'No knowledge pieces found';
|
|
5193
|
+
}
|
|
5194
|
+
// TODO: [🚐] Make arrayable LLMs -> single LLM DRY
|
|
5195
|
+
const _llms = arrayableToArray(tools.llm);
|
|
5196
|
+
const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
|
|
5197
|
+
const taskEmbeddingPrompt = {
|
|
5198
|
+
title: 'Knowledge Search',
|
|
5199
|
+
modelRequirements: {
|
|
5200
|
+
modelVariant: 'EMBEDDING',
|
|
5201
|
+
modelName: firstKnowlegeIndex.modelName,
|
|
5202
|
+
},
|
|
5203
|
+
content: task.content,
|
|
5204
|
+
parameters: {
|
|
5205
|
+
/* !!!!!!!! */
|
|
5206
|
+
},
|
|
5207
|
+
};
|
|
5208
|
+
const taskEmbeddingResult = await llmTools.callEmbeddingModel(taskEmbeddingPrompt);
|
|
5209
|
+
const knowledgePiecesWithRelevance = preparedPipeline.knowledgePieces.map((knowledgePiece) => {
|
|
5210
|
+
const { index } = knowledgePiece;
|
|
5211
|
+
const knowledgePieceIndex = index.find((i) => i.modelName === firstKnowlegeIndex.modelName);
|
|
5212
|
+
// <- TODO: Do not use just first knowledge piece and first index to determine embedding model
|
|
5213
|
+
if (knowledgePieceIndex === undefined) {
|
|
5214
|
+
return {
|
|
5215
|
+
content: knowledgePiece.content,
|
|
5216
|
+
relevance: 0,
|
|
5217
|
+
};
|
|
5218
|
+
}
|
|
5219
|
+
const relevance = computeCosineSimilarity(knowledgePieceIndex.position, taskEmbeddingResult.content);
|
|
5220
|
+
return {
|
|
5221
|
+
content: knowledgePiece.content,
|
|
5222
|
+
relevance,
|
|
5223
|
+
};
|
|
5224
|
+
});
|
|
5225
|
+
const knowledgePiecesSorted = knowledgePiecesWithRelevance.sort((a, b) => a.relevance - b.relevance);
|
|
5226
|
+
const knowledgePiecesLimited = knowledgePiecesSorted.slice(0, 5);
|
|
5227
|
+
console.log('!!! Embedding', {
|
|
5228
|
+
task,
|
|
5229
|
+
taskEmbeddingPrompt,
|
|
5230
|
+
taskEmbeddingResult,
|
|
5231
|
+
firstKnowlegePiece,
|
|
5232
|
+
firstKnowlegeIndex,
|
|
5233
|
+
knowledgePiecesWithRelevance,
|
|
5234
|
+
knowledgePiecesSorted,
|
|
5235
|
+
knowledgePiecesLimited,
|
|
5236
|
+
});
|
|
5237
|
+
return knowledgePiecesLimited.map(({ content }) => `- ${content}`).join('\n');
|
|
5140
5238
|
// <- TODO: [🧠] Some smart aggregation of knowledge pieces, single-line vs multi-line vs mixed
|
|
5141
5239
|
}
|
|
5240
|
+
// TODO: !!!!!! Annotate + to new file
|
|
5241
|
+
function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
|
|
5242
|
+
if (embeddingVector1.length !== embeddingVector2.length) {
|
|
5243
|
+
throw new TypeError('Embedding vectors must have the same length');
|
|
5244
|
+
}
|
|
5245
|
+
const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
|
|
5246
|
+
const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
|
|
5247
|
+
const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
|
|
5248
|
+
return 1 - dotProduct / (magnitude1 * magnitude2);
|
|
5249
|
+
}
|
|
5250
|
+
/**
|
|
5251
|
+
* TODO: !!!! Verify if this is working
|
|
5252
|
+
* TODO: [♨] Implement Better - use keyword search
|
|
5253
|
+
* TODO: [♨] Examples of values
|
|
5254
|
+
*/
|
|
5142
5255
|
|
|
5143
5256
|
/**
|
|
5144
5257
|
* @@@
|
|
@@ -5146,9 +5259,9 @@ async function getKnowledgeForTask(options) {
|
|
|
5146
5259
|
* @private internal utility of `createPipelineExecutor`
|
|
5147
5260
|
*/
|
|
5148
5261
|
async function getReservedParametersForTask(options) {
|
|
5149
|
-
const { preparedPipeline, task, pipelineIdentification } = options;
|
|
5262
|
+
const { tools, preparedPipeline, task, pipelineIdentification } = options;
|
|
5150
5263
|
const context = await getContextForTask(); // <- [🏍]
|
|
5151
|
-
const knowledge = await getKnowledgeForTask({ preparedPipeline, task });
|
|
5264
|
+
const knowledge = await getKnowledgeForTask({ tools, preparedPipeline, task });
|
|
5152
5265
|
const examples = await getExamplesForTask();
|
|
5153
5266
|
const currentDate = new Date().toISOString(); // <- TODO: [🧠][💩] Better
|
|
5154
5267
|
const modelName = RESERVED_PARAMETER_MISSING_VALUE;
|
|
@@ -5210,6 +5323,7 @@ async function executeTask(options) {
|
|
|
5210
5323
|
}
|
|
5211
5324
|
const definedParameters = Object.freeze({
|
|
5212
5325
|
...(await getReservedParametersForTask({
|
|
5326
|
+
tools,
|
|
5213
5327
|
preparedPipeline,
|
|
5214
5328
|
task: currentTask,
|
|
5215
5329
|
pipelineIdentification,
|