@promptbook/website-crawler 0.92.0-5 → 0.92.0-7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -29,7 +29,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
29
29
  * @generated
30
30
  * @see https://github.com/webgptorg/promptbook
31
31
  */
32
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-5';
32
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-7';
33
33
  /**
34
34
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
35
35
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -2275,6 +2275,45 @@ function isPipelinePrepared(pipeline) {
2275
2275
  * - [♨] Are tasks prepared
2276
2276
  */
2277
2277
 
2278
+ /**
2279
+ * Converts a JavaScript Object Notation (JSON) string into an object.
2280
+ *
2281
+ * Note: This is wrapper around `JSON.parse()` with better error and type handling
2282
+ *
2283
+ * @public exported from `@promptbook/utils`
2284
+ */
2285
+ function jsonParse(value) {
2286
+ if (value === undefined) {
2287
+ throw new Error(`Can not parse JSON from undefined value.`);
2288
+ }
2289
+ else if (typeof value !== 'string') {
2290
+ console.error('Can not parse JSON from non-string value.', { text: value });
2291
+ throw new Error(spaceTrim$1(`
2292
+ Can not parse JSON from non-string value.
2293
+
2294
+ The value type: ${typeof value}
2295
+ See more in console.
2296
+ `));
2297
+ }
2298
+ try {
2299
+ return JSON.parse(value);
2300
+ }
2301
+ catch (error) {
2302
+ if (!(error instanceof Error)) {
2303
+ throw error;
2304
+ }
2305
+ throw new Error(spaceTrim$1((block) => `
2306
+ ${block(error.message)}
2307
+
2308
+ The JSON text:
2309
+ ${block(value)}
2310
+ `));
2311
+ }
2312
+ }
2313
+ /**
2314
+ * TODO: !!!! Use in Promptbook.studio
2315
+ */
2316
+
2278
2317
  /**
2279
2318
  * Recursively converts JSON strings to JSON objects
2280
2319
 
@@ -2293,7 +2332,7 @@ function jsonStringsToJsons(object) {
2293
2332
  const newObject = { ...object };
2294
2333
  for (const [key, value] of Object.entries(object)) {
2295
2334
  if (typeof value === 'string' && isValidJsonString(value)) {
2296
- newObject[key] = JSON.parse(value);
2335
+ newObject[key] = jsonParse(value);
2297
2336
  }
2298
2337
  else {
2299
2338
  newObject[key] = jsonStringsToJsons(value);
@@ -3117,18 +3156,26 @@ async function preparePersona(personaDescription, tools, options) {
3117
3156
  }).asPromise();
3118
3157
  const { outputParameters } = result;
3119
3158
  const { modelsRequirements: modelsRequirementsJson } = outputParameters;
3120
- const modelsRequirementsUnchecked = JSON.parse(modelsRequirementsJson);
3159
+ let modelsRequirementsUnchecked = jsonParse(modelsRequirementsJson);
3121
3160
  if (isVerbose) {
3122
3161
  console.info(`PERSONA ${personaDescription}`, modelsRequirementsUnchecked);
3123
3162
  }
3124
3163
  if (!Array.isArray(modelsRequirementsUnchecked)) {
3125
- throw new UnexpectedError(spaceTrim$1((block) => `
3164
+ // <- TODO: Book should have syntax and system to enforce shape of JSON
3165
+ modelsRequirementsUnchecked = [modelsRequirementsUnchecked];
3166
+ /*
3167
+ throw new UnexpectedError(
3168
+ spaceTrim(
3169
+ (block) => `
3126
3170
  Invalid \`modelsRequirements\`:
3127
3171
 
3128
3172
  \`\`\`json
3129
3173
  ${block(JSON.stringify(modelsRequirementsUnchecked, null, 4))}
3130
3174
  \`\`\`
3131
- `));
3175
+ `,
3176
+ ),
3177
+ );
3178
+ */
3132
3179
  }
3133
3180
  const modelsRequirements = modelsRequirementsUnchecked.map((modelRequirements) => ({
3134
3181
  modelVariant: 'CHAT',
@@ -3459,7 +3506,7 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3459
3506
  > },
3460
3507
  */
3461
3508
  async asJson() {
3462
- return JSON.parse(await tools.fs.readFile(filename, 'utf-8'));
3509
+ return jsonParse(await tools.fs.readFile(filename, 'utf-8'));
3463
3510
  },
3464
3511
  async asText() {
3465
3512
  return await tools.fs.readFile(filename, 'utf-8');
@@ -5146,13 +5193,79 @@ async function getExamplesForTask(task) {
5146
5193
  /**
5147
5194
  * @@@
5148
5195
  *
5196
+ * Here is the place where RAG (retrieval-augmented generation) happens
5197
+ *
5149
5198
  * @private internal utility of `createPipelineExecutor`
5150
5199
  */
5151
5200
  async function getKnowledgeForTask(options) {
5152
- const { preparedPipeline, task } = options;
5153
- return preparedPipeline.knowledgePieces.map(({ content }) => `- ${content}`).join('\n');
5201
+ const { tools, preparedPipeline, task } = options;
5202
+ const firstKnowlegePiece = preparedPipeline.knowledgePieces[0];
5203
+ const firstKnowlegeIndex = firstKnowlegePiece === null || firstKnowlegePiece === void 0 ? void 0 : firstKnowlegePiece.index[0];
5204
+ // <- TODO: Do not use just first knowledge piece and first index to determine embedding model, use also keyword search
5205
+ if (firstKnowlegePiece === undefined || firstKnowlegeIndex === undefined) {
5206
+ return 'No knowledge pieces found';
5207
+ }
5208
+ // TODO: [🚐] Make arrayable LLMs -> single LLM DRY
5209
+ const _llms = arrayableToArray(tools.llm);
5210
+ const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
5211
+ const taskEmbeddingPrompt = {
5212
+ title: 'Knowledge Search',
5213
+ modelRequirements: {
5214
+ modelVariant: 'EMBEDDING',
5215
+ modelName: firstKnowlegeIndex.modelName,
5216
+ },
5217
+ content: task.content,
5218
+ parameters: {
5219
+ /* !!!!!!!! */
5220
+ },
5221
+ };
5222
+ const taskEmbeddingResult = await llmTools.callEmbeddingModel(taskEmbeddingPrompt);
5223
+ const knowledgePiecesWithRelevance = preparedPipeline.knowledgePieces.map((knowledgePiece) => {
5224
+ const { index } = knowledgePiece;
5225
+ const knowledgePieceIndex = index.find((i) => i.modelName === firstKnowlegeIndex.modelName);
5226
+ // <- TODO: Do not use just first knowledge piece and first index to determine embedding model
5227
+ if (knowledgePieceIndex === undefined) {
5228
+ return {
5229
+ content: knowledgePiece.content,
5230
+ relevance: 0,
5231
+ };
5232
+ }
5233
+ const relevance = computeCosineSimilarity(knowledgePieceIndex.position, taskEmbeddingResult.content);
5234
+ return {
5235
+ content: knowledgePiece.content,
5236
+ relevance,
5237
+ };
5238
+ });
5239
+ const knowledgePiecesSorted = knowledgePiecesWithRelevance.sort((a, b) => a.relevance - b.relevance);
5240
+ const knowledgePiecesLimited = knowledgePiecesSorted.slice(0, 5);
5241
+ console.log('!!! Embedding', {
5242
+ task,
5243
+ taskEmbeddingPrompt,
5244
+ taskEmbeddingResult,
5245
+ firstKnowlegePiece,
5246
+ firstKnowlegeIndex,
5247
+ knowledgePiecesWithRelevance,
5248
+ knowledgePiecesSorted,
5249
+ knowledgePiecesLimited,
5250
+ });
5251
+ return knowledgePiecesLimited.map(({ content }) => `- ${content}`).join('\n');
5154
5252
  // <- TODO: [🧠] Some smart aggregation of knowledge pieces, single-line vs multi-line vs mixed
5155
5253
  }
5254
+ // TODO: !!!!!! Annotate + to new file
5255
+ function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
5256
+ if (embeddingVector1.length !== embeddingVector2.length) {
5257
+ throw new TypeError('Embedding vectors must have the same length');
5258
+ }
5259
+ const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
5260
+ const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
5261
+ const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
5262
+ return 1 - dotProduct / (magnitude1 * magnitude2);
5263
+ }
5264
+ /**
5265
+ * TODO: !!!! Verify if this is working
5266
+ * TODO: [♨] Implement Better - use keyword search
5267
+ * TODO: [♨] Examples of values
5268
+ */
5156
5269
 
5157
5270
  /**
5158
5271
  * @@@
@@ -5160,9 +5273,9 @@ async function getKnowledgeForTask(options) {
5160
5273
  * @private internal utility of `createPipelineExecutor`
5161
5274
  */
5162
5275
  async function getReservedParametersForTask(options) {
5163
- const { preparedPipeline, task, pipelineIdentification } = options;
5276
+ const { tools, preparedPipeline, task, pipelineIdentification } = options;
5164
5277
  const context = await getContextForTask(); // <- [🏍]
5165
- const knowledge = await getKnowledgeForTask({ preparedPipeline, task });
5278
+ const knowledge = await getKnowledgeForTask({ tools, preparedPipeline, task });
5166
5279
  const examples = await getExamplesForTask();
5167
5280
  const currentDate = new Date().toISOString(); // <- TODO: [🧠][💩] Better
5168
5281
  const modelName = RESERVED_PARAMETER_MISSING_VALUE;
@@ -5224,6 +5337,7 @@ async function executeTask(options) {
5224
5337
  }
5225
5338
  const definedParameters = Object.freeze({
5226
5339
  ...(await getReservedParametersForTask({
5340
+ tools,
5227
5341
  preparedPipeline,
5228
5342
  task: currentTask,
5229
5343
  pipelineIdentification,