@promptbook/cli 0.95.0 โ†’ 0.98.0-10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. package/README.md +12 -0
  2. package/esm/index.es.js +1372 -1038
  3. package/esm/index.es.js.map +1 -1
  4. package/esm/typings/src/_packages/anthropic-claude.index.d.ts +2 -2
  5. package/esm/typings/src/_packages/cli.index.d.ts +4 -0
  6. package/esm/typings/src/_packages/core.index.d.ts +2 -0
  7. package/esm/typings/src/_packages/openai.index.d.ts +10 -0
  8. package/esm/typings/src/_packages/types.index.d.ts +12 -2
  9. package/esm/typings/src/_packages/wizard.index.d.ts +4 -0
  10. package/esm/typings/src/config.d.ts +1 -1
  11. package/esm/typings/src/execution/createPipelineExecutor/$OngoingTaskResult.d.ts +8 -0
  12. package/esm/typings/src/execution/utils/validatePromptResult.d.ts +53 -0
  13. package/esm/typings/src/llm-providers/anthropic-claude/AnthropicClaudeExecutionTools.d.ts +3 -3
  14. package/esm/typings/src/llm-providers/anthropic-claude/AnthropicClaudeExecutionToolsOptions.d.ts +2 -2
  15. package/esm/typings/src/llm-providers/openai/OpenAiAssistantExecutionToolsOptions.d.ts +2 -2
  16. package/esm/typings/src/llm-providers/openai/OpenAiCompatibleExecutionTools.d.ts +4 -4
  17. package/esm/typings/src/llm-providers/openai/OpenAiCompatibleExecutionToolsOptions.d.ts +52 -0
  18. package/esm/typings/src/llm-providers/openai/OpenAiExecutionToolsOptions.d.ts +3 -5
  19. package/esm/typings/src/llm-providers/openai/createOpenAiCompatibleExecutionTools.d.ts +74 -0
  20. package/esm/typings/src/llm-providers/openai/register-configuration.d.ts +11 -0
  21. package/esm/typings/src/llm-providers/openai/register-constructor.d.ts +14 -0
  22. package/esm/typings/src/version.d.ts +1 -1
  23. package/package.json +1 -1
  24. package/umd/index.umd.js +1373 -1037
  25. package/umd/index.umd.js.map +1 -1
package/umd/index.umd.js CHANGED
@@ -57,7 +57,7 @@
57
57
  * @generated
58
58
  * @see https://github.com/webgptorg/promptbook
59
59
  */
60
- const PROMPTBOOK_ENGINE_VERSION = '0.95.0';
60
+ const PROMPTBOOK_ENGINE_VERSION = '0.98.0-10';
61
61
  /**
62
62
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
63
63
  * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
@@ -271,7 +271,7 @@
271
271
  *
272
272
  * @public exported from `@promptbook/core`
273
273
  */
274
- const DEFAULT_MAX_EXECUTION_ATTEMPTS = 10; // <- TODO: [๐Ÿคนโ€โ™‚๏ธ]
274
+ const DEFAULT_MAX_EXECUTION_ATTEMPTS = 7; // <- TODO: [๐Ÿคนโ€โ™‚๏ธ]
275
275
  // <- TODO: [๐Ÿ]
276
276
  /**
277
277
  * Where to store your books
@@ -1070,7 +1070,7 @@
1070
1070
  throw new Error(spaceTrim__default["default"]((block) => `
1071
1071
  ${block(error.message)}
1072
1072
 
1073
- The JSON text:
1073
+ The expected JSON text:
1074
1074
  ${block(value)}
1075
1075
  `));
1076
1076
  }
@@ -2769,303 +2769,520 @@
2769
2769
  }
2770
2770
 
2771
2771
  /**
2772
- * Intercepts LLM tools and counts total usage of the tools
2772
+ * Function isValidJsonString will tell you if the string is valid JSON or not
2773
2773
  *
2774
- * Note: It can take extended `LlmExecutionTools` and cache the
2774
+ * @param value The string to check
2775
+ * @returns `true` if the string is a valid JSON string, false otherwise
2775
2776
  *
2776
- * @param llmTools LLM tools to be intercepted with usage counting, it can contain extra methods like `totalUsage`
2777
- * @returns LLM tools with same functionality with added total cost counting
2778
- * @public exported from `@promptbook/core`
2777
+ * @public exported from `@promptbook/utils`
2779
2778
  */
2780
- function cacheLlmTools(llmTools, options = {}) {
2781
- const { storage = new MemoryStorage(), isCacheReloaded = false, isVerbose = DEFAULT_IS_VERBOSE } = options;
2782
- const proxyTools = {
2783
- ...llmTools,
2784
- // <- Note: [๐Ÿฅซ]
2785
- get title() {
2786
- return `${llmTools.title} (cached)`;
2787
- // <- TODO: [๐Ÿงˆ] Maybe standartize the suffix when wrapping `LlmExecutionTools` up
2788
- // <- TODO: [๐Ÿงˆ][๐Ÿง ] Does it make sense to suffix "(cached)"?
2789
- },
2790
- get description() {
2791
- return `${llmTools.description} (cached)`;
2792
- // <- TODO: [๐Ÿงˆ] Maybe standartize the suffix when wrapping `LlmExecutionTools` up
2793
- // <- TODO: [๐Ÿงˆ][๐Ÿง ] Does it make sense to suffix "(cached)"?
2794
- },
2795
- listModels() {
2796
- // TODO: [๐Ÿง ] Should be model listing also cached?
2797
- return /* not await */ llmTools.listModels();
2798
- },
2799
- };
2800
- const callCommonModel = async (prompt) => {
2801
- const { parameters, content, modelRequirements } = prompt;
2802
- // <- Note: These are relevant things from the prompt that the cache key should depend on.
2803
- // TODO: Maybe some standalone function for normalization of content for cache
2804
- let normalizedContent = content;
2805
- normalizedContent = normalizedContent.replace(/\s+/g, ' ');
2806
- normalizedContent = normalizedContent.split('\r\n').join('\n');
2807
- normalizedContent = spaceTrim__default["default"](normalizedContent);
2808
- // Note: Do not need to save everything in the cache, just the relevant parameters
2809
- const relevantParameterNames = extractParameterNames(content);
2810
- const relevantParameters = Object.fromEntries(Object.entries(parameters).filter(([key]) => relevantParameterNames.has(key)));
2811
- const keyHashBase = { relevantParameters, normalizedContent, modelRequirements };
2812
- const key = titleToName(prompt.title.substring(0, MAX_FILENAME_LENGTH - 10) +
2813
- '-' +
2814
- sha256__default["default"](hexEncoder__default["default"].parse(JSON.stringify(keyHashBase)))
2815
- .toString( /* hex */)
2816
- .substring(0, 10 - 1));
2817
- const cacheItem = !isCacheReloaded ? await storage.getItem(key) : null;
2818
- if (cacheItem) {
2819
- return cacheItem.promptResult;
2779
+ function isValidJsonString(value /* <- [๐Ÿ‘จโ€โš–๏ธ] */) {
2780
+ try {
2781
+ JSON.parse(value);
2782
+ return true;
2783
+ }
2784
+ catch (error) {
2785
+ assertsError(error);
2786
+ if (error.message.includes('Unexpected token')) {
2787
+ return false;
2820
2788
  }
2821
- if (isVerbose) {
2822
- console.info('Cache miss for key:', key, {
2823
- prompt,
2824
- 'prompt.title': prompt.title,
2825
- MAX_FILENAME_LENGTH,
2826
- keyHashBase,
2827
- parameters,
2828
- relevantParameters,
2829
- content,
2830
- normalizedContent,
2831
- modelRequirements,
2832
- });
2789
+ return false;
2790
+ }
2791
+ }
2792
+
2793
+ /**
2794
+ * Makes first letter of a string uppercase
2795
+ *
2796
+ * @public exported from `@promptbook/utils`
2797
+ */
2798
+ function capitalize(word) {
2799
+ return word.substring(0, 1).toUpperCase() + word.substring(1);
2800
+ }
2801
+
2802
+ /**
2803
+ * Extracts all code blocks from markdown.
2804
+ *
2805
+ * Note: There are multiple similar functions:
2806
+ * - `extractBlock` just extracts the content of the code block which is also used as built-in function for postprocessing
2807
+ * - `extractJsonBlock` extracts exactly one valid JSON code block
2808
+ * - `extractOneBlockFromMarkdown` extracts exactly one code block with language of the code block
2809
+ * - `extractAllBlocksFromMarkdown` extracts all code blocks with language of the code block
2810
+ *
2811
+ * @param markdown any valid markdown
2812
+ * @returns code blocks with language and content
2813
+ * @throws {ParseError} if block is not closed properly
2814
+ * @public exported from `@promptbook/markdown-utils`
2815
+ */
2816
+ function extractAllBlocksFromMarkdown(markdown) {
2817
+ const codeBlocks = [];
2818
+ const lines = markdown.split('\n');
2819
+ // Note: [0] Ensure that the last block notated by gt > will be closed
2820
+ lines.push('');
2821
+ let currentCodeBlock = null;
2822
+ for (const line of lines) {
2823
+ if (line.startsWith('> ') || line === '>') {
2824
+ if (currentCodeBlock === null) {
2825
+ currentCodeBlock = { blockNotation: '>', language: null, content: '' };
2826
+ } /* not else */
2827
+ if (currentCodeBlock.blockNotation === '>') {
2828
+ if (currentCodeBlock.content !== '') {
2829
+ currentCodeBlock.content += '\n';
2830
+ }
2831
+ currentCodeBlock.content += line.slice(2);
2832
+ }
2833
2833
  }
2834
- let promptResult;
2835
- variant: switch (prompt.modelRequirements.modelVariant) {
2836
- case 'CHAT':
2837
- promptResult = await llmTools.callChatModel(prompt);
2838
- break variant;
2839
- case 'COMPLETION':
2840
- promptResult = await llmTools.callCompletionModel(prompt);
2841
- break variant;
2842
- case 'EMBEDDING':
2843
- promptResult = await llmTools.callEmbeddingModel(prompt);
2844
- break variant;
2845
- // <- case [๐Ÿค–]:
2846
- default:
2847
- throw new PipelineExecutionError(`Unknown model variant "${prompt.modelRequirements.modelVariant}"`);
2834
+ else if (currentCodeBlock !== null && currentCodeBlock.blockNotation === '>' /* <- Note: [0] */) {
2835
+ codeBlocks.push(currentCodeBlock);
2836
+ currentCodeBlock = null;
2837
+ }
2838
+ /* not else */
2839
+ if (line.startsWith('```')) {
2840
+ const language = line.slice(3).trim() || null;
2841
+ if (currentCodeBlock === null) {
2842
+ currentCodeBlock = { blockNotation: '```', language, content: '' };
2843
+ }
2844
+ else {
2845
+ if (language !== null) {
2846
+ throw new ParseError(`${capitalize(currentCodeBlock.language || 'the')} code block was not closed and already opening new ${language} code block`);
2847
+ }
2848
+ codeBlocks.push(currentCodeBlock);
2849
+ currentCodeBlock = null;
2850
+ }
2851
+ }
2852
+ else if (currentCodeBlock !== null && currentCodeBlock.blockNotation === '```') {
2853
+ if (currentCodeBlock.content !== '') {
2854
+ currentCodeBlock.content += '\n';
2855
+ }
2856
+ currentCodeBlock.content += line.split('\\`\\`\\`').join('```') /* <- TODO: Maybe make proper unescape */;
2848
2857
  }
2849
- // TODO: [๐Ÿง ] !!5 How to do timing in mixed cache / non-cache situation
2850
- // promptResult.timing: FromtoItems
2851
- await storage.setItem(key, {
2852
- date: $getCurrentDate(),
2853
- promptbookVersion: PROMPTBOOK_ENGINE_VERSION,
2854
- bookVersion: BOOK_LANGUAGE_VERSION,
2855
- prompt: {
2856
- ...prompt,
2857
- parameters: Object.entries(parameters).length === Object.entries(relevantParameters).length
2858
- ? parameters
2859
- : {
2860
- ...relevantParameters,
2861
- note: `<- Note: Only relevant parameters are stored in the cache`,
2862
- },
2863
- },
2864
- promptResult,
2865
- });
2866
- return promptResult;
2867
- };
2868
- if (llmTools.callChatModel !== undefined) {
2869
- proxyTools.callChatModel = async (prompt) => {
2870
- return /* not await */ callCommonModel(prompt);
2871
- };
2872
2858
  }
2873
- if (llmTools.callCompletionModel !== undefined) {
2874
- proxyTools.callCompletionModel = async (prompt) => {
2875
- return /* not await */ callCommonModel(prompt);
2876
- };
2859
+ if (currentCodeBlock !== null) {
2860
+ throw new ParseError(`${capitalize(currentCodeBlock.language || 'the')} code block was not closed at the end of the markdown`);
2877
2861
  }
2878
- if (llmTools.callEmbeddingModel !== undefined) {
2879
- proxyTools.callEmbeddingModel = async (prompt) => {
2880
- return /* not await */ callCommonModel(prompt);
2881
- };
2862
+ return codeBlocks;
2863
+ }
2864
+ /**
2865
+ * TODO: Maybe name for `blockNotation` instead of '```' and '>'
2866
+ */
2867
+
2868
+ /**
2869
+ * Extracts extracts exactly one valid JSON code block
2870
+ *
2871
+ * - When given string is a valid JSON as it is, it just returns it
2872
+ * - When there is no JSON code block the function throws a `ParseError`
2873
+ * - When there are multiple JSON code blocks the function throws a `ParseError`
2874
+ *
2875
+ * Note: It is not important if marked as ```json BUT if it is VALID JSON
2876
+ * Note: There are multiple similar function:
2877
+ * - `extractBlock` just extracts the content of the code block which is also used as build-in function for postprocessing
2878
+ * - `extractJsonBlock` extracts exactly one valid JSON code block
2879
+ * - `extractOneBlockFromMarkdown` extracts exactly one code block with language of the code block
2880
+ * - `extractAllBlocksFromMarkdown` extracts all code blocks with language of the code block
2881
+ *
2882
+ * @public exported from `@promptbook/markdown-utils`
2883
+ * @throws {ParseError} if there is no valid JSON block in the markdown
2884
+ */
2885
+ function extractJsonBlock(markdown) {
2886
+ if (isValidJsonString(markdown)) {
2887
+ return markdown;
2882
2888
  }
2883
- // <- Note: [๐Ÿค–]
2884
- return proxyTools;
2889
+ const codeBlocks = extractAllBlocksFromMarkdown(markdown);
2890
+ const jsonBlocks = codeBlocks.filter(({ content }) => isValidJsonString(content));
2891
+ if (jsonBlocks.length === 0) {
2892
+ throw new Error('There is no valid JSON block in the markdown');
2893
+ }
2894
+ if (jsonBlocks.length > 1) {
2895
+ throw new Error('There are multiple JSON code blocks in the markdown');
2896
+ }
2897
+ return jsonBlocks[0].content;
2885
2898
  }
2886
2899
  /**
2887
- * TODO: [๐Ÿง ][๐Ÿ’ธ] Maybe make some common abstraction `interceptLlmTools` and use here (or use javascript Proxy?)
2888
- * TODO: [๐Ÿง ] Is there some meaningfull way how to test this util
2889
- * TODO: [๐Ÿ‘ทโ€โ™‚๏ธ] Comprehensive manual about construction of llmTools
2890
- * Detailed explanation about caching strategies and appropriate storage selection for different use cases
2891
- * Examples of how to combine multiple interceptors for advanced caching, logging, and usage tracking
2900
+ * TODO: Add some auto-healing logic + extract YAML, JSON5, TOML, etc.
2901
+ * TODO: [๐Ÿข] Make this logic part of `JsonFormatParser` or `isValidJsonString`
2892
2902
  */
2893
2903
 
2894
2904
  /**
2895
- * Represents the uncertain value
2905
+ * Counts number of characters in the text
2896
2906
  *
2897
- * @public exported from `@promptbook/core`
2907
+ * @public exported from `@promptbook/utils`
2898
2908
  */
2899
- const ZERO_VALUE = $deepFreeze({ value: 0 });
2909
+ function countCharacters(text) {
2910
+ // Remove null characters
2911
+ text = text.replace(/\0/g, '');
2912
+ // Replace emojis (and also ZWJ sequence) with hyphens
2913
+ text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
2914
+ text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
2915
+ text = text.replace(/\p{Extended_Pictographic}(\u{200D}\p{Extended_Pictographic})*/gu, '-');
2916
+ return text.length;
2917
+ }
2900
2918
  /**
2901
- * Represents the uncertain value
2919
+ * TODO: [๐Ÿฅด] Implement counting in formats - like JSON, CSV, XML,...
2920
+ */
2921
+
2922
+ /**
2923
+ * Number of characters per standard line with 11pt Arial font size.
2902
2924
  *
2903
- * @public exported from `@promptbook/core`
2925
+ * @public exported from `@promptbook/utils`
2904
2926
  */
2905
- const UNCERTAIN_ZERO_VALUE = $deepFreeze({ value: 0, isUncertain: true });
2927
+ const CHARACTERS_PER_STANDARD_LINE = 63;
2906
2928
  /**
2907
- * Represents the usage with no resources consumed
2929
+ * Number of lines per standard A4 page with 11pt Arial font size and standard margins and spacing.
2908
2930
  *
2909
- * @public exported from `@promptbook/core`
2931
+ * @public exported from `@promptbook/utils`
2910
2932
  */
2911
- const ZERO_USAGE = $deepFreeze({
2912
- price: ZERO_VALUE,
2913
- input: {
2914
- tokensCount: ZERO_VALUE,
2915
- charactersCount: ZERO_VALUE,
2916
- wordsCount: ZERO_VALUE,
2917
- sentencesCount: ZERO_VALUE,
2918
- linesCount: ZERO_VALUE,
2919
- paragraphsCount: ZERO_VALUE,
2920
- pagesCount: ZERO_VALUE,
2921
- },
2922
- output: {
2923
- tokensCount: ZERO_VALUE,
2924
- charactersCount: ZERO_VALUE,
2925
- wordsCount: ZERO_VALUE,
2926
- sentencesCount: ZERO_VALUE,
2927
- linesCount: ZERO_VALUE,
2928
- paragraphsCount: ZERO_VALUE,
2929
- pagesCount: ZERO_VALUE,
2930
- },
2931
- });
2933
+ const LINES_PER_STANDARD_PAGE = 44;
2932
2934
  /**
2933
- * Represents the usage with unknown resources consumed
2935
+ * TODO: [๐Ÿง ] Should be this `constants.ts` or `config.ts`?
2936
+ * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
2937
+ */
2938
+
2939
+ /**
2940
+ * Counts number of lines in the text
2934
2941
  *
2935
- * @public exported from `@promptbook/core`
2942
+ * Note: This does not check only for the presence of newlines, but also for the length of the standard line.
2943
+ *
2944
+ * @public exported from `@promptbook/utils`
2936
2945
  */
2937
- const UNCERTAIN_USAGE = $deepFreeze({
2938
- price: UNCERTAIN_ZERO_VALUE,
2939
- input: {
2940
- tokensCount: UNCERTAIN_ZERO_VALUE,
2941
- charactersCount: UNCERTAIN_ZERO_VALUE,
2942
- wordsCount: UNCERTAIN_ZERO_VALUE,
2943
- sentencesCount: UNCERTAIN_ZERO_VALUE,
2944
- linesCount: UNCERTAIN_ZERO_VALUE,
2945
- paragraphsCount: UNCERTAIN_ZERO_VALUE,
2946
- pagesCount: UNCERTAIN_ZERO_VALUE,
2947
- },
2948
- output: {
2949
- tokensCount: UNCERTAIN_ZERO_VALUE,
2950
- charactersCount: UNCERTAIN_ZERO_VALUE,
2951
- wordsCount: UNCERTAIN_ZERO_VALUE,
2952
- sentencesCount: UNCERTAIN_ZERO_VALUE,
2953
- linesCount: UNCERTAIN_ZERO_VALUE,
2954
- paragraphsCount: UNCERTAIN_ZERO_VALUE,
2955
- pagesCount: UNCERTAIN_ZERO_VALUE,
2956
- },
2957
- });
2946
+ function countLines(text) {
2947
+ text = text.replace('\r\n', '\n');
2948
+ text = text.replace('\r', '\n');
2949
+ const lines = text.split('\n');
2950
+ return lines.reduce((count, line) => count + Math.ceil(line.length / CHARACTERS_PER_STANDARD_LINE), 0);
2951
+ }
2952
+ /**
2953
+ * TODO: [๐Ÿฅด] Implement counting in formats - like JSON, CSV, XML,...
2954
+ */
2955
+
2956
+ /**
2957
+ * Counts number of pages in the text
2958
+ *
2959
+ * Note: This does not check only for the count of newlines, but also for the length of the standard line and length of the standard page.
2960
+ *
2961
+ * @public exported from `@promptbook/utils`
2962
+ */
2963
+ function countPages(text) {
2964
+ return Math.ceil(countLines(text) / LINES_PER_STANDARD_PAGE);
2965
+ }
2966
+ /**
2967
+ * TODO: [๐Ÿฅด] Implement counting in formats - like JSON, CSV, XML,...
2968
+ */
2969
+
2970
+ /**
2971
+ * Counts number of paragraphs in the text
2972
+ *
2973
+ * @public exported from `@promptbook/utils`
2974
+ */
2975
+ function countParagraphs(text) {
2976
+ return text.split(/\n\s*\n/).filter((paragraph) => paragraph.trim() !== '').length;
2977
+ }
2978
+ /**
2979
+ * TODO: [๐Ÿฅด] Implement counting in formats - like JSON, CSV, XML,...
2980
+ */
2981
+
2982
+ /**
2983
+ * Split text into sentences
2984
+ *
2985
+ * @public exported from `@promptbook/utils`
2986
+ */
2987
+ function splitIntoSentences(text) {
2988
+ return text.split(/[.!?]+/).filter((sentence) => sentence.trim() !== '');
2989
+ }
2990
+ /**
2991
+ * Counts number of sentences in the text
2992
+ *
2993
+ * @public exported from `@promptbook/utils`
2994
+ */
2995
+ function countSentences(text) {
2996
+ return splitIntoSentences(text).length;
2997
+ }
2998
+ /**
2999
+ * TODO: [๐Ÿฅด] Implement counting in formats - like JSON, CSV, XML,...
3000
+ */
3001
+
3002
+ /**
3003
+ * Counts number of words in the text
3004
+ *
3005
+ * @public exported from `@promptbook/utils`
3006
+ */
3007
+ function countWords(text) {
3008
+ text = text.replace(/[\p{Extended_Pictographic}]/gu, 'a');
3009
+ text = removeDiacritics(text);
3010
+ // Add spaces before uppercase letters preceded by lowercase letters (for camelCase)
3011
+ text = text.replace(/([a-z])([A-Z])/g, '$1 $2');
3012
+ return text.split(/[^a-zะฐ-ั0-9]+/i).filter((word) => word.length > 0).length;
3013
+ }
3014
+ /**
3015
+ * TODO: [๐Ÿฅด] Implement counting in formats - like JSON, CSV, XML,...
3016
+ */
3017
+
3018
+ /**
3019
+ * Index of all counter functions
3020
+ *
3021
+ * @public exported from `@promptbook/utils`
3022
+ */
3023
+ const CountUtils = {
3024
+ CHARACTERS: countCharacters,
3025
+ WORDS: countWords,
3026
+ SENTENCES: countSentences,
3027
+ PARAGRAPHS: countParagraphs,
3028
+ LINES: countLines,
3029
+ PAGES: countPages,
3030
+ };
2958
3031
  /**
3032
+ * TODO: [๐Ÿง ][๐Ÿค ] This should be probably as part of `TextFormatParser`
2959
3033
  * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
2960
3034
  */
2961
3035
 
2962
3036
  /**
2963
- * Function `addUsage` will add multiple usages into one
3037
+ * Function checkExpectations will check if the expectations on given value are met
2964
3038
  *
2965
- * Note: If you provide 0 values, it returns ZERO_USAGE
3039
+ * Note: There are two similar functions:
3040
+ * - `checkExpectations` which throws an error if the expectations are not met
3041
+ * - `isPassingExpectations` which returns a boolean
2966
3042
  *
2967
- * @public exported from `@promptbook/core`
3043
+ * @throws {ExpectError} if the expectations are not met
3044
+ * @returns {void} Nothing
3045
+ * @private internal function of `createPipelineExecutor`
2968
3046
  */
2969
- function addUsage(...usageItems) {
2970
- return usageItems.reduce((acc, item) => {
2971
- var _a;
2972
- acc.price.value += ((_a = item.price) === null || _a === void 0 ? void 0 : _a.value) || 0;
2973
- for (const key of Object.keys(acc.input)) {
2974
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
2975
- //@ts-ignore
2976
- if (item.input[key]) {
2977
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
2978
- //@ts-ignore
2979
- acc.input[key].value += item.input[key].value || 0;
2980
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
2981
- //@ts-ignore
2982
- if (item.input[key].isUncertain) {
2983
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
2984
- //@ts-ignore
2985
- acc.input[key].isUncertain = true;
2986
- }
2987
- }
3047
+ function checkExpectations(expectations, value) {
3048
+ for (const [unit, { max, min }] of Object.entries(expectations)) {
3049
+ const amount = CountUtils[unit.toUpperCase()](value);
3050
+ if (min && amount < min) {
3051
+ throw new ExpectError(`Expected at least ${min} ${unit} but got ${amount}`);
3052
+ } /* not else */
3053
+ if (max && amount > max) {
3054
+ throw new ExpectError(`Expected at most ${max} ${unit} but got ${amount}`);
2988
3055
  }
2989
- for (const key of Object.keys(acc.output)) {
2990
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
2991
- //@ts-ignore
2992
- if (item.output[key]) {
2993
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
2994
- //@ts-ignore
2995
- acc.output[key].value += item.output[key].value || 0;
2996
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
2997
- //@ts-ignore
2998
- if (item.output[key].isUncertain) {
2999
- // eslint-disable-next-line @typescript-eslint/ban-ts-comment
3000
- //@ts-ignore
3001
- acc.output[key].isUncertain = true;
3056
+ }
3057
+ }
3058
+ /**
3059
+ * TODO: [๐Ÿ’] Unite object for expecting amount and format
3060
+ * TODO: [๐Ÿง ][๐Ÿค ] This should be part of `TextFormatParser`
3061
+ * Note: [๐Ÿ’] and [๐Ÿค ] are interconnected together
3062
+ */
3063
+
3064
+ /**
3065
+ * Validates a prompt result against expectations and format requirements.
3066
+ * This function provides a common abstraction for result validation that can be used
3067
+ * by both execution logic and caching logic to ensure consistency.
3068
+ *
3069
+ * @param options - The validation options including result string, expectations, and format
3070
+ * @returns Validation result with processed string and validity status
3071
+ * @private internal function of `createPipelineExecutor` and `cacheLlmTools`
3072
+ */
3073
+ function validatePromptResult(options) {
3074
+ const { resultString, expectations, format } = options;
3075
+ let processedResultString = resultString;
3076
+ let validationError;
3077
+ try {
3078
+ // TODO: [๐Ÿ’] Unite object for expecting amount and format
3079
+ if (format) {
3080
+ if (format === 'JSON') {
3081
+ if (!isValidJsonString(processedResultString)) {
3082
+ // TODO: [๐Ÿข] Do more universally via `FormatParser`
3083
+ try {
3084
+ processedResultString = extractJsonBlock(processedResultString);
3085
+ }
3086
+ catch (error) {
3087
+ keepUnused(error);
3088
+ throw new ExpectError(spaceTrim.spaceTrim((block) => `
3089
+ Expected valid JSON string
3090
+
3091
+ The expected JSON text:
3092
+ ${block(processedResultString)}
3093
+ `));
3094
+ }
3002
3095
  }
3003
3096
  }
3097
+ else {
3098
+ throw new UnexpectedError(`Unknown format "${format}"`);
3099
+ }
3004
3100
  }
3005
- return acc;
3006
- }, deepClone(ZERO_USAGE));
3101
+ // TODO: [๐Ÿ’] Unite object for expecting amount and format
3102
+ if (expectations) {
3103
+ checkExpectations(expectations, processedResultString);
3104
+ }
3105
+ return {
3106
+ isValid: true,
3107
+ processedResultString,
3108
+ };
3109
+ }
3110
+ catch (error) {
3111
+ if (error instanceof ExpectError) {
3112
+ validationError = error;
3113
+ }
3114
+ else {
3115
+ // Re-throw non-ExpectError errors (like UnexpectedError)
3116
+ throw error;
3117
+ }
3118
+ return {
3119
+ isValid: false,
3120
+ processedResultString,
3121
+ error: validationError,
3122
+ };
3123
+ }
3007
3124
  }
3008
3125
 
3009
3126
  /**
3010
3127
  * Intercepts LLM tools and counts total usage of the tools
3011
3128
  *
3012
- * @param llmTools LLM tools to be intercepted with usage counting
3129
+ * Note: It can take extended `LlmExecutionTools` and cache the
3130
+ *
3131
+ * @param llmTools LLM tools to be intercepted with usage counting, it can contain extra methods like `totalUsage`
3013
3132
  * @returns LLM tools with same functionality with added total cost counting
3014
3133
  * @public exported from `@promptbook/core`
3015
3134
  */
3016
- function countUsage(llmTools) {
3017
- let totalUsage = ZERO_USAGE;
3018
- const spending = new rxjs.Subject();
3135
+ function cacheLlmTools(llmTools, options = {}) {
3136
+ const { storage = new MemoryStorage(), isCacheReloaded = false, isVerbose = DEFAULT_IS_VERBOSE } = options;
3019
3137
  const proxyTools = {
3138
+ ...llmTools,
3139
+ // <- Note: [๐Ÿฅซ]
3020
3140
  get title() {
3021
- return `${llmTools.title} (+usage)`;
3141
+ return `${llmTools.title} (cached)`;
3022
3142
  // <- TODO: [๐Ÿงˆ] Maybe standartize the suffix when wrapping `LlmExecutionTools` up
3023
- // <- TODO: [๐Ÿงˆ][๐Ÿง ] Does it make sense to suffix "(+usage)"?
3143
+ // <- TODO: [๐Ÿงˆ][๐Ÿง ] Does it make sense to suffix "(cached)"?
3024
3144
  },
3025
3145
  get description() {
3026
- return `${llmTools.description} (+usage)`;
3146
+ return `${llmTools.description} (cached)`;
3027
3147
  // <- TODO: [๐Ÿงˆ] Maybe standartize the suffix when wrapping `LlmExecutionTools` up
3028
- // <- TODO: [๐Ÿงˆ][๐Ÿง ] Does it make sense to suffix "(+usage)"?
3029
- },
3030
- checkConfiguration() {
3031
- return /* not await */ llmTools.checkConfiguration();
3148
+ // <- TODO: [๐Ÿงˆ][๐Ÿง ] Does it make sense to suffix "(cached)"?
3032
3149
  },
3033
3150
  listModels() {
3151
+ // TODO: [๐Ÿง ] Should be model listing also cached?
3034
3152
  return /* not await */ llmTools.listModels();
3035
3153
  },
3036
- spending() {
3037
- return spending.asObservable();
3038
- },
3039
- getTotalUsage() {
3040
- // <- Note: [๐Ÿฅซ] Not using getter `get totalUsage` but `getTotalUsage` to allow this object to be proxied
3041
- return totalUsage;
3042
- },
3043
3154
  };
3044
- if (llmTools.callChatModel !== undefined) {
3045
- proxyTools.callChatModel = async (prompt) => {
3046
- // console.info('[๐Ÿš•] callChatModel through countTotalUsage');
3047
- const promptResult = await llmTools.callChatModel(prompt);
3048
- totalUsage = addUsage(totalUsage, promptResult.usage);
3049
- spending.next(promptResult.usage);
3050
- return promptResult;
3051
- };
3052
- }
3053
- if (llmTools.callCompletionModel !== undefined) {
3054
- proxyTools.callCompletionModel = async (prompt) => {
3055
- // console.info('[๐Ÿš•] callCompletionModel through countTotalUsage');
3056
- const promptResult = await llmTools.callCompletionModel(prompt);
3057
- totalUsage = addUsage(totalUsage, promptResult.usage);
3058
- spending.next(promptResult.usage);
3059
- return promptResult;
3155
+ const callCommonModel = async (prompt) => {
3156
+ var _a;
3157
+ const { parameters, content, modelRequirements } = prompt;
3158
+ // <- Note: These are relevant things from the prompt that the cache key should depend on.
3159
+ // TODO: Maybe some standalone function for normalization of content for cache
3160
+ let normalizedContent = content;
3161
+ normalizedContent = normalizedContent.replace(/\s+/g, ' ');
3162
+ normalizedContent = normalizedContent.split('\r\n').join('\n');
3163
+ normalizedContent = spaceTrim__default["default"](normalizedContent);
3164
+ // Note: Do not need to save everything in the cache, just the relevant parameters
3165
+ const relevantParameterNames = extractParameterNames(content);
3166
+ const relevantParameters = Object.fromEntries(Object.entries(parameters).filter(([key]) => relevantParameterNames.has(key)));
3167
+ const keyHashBase = { relevantParameters, normalizedContent, modelRequirements };
3168
+ const key = titleToName(prompt.title.substring(0, MAX_FILENAME_LENGTH - 10) +
3169
+ '-' +
3170
+ sha256__default["default"](hexEncoder__default["default"].parse(JSON.stringify(keyHashBase)))
3171
+ .toString( /* hex */)
3172
+ .substring(0, 10 - 1));
3173
+ const cacheItem = !isCacheReloaded ? await storage.getItem(key) : null;
3174
+ if (cacheItem) {
3175
+ return cacheItem.promptResult;
3176
+ }
3177
+ if (isVerbose) {
3178
+ console.info('Cache miss for key:', key, {
3179
+ prompt,
3180
+ 'prompt.title': prompt.title,
3181
+ MAX_FILENAME_LENGTH,
3182
+ keyHashBase,
3183
+ parameters,
3184
+ relevantParameters,
3185
+ content,
3186
+ normalizedContent,
3187
+ modelRequirements,
3188
+ });
3189
+ }
3190
+ let promptResult;
3191
+ variant: switch (prompt.modelRequirements.modelVariant) {
3192
+ case 'CHAT':
3193
+ promptResult = await llmTools.callChatModel(prompt);
3194
+ break variant;
3195
+ case 'COMPLETION':
3196
+ promptResult = await llmTools.callCompletionModel(prompt);
3197
+ break variant;
3198
+ case 'EMBEDDING':
3199
+ promptResult = await llmTools.callEmbeddingModel(prompt);
3200
+ break variant;
3201
+ // <- case [๐Ÿค–]:
3202
+ default:
3203
+ throw new PipelineExecutionError(`Unknown model variant "${prompt.modelRequirements.modelVariant}"`);
3204
+ }
3205
+ // TODO: [๐Ÿง ] !!5 How to do timing in mixed cache / non-cache situation
3206
+ // promptResult.timing: FromtoItems
3207
+ // Check if the result is valid and should be cached
3208
+ // A result is considered failed if:
3209
+ // 1. It has a content property that is null or undefined
3210
+ // 2. It has an error property that is truthy
3211
+ // 3. It has a success property that is explicitly false
3212
+ // 4. It doesn't meet the prompt's expectations or format requirements
3213
+ const isBasicFailedResult = promptResult.content === null ||
3214
+ promptResult.content === undefined ||
3215
+ promptResult.error ||
3216
+ promptResult.success === false;
3217
+ let shouldCache = !isBasicFailedResult;
3218
+ // If the basic result is valid, check against expectations and format
3219
+ if (shouldCache && promptResult.content) {
3220
+ try {
3221
+ const validationResult = validatePromptResult({
3222
+ resultString: promptResult.content,
3223
+ expectations: prompt.expectations,
3224
+ format: prompt.format,
3225
+ });
3226
+ shouldCache = validationResult.isValid;
3227
+ if (!shouldCache && isVerbose) {
3228
+ console.info('Not caching result that fails expectations/format validation for key:', key, {
3229
+ content: promptResult.content,
3230
+ expectations: prompt.expectations,
3231
+ format: prompt.format,
3232
+ validationError: (_a = validationResult.error) === null || _a === void 0 ? void 0 : _a.message,
3233
+ });
3234
+ }
3235
+ }
3236
+ catch (error) {
3237
+ // If validation throws an unexpected error, don't cache
3238
+ shouldCache = false;
3239
+ if (isVerbose) {
3240
+ console.info('Not caching result due to validation error for key:', key, {
3241
+ content: promptResult.content,
3242
+ validationError: error instanceof Error ? error.message : String(error),
3243
+ });
3244
+ }
3245
+ }
3246
+ }
3247
+ if (shouldCache) {
3248
+ await storage.setItem(key, {
3249
+ date: $getCurrentDate(),
3250
+ promptbookVersion: PROMPTBOOK_ENGINE_VERSION,
3251
+ bookVersion: BOOK_LANGUAGE_VERSION,
3252
+ prompt: {
3253
+ ...prompt,
3254
+ parameters: Object.entries(parameters).length === Object.entries(relevantParameters).length
3255
+ ? parameters
3256
+ : {
3257
+ ...relevantParameters,
3258
+ note: `<- Note: Only relevant parameters are stored in the cache`,
3259
+ },
3260
+ },
3261
+ promptResult,
3262
+ });
3263
+ }
3264
+ else if (isVerbose && isBasicFailedResult) {
3265
+ console.info('Not caching failed result for key:', key, {
3266
+ content: promptResult.content,
3267
+ error: promptResult.error,
3268
+ success: promptResult.success,
3269
+ });
3270
+ }
3271
+ return promptResult;
3272
+ };
3273
+ if (llmTools.callChatModel !== undefined) {
3274
+ proxyTools.callChatModel = async (prompt) => {
3275
+ return /* not await */ callCommonModel(prompt);
3276
+ };
3277
+ }
3278
+ if (llmTools.callCompletionModel !== undefined) {
3279
+ proxyTools.callCompletionModel = async (prompt) => {
3280
+ return /* not await */ callCommonModel(prompt);
3060
3281
  };
3061
3282
  }
3062
3283
  if (llmTools.callEmbeddingModel !== undefined) {
3063
3284
  proxyTools.callEmbeddingModel = async (prompt) => {
3064
- // console.info('[๐Ÿš•] callEmbeddingModel through countTotalUsage');
3065
- const promptResult = await llmTools.callEmbeddingModel(prompt);
3066
- totalUsage = addUsage(totalUsage, promptResult.usage);
3067
- spending.next(promptResult.usage);
3068
- return promptResult;
3285
+ return /* not await */ callCommonModel(prompt);
3069
3286
  };
3070
3287
  }
3071
3288
  // <- Note: [๐Ÿค–]
@@ -3074,84 +3291,272 @@
3074
3291
  /**
3075
3292
  * TODO: [๐Ÿง ][๐Ÿ’ธ] Maybe make some common abstraction `interceptLlmTools` and use here (or use javascript Proxy?)
3076
3293
  * TODO: [๐Ÿง ] Is there some meaningfull way how to test this util
3077
- * TODO: [๐Ÿง ][๐ŸŒฏ] Maybe a way how to hide ability to `get totalUsage`
3078
- * > const [llmToolsWithUsage,getUsage] = countTotalUsage(llmTools);
3079
- * TODO: [๐Ÿ‘ทโ€โ™‚๏ธ] @@@ Manual about construction of llmTools
3294
+ * TODO: [๐Ÿ‘ทโ€โ™‚๏ธ] Comprehensive manual about construction of llmTools
3295
+ * Detailed explanation about caching strategies and appropriate storage selection for different use cases
3296
+ * Examples of how to combine multiple interceptors for advanced caching, logging, and usage tracking
3080
3297
  */
3081
3298
 
3082
3299
  /**
3083
- * Provides LLM tools configuration by reading environment variables.
3300
+ * Represents the uncertain value
3084
3301
  *
3085
- * Note: `$` is used to indicate that this function is not a pure function - it uses filesystem to access `.env` file
3302
+ * @public exported from `@promptbook/core`
3303
+ */
3304
+ const ZERO_VALUE = $deepFreeze({ value: 0 });
3305
+ /**
3306
+ * Represents the uncertain value
3086
3307
  *
3087
- * It looks for environment variables:
3088
- * - `process.env.OPENAI_API_KEY`
3089
- * - `process.env.ANTHROPIC_CLAUDE_API_KEY`
3090
- * - ...
3308
+ * @public exported from `@promptbook/core`
3309
+ */
3310
+ const UNCERTAIN_ZERO_VALUE = $deepFreeze({ value: 0, isUncertain: true });
3311
+ /**
3312
+ * Represents the usage with no resources consumed
3091
3313
  *
3092
- * @see Environment variables documentation or .env file for required variables.
3093
- * @returns A promise that resolves to the LLM tools configuration, or null if configuration is incomplete or missing.
3094
- * @public exported from `@promptbook/node`
3314
+ * @public exported from `@promptbook/core`
3095
3315
  */
3096
- async function $provideLlmToolsConfigurationFromEnv() {
3097
- if (!$isRunningInNode()) {
3098
- throw new EnvironmentMismatchError('Function `$provideLlmToolsFromEnv` works only in Node.js environment');
3099
- }
3100
- const envFilepath = await $provideEnvFilename();
3101
- if (envFilepath !== null) {
3102
- dotenv__namespace.config({ path: envFilepath });
3103
- }
3104
- const llmToolsConfiguration = $llmToolsMetadataRegister
3105
- .list()
3106
- .map((metadata) => metadata.createConfigurationFromEnv(process.env))
3107
- .filter((configuration) => configuration !== null);
3108
- return llmToolsConfiguration;
3109
- }
3316
+ const ZERO_USAGE = $deepFreeze({
3317
+ price: ZERO_VALUE,
3318
+ input: {
3319
+ tokensCount: ZERO_VALUE,
3320
+ charactersCount: ZERO_VALUE,
3321
+ wordsCount: ZERO_VALUE,
3322
+ sentencesCount: ZERO_VALUE,
3323
+ linesCount: ZERO_VALUE,
3324
+ paragraphsCount: ZERO_VALUE,
3325
+ pagesCount: ZERO_VALUE,
3326
+ },
3327
+ output: {
3328
+ tokensCount: ZERO_VALUE,
3329
+ charactersCount: ZERO_VALUE,
3330
+ wordsCount: ZERO_VALUE,
3331
+ sentencesCount: ZERO_VALUE,
3332
+ linesCount: ZERO_VALUE,
3333
+ paragraphsCount: ZERO_VALUE,
3334
+ pagesCount: ZERO_VALUE,
3335
+ },
3336
+ });
3110
3337
  /**
3111
- * Note: [๐ŸŸข] Code in this file should never be never released in packages that could be imported into browser environment
3338
+ * Represents the usage with unknown resources consumed
3339
+ *
3340
+ * @public exported from `@promptbook/core`
3341
+ */
3342
+ const UNCERTAIN_USAGE = $deepFreeze({
3343
+ price: UNCERTAIN_ZERO_VALUE,
3344
+ input: {
3345
+ tokensCount: UNCERTAIN_ZERO_VALUE,
3346
+ charactersCount: UNCERTAIN_ZERO_VALUE,
3347
+ wordsCount: UNCERTAIN_ZERO_VALUE,
3348
+ sentencesCount: UNCERTAIN_ZERO_VALUE,
3349
+ linesCount: UNCERTAIN_ZERO_VALUE,
3350
+ paragraphsCount: UNCERTAIN_ZERO_VALUE,
3351
+ pagesCount: UNCERTAIN_ZERO_VALUE,
3352
+ },
3353
+ output: {
3354
+ tokensCount: UNCERTAIN_ZERO_VALUE,
3355
+ charactersCount: UNCERTAIN_ZERO_VALUE,
3356
+ wordsCount: UNCERTAIN_ZERO_VALUE,
3357
+ sentencesCount: UNCERTAIN_ZERO_VALUE,
3358
+ linesCount: UNCERTAIN_ZERO_VALUE,
3359
+ paragraphsCount: UNCERTAIN_ZERO_VALUE,
3360
+ pagesCount: UNCERTAIN_ZERO_VALUE,
3361
+ },
3362
+ });
3363
+ /**
3364
+ * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
3112
3365
  */
3113
3366
 
3114
3367
  /**
3115
- * Multiple LLM Execution Tools is a proxy server that uses multiple execution tools internally and exposes the executor interface externally.
3368
+ * Function `addUsage` will add multiple usages into one
3369
+ *
3370
+ * Note: If you provide 0 values, it returns ZERO_USAGE
3116
3371
  *
3117
- * Note: Internal utility of `joinLlmExecutionTools` but exposed type
3118
3372
  * @public exported from `@promptbook/core`
3119
3373
  */
3120
- class MultipleLlmExecutionTools {
3121
- /**
3122
- * Gets array of execution tools in order of priority
3123
- */
3124
- constructor(...llmExecutionTools) {
3125
- this.llmExecutionTools = llmExecutionTools;
3126
- }
3127
- get title() {
3128
- return 'Multiple LLM Providers';
3129
- }
3130
- get description() {
3131
- const innerModelsTitlesAndDescriptions = this.llmExecutionTools
3132
- .map(({ title, description }, index) => {
3133
- const headLine = `${index + 1}) \`${title}\``;
3134
- if (description === undefined) {
3135
- return headLine;
3374
+ function addUsage(...usageItems) {
3375
+ return usageItems.reduce((acc, item) => {
3376
+ var _a;
3377
+ acc.price.value += ((_a = item.price) === null || _a === void 0 ? void 0 : _a.value) || 0;
3378
+ for (const key of Object.keys(acc.input)) {
3379
+ // eslint-disable-next-line @typescript-eslint/ban-ts-comment
3380
+ //@ts-ignore
3381
+ if (item.input[key]) {
3382
+ // eslint-disable-next-line @typescript-eslint/ban-ts-comment
3383
+ //@ts-ignore
3384
+ acc.input[key].value += item.input[key].value || 0;
3385
+ // eslint-disable-next-line @typescript-eslint/ban-ts-comment
3386
+ //@ts-ignore
3387
+ if (item.input[key].isUncertain) {
3388
+ // eslint-disable-next-line @typescript-eslint/ban-ts-comment
3389
+ //@ts-ignore
3390
+ acc.input[key].isUncertain = true;
3391
+ }
3136
3392
  }
3137
- return spaceTrim__default["default"]((block) => `
3138
- ${headLine}
3139
-
3140
- ${ /* <- Note: Indenting the description: */block(description)}
3141
- `);
3142
- })
3143
- .join('\n\n');
3144
- return spaceTrim__default["default"]((block) => `
3145
- Multiple LLM Providers:
3393
+ }
3394
+ for (const key of Object.keys(acc.output)) {
3395
+ // eslint-disable-next-line @typescript-eslint/ban-ts-comment
3396
+ //@ts-ignore
3397
+ if (item.output[key]) {
3398
+ // eslint-disable-next-line @typescript-eslint/ban-ts-comment
3399
+ //@ts-ignore
3400
+ acc.output[key].value += item.output[key].value || 0;
3401
+ // eslint-disable-next-line @typescript-eslint/ban-ts-comment
3402
+ //@ts-ignore
3403
+ if (item.output[key].isUncertain) {
3404
+ // eslint-disable-next-line @typescript-eslint/ban-ts-comment
3405
+ //@ts-ignore
3406
+ acc.output[key].isUncertain = true;
3407
+ }
3408
+ }
3409
+ }
3410
+ return acc;
3411
+ }, deepClone(ZERO_USAGE));
3412
+ }
3146
3413
 
3147
- ${block(innerModelsTitlesAndDescriptions)}
3148
- `);
3149
- }
3150
- /**
3151
- * Check the configuration of all execution tools
3152
- */
3153
- async checkConfiguration() {
3154
- // Note: Run checks in parallel
3414
+ /**
3415
+ * Intercepts LLM tools and counts total usage of the tools
3416
+ *
3417
+ * @param llmTools LLM tools to be intercepted with usage counting
3418
+ * @returns LLM tools with same functionality with added total cost counting
3419
+ * @public exported from `@promptbook/core`
3420
+ */
3421
+ function countUsage(llmTools) {
3422
+ let totalUsage = ZERO_USAGE;
3423
+ const spending = new rxjs.Subject();
3424
+ const proxyTools = {
3425
+ get title() {
3426
+ return `${llmTools.title} (+usage)`;
3427
+ // <- TODO: [๐Ÿงˆ] Maybe standartize the suffix when wrapping `LlmExecutionTools` up
3428
+ // <- TODO: [๐Ÿงˆ][๐Ÿง ] Does it make sense to suffix "(+usage)"?
3429
+ },
3430
+ get description() {
3431
+ return `${llmTools.description} (+usage)`;
3432
+ // <- TODO: [๐Ÿงˆ] Maybe standartize the suffix when wrapping `LlmExecutionTools` up
3433
+ // <- TODO: [๐Ÿงˆ][๐Ÿง ] Does it make sense to suffix "(+usage)"?
3434
+ },
3435
+ checkConfiguration() {
3436
+ return /* not await */ llmTools.checkConfiguration();
3437
+ },
3438
+ listModels() {
3439
+ return /* not await */ llmTools.listModels();
3440
+ },
3441
+ spending() {
3442
+ return spending.asObservable();
3443
+ },
3444
+ getTotalUsage() {
3445
+ // <- Note: [๐Ÿฅซ] Not using getter `get totalUsage` but `getTotalUsage` to allow this object to be proxied
3446
+ return totalUsage;
3447
+ },
3448
+ };
3449
+ if (llmTools.callChatModel !== undefined) {
3450
+ proxyTools.callChatModel = async (prompt) => {
3451
+ // console.info('[๐Ÿš•] callChatModel through countTotalUsage');
3452
+ const promptResult = await llmTools.callChatModel(prompt);
3453
+ totalUsage = addUsage(totalUsage, promptResult.usage);
3454
+ spending.next(promptResult.usage);
3455
+ return promptResult;
3456
+ };
3457
+ }
3458
+ if (llmTools.callCompletionModel !== undefined) {
3459
+ proxyTools.callCompletionModel = async (prompt) => {
3460
+ // console.info('[๐Ÿš•] callCompletionModel through countTotalUsage');
3461
+ const promptResult = await llmTools.callCompletionModel(prompt);
3462
+ totalUsage = addUsage(totalUsage, promptResult.usage);
3463
+ spending.next(promptResult.usage);
3464
+ return promptResult;
3465
+ };
3466
+ }
3467
+ if (llmTools.callEmbeddingModel !== undefined) {
3468
+ proxyTools.callEmbeddingModel = async (prompt) => {
3469
+ // console.info('[๐Ÿš•] callEmbeddingModel through countTotalUsage');
3470
+ const promptResult = await llmTools.callEmbeddingModel(prompt);
3471
+ totalUsage = addUsage(totalUsage, promptResult.usage);
3472
+ spending.next(promptResult.usage);
3473
+ return promptResult;
3474
+ };
3475
+ }
3476
+ // <- Note: [๐Ÿค–]
3477
+ return proxyTools;
3478
+ }
3479
+ /**
3480
+ * TODO: [๐Ÿง ][๐Ÿ’ธ] Maybe make some common abstraction `interceptLlmTools` and use here (or use javascript Proxy?)
3481
+ * TODO: [๐Ÿง ] Is there some meaningfull way how to test this util
3482
+ * TODO: [๐Ÿง ][๐ŸŒฏ] Maybe a way how to hide ability to `get totalUsage`
3483
+ * > const [llmToolsWithUsage,getUsage] = countTotalUsage(llmTools);
3484
+ * TODO: [๐Ÿ‘ทโ€โ™‚๏ธ] @@@ Manual about construction of llmTools
3485
+ */
3486
+
3487
+ /**
3488
+ * Provides LLM tools configuration by reading environment variables.
3489
+ *
3490
+ * Note: `$` is used to indicate that this function is not a pure function - it uses filesystem to access `.env` file
3491
+ *
3492
+ * It looks for environment variables:
3493
+ * - `process.env.OPENAI_API_KEY`
3494
+ * - `process.env.ANTHROPIC_CLAUDE_API_KEY`
3495
+ * - ...
3496
+ *
3497
+ * @see Environment variables documentation or .env file for required variables.
3498
+ * @returns A promise that resolves to the LLM tools configuration, or null if configuration is incomplete or missing.
3499
+ * @public exported from `@promptbook/node`
3500
+ */
3501
+ async function $provideLlmToolsConfigurationFromEnv() {
3502
+ if (!$isRunningInNode()) {
3503
+ throw new EnvironmentMismatchError('Function `$provideLlmToolsFromEnv` works only in Node.js environment');
3504
+ }
3505
+ const envFilepath = await $provideEnvFilename();
3506
+ if (envFilepath !== null) {
3507
+ dotenv__namespace.config({ path: envFilepath });
3508
+ }
3509
+ const llmToolsConfiguration = $llmToolsMetadataRegister
3510
+ .list()
3511
+ .map((metadata) => metadata.createConfigurationFromEnv(process.env))
3512
+ .filter((configuration) => configuration !== null);
3513
+ return llmToolsConfiguration;
3514
+ }
3515
+ /**
3516
+ * Note: [๐ŸŸข] Code in this file should never be never released in packages that could be imported into browser environment
3517
+ */
3518
+
3519
+ /**
3520
+ * Multiple LLM Execution Tools is a proxy server that uses multiple execution tools internally and exposes the executor interface externally.
3521
+ *
3522
+ * Note: Internal utility of `joinLlmExecutionTools` but exposed type
3523
+ * @public exported from `@promptbook/core`
3524
+ */
3525
+ class MultipleLlmExecutionTools {
3526
+ /**
3527
+ * Gets array of execution tools in order of priority
3528
+ */
3529
+ constructor(...llmExecutionTools) {
3530
+ this.llmExecutionTools = llmExecutionTools;
3531
+ }
3532
+ get title() {
3533
+ return 'Multiple LLM Providers';
3534
+ }
3535
+ get description() {
3536
+ const innerModelsTitlesAndDescriptions = this.llmExecutionTools
3537
+ .map(({ title, description }, index) => {
3538
+ const headLine = `${index + 1}) \`${title}\``;
3539
+ if (description === undefined) {
3540
+ return headLine;
3541
+ }
3542
+ return spaceTrim__default["default"]((block) => `
3543
+ ${headLine}
3544
+
3545
+ ${ /* <- Note: Indenting the description: */block(description)}
3546
+ `);
3547
+ })
3548
+ .join('\n\n');
3549
+ return spaceTrim__default["default"]((block) => `
3550
+ Multiple LLM Providers:
3551
+
3552
+ ${block(innerModelsTitlesAndDescriptions)}
3553
+ `);
3554
+ }
3555
+ /**
3556
+ * Check the configuration of all execution tools
3557
+ */
3558
+ async checkConfiguration() {
3559
+ // Note: Run checks in parallel
3155
3560
  await Promise.all(this.llmExecutionTools.map((tools) => tools.checkConfiguration()));
3156
3561
  }
3157
3562
  /**
@@ -3334,8 +3739,10 @@
3334
3739
  .list()
3335
3740
  .find(({ packageName, className }) => llmConfiguration.packageName === packageName && llmConfiguration.className === className);
3336
3741
  if (registeredItem === undefined) {
3742
+ console.log('!!! $llmToolsRegister.list()', $llmToolsRegister.list());
3337
3743
  throw new Error(spaceTrim__default["default"]((block) => `
3338
3744
  There is no constructor for LLM provider \`${llmConfiguration.className}\` from \`${llmConfiguration.packageName}\`
3745
+ Running in ${!$isRunningInBrowser() ? '' : 'browser environment'}${!$isRunningInNode() ? '' : 'node environment'}${!$isRunningInWebWorker() ? '' : 'worker environment'}
3339
3746
 
3340
3747
  You have probably forgotten install and import the provider package.
3341
3748
  To fix this issue, you can:
@@ -4669,28 +5076,6 @@
4669
5076
 
4670
5077
  var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book",formfactorName:"GENERIC",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge from Markdown\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book`\n- INPUT PARAMETER `{knowledgeContent}` Markdown document content\n- OUTPUT PARAMETER `{knowledgePieces}` The knowledge JSON object\n\n## Knowledge\n\n<!-- TODO: [๐Ÿ†] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}\n```\n\n`-> {knowledgePieces}`\n"}],sourceFile:"./books/prepare-knowledge-from-markdown.book"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Keywords\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-keywords.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{keywords}` Keywords separated by comma\n\n## Knowledge\n\n<!-- TODO: [๐Ÿ†] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}\n```\n\n`-> {keywords}`\n"}],sourceFile:"./books/prepare-knowledge-keywords.book"},{title:"Prepare Knowledge-piece Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge-piece Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-title.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{title}` The title of the document\n\n## Knowledge\n\n- EXPECT MIN 1 WORD\n- EXPECT MAX 8 WORDS\n\n```markdown\nYou are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-knowledge-title.book"},{title:"Prepare Persona",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.book",formfactorName:"GENERIC",parameters:[{name:"availableModels",description:"List of available model names together with their descriptions as JSON",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelsRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-model-requirements",title:"Make modelRequirements",content:"You are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n```json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpful assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n```\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n```json\n{availableModels}\n```\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelsRequirements",format:"JSON",dependentParameterNames:["availableModels","personaDescription"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Persona\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-persona.book`\n- INPUT PARAMETER `{availableModels}` List of available model names together with their descriptions as JSON\n- INPUT PARAMETER `{personaDescription}` Description of the persona\n- OUTPUT PARAMETER `{modelsRequirements}` Specific requirements for the model\n\n## Make modelRequirements\n\n- FORMAT JSON\n\n```markdown\nYou are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n\\`\\`\\`json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpful assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n\\`\\`\\`\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n\\`\\`\\`json\n{availableModels}\n\\`\\`\\`\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}\n```\n\n`-> {modelsRequirements}`\n"}],sourceFile:"./books/prepare-persona.book"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-title.book",formfactorName:"GENERIC",parameters:[{name:"book",description:"The book to prepare the title for",isInput:true,isOutput:false},{name:"title",description:"Best title for the book",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-title",title:"Make title",content:"Make best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"โœ Convert Knowledge-piece to title\" but \"โœ Title\"_\n\n## The workflow\n\n> {book}",resultingParameterName:"title",expectations:{words:{min:1,max:8},lines:{min:1,max:1}},dependentParameterNames:["book"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-title.book`\n- INPUT PARAMETER `{book}` The book to prepare the title for\n- OUTPUT PARAMETER `{title}` Best title for the book\n\n## Make title\n\n- EXPECT MIN 1 Word\n- EXPECT MAX 8 Words\n- EXPECT EXACTLY 1 Line\n\n```markdown\nMake best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"โœ Convert Knowledge-piece to title\" but \"โœ Title\"_\n\n## The workflow\n\n> {book}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-title.book"}];
4671
5078
 
4672
- /**
4673
- * Function isValidJsonString will tell you if the string is valid JSON or not
4674
- *
4675
- * @param value The string to check
4676
- * @returns `true` if the string is a valid JSON string, false otherwise
4677
- *
4678
- * @public exported from `@promptbook/utils`
4679
- */
4680
- function isValidJsonString(value /* <- [๐Ÿ‘จโ€โš–๏ธ] */) {
4681
- try {
4682
- JSON.parse(value);
4683
- return true;
4684
- }
4685
- catch (error) {
4686
- assertsError(error);
4687
- if (error.message.includes('Unexpected token')) {
4688
- return false;
4689
- }
4690
- return false;
4691
- }
4692
- }
4693
-
4694
5079
  /**
4695
5080
  * Function `validatePipelineString` will validate the if the string is a valid pipeline string
4696
5081
  * It does not check if the string is fully logically correct, but if it is a string that can be a pipeline string or the string looks completely different.
@@ -4754,15 +5139,6 @@
4754
5139
  }
4755
5140
  }
4756
5141
 
4757
- /**
4758
- * Makes first letter of a string uppercase
4759
- *
4760
- * @public exported from `@promptbook/utils`
4761
- */
4762
- function capitalize(word) {
4763
- return word.substring(0, 1).toUpperCase() + word.substring(1);
4764
- }
4765
-
4766
5142
  /**
4767
5143
  * Converts promptbook in JSON format to string format
4768
5144
  *
@@ -5521,743 +5897,481 @@
5521
5897
  function difference(a, b, isEqual = (a, b) => a === b) {
5522
5898
  const diff = new Set();
5523
5899
  for (const itemA of Array.from(a)) {
5524
- if (!Array.from(b).some((itemB) => isEqual(itemA, itemB))) {
5525
- diff.add(itemA);
5526
- }
5527
- }
5528
- return diff;
5529
- }
5530
- /**
5531
- * TODO: [๐Ÿง ][๐Ÿ’ฏ] Maybe also implement symmetricDifference
5532
- */
5533
-
5534
- /**
5535
- * Creates a new set with all elements that are present in either set
5536
- *
5537
- * @deprecated use new javascript set methods instead @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Set
5538
- * @public exported from `@promptbook/utils`
5539
- */
5540
- function union(...sets) {
5541
- const union = new Set();
5542
- for (const set of sets) {
5543
- for (const item of Array.from(set)) {
5544
- union.add(item);
5545
- }
5546
- }
5547
- return union;
5548
- }
5549
-
5550
- /**
5551
- * Contains configuration options for parsing and generating CSV files, such as delimiters and quoting rules.
5552
- *
5553
- * @public exported from `@promptbook/core`
5554
- */
5555
- const MANDATORY_CSV_SETTINGS = Object.freeze({
5556
- header: true,
5557
- // encoding: 'utf-8',
5558
- });
5559
-
5560
- /**
5561
- * Converts a CSV string into an object
5562
- *
5563
- * Note: This is wrapper around `papaparse.parse()` with better autohealing
5564
- *
5565
- * @private - for now until `@promptbook/csv` is released
5566
- */
5567
- function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO: Make CSV Schemas */) {
5568
- settings = { ...settings, ...MANDATORY_CSV_SETTINGS };
5569
- // Note: Autoheal invalid '\n' characters
5570
- if (settings.newline && !settings.newline.includes('\r') && value.includes('\r')) {
5571
- console.warn('CSV string contains carriage return characters, but in the CSV settings the `newline` setting does not include them. Autohealing the CSV string.');
5572
- value = value.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
5573
- }
5574
- const csv = papaparse.parse(value, settings);
5575
- return csv;
5576
- }
5577
-
5578
- /**
5579
- * Function to check if a string is valid CSV
5580
- *
5581
- * @param value The string to check
5582
- * @returns `true` if the string is a valid CSV string, false otherwise
5583
- *
5584
- * @public exported from `@promptbook/utils`
5585
- */
5586
- function isValidCsvString(value) {
5587
- try {
5588
- // A simple check for CSV format: at least one comma and no invalid characters
5589
- if (value.includes(',') && /^[\w\s,"']+$/.test(value)) {
5590
- return true;
5591
- }
5592
- return false;
5593
- }
5594
- catch (error) {
5595
- assertsError(error);
5596
- return false;
5597
- }
5598
- }
5599
-
5600
- /**
5601
- * Definition for CSV spreadsheet
5602
- *
5603
- * @public exported from `@promptbook/core`
5604
- * <- TODO: [๐Ÿข] Export from package `@promptbook/csv`
5605
- */
5606
- const CsvFormatParser = {
5607
- formatName: 'CSV',
5608
- aliases: ['SPREADSHEET', 'TABLE'],
5609
- isValid(value, settings, schema) {
5610
- return isValidCsvString(value);
5611
- },
5612
- canBeValid(partialValue, settings, schema) {
5613
- return true;
5614
- },
5615
- heal(value, settings, schema) {
5616
- throw new Error('Not implemented');
5617
- },
5618
- subvalueParsers: [
5619
- {
5620
- subvalueName: 'ROW',
5621
- async mapValues(options) {
5622
- const { value, outputParameterName, settings, mapCallback, onProgress } = options;
5623
- const csv = csvParse(value, settings);
5624
- if (csv.errors.length !== 0) {
5625
- throw new CsvFormatError(spaceTrim__default["default"]((block) => `
5626
- CSV parsing error
5627
-
5628
- Error(s) from CSV parsing:
5629
- ${block(csv.errors.map((error) => error.message).join('\n\n'))}
5630
-
5631
- The CSV setings:
5632
- ${block(JSON.stringify({ ...settings, ...MANDATORY_CSV_SETTINGS }, null, 2))}
5633
-
5634
- The CSV data:
5635
- ${block(value)}
5636
- `));
5637
- }
5638
- const mappedData = [];
5639
- const length = csv.data.length;
5640
- for (let index = 0; index < length; index++) {
5641
- const row = csv.data[index];
5642
- if (row[outputParameterName]) {
5643
- throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
5644
- }
5645
- const mappedRow = {
5646
- ...row,
5647
- [outputParameterName]: await mapCallback(row, index, length),
5648
- };
5649
- mappedData.push(mappedRow);
5650
- if (onProgress) {
5651
- // Note: Report the CSV with all rows mapped so far
5652
- /*
5653
- // TODO: [๐Ÿ›•] Report progress with all the rows including the pending ones
5654
- const progressData = mappedData.map((row, i) =>
5655
- i > index ? { ...row, [outputParameterName]: PENDING_VALUE_PLACEHOLDER } : row,
5656
- );
5657
- */
5658
- await onProgress(papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
5659
- }
5660
- }
5661
- return papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
5662
- },
5663
- },
5664
- {
5665
- subvalueName: 'CELL',
5666
- async mapValues(options) {
5667
- const { value, settings, mapCallback, onProgress } = options;
5668
- const csv = csvParse(value, settings);
5669
- if (csv.errors.length !== 0) {
5670
- throw new CsvFormatError(spaceTrim__default["default"]((block) => `
5671
- CSV parsing error
5672
-
5673
- Error(s) from CSV parsing:
5674
- ${block(csv.errors.map((error) => error.message).join('\n\n'))}
5675
-
5676
- The CSV setings:
5677
- ${block(JSON.stringify({ ...settings, ...MANDATORY_CSV_SETTINGS }, null, 2))}
5678
-
5679
- The CSV data:
5680
- ${block(value)}
5681
- `));
5682
- }
5683
- const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
5684
- return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
5685
- const index = rowIndex * Object.keys(row).length + columnIndex;
5686
- return /* not await */ mapCallback({ [key]: value }, index, array.length);
5687
- }));
5688
- }));
5689
- return papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
5690
- },
5691
- },
5692
- ],
5693
- };
5694
- /**
5695
- * TODO: [๐Ÿ“] In `CsvFormatParser` implement simple `isValid`
5696
- * TODO: [๐Ÿ“] In `CsvFormatParser` implement partial `canBeValid`
5697
- * TODO: [๐Ÿ“] In `CsvFormatParser` implement `heal
5698
- * TODO: [๐Ÿ“] In `CsvFormatParser` implement `subvalueParsers`
5699
- * TODO: [๐Ÿข] Allow to expect something inside CSV objects and other formats
5700
- */
5701
-
5702
- /**
5703
- * Definition for JSON format
5704
- *
5705
- * @private still in development [๐Ÿข]
5706
- */
5707
- const JsonFormatParser = {
5708
- formatName: 'JSON',
5709
- mimeType: 'application/json',
5710
- isValid(value, settings, schema) {
5711
- return isValidJsonString(value);
5712
- },
5713
- canBeValid(partialValue, settings, schema) {
5714
- return true;
5715
- },
5716
- heal(value, settings, schema) {
5717
- throw new Error('Not implemented');
5718
- },
5719
- subvalueParsers: [],
5720
- };
5721
- /**
5722
- * TODO: [๐Ÿง ] Maybe proper instance of object
5723
- * TODO: [0] Make string_serialized_json
5724
- * TODO: [1] Make type for JSON Settings and Schema
5725
- * TODO: [๐Ÿง ] What to use for validating JSONs - JSON Schema, ZoD, typescript types/interfaces,...?
5726
- * TODO: [๐Ÿ“] In `JsonFormatParser` implement simple `isValid`
5727
- * TODO: [๐Ÿ“] In `JsonFormatParser` implement partial `canBeValid`
5728
- * TODO: [๐Ÿ“] In `JsonFormatParser` implement `heal
5729
- * TODO: [๐Ÿ“] In `JsonFormatParser` implement `subvalueParsers`
5730
- * TODO: [๐Ÿข] Allow to expect something inside JSON objects and other formats
5731
- */
5732
-
5733
- /**
5734
- * Definition for any text - this will be always valid
5735
- *
5736
- * Note: This is not useful for validation, but for splitting and mapping with `subvalueParsers`
5737
- *
5738
- * @public exported from `@promptbook/core`
5739
- */
5740
- const TextFormatParser = {
5741
- formatName: 'TEXT',
5742
- isValid(value) {
5743
- return typeof value === 'string';
5744
- },
5745
- canBeValid(partialValue) {
5746
- return typeof partialValue === 'string';
5747
- },
5748
- heal() {
5749
- throw new UnexpectedError('It does not make sense to call `TextFormatParser.heal`');
5750
- },
5751
- subvalueParsers: [
5752
- {
5753
- subvalueName: 'LINE',
5754
- async mapValues(options) {
5755
- const { value, mapCallback, onProgress } = options;
5756
- const lines = value.split('\n');
5757
- const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
5758
- // TODO: [๐Ÿง ] Maybe option to skip empty line
5759
- /* not await */ mapCallback({
5760
- lineContent,
5761
- // TODO: [๐Ÿง ] Maybe also put here `lineNumber`
5762
- }, lineNumber, array.length)));
5763
- return mappedLines.join('\n');
5764
- },
5765
- },
5766
- // <- TODO: [๐Ÿง ][๐Ÿค ] Here should be all words, characters, lines, paragraphs, pages available as subvalues
5767
- ],
5768
- };
5769
- /**
5770
- * TODO: [1] Make type for XML Text and Schema
5771
- * TODO: [๐Ÿง ][๐Ÿค ] Here should be all words, characters, lines, paragraphs, pages available as subvalues
5772
- * TODO: [๐Ÿ“] In `TextFormatParser` implement simple `isValid`
5773
- * TODO: [๐Ÿ“] In `TextFormatParser` implement partial `canBeValid`
5774
- * TODO: [๐Ÿ“] In `TextFormatParser` implement `heal
5775
- * TODO: [๐Ÿ“] In `TextFormatParser` implement `subvalueParsers`
5776
- * TODO: [๐Ÿข] Allow to expect something inside each item of list and other formats
5777
- */
5778
-
5779
- /**
5780
- * Function to check if a string is valid XML
5781
- *
5782
- * @param value
5783
- * @returns `true` if the string is a valid XML string, false otherwise
5784
- *
5785
- * @public exported from `@promptbook/utils`
5786
- */
5787
- function isValidXmlString(value) {
5788
- try {
5789
- const parser = new DOMParser();
5790
- const parsedDocument = parser.parseFromString(value, 'application/xml');
5791
- const parserError = parsedDocument.getElementsByTagName('parsererror');
5792
- if (parserError.length > 0) {
5793
- return false;
5794
- }
5795
- return true;
5796
- }
5797
- catch (error) {
5798
- assertsError(error);
5799
- return false;
5800
- }
5801
- }
5802
-
5803
- /**
5804
- * Definition for XML format
5805
- *
5806
- * @private still in development [๐Ÿข]
5807
- */
5808
- const XmlFormatParser = {
5809
- formatName: 'XML',
5810
- mimeType: 'application/xml',
5811
- isValid(value, settings, schema) {
5812
- return isValidXmlString(value);
5813
- },
5814
- canBeValid(partialValue, settings, schema) {
5815
- return true;
5816
- },
5817
- heal(value, settings, schema) {
5818
- throw new Error('Not implemented');
5819
- },
5820
- subvalueParsers: [],
5821
- };
5822
- /**
5823
- * TODO: [๐Ÿง ] Maybe proper instance of object
5824
- * TODO: [0] Make string_serialized_xml
5825
- * TODO: [1] Make type for XML Settings and Schema
5826
- * TODO: [๐Ÿง ] What to use for validating XMLs - XSD,...
5827
- * TODO: [๐Ÿ“] In `XmlFormatParser` implement simple `isValid`
5828
- * TODO: [๐Ÿ“] In `XmlFormatParser` implement partial `canBeValid`
5829
- * TODO: [๐Ÿ“] In `XmlFormatParser` implement `heal
5830
- * TODO: [๐Ÿ“] In `XmlFormatParser` implement `subvalueParsers`
5831
- * TODO: [๐Ÿข] Allow to expect something inside XML and other formats
5832
- */
5833
-
5834
- /**
5835
- * Definitions for all formats supported by Promptbook
5836
- *
5837
- * @private internal index of `...` <- TODO [๐Ÿข]
5838
- */
5839
- const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser, CsvFormatParser];
5840
- /**
5841
- * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
5842
- */
5843
-
5844
- /**
5845
- * Maps available parameters to expected parameters for a pipeline task.
5846
- *
5847
- * The strategy is:
5848
- * 1) First, match parameters by name where both available and expected.
5849
- * 2) Then, if there are unmatched expected and available parameters, map them by order.
5850
- *
5851
- * @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
5852
- * @private within the repository used in `createPipelineExecutor`
5853
- */
5854
- function mapAvailableToExpectedParameters(options) {
5855
- const { expectedParameters, availableParameters } = options;
5856
- const availableParametersNames = new Set(Object.keys(availableParameters));
5857
- const expectedParameterNames = new Set(Object.keys(expectedParameters));
5858
- const mappedParameters = {};
5859
- // Phase 1๏ธโƒฃ: Matching mapping
5860
- for (const parameterName of Array.from(union(availableParametersNames, expectedParameterNames))) {
5861
- // Situation: Parameter is available and expected
5862
- if (availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) {
5863
- mappedParameters[parameterName] = availableParameters[parameterName];
5864
- // <- Note: [๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ง] Maybe detect parameter collision here?
5865
- availableParametersNames.delete(parameterName);
5866
- expectedParameterNames.delete(parameterName);
5867
- }
5868
- // Situation: Parameter is available but NOT expected
5869
- else if (availableParametersNames.has(parameterName) && !expectedParameterNames.has(parameterName)) ;
5870
- // Situation: Parameter is NOT available BUT expected
5871
- else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
5872
- }
5873
- if (expectedParameterNames.size === 0) {
5874
- // Note: [๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ง] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
5875
- Object.freeze(mappedParameters);
5876
- return mappedParameters;
5877
- }
5878
- // Phase 2๏ธโƒฃ: Non-matching mapping
5879
- if (expectedParameterNames.size !== availableParametersNames.size) {
5880
- throw new PipelineExecutionError(spaceTrim__default["default"]((block) => `
5881
- Can not map available parameters to expected parameters
5882
-
5883
- Mapped parameters:
5884
- ${block(Object.keys(mappedParameters)
5885
- .map((parameterName) => `- {${parameterName}}`)
5886
- .join('\n'))}
5887
-
5888
- Expected parameters which can not be mapped:
5889
- ${block(Array.from(expectedParameterNames)
5890
- .map((parameterName) => `- {${parameterName}}`)
5891
- .join('\n'))}
5892
-
5893
- Remaining available parameters:
5894
- ${block(Array.from(availableParametersNames)
5895
- .map((parameterName) => `- {${parameterName}}`)
5896
- .join('\n'))}
5897
-
5898
- `));
5899
- }
5900
- const expectedParameterNamesArray = Array.from(expectedParameterNames);
5901
- const availableParametersNamesArray = Array.from(availableParametersNames);
5902
- for (let i = 0; i < expectedParameterNames.size; i++) {
5903
- mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
5904
- }
5905
- // Note: [๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ง] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
5906
- Object.freeze(mappedParameters);
5907
- return mappedParameters;
5908
- }
5909
-
5910
- /**
5911
- * Extracts all code blocks from markdown.
5912
- *
5913
- * Note: There are multiple similar functions:
5914
- * - `extractBlock` just extracts the content of the code block which is also used as built-in function for postprocessing
5915
- * - `extractJsonBlock` extracts exactly one valid JSON code block
5916
- * - `extractOneBlockFromMarkdown` extracts exactly one code block with language of the code block
5917
- * - `extractAllBlocksFromMarkdown` extracts all code blocks with language of the code block
5918
- *
5919
- * @param markdown any valid markdown
5920
- * @returns code blocks with language and content
5921
- * @throws {ParseError} if block is not closed properly
5922
- * @public exported from `@promptbook/markdown-utils`
5923
- */
5924
- function extractAllBlocksFromMarkdown(markdown) {
5925
- const codeBlocks = [];
5926
- const lines = markdown.split('\n');
5927
- // Note: [0] Ensure that the last block notated by gt > will be closed
5928
- lines.push('');
5929
- let currentCodeBlock = null;
5930
- for (const line of lines) {
5931
- if (line.startsWith('> ') || line === '>') {
5932
- if (currentCodeBlock === null) {
5933
- currentCodeBlock = { blockNotation: '>', language: null, content: '' };
5934
- } /* not else */
5935
- if (currentCodeBlock.blockNotation === '>') {
5936
- if (currentCodeBlock.content !== '') {
5937
- currentCodeBlock.content += '\n';
5938
- }
5939
- currentCodeBlock.content += line.slice(2);
5940
- }
5941
- }
5942
- else if (currentCodeBlock !== null && currentCodeBlock.blockNotation === '>' /* <- Note: [0] */) {
5943
- codeBlocks.push(currentCodeBlock);
5944
- currentCodeBlock = null;
5945
- }
5946
- /* not else */
5947
- if (line.startsWith('```')) {
5948
- const language = line.slice(3).trim() || null;
5949
- if (currentCodeBlock === null) {
5950
- currentCodeBlock = { blockNotation: '```', language, content: '' };
5951
- }
5952
- else {
5953
- if (language !== null) {
5954
- throw new ParseError(`${capitalize(currentCodeBlock.language || 'the')} code block was not closed and already opening new ${language} code block`);
5955
- }
5956
- codeBlocks.push(currentCodeBlock);
5957
- currentCodeBlock = null;
5958
- }
5959
- }
5960
- else if (currentCodeBlock !== null && currentCodeBlock.blockNotation === '```') {
5961
- if (currentCodeBlock.content !== '') {
5962
- currentCodeBlock.content += '\n';
5963
- }
5964
- currentCodeBlock.content += line.split('\\`\\`\\`').join('```') /* <- TODO: Maybe make proper unescape */;
5900
+ if (!Array.from(b).some((itemB) => isEqual(itemA, itemB))) {
5901
+ diff.add(itemA);
5965
5902
  }
5966
5903
  }
5967
- if (currentCodeBlock !== null) {
5968
- throw new ParseError(`${capitalize(currentCodeBlock.language || 'the')} code block was not closed at the end of the markdown`);
5969
- }
5970
- return codeBlocks;
5904
+ return diff;
5971
5905
  }
5972
5906
  /**
5973
- * TODO: Maybe name for `blockNotation` instead of '```' and '>'
5907
+ * TODO: [๐Ÿง ][๐Ÿ’ฏ] Maybe also implement symmetricDifference
5974
5908
  */
5975
5909
 
5976
5910
  /**
5977
- * Extracts extracts exactly one valid JSON code block
5978
- *
5979
- * - When given string is a valid JSON as it is, it just returns it
5980
- * - When there is no JSON code block the function throws a `ParseError`
5981
- * - When there are multiple JSON code blocks the function throws a `ParseError`
5982
- *
5983
- * Note: It is not important if marked as ```json BUT if it is VALID JSON
5984
- * Note: There are multiple similar function:
5985
- * - `extractBlock` just extracts the content of the code block which is also used as build-in function for postprocessing
5986
- * - `extractJsonBlock` extracts exactly one valid JSON code block
5987
- * - `extractOneBlockFromMarkdown` extracts exactly one code block with language of the code block
5988
- * - `extractAllBlocksFromMarkdown` extracts all code blocks with language of the code block
5911
+ * Creates a new set with all elements that are present in either set
5989
5912
  *
5990
- * @public exported from `@promptbook/markdown-utils`
5991
- * @throws {ParseError} if there is no valid JSON block in the markdown
5913
+ * @deprecated use new javascript set methods instead @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Set
5914
+ * @public exported from `@promptbook/utils`
5992
5915
  */
5993
- function extractJsonBlock(markdown) {
5994
- if (isValidJsonString(markdown)) {
5995
- return markdown;
5996
- }
5997
- const codeBlocks = extractAllBlocksFromMarkdown(markdown);
5998
- const jsonBlocks = codeBlocks.filter(({ content }) => isValidJsonString(content));
5999
- if (jsonBlocks.length === 0) {
6000
- throw new Error('There is no valid JSON block in the markdown');
6001
- }
6002
- if (jsonBlocks.length > 1) {
6003
- throw new Error('There are multiple JSON code blocks in the markdown');
5916
+ function union(...sets) {
5917
+ const union = new Set();
5918
+ for (const set of sets) {
5919
+ for (const item of Array.from(set)) {
5920
+ union.add(item);
5921
+ }
6004
5922
  }
6005
- return jsonBlocks[0].content;
5923
+ return union;
6006
5924
  }
5925
+
6007
5926
  /**
6008
- * TODO: Add some auto-healing logic + extract YAML, JSON5, TOML, etc.
6009
- * TODO: [๐Ÿข] Make this logic part of `JsonFormatParser` or `isValidJsonString`
5927
+ * Contains configuration options for parsing and generating CSV files, such as delimiters and quoting rules.
5928
+ *
5929
+ * @public exported from `@promptbook/core`
6010
5930
  */
5931
+ const MANDATORY_CSV_SETTINGS = Object.freeze({
5932
+ header: true,
5933
+ // encoding: 'utf-8',
5934
+ });
6011
5935
 
6012
5936
  /**
6013
- * Takes an item or an array of items and returns an array of items
5937
+ * Converts a CSV string into an object
6014
5938
  *
6015
- * 1) Any item except array and undefined returns array with that one item (also null)
6016
- * 2) Undefined returns empty array
6017
- * 3) Array returns itself
5939
+ * Note: This is wrapper around `papaparse.parse()` with better autohealing
6018
5940
  *
6019
- * @private internal utility
5941
+ * @private - for now until `@promptbook/csv` is released
6020
5942
  */
6021
- function arrayableToArray(input) {
6022
- if (input === undefined) {
6023
- return [];
6024
- }
6025
- if (input instanceof Array) {
6026
- return input;
5943
+ function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO: Make CSV Schemas */) {
5944
+ settings = { ...settings, ...MANDATORY_CSV_SETTINGS };
5945
+ // Note: Autoheal invalid '\n' characters
5946
+ if (settings.newline && !settings.newline.includes('\r') && value.includes('\r')) {
5947
+ console.warn('CSV string contains carriage return characters, but in the CSV settings the `newline` setting does not include them. Autohealing the CSV string.');
5948
+ value = value.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
6027
5949
  }
6028
- return [input];
5950
+ const csv = papaparse.parse(value, settings);
5951
+ return csv;
6029
5952
  }
6030
5953
 
6031
5954
  /**
6032
- * Replaces parameters in template with values from parameters object
5955
+ * Function to check if a string is valid CSV
6033
5956
  *
6034
- * Note: This function is not places strings into string,
6035
- * It's more complex and can handle this operation specifically for LLM models
5957
+ * @param value The string to check
5958
+ * @returns `true` if the string is a valid CSV string, false otherwise
6036
5959
  *
6037
- * @param template the template with parameters in {curly} braces
6038
- * @param parameters the object with parameters
6039
- * @returns the template with replaced parameters
6040
- * @throws {PipelineExecutionError} if parameter is not defined, not closed, or not opened
6041
5960
  * @public exported from `@promptbook/utils`
6042
5961
  */
6043
- function templateParameters(template, parameters) {
6044
- for (const [parameterName, parameterValue] of Object.entries(parameters)) {
6045
- if (parameterValue === RESERVED_PARAMETER_MISSING_VALUE) {
6046
- throw new UnexpectedError(`Parameter \`{${parameterName}}\` has missing value`);
6047
- }
6048
- else if (parameterValue === RESERVED_PARAMETER_RESTRICTED) {
6049
- // TODO: [๐Ÿต]
6050
- throw new UnexpectedError(`Parameter \`{${parameterName}}\` is restricted to use`);
6051
- }
6052
- }
6053
- let replacedTemplates = template;
6054
- let match;
6055
- let loopLimit = LOOP_LIMIT;
6056
- while ((match = /^(?<precol>.*){(?<parameterName>\w+)}(.*)/m /* <- Not global */
6057
- .exec(replacedTemplates))) {
6058
- if (loopLimit-- < 0) {
6059
- throw new LimitReachedError('Loop limit reached during parameters replacement in `templateParameters`');
6060
- }
6061
- const precol = match.groups.precol;
6062
- const parameterName = match.groups.parameterName;
6063
- if (parameterName === '') {
6064
- // Note: Skip empty placeholders. It's used to avoid confusion with JSON-like strings
6065
- continue;
6066
- }
6067
- if (parameterName.indexOf('{') !== -1 || parameterName.indexOf('}') !== -1) {
6068
- throw new PipelineExecutionError('Parameter is already opened or not closed');
6069
- }
6070
- if (parameters[parameterName] === undefined) {
6071
- throw new PipelineExecutionError(`Parameter \`{${parameterName}}\` is not defined`);
6072
- }
6073
- let parameterValue = parameters[parameterName];
6074
- if (parameterValue === undefined) {
6075
- throw new PipelineExecutionError(`Parameter \`{${parameterName}}\` is not defined`);
6076
- }
6077
- parameterValue = valueToString(parameterValue);
6078
- // Escape curly braces in parameter values to prevent prompt-injection
6079
- parameterValue = parameterValue.replace(/[{}]/g, '\\$&');
6080
- if (parameterValue.includes('\n') && /^\s*\W{0,3}\s*$/.test(precol)) {
6081
- parameterValue = parameterValue
6082
- .split('\n')
6083
- .map((line, index) => (index === 0 ? line : `${precol}${line}`))
6084
- .join('\n');
5962
+ function isValidCsvString(value) {
5963
+ try {
5964
+ // A simple check for CSV format: at least one comma and no invalid characters
5965
+ if (value.includes(',') && /^[\w\s,"']+$/.test(value)) {
5966
+ return true;
6085
5967
  }
6086
- replacedTemplates =
6087
- replacedTemplates.substring(0, match.index + precol.length) +
6088
- parameterValue +
6089
- replacedTemplates.substring(match.index + precol.length + parameterName.length + 2);
6090
- }
6091
- // [๐Ÿ’ซ] Check if there are parameters that are not closed properly
6092
- if (/{\w+$/.test(replacedTemplates)) {
6093
- throw new PipelineExecutionError('Parameter is not closed');
5968
+ return false;
6094
5969
  }
6095
- // [๐Ÿ’ซ] Check if there are parameters that are not opened properly
6096
- if (/^\w+}/.test(replacedTemplates)) {
6097
- throw new PipelineExecutionError('Parameter is not opened');
5970
+ catch (error) {
5971
+ assertsError(error);
5972
+ return false;
6098
5973
  }
6099
- return replacedTemplates;
6100
5974
  }
6101
5975
 
6102
5976
  /**
6103
- * Counts number of characters in the text
5977
+ * Definition for CSV spreadsheet
6104
5978
  *
6105
- * @public exported from `@promptbook/utils`
5979
+ * @public exported from `@promptbook/core`
5980
+ * <- TODO: [๐Ÿข] Export from package `@promptbook/csv`
6106
5981
  */
6107
- function countCharacters(text) {
6108
- // Remove null characters
6109
- text = text.replace(/\0/g, '');
6110
- // Replace emojis (and also ZWJ sequence) with hyphens
6111
- text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
6112
- text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
6113
- text = text.replace(/\p{Extended_Pictographic}(\u{200D}\p{Extended_Pictographic})*/gu, '-');
6114
- return text.length;
6115
- }
5982
+ const CsvFormatParser = {
5983
+ formatName: 'CSV',
5984
+ aliases: ['SPREADSHEET', 'TABLE'],
5985
+ isValid(value, settings, schema) {
5986
+ return isValidCsvString(value);
5987
+ },
5988
+ canBeValid(partialValue, settings, schema) {
5989
+ return true;
5990
+ },
5991
+ heal(value, settings, schema) {
5992
+ throw new Error('Not implemented');
5993
+ },
5994
+ subvalueParsers: [
5995
+ {
5996
+ subvalueName: 'ROW',
5997
+ async mapValues(options) {
5998
+ const { value, outputParameterName, settings, mapCallback, onProgress } = options;
5999
+ const csv = csvParse(value, settings);
6000
+ if (csv.errors.length !== 0) {
6001
+ throw new CsvFormatError(spaceTrim__default["default"]((block) => `
6002
+ CSV parsing error
6003
+
6004
+ Error(s) from CSV parsing:
6005
+ ${block(csv.errors.map((error) => error.message).join('\n\n'))}
6006
+
6007
+ The CSV setings:
6008
+ ${block(JSON.stringify({ ...settings, ...MANDATORY_CSV_SETTINGS }, null, 2))}
6009
+
6010
+ The CSV data:
6011
+ ${block(value)}
6012
+ `));
6013
+ }
6014
+ const mappedData = [];
6015
+ const length = csv.data.length;
6016
+ for (let index = 0; index < length; index++) {
6017
+ const row = csv.data[index];
6018
+ if (row[outputParameterName]) {
6019
+ throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
6020
+ }
6021
+ const mappedRow = {
6022
+ ...row,
6023
+ [outputParameterName]: await mapCallback(row, index, length),
6024
+ };
6025
+ mappedData.push(mappedRow);
6026
+ if (onProgress) {
6027
+ // Note: Report the CSV with all rows mapped so far
6028
+ /*
6029
+ // TODO: [๐Ÿ›•] Report progress with all the rows including the pending ones
6030
+ const progressData = mappedData.map((row, i) =>
6031
+ i > index ? { ...row, [outputParameterName]: PENDING_VALUE_PLACEHOLDER } : row,
6032
+ );
6033
+ */
6034
+ await onProgress(papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
6035
+ }
6036
+ }
6037
+ return papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
6038
+ },
6039
+ },
6040
+ {
6041
+ subvalueName: 'CELL',
6042
+ async mapValues(options) {
6043
+ const { value, settings, mapCallback, onProgress } = options;
6044
+ const csv = csvParse(value, settings);
6045
+ if (csv.errors.length !== 0) {
6046
+ throw new CsvFormatError(spaceTrim__default["default"]((block) => `
6047
+ CSV parsing error
6048
+
6049
+ Error(s) from CSV parsing:
6050
+ ${block(csv.errors.map((error) => error.message).join('\n\n'))}
6051
+
6052
+ The CSV setings:
6053
+ ${block(JSON.stringify({ ...settings, ...MANDATORY_CSV_SETTINGS }, null, 2))}
6054
+
6055
+ The CSV data:
6056
+ ${block(value)}
6057
+ `));
6058
+ }
6059
+ const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
6060
+ return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
6061
+ const index = rowIndex * Object.keys(row).length + columnIndex;
6062
+ return /* not await */ mapCallback({ [key]: value }, index, array.length);
6063
+ }));
6064
+ }));
6065
+ return papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
6066
+ },
6067
+ },
6068
+ ],
6069
+ };
6116
6070
  /**
6117
- * TODO: [๐Ÿฅด] Implement counting in formats - like JSON, CSV, XML,...
6071
+ * TODO: [๐Ÿ“] In `CsvFormatParser` implement simple `isValid`
6072
+ * TODO: [๐Ÿ“] In `CsvFormatParser` implement partial `canBeValid`
6073
+ * TODO: [๐Ÿ“] In `CsvFormatParser` implement `heal
6074
+ * TODO: [๐Ÿ“] In `CsvFormatParser` implement `subvalueParsers`
6075
+ * TODO: [๐Ÿข] Allow to expect something inside CSV objects and other formats
6118
6076
  */
6119
6077
 
6120
6078
  /**
6121
- * Number of characters per standard line with 11pt Arial font size.
6122
- *
6123
- * @public exported from `@promptbook/utils`
6124
- */
6125
- const CHARACTERS_PER_STANDARD_LINE = 63;
6126
- /**
6127
- * Number of lines per standard A4 page with 11pt Arial font size and standard margins and spacing.
6079
+ * Definition for JSON format
6128
6080
  *
6129
- * @public exported from `@promptbook/utils`
6081
+ * @private still in development [๐Ÿข]
6130
6082
  */
6131
- const LINES_PER_STANDARD_PAGE = 44;
6083
+ const JsonFormatParser = {
6084
+ formatName: 'JSON',
6085
+ mimeType: 'application/json',
6086
+ isValid(value, settings, schema) {
6087
+ return isValidJsonString(value);
6088
+ },
6089
+ canBeValid(partialValue, settings, schema) {
6090
+ return true;
6091
+ },
6092
+ heal(value, settings, schema) {
6093
+ throw new Error('Not implemented');
6094
+ },
6095
+ subvalueParsers: [],
6096
+ };
6132
6097
  /**
6133
- * TODO: [๐Ÿง ] Should be this `constants.ts` or `config.ts`?
6134
- * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
6098
+ * TODO: [๐Ÿง ] Maybe proper instance of object
6099
+ * TODO: [0] Make string_serialized_json
6100
+ * TODO: [1] Make type for JSON Settings and Schema
6101
+ * TODO: [๐Ÿง ] What to use for validating JSONs - JSON Schema, ZoD, typescript types/interfaces,...?
6102
+ * TODO: [๐Ÿ“] In `JsonFormatParser` implement simple `isValid`
6103
+ * TODO: [๐Ÿ“] In `JsonFormatParser` implement partial `canBeValid`
6104
+ * TODO: [๐Ÿ“] In `JsonFormatParser` implement `heal
6105
+ * TODO: [๐Ÿ“] In `JsonFormatParser` implement `subvalueParsers`
6106
+ * TODO: [๐Ÿข] Allow to expect something inside JSON objects and other formats
6135
6107
  */
6136
6108
 
6137
6109
  /**
6138
- * Counts number of lines in the text
6110
+ * Definition for any text - this will be always valid
6139
6111
  *
6140
- * Note: This does not check only for the presence of newlines, but also for the length of the standard line.
6112
+ * Note: This is not useful for validation, but for splitting and mapping with `subvalueParsers`
6141
6113
  *
6142
- * @public exported from `@promptbook/utils`
6114
+ * @public exported from `@promptbook/core`
6143
6115
  */
6144
- function countLines(text) {
6145
- text = text.replace('\r\n', '\n');
6146
- text = text.replace('\r', '\n');
6147
- const lines = text.split('\n');
6148
- return lines.reduce((count, line) => count + Math.ceil(line.length / CHARACTERS_PER_STANDARD_LINE), 0);
6149
- }
6116
+ const TextFormatParser = {
6117
+ formatName: 'TEXT',
6118
+ isValid(value) {
6119
+ return typeof value === 'string';
6120
+ },
6121
+ canBeValid(partialValue) {
6122
+ return typeof partialValue === 'string';
6123
+ },
6124
+ heal() {
6125
+ throw new UnexpectedError('It does not make sense to call `TextFormatParser.heal`');
6126
+ },
6127
+ subvalueParsers: [
6128
+ {
6129
+ subvalueName: 'LINE',
6130
+ async mapValues(options) {
6131
+ const { value, mapCallback, onProgress } = options;
6132
+ const lines = value.split('\n');
6133
+ const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
6134
+ // TODO: [๐Ÿง ] Maybe option to skip empty line
6135
+ /* not await */ mapCallback({
6136
+ lineContent,
6137
+ // TODO: [๐Ÿง ] Maybe also put here `lineNumber`
6138
+ }, lineNumber, array.length)));
6139
+ return mappedLines.join('\n');
6140
+ },
6141
+ },
6142
+ // <- TODO: [๐Ÿง ][๐Ÿค ] Here should be all words, characters, lines, paragraphs, pages available as subvalues
6143
+ ],
6144
+ };
6150
6145
  /**
6151
- * TODO: [๐Ÿฅด] Implement counting in formats - like JSON, CSV, XML,...
6146
+ * TODO: [1] Make type for XML Text and Schema
6147
+ * TODO: [๐Ÿง ][๐Ÿค ] Here should be all words, characters, lines, paragraphs, pages available as subvalues
6148
+ * TODO: [๐Ÿ“] In `TextFormatParser` implement simple `isValid`
6149
+ * TODO: [๐Ÿ“] In `TextFormatParser` implement partial `canBeValid`
6150
+ * TODO: [๐Ÿ“] In `TextFormatParser` implement `heal
6151
+ * TODO: [๐Ÿ“] In `TextFormatParser` implement `subvalueParsers`
6152
+ * TODO: [๐Ÿข] Allow to expect something inside each item of list and other formats
6152
6153
  */
6153
6154
 
6154
6155
  /**
6155
- * Counts number of pages in the text
6156
+ * Function to check if a string is valid XML
6156
6157
  *
6157
- * Note: This does not check only for the count of newlines, but also for the length of the standard line and length of the standard page.
6158
+ * @param value
6159
+ * @returns `true` if the string is a valid XML string, false otherwise
6158
6160
  *
6159
6161
  * @public exported from `@promptbook/utils`
6160
6162
  */
6161
- function countPages(text) {
6162
- return Math.ceil(countLines(text) / LINES_PER_STANDARD_PAGE);
6163
+ function isValidXmlString(value) {
6164
+ try {
6165
+ const parser = new DOMParser();
6166
+ const parsedDocument = parser.parseFromString(value, 'application/xml');
6167
+ const parserError = parsedDocument.getElementsByTagName('parsererror');
6168
+ if (parserError.length > 0) {
6169
+ return false;
6170
+ }
6171
+ return true;
6172
+ }
6173
+ catch (error) {
6174
+ assertsError(error);
6175
+ return false;
6176
+ }
6163
6177
  }
6164
- /**
6165
- * TODO: [๐Ÿฅด] Implement counting in formats - like JSON, CSV, XML,...
6166
- */
6167
6178
 
6168
6179
  /**
6169
- * Counts number of paragraphs in the text
6180
+ * Definition for XML format
6170
6181
  *
6171
- * @public exported from `@promptbook/utils`
6182
+ * @private still in development [๐Ÿข]
6172
6183
  */
6173
- function countParagraphs(text) {
6174
- return text.split(/\n\s*\n/).filter((paragraph) => paragraph.trim() !== '').length;
6175
- }
6184
+ const XmlFormatParser = {
6185
+ formatName: 'XML',
6186
+ mimeType: 'application/xml',
6187
+ isValid(value, settings, schema) {
6188
+ return isValidXmlString(value);
6189
+ },
6190
+ canBeValid(partialValue, settings, schema) {
6191
+ return true;
6192
+ },
6193
+ heal(value, settings, schema) {
6194
+ throw new Error('Not implemented');
6195
+ },
6196
+ subvalueParsers: [],
6197
+ };
6176
6198
  /**
6177
- * TODO: [๐Ÿฅด] Implement counting in formats - like JSON, CSV, XML,...
6199
+ * TODO: [๐Ÿง ] Maybe proper instance of object
6200
+ * TODO: [0] Make string_serialized_xml
6201
+ * TODO: [1] Make type for XML Settings and Schema
6202
+ * TODO: [๐Ÿง ] What to use for validating XMLs - XSD,...
6203
+ * TODO: [๐Ÿ“] In `XmlFormatParser` implement simple `isValid`
6204
+ * TODO: [๐Ÿ“] In `XmlFormatParser` implement partial `canBeValid`
6205
+ * TODO: [๐Ÿ“] In `XmlFormatParser` implement `heal
6206
+ * TODO: [๐Ÿ“] In `XmlFormatParser` implement `subvalueParsers`
6207
+ * TODO: [๐Ÿข] Allow to expect something inside XML and other formats
6178
6208
  */
6179
6209
 
6180
6210
  /**
6181
- * Split text into sentences
6182
- *
6183
- * @public exported from `@promptbook/utils`
6184
- */
6185
- function splitIntoSentences(text) {
6186
- return text.split(/[.!?]+/).filter((sentence) => sentence.trim() !== '');
6187
- }
6188
- /**
6189
- * Counts number of sentences in the text
6211
+ * Definitions for all formats supported by Promptbook
6190
6212
  *
6191
- * @public exported from `@promptbook/utils`
6213
+ * @private internal index of `...` <- TODO [๐Ÿข]
6192
6214
  */
6193
- function countSentences(text) {
6194
- return splitIntoSentences(text).length;
6195
- }
6215
+ const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser, CsvFormatParser];
6196
6216
  /**
6197
- * TODO: [๐Ÿฅด] Implement counting in formats - like JSON, CSV, XML,...
6217
+ * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
6198
6218
  */
6199
6219
 
6200
6220
  /**
6201
- * Counts number of words in the text
6221
+ * Maps available parameters to expected parameters for a pipeline task.
6202
6222
  *
6203
- * @public exported from `@promptbook/utils`
6223
+ * The strategy is:
6224
+ * 1) First, match parameters by name where both available and expected.
6225
+ * 2) Then, if there are unmatched expected and available parameters, map them by order.
6226
+ *
6227
+ * @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
6228
+ * @private within the repository used in `createPipelineExecutor`
6204
6229
  */
6205
- function countWords(text) {
6206
- text = text.replace(/[\p{Extended_Pictographic}]/gu, 'a');
6207
- text = removeDiacritics(text);
6208
- // Add spaces before uppercase letters preceded by lowercase letters (for camelCase)
6209
- text = text.replace(/([a-z])([A-Z])/g, '$1 $2');
6210
- return text.split(/[^a-zะฐ-ั0-9]+/i).filter((word) => word.length > 0).length;
6230
+ function mapAvailableToExpectedParameters(options) {
6231
+ const { expectedParameters, availableParameters } = options;
6232
+ const availableParametersNames = new Set(Object.keys(availableParameters));
6233
+ const expectedParameterNames = new Set(Object.keys(expectedParameters));
6234
+ const mappedParameters = {};
6235
+ // Phase 1๏ธโƒฃ: Matching mapping
6236
+ for (const parameterName of Array.from(union(availableParametersNames, expectedParameterNames))) {
6237
+ // Situation: Parameter is available and expected
6238
+ if (availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) {
6239
+ mappedParameters[parameterName] = availableParameters[parameterName];
6240
+ // <- Note: [๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ง] Maybe detect parameter collision here?
6241
+ availableParametersNames.delete(parameterName);
6242
+ expectedParameterNames.delete(parameterName);
6243
+ }
6244
+ // Situation: Parameter is available but NOT expected
6245
+ else if (availableParametersNames.has(parameterName) && !expectedParameterNames.has(parameterName)) ;
6246
+ // Situation: Parameter is NOT available BUT expected
6247
+ else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
6248
+ }
6249
+ if (expectedParameterNames.size === 0) {
6250
+ // Note: [๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ง] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
6251
+ Object.freeze(mappedParameters);
6252
+ return mappedParameters;
6253
+ }
6254
+ // Phase 2๏ธโƒฃ: Non-matching mapping
6255
+ if (expectedParameterNames.size !== availableParametersNames.size) {
6256
+ throw new PipelineExecutionError(spaceTrim__default["default"]((block) => `
6257
+ Can not map available parameters to expected parameters
6258
+
6259
+ Mapped parameters:
6260
+ ${block(Object.keys(mappedParameters)
6261
+ .map((parameterName) => `- {${parameterName}}`)
6262
+ .join('\n'))}
6263
+
6264
+ Expected parameters which can not be mapped:
6265
+ ${block(Array.from(expectedParameterNames)
6266
+ .map((parameterName) => `- {${parameterName}}`)
6267
+ .join('\n'))}
6268
+
6269
+ Remaining available parameters:
6270
+ ${block(Array.from(availableParametersNames)
6271
+ .map((parameterName) => `- {${parameterName}}`)
6272
+ .join('\n'))}
6273
+
6274
+ `));
6275
+ }
6276
+ const expectedParameterNamesArray = Array.from(expectedParameterNames);
6277
+ const availableParametersNamesArray = Array.from(availableParametersNames);
6278
+ for (let i = 0; i < expectedParameterNames.size; i++) {
6279
+ mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
6280
+ }
6281
+ // Note: [๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ง] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
6282
+ Object.freeze(mappedParameters);
6283
+ return mappedParameters;
6211
6284
  }
6212
- /**
6213
- * TODO: [๐Ÿฅด] Implement counting in formats - like JSON, CSV, XML,...
6214
- */
6215
6285
 
6216
6286
  /**
6217
- * Index of all counter functions
6287
+ * Takes an item or an array of items and returns an array of items
6218
6288
  *
6219
- * @public exported from `@promptbook/utils`
6220
- */
6221
- const CountUtils = {
6222
- CHARACTERS: countCharacters,
6223
- WORDS: countWords,
6224
- SENTENCES: countSentences,
6225
- PARAGRAPHS: countParagraphs,
6226
- LINES: countLines,
6227
- PAGES: countPages,
6228
- };
6229
- /**
6230
- * TODO: [๐Ÿง ][๐Ÿค ] This should be probably as part of `TextFormatParser`
6231
- * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
6289
+ * 1) Any item except array and undefined returns array with that one item (also null)
6290
+ * 2) Undefined returns empty array
6291
+ * 3) Array returns itself
6292
+ *
6293
+ * @private internal utility
6232
6294
  */
6295
+ function arrayableToArray(input) {
6296
+ if (input === undefined) {
6297
+ return [];
6298
+ }
6299
+ if (input instanceof Array) {
6300
+ return input;
6301
+ }
6302
+ return [input];
6303
+ }
6233
6304
 
6234
6305
  /**
6235
- * Function checkExpectations will check if the expectations on given value are met
6306
+ * Replaces parameters in template with values from parameters object
6236
6307
  *
6237
- * Note: There are two similar functions:
6238
- * - `checkExpectations` which throws an error if the expectations are not met
6239
- * - `isPassingExpectations` which returns a boolean
6308
+ * Note: This function is not places strings into string,
6309
+ * It's more complex and can handle this operation specifically for LLM models
6240
6310
  *
6241
- * @throws {ExpectError} if the expectations are not met
6242
- * @returns {void} Nothing
6243
- * @private internal function of `createPipelineExecutor`
6311
+ * @param template the template with parameters in {curly} braces
6312
+ * @param parameters the object with parameters
6313
+ * @returns the template with replaced parameters
6314
+ * @throws {PipelineExecutionError} if parameter is not defined, not closed, or not opened
6315
+ * @public exported from `@promptbook/utils`
6244
6316
  */
6245
- function checkExpectations(expectations, value) {
6246
- for (const [unit, { max, min }] of Object.entries(expectations)) {
6247
- const amount = CountUtils[unit.toUpperCase()](value);
6248
- if (min && amount < min) {
6249
- throw new ExpectError(`Expected at least ${min} ${unit} but got ${amount}`);
6250
- } /* not else */
6251
- if (max && amount > max) {
6252
- throw new ExpectError(`Expected at most ${max} ${unit} but got ${amount}`);
6317
+ function templateParameters(template, parameters) {
6318
+ for (const [parameterName, parameterValue] of Object.entries(parameters)) {
6319
+ if (parameterValue === RESERVED_PARAMETER_MISSING_VALUE) {
6320
+ throw new UnexpectedError(`Parameter \`{${parameterName}}\` has missing value`);
6321
+ }
6322
+ else if (parameterValue === RESERVED_PARAMETER_RESTRICTED) {
6323
+ // TODO: [๐Ÿต]
6324
+ throw new UnexpectedError(`Parameter \`{${parameterName}}\` is restricted to use`);
6325
+ }
6326
+ }
6327
+ let replacedTemplates = template;
6328
+ let match;
6329
+ let loopLimit = LOOP_LIMIT;
6330
+ while ((match = /^(?<precol>.*){(?<parameterName>\w+)}(.*)/m /* <- Not global */
6331
+ .exec(replacedTemplates))) {
6332
+ if (loopLimit-- < 0) {
6333
+ throw new LimitReachedError('Loop limit reached during parameters replacement in `templateParameters`');
6334
+ }
6335
+ const precol = match.groups.precol;
6336
+ const parameterName = match.groups.parameterName;
6337
+ if (parameterName === '') {
6338
+ // Note: Skip empty placeholders. It's used to avoid confusion with JSON-like strings
6339
+ continue;
6340
+ }
6341
+ if (parameterName.indexOf('{') !== -1 || parameterName.indexOf('}') !== -1) {
6342
+ throw new PipelineExecutionError('Parameter is already opened or not closed');
6343
+ }
6344
+ if (parameters[parameterName] === undefined) {
6345
+ throw new PipelineExecutionError(`Parameter \`{${parameterName}}\` is not defined`);
6346
+ }
6347
+ let parameterValue = parameters[parameterName];
6348
+ if (parameterValue === undefined) {
6349
+ throw new PipelineExecutionError(`Parameter \`{${parameterName}}\` is not defined`);
6350
+ }
6351
+ parameterValue = valueToString(parameterValue);
6352
+ // Escape curly braces in parameter values to prevent prompt-injection
6353
+ parameterValue = parameterValue.replace(/[{}]/g, '\\$&');
6354
+ if (parameterValue.includes('\n') && /^\s*\W{0,3}\s*$/.test(precol)) {
6355
+ parameterValue = parameterValue
6356
+ .split('\n')
6357
+ .map((line, index) => (index === 0 ? line : `${precol}${line}`))
6358
+ .join('\n');
6253
6359
  }
6360
+ replacedTemplates =
6361
+ replacedTemplates.substring(0, match.index + precol.length) +
6362
+ parameterValue +
6363
+ replacedTemplates.substring(match.index + precol.length + parameterName.length + 2);
6364
+ }
6365
+ // [๐Ÿ’ซ] Check if there are parameters that are not closed properly
6366
+ if (/{\w+$/.test(replacedTemplates)) {
6367
+ throw new PipelineExecutionError('Parameter is not closed');
6368
+ }
6369
+ // [๐Ÿ’ซ] Check if there are parameters that are not opened properly
6370
+ if (/^\w+}/.test(replacedTemplates)) {
6371
+ throw new PipelineExecutionError('Parameter is not opened');
6254
6372
  }
6373
+ return replacedTemplates;
6255
6374
  }
6256
- /**
6257
- * TODO: [๐Ÿ’] Unite object for expecting amount and format
6258
- * TODO: [๐Ÿง ][๐Ÿค ] This should be part of `TextFormatParser`
6259
- * Note: [๐Ÿ’] and [๐Ÿค ] are interconnected together
6260
- */
6261
6375
 
6262
6376
  /**
6263
6377
  * Executes a pipeline task with multiple attempts, including joker and retry logic. Handles different task types
@@ -6276,17 +6390,18 @@
6276
6390
  $resultString: null,
6277
6391
  $expectError: null,
6278
6392
  $scriptPipelineExecutionErrors: [],
6393
+ $failedResults: [], // Track all failed attempts
6279
6394
  };
6280
6395
  // TODO: [๐Ÿš] Make arrayable LLMs -> single LLM DRY
6281
6396
  const _llms = arrayableToArray(tools.llm);
6282
6397
  const llmTools = _llms.length === 1 ? _llms[0] : joinLlmExecutionTools(..._llms);
6283
- attempts: for (let attempt = -jokerParameterNames.length; attempt < maxAttempts; attempt++) {
6284
- const isJokerAttempt = attempt < 0;
6285
- const jokerParameterName = jokerParameterNames[jokerParameterNames.length + attempt];
6398
+ attempts: for (let attemptIndex = -jokerParameterNames.length; attemptIndex < maxAttempts; attemptIndex++) {
6399
+ const isJokerAttempt = attemptIndex < 0;
6400
+ const jokerParameterName = jokerParameterNames[jokerParameterNames.length + attemptIndex];
6286
6401
  // TODO: [๐Ÿง ][๐Ÿญ] JOKERS, EXPECTATIONS, POSTPROCESSING and FOREACH
6287
6402
  if (isJokerAttempt && !jokerParameterName) {
6288
6403
  throw new UnexpectedError(spaceTrim.spaceTrim((block) => `
6289
- Joker not found in attempt ${attempt}
6404
+ Joker not found in attempt ${attemptIndex}
6290
6405
 
6291
6406
  ${block(pipelineIdentification)}
6292
6407
  `));
@@ -6484,35 +6599,18 @@
6484
6599
  }
6485
6600
  }
6486
6601
  // TODO: [๐Ÿ’] Unite object for expecting amount and format
6487
- if (task.format) {
6488
- if (task.format === 'JSON') {
6489
- if (!isValidJsonString($ongoingTaskResult.$resultString || '')) {
6490
- // TODO: [๐Ÿข] Do more universally via `FormatParser`
6491
- try {
6492
- $ongoingTaskResult.$resultString = extractJsonBlock($ongoingTaskResult.$resultString || '');
6493
- }
6494
- catch (error) {
6495
- keepUnused(error);
6496
- throw new ExpectError(spaceTrim.spaceTrim((block) => `
6497
- Expected valid JSON string
6498
-
6499
- ${block(
6500
- /*<- Note: No need for `pipelineIdentification`, it will be catched and added later */ '')}
6501
- `));
6502
- }
6503
- }
6504
- }
6505
- else {
6506
- throw new UnexpectedError(spaceTrim.spaceTrim((block) => `
6507
- Unknown format "${task.format}"
6508
-
6509
- ${block(pipelineIdentification)}
6510
- `));
6602
+ // Use the common validation function for both format and expectations
6603
+ if (task.format || task.expectations) {
6604
+ const validationResult = validatePromptResult({
6605
+ resultString: $ongoingTaskResult.$resultString || '',
6606
+ expectations: task.expectations,
6607
+ format: task.format,
6608
+ });
6609
+ if (!validationResult.isValid) {
6610
+ throw validationResult.error;
6511
6611
  }
6512
- }
6513
- // TODO: [๐Ÿ’] Unite object for expecting amount and format
6514
- if (task.expectations) {
6515
- checkExpectations(task.expectations, $ongoingTaskResult.$resultString || '');
6612
+ // Update the result string in case format processing modified it (e.g., JSON extraction)
6613
+ $ongoingTaskResult.$resultString = validationResult.processedResultString;
6516
6614
  }
6517
6615
  break attempts;
6518
6616
  }
@@ -6521,6 +6619,15 @@
6521
6619
  throw error;
6522
6620
  }
6523
6621
  $ongoingTaskResult.$expectError = error;
6622
+ // Store each failed attempt
6623
+ if (!Array.isArray($ongoingTaskResult.$failedResults)) {
6624
+ $ongoingTaskResult.$failedResults = [];
6625
+ }
6626
+ $ongoingTaskResult.$failedResults.push({
6627
+ attemptIndex,
6628
+ result: $ongoingTaskResult.$resultString,
6629
+ error: error,
6630
+ });
6524
6631
  }
6525
6632
  finally {
6526
6633
  if (!isJokerAttempt &&
@@ -6542,35 +6649,41 @@
6542
6649
  });
6543
6650
  }
6544
6651
  }
6545
- if ($ongoingTaskResult.$expectError !== null && attempt === maxAttempts - 1) {
6652
+ if ($ongoingTaskResult.$expectError !== null && attemptIndex === maxAttempts - 1) {
6653
+ // Note: Create a summary of all failures
6654
+ const failuresSummary = $ongoingTaskResult.$failedResults
6655
+ .map((failure) => spaceTrim.spaceTrim((block) => {
6656
+ var _a, _b;
6657
+ return `
6658
+ Attempt ${failure.attemptIndex + 1}:
6659
+ Error ${((_a = failure.error) === null || _a === void 0 ? void 0 : _a.name) || ''}:
6660
+ ${block((_b = failure.error) === null || _b === void 0 ? void 0 : _b.message.split('\n').map((line) => `> ${line}`).join('\n'))}
6661
+
6662
+ Result:
6663
+ ${block(failure.result === null
6664
+ ? 'null'
6665
+ : spaceTrim.spaceTrim(failure.result)
6666
+ .split('\n')
6667
+ .map((line) => `> ${line}`)
6668
+ .join('\n'))}
6669
+ `;
6670
+ }))
6671
+ .join('\n\n---\n\n');
6546
6672
  throw new PipelineExecutionError(spaceTrim.spaceTrim((block) => {
6547
- var _a, _b, _c;
6673
+ var _a;
6548
6674
  return `
6549
6675
  LLM execution failed ${maxExecutionAttempts}x
6550
6676
 
6551
6677
  ${block(pipelineIdentification)}
6552
6678
 
6553
- ---
6554
6679
  The Prompt:
6555
6680
  ${block((((_a = $ongoingTaskResult.$prompt) === null || _a === void 0 ? void 0 : _a.content) || '')
6556
6681
  .split('\n')
6557
6682
  .map((line) => `> ${line}`)
6558
6683
  .join('\n'))}
6559
6684
 
6560
- Last error ${((_b = $ongoingTaskResult.$expectError) === null || _b === void 0 ? void 0 : _b.name) || ''}:
6561
- ${block((((_c = $ongoingTaskResult.$expectError) === null || _c === void 0 ? void 0 : _c.message) || '')
6562
- .split('\n')
6563
- .map((line) => `> ${line}`)
6564
- .join('\n'))}
6565
-
6566
- Last result:
6567
- ${block($ongoingTaskResult.$resultString === null
6568
- ? 'null'
6569
- : spaceTrim.spaceTrim($ongoingTaskResult.$resultString)
6570
- .split('\n')
6571
- .map((line) => `> ${line}`)
6572
- .join('\n'))}
6573
- ---
6685
+ All Failed Attempts:
6686
+ ${block(failuresSummary)}
6574
6687
  `;
6575
6688
  }));
6576
6689
  }
@@ -14349,6 +14462,66 @@
14349
14462
  response.setHeader('X-Powered-By', 'Promptbook engine');
14350
14463
  next();
14351
14464
  });
14465
+ // Note: OpenAI-compatible chat completions endpoint
14466
+ app.post('/v1/chat/completions', async (request, response) => {
14467
+ // TODO: !!!! Make more promptbook-native:
14468
+ try {
14469
+ const params = request.body;
14470
+ const { model, messages } = params;
14471
+ // Convert messages to a single prompt
14472
+ const prompt = messages
14473
+ .map((message) => `${message.role}: ${message.content}`)
14474
+ .join('\n');
14475
+ // Get pipeline for the book
14476
+ if (!collection) {
14477
+ throw new Error('No collection available');
14478
+ }
14479
+ const pipeline = await collection.getPipelineByUrl(model);
14480
+ const pipelineExecutor = createPipelineExecutor({
14481
+ pipeline,
14482
+ tools: await getExecutionToolsFromIdentification({
14483
+ isAnonymous: true,
14484
+ llmToolsConfiguration: [],
14485
+ }),
14486
+ });
14487
+ // Execute the pipeline with the prompt content as input
14488
+ const result = await pipelineExecutor({ prompt }).asPromise({ isCrashedOnError: true });
14489
+ if (!result.isSuccessful) {
14490
+ throw new Error(`Failed to execute book: ${result.errors.join(', ')}`);
14491
+ }
14492
+ // Return the result in OpenAI-compatible format
14493
+ response.json({
14494
+ id: 'chatcmpl-' + Math.random().toString(36).substring(2),
14495
+ object: 'chat.completion',
14496
+ created: Math.floor(Date.now() / 1000),
14497
+ model,
14498
+ choices: [
14499
+ {
14500
+ index: 0,
14501
+ message: {
14502
+ role: 'assistant',
14503
+ content: result.outputParameters.response,
14504
+ },
14505
+ finish_reason: 'stop',
14506
+ },
14507
+ ],
14508
+ usage: {
14509
+ prompt_tokens: 0,
14510
+ completion_tokens: 0,
14511
+ total_tokens: 0,
14512
+ },
14513
+ });
14514
+ }
14515
+ catch (error) {
14516
+ response.status(500).json({
14517
+ error: {
14518
+ message: error instanceof Error ? error.message : 'Unknown error',
14519
+ type: 'server_error',
14520
+ code: 'internal_error',
14521
+ },
14522
+ });
14523
+ }
14524
+ });
14352
14525
  // TODO: [๐Ÿฅบ] Expose openapiJson to consumer and also allow to add new routes
14353
14526
  app.use(OpenApiValidator__namespace.middleware({
14354
14527
  apiSpec: openapiJson,
@@ -14709,7 +14882,6 @@
14709
14882
  catch (error) {
14710
14883
  assertsError(error);
14711
14884
  socket.emit('error', serializeError(error));
14712
- // <- TODO: [๐Ÿš‹] There is a problem with the remote server handling errors and sending them back to the client
14713
14885
  }
14714
14886
  finally {
14715
14887
  socket.disconnect();
@@ -17234,7 +17406,7 @@
17234
17406
  */
17235
17407
 
17236
17408
  /**
17237
- * Execution Tools for calling OpenAI API or other OpeenAI compatible provider
17409
+ * Execution Tools for calling OpenAI API or other OpenAI compatible provider
17238
17410
  *
17239
17411
  * @public exported from `@promptbook/openai`
17240
17412
  */
@@ -17804,6 +17976,7 @@
17804
17976
  baseURL: DEFAULT_OLLAMA_BASE_URL,
17805
17977
  ...ollamaOptions,
17806
17978
  apiKey: 'ollama',
17979
+ isProxied: false, // <- Note: Ollama is always local
17807
17980
  };
17808
17981
  super(openAiCompatibleOptions);
17809
17982
  }
@@ -17976,6 +18149,42 @@
17976
18149
  */
17977
18150
  },
17978
18151
  });
18152
+ /**
18153
+ * Registration of the OpenAI Compatible metadata
18154
+ *
18155
+ * Note: OpenAiCompatibleExecutionTools is an abstract class and cannot be instantiated directly.
18156
+ * It serves as a base class for OpenAiExecutionTools and other compatible implementations.
18157
+ *
18158
+ * @public exported from `@promptbook/core`
18159
+ * @public exported from `@promptbook/wizard`
18160
+ * @public exported from `@promptbook/cli`
18161
+ */
18162
+ const _OpenAiCompatibleMetadataRegistration = $llmToolsMetadataRegister.register({
18163
+ title: 'Open AI Compatible',
18164
+ packageName: '@promptbook/openai',
18165
+ className: 'OpenAiCompatibleExecutionTools',
18166
+ envVariables: ['OPENAI_API_KEY', 'OPENAI_BASE_URL'],
18167
+ trustLevel: 'CLOSED',
18168
+ order: MODEL_ORDERS.TOP_TIER,
18169
+ getBoilerplateConfiguration() {
18170
+ return {
18171
+ title: 'Open AI Compatible',
18172
+ packageName: '@promptbook/openai',
18173
+ className: 'OpenAiCompatibleExecutionTools',
18174
+ options: {
18175
+ apiKey: 'sk-',
18176
+ baseURL: 'https://api.openai.com/v1',
18177
+ defaultModelName: 'gpt-4-turbo',
18178
+ isProxied: false,
18179
+ remoteServerUrl: DEFAULT_REMOTE_SERVER_URL,
18180
+ maxRequestsPerMinute: DEFAULT_MAX_REQUESTS_PER_MINUTE,
18181
+ },
18182
+ };
18183
+ },
18184
+ createConfigurationFromEnv(env) {
18185
+ return null;
18186
+ },
18187
+ });
17979
18188
  /**
17980
18189
  * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
17981
18190
  */
@@ -18025,7 +18234,7 @@
18025
18234
  * Default model for chat variant.
18026
18235
  */
18027
18236
  getDefaultChatModel() {
18028
- return this.getDefaultModel('gpt-4o');
18237
+ return this.getDefaultModel('gpt-4-turbo');
18029
18238
  }
18030
18239
  /**
18031
18240
  * Default model for completion variant.
@@ -18055,6 +18264,9 @@
18055
18264
  * @param options which are relevant are directly passed to the OpenAI client
18056
18265
  */
18057
18266
  constructor(options) {
18267
+ if (options.isProxied) {
18268
+ throw new NotYetImplementedError(`Proxy mode is not yet implemented for OpenAI assistants`);
18269
+ }
18058
18270
  super(options);
18059
18271
  this.assistantId = options.assistantId;
18060
18272
  // TODO: [๐Ÿ‘ฑ] Make limiter same as in `OpenAiExecutionTools`
@@ -18228,6 +18440,110 @@
18228
18440
  * TODO: [๐ŸŽถ] Naming "constructor" vs "creator" vs "factory"
18229
18441
  */
18230
18442
 
18443
+ /**
18444
+ * Execution Tools for calling OpenAI compatible API
18445
+ *
18446
+ * Note: This can be used for any OpenAI compatible APIs
18447
+ *
18448
+ * @public exported from `@promptbook/openai`
18449
+ */
18450
+ const createOpenAiCompatibleExecutionTools = Object.assign((options) => {
18451
+ if (options.isProxied) {
18452
+ return new RemoteLlmExecutionTools({
18453
+ ...options,
18454
+ identification: {
18455
+ isAnonymous: true,
18456
+ llmToolsConfiguration: [
18457
+ {
18458
+ title: 'OpenAI Compatible (proxied)',
18459
+ packageName: '@promptbook/openai',
18460
+ className: 'OpenAiCompatibleExecutionTools',
18461
+ options: {
18462
+ ...options,
18463
+ isProxied: false,
18464
+ },
18465
+ },
18466
+ ],
18467
+ },
18468
+ });
18469
+ }
18470
+ if (($isRunningInBrowser() || $isRunningInWebWorker()) && !options.dangerouslyAllowBrowser) {
18471
+ options = { ...options, dangerouslyAllowBrowser: true };
18472
+ }
18473
+ return new HardcodedOpenAiCompatibleExecutionTools(options.defaultModelName, options);
18474
+ }, {
18475
+ packageName: '@promptbook/openai',
18476
+ className: 'OpenAiCompatibleExecutionTools',
18477
+ });
18478
+ /**
18479
+ * Execution Tools for calling ONE SPECIFIC PRECONFIGURED OpenAI compatible provider
18480
+ *
18481
+ * @private for `createOpenAiCompatibleExecutionTools`
18482
+ */
18483
+ class HardcodedOpenAiCompatibleExecutionTools extends OpenAiCompatibleExecutionTools {
18484
+ /**
18485
+ * Creates OpenAI compatible Execution Tools.
18486
+ *
18487
+ * @param options which are relevant are directly passed to the OpenAI compatible client
18488
+ */
18489
+ constructor(defaultModelName, options) {
18490
+ super(options);
18491
+ this.defaultModelName = defaultModelName;
18492
+ this.options = options;
18493
+ }
18494
+ get title() {
18495
+ return `${this.defaultModelName} on ${this.options.baseURL}`;
18496
+ }
18497
+ get description() {
18498
+ return `OpenAI compatible connected to "${this.options.baseURL}" model "${this.defaultModelName}"`;
18499
+ }
18500
+ /**
18501
+ * List all available models (non dynamically)
18502
+ *
18503
+ * Note: Purpose of this is to provide more information about models than standard listing from API
18504
+ */
18505
+ get HARDCODED_MODELS() {
18506
+ return [
18507
+ {
18508
+ modelName: this.defaultModelName,
18509
+ modelVariant: 'CHAT',
18510
+ modelDescription: '', // <- TODO: What is the best value here, maybe `this.description`?
18511
+ },
18512
+ ];
18513
+ }
18514
+ /**
18515
+ * Computes the usage
18516
+ */
18517
+ computeUsage(...args) {
18518
+ return {
18519
+ ...computeOpenAiUsage(...args),
18520
+ price: UNCERTAIN_ZERO_VALUE, // <- TODO: Maybe in future pass this counting mechanism, but for now, we dont know
18521
+ };
18522
+ }
18523
+ /**
18524
+ * Default model for chat variant.
18525
+ */
18526
+ getDefaultChatModel() {
18527
+ return this.getDefaultModel(this.defaultModelName);
18528
+ }
18529
+ /**
18530
+ * Default model for completion variant.
18531
+ */
18532
+ getDefaultCompletionModel() {
18533
+ throw new PipelineExecutionError(`${this.title} does not support COMPLETION model variant`);
18534
+ }
18535
+ /**
18536
+ * Default model for completion variant.
18537
+ */
18538
+ getDefaultEmbeddingModel() {
18539
+ throw new PipelineExecutionError(`${this.title} does not support EMBEDDING model variant`);
18540
+ }
18541
+ }
18542
+ /**
18543
+ * TODO: [๐Ÿฆบ] Is there some way how to put `packageName` and `className` on top and function definition on bottom?
18544
+ * TODO: [๐ŸŽถ] Naming "constructor" vs "creator" vs "factory"
18545
+ */
18546
+
18231
18547
  /**
18232
18548
  * Execution Tools for calling OpenAI API
18233
18549
  *
@@ -18239,6 +18555,9 @@
18239
18555
  if (($isRunningInBrowser() || $isRunningInWebWorker()) && !options.dangerouslyAllowBrowser) {
18240
18556
  options = { ...options, dangerouslyAllowBrowser: true };
18241
18557
  }
18558
+ if (options.isProxied) {
18559
+ throw new NotYetImplementedError(`Proxy mode is not yet implemented in createOpenAiExecutionTools`);
18560
+ }
18242
18561
  return new OpenAiExecutionTools(options);
18243
18562
  }, {
18244
18563
  packageName: '@promptbook/openai',
@@ -18249,6 +18568,7 @@
18249
18568
  * TODO: [๐ŸŽถ] Naming "constructor" vs "creator" vs "factory"
18250
18569
  */
18251
18570
 
18571
+ // Note: OpenAiCompatibleExecutionTools is an abstract class and cannot be instantiated directly
18252
18572
  /**
18253
18573
  * Registration of LLM provider
18254
18574
  *
@@ -18269,6 +18589,20 @@
18269
18589
  * @public exported from `@promptbook/cli`
18270
18590
  */
18271
18591
  const _OpenAiAssistantRegistration = $llmToolsRegister.register(createOpenAiAssistantExecutionTools);
18592
+ /**
18593
+ * Registration of the OpenAI Compatible provider
18594
+ *
18595
+ * Note: [๐Ÿ] Configurations registrations are done in register-constructor.ts BUT constructor register-constructor.ts
18596
+ *
18597
+ * @public exported from `@promptbook/openai`
18598
+ * @public exported from `@promptbook/wizard`
18599
+ * @public exported from `@promptbook/cli`
18600
+ */
18601
+ const _OpenAiCompatibleRegistration = $llmToolsRegister.register(createOpenAiCompatibleExecutionTools);
18602
+ /**
18603
+ * Note: OpenAiCompatibleExecutionTools is an abstract class and cannot be registered directly.
18604
+ * It serves as a base class for OpenAiExecutionTools and other compatible implementations.
18605
+ */
18272
18606
  /**
18273
18607
  * TODO: [๐ŸŽถ] Naming "constructor" vs "creator" vs "factory"
18274
18608
  * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
@@ -19531,6 +19865,8 @@
19531
19865
  exports._OllamaRegistration = _OllamaRegistration;
19532
19866
  exports._OpenAiAssistantMetadataRegistration = _OpenAiAssistantMetadataRegistration;
19533
19867
  exports._OpenAiAssistantRegistration = _OpenAiAssistantRegistration;
19868
+ exports._OpenAiCompatibleMetadataRegistration = _OpenAiCompatibleMetadataRegistration;
19869
+ exports._OpenAiCompatibleRegistration = _OpenAiCompatibleRegistration;
19534
19870
  exports._OpenAiMetadataRegistration = _OpenAiMetadataRegistration;
19535
19871
  exports._OpenAiRegistration = _OpenAiRegistration;
19536
19872
  exports._PdfScraperMetadataRegistration = _PdfScraperMetadataRegistration;