@promptbook/cli 0.92.0-23 → 0.92.0-25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +157 -111
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +0 -2
- package/esm/typings/src/collection/PipelineCollection.d.ts +0 -2
- package/esm/typings/src/collection/SimplePipelineCollection.d.ts +1 -1
- package/esm/typings/src/commands/FORMFACTOR/formfactorCommandParser.d.ts +1 -1
- package/esm/typings/src/commands/_common/types/CommandParser.d.ts +36 -28
- package/esm/typings/src/config.d.ts +8 -4
- package/esm/typings/src/constants.d.ts +2 -2
- package/esm/typings/src/errors/0-BoilerplateError.d.ts +2 -2
- package/esm/typings/src/execution/CommonToolsOptions.d.ts +3 -3
- package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +10 -10
- package/esm/typings/src/formfactors/_boilerplate/BoilerplateFormfactorDefinition.d.ts +3 -2
- package/esm/typings/src/formfactors/_common/string_formfactor_name.d.ts +2 -1
- package/esm/typings/src/formfactors/index.d.ts +1 -1
- package/esm/typings/src/formfactors/sheets/SheetsFormfactorDefinition.d.ts +3 -2
- package/esm/typings/src/llm-providers/_common/register/$llmToolsMetadataRegister.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/register/$llmToolsRegister.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +4 -4
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheItem.d.ts +5 -5
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +5 -3
- package/esm/typings/src/llm-providers/_common/utils/cache/cacheLlmTools.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/utils/count-total-usage/limitTotalUsage.d.ts +5 -5
- package/esm/typings/src/other/templates/getBookTemplates.d.ts +2 -2
- package/esm/typings/src/pipeline/PipelineInterface/PipelineInterface.d.ts +3 -3
- package/esm/typings/src/pipeline/PipelineInterface/constants.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineInterface/getPipelineInterface.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineInterface/isPipelineImplementingInterface.d.ts +5 -4
- package/esm/typings/src/pipeline/PipelineInterface/isPipelineInterfacesEqual.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineJson/CommonTaskJson.d.ts +9 -6
- package/esm/typings/src/pipeline/PipelineJson/PipelineJson.d.ts +2 -2
- package/esm/typings/src/pipeline/PipelineString.d.ts +3 -1
- package/esm/typings/src/pipeline/book-notation.d.ts +2 -2
- package/esm/typings/src/prepare/prepareTasks.d.ts +7 -4
- package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +2 -1
- package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +3 -3
- package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/Converter.d.ts +3 -1
- package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -3
- package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +4 -2
- package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +2 -1
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +6 -3
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +3 -5
- package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +3 -2
- package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +8 -5
- package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +2 -1
- package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +6 -5
- package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +3 -1
- package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +2 -1
- package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +4 -1
- package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +2 -1
- package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +3 -4
- package/esm/typings/src/scripting/javascript/postprocessing-functions.d.ts +5 -1
- package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +12 -5
- package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +4 -2
- package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +2 -1
- package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromWebStorage.d.ts +2 -1
- package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +2 -1
- package/esm/typings/src/types/ModelVariant.d.ts +5 -5
- package/esm/typings/src/types/typeAliases.d.ts +8 -6
- package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +2 -2
- package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +4 -1
- package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +2 -1
- package/esm/typings/src/utils/environment/$getGlobalScope.d.ts +2 -1
- package/esm/typings/src/utils/markdown/extractAllListItemsFromMarkdown.d.ts +1 -1
- package/esm/typings/src/utils/normalization/nameToUriPart.d.ts +4 -4
- package/esm/typings/src/utils/normalization/nameToUriParts.d.ts +4 -4
- package/esm/typings/src/utils/normalization/normalize-to-kebab-case.d.ts +3 -3
- package/esm/typings/src/utils/normalization/normalizeTo_SCREAMING_CASE.d.ts +3 -3
- package/esm/typings/src/utils/normalization/normalizeTo_camelCase.d.ts +4 -4
- package/esm/typings/src/utils/normalization/normalizeTo_snake_case.d.ts +3 -3
- package/esm/typings/src/utils/normalization/removeDiacritics.d.ts +3 -3
- package/esm/typings/src/utils/normalization/searchKeywords.d.ts +4 -1
- package/esm/typings/src/utils/normalization/titleToName.d.ts +4 -4
- package/esm/typings/src/utils/organization/empty_object.d.ts +2 -2
- package/esm/typings/src/utils/organization/just_empty_object.d.ts +4 -4
- package/esm/typings/src/version.d.ts +2 -1
- package/package.json +1 -1
- package/umd/index.umd.js +157 -111
- package/umd/index.umd.js.map +1 -1
package/esm/index.es.js
CHANGED
|
@@ -47,7 +47,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
47
47
|
* @generated
|
|
48
48
|
* @see https://github.com/webgptorg/promptbook
|
|
49
49
|
*/
|
|
50
|
-
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-
|
|
50
|
+
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-25';
|
|
51
51
|
/**
|
|
52
52
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
53
53
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -164,11 +164,20 @@ const DEFAULT_BOOK_OUTPUT_PARAMETER_NAME = 'result';
|
|
|
164
164
|
*/
|
|
165
165
|
const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
|
|
166
166
|
/**
|
|
167
|
-
*
|
|
167
|
+
* Threshold value that determines when a dataset is considered "big"
|
|
168
|
+
* and may require special handling or optimizations
|
|
169
|
+
*
|
|
170
|
+
* For example, when error occurs in one item of the big dataset, it will not fail the whole pipeline
|
|
168
171
|
*
|
|
169
172
|
* @public exported from `@promptbook/core`
|
|
170
173
|
*/
|
|
171
174
|
const BIG_DATASET_TRESHOLD = 50;
|
|
175
|
+
/**
|
|
176
|
+
* Placeholder text used to represent a placeholder value of failed operation
|
|
177
|
+
*
|
|
178
|
+
* @public exported from `@promptbook/core`
|
|
179
|
+
*/
|
|
180
|
+
const FAILED_VALUE_PLACEHOLDER = '!?';
|
|
172
181
|
// <- TODO: !!!! Use
|
|
173
182
|
/**
|
|
174
183
|
* Warning message for the generated sections and files files
|
|
@@ -703,7 +712,8 @@ class NotYetImplementedError extends Error {
|
|
|
703
712
|
}
|
|
704
713
|
|
|
705
714
|
/**
|
|
706
|
-
*
|
|
715
|
+
* Safely retrieves the global scope object (window in browser, global in Node.js)
|
|
716
|
+
* regardless of the JavaScript environment in which the code is running
|
|
707
717
|
*
|
|
708
718
|
* Note: `$` is used to indicate that this function is not a pure function - it access global scope
|
|
709
719
|
*
|
|
@@ -714,10 +724,10 @@ function $getGlobalScope() {
|
|
|
714
724
|
}
|
|
715
725
|
|
|
716
726
|
/**
|
|
717
|
-
*
|
|
727
|
+
* Normalizes a text string to SCREAMING_CASE (all uppercase with underscores).
|
|
718
728
|
*
|
|
719
|
-
* @param text
|
|
720
|
-
* @returns
|
|
729
|
+
* @param text The text string to be converted to SCREAMING_CASE format.
|
|
730
|
+
* @returns The normalized text in SCREAMING_CASE format.
|
|
721
731
|
* @example 'HELLO_WORLD'
|
|
722
732
|
* @example 'I_LOVE_PROMPTBOOK'
|
|
723
733
|
* @public exported from `@promptbook/utils`
|
|
@@ -769,10 +779,10 @@ function normalizeTo_SCREAMING_CASE(text) {
|
|
|
769
779
|
*/
|
|
770
780
|
|
|
771
781
|
/**
|
|
772
|
-
*
|
|
782
|
+
* Normalizes a text string to snake_case format.
|
|
773
783
|
*
|
|
774
|
-
* @param text
|
|
775
|
-
* @returns
|
|
784
|
+
* @param text The text string to be converted to snake_case format.
|
|
785
|
+
* @returns The normalized text in snake_case format.
|
|
776
786
|
* @example 'hello_world'
|
|
777
787
|
* @example 'i_love_promptbook'
|
|
778
788
|
* @public exported from `@promptbook/utils`
|
|
@@ -830,10 +840,10 @@ class $Register {
|
|
|
830
840
|
}
|
|
831
841
|
|
|
832
842
|
/**
|
|
833
|
-
*
|
|
843
|
+
* Register for LLM tools metadata.
|
|
834
844
|
*
|
|
835
845
|
* Note: `$` is used to indicate that this interacts with the global scope
|
|
836
|
-
* @singleton Only one instance of each register is created per build, but
|
|
846
|
+
* @singleton Only one instance of each register is created per build, but there can be more instances across different builds or environments.
|
|
837
847
|
* @public exported from `@promptbook/core`
|
|
838
848
|
*/
|
|
839
849
|
const $llmToolsMetadataRegister = new $Register('llm_tools_metadata');
|
|
@@ -842,10 +852,10 @@ const $llmToolsMetadataRegister = new $Register('llm_tools_metadata');
|
|
|
842
852
|
*/
|
|
843
853
|
|
|
844
854
|
/**
|
|
845
|
-
*
|
|
855
|
+
* Register for LLM tools.
|
|
846
856
|
*
|
|
847
857
|
* Note: `$` is used to indicate that this interacts with the global scope
|
|
848
|
-
* @singleton Only one instance of each register is created per build, but
|
|
858
|
+
* @singleton Only one instance of each register is created per build, but there can be more instances across different builds or environments.
|
|
849
859
|
* @public exported from `@promptbook/core`
|
|
850
860
|
*/
|
|
851
861
|
const $llmToolsRegister = new $Register('llm_execution_tools_constructors');
|
|
@@ -1105,7 +1115,8 @@ function TODO_USE(...value) {
|
|
|
1105
1115
|
}
|
|
1106
1116
|
|
|
1107
1117
|
/**
|
|
1108
|
-
*
|
|
1118
|
+
* Provides filesystem access (for example for Node.js-based scrapers)
|
|
1119
|
+
* Creates a standardized filesystem interface that scrapers can use for file operations.
|
|
1109
1120
|
*
|
|
1110
1121
|
* @public exported from `@promptbook/node`
|
|
1111
1122
|
*/
|
|
@@ -1567,13 +1578,13 @@ const ORDER_OF_PIPELINE_JSON = [
|
|
|
1567
1578
|
*/
|
|
1568
1579
|
const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
|
|
1569
1580
|
/**
|
|
1570
|
-
*
|
|
1581
|
+
* Placeholder value indicating a parameter is missing its value.
|
|
1571
1582
|
*
|
|
1572
1583
|
* @private within the repository
|
|
1573
1584
|
*/
|
|
1574
1585
|
const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE;
|
|
1575
1586
|
/**
|
|
1576
|
-
*
|
|
1587
|
+
* Placeholder value indicating a parameter is restricted and cannot be used directly.
|
|
1577
1588
|
*
|
|
1578
1589
|
* @private within the repository
|
|
1579
1590
|
*/
|
|
@@ -2008,10 +2019,10 @@ for (let i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
|
|
|
2008
2019
|
*/
|
|
2009
2020
|
|
|
2010
2021
|
/**
|
|
2011
|
-
*
|
|
2022
|
+
* Removes diacritic marks (accents) from characters in a string.
|
|
2012
2023
|
*
|
|
2013
|
-
* @param input
|
|
2014
|
-
* @returns
|
|
2024
|
+
* @param input The string containing diacritics to be normalized.
|
|
2025
|
+
* @returns The string with diacritics removed or normalized.
|
|
2015
2026
|
* @public exported from `@promptbook/utils`
|
|
2016
2027
|
*/
|
|
2017
2028
|
function removeDiacritics(input) {
|
|
@@ -2025,10 +2036,10 @@ function removeDiacritics(input) {
|
|
|
2025
2036
|
*/
|
|
2026
2037
|
|
|
2027
2038
|
/**
|
|
2028
|
-
*
|
|
2039
|
+
* Converts a given text to kebab-case format.
|
|
2029
2040
|
*
|
|
2030
|
-
* @param text
|
|
2031
|
-
* @returns
|
|
2041
|
+
* @param text The text to be converted.
|
|
2042
|
+
* @returns The kebab-case formatted string.
|
|
2032
2043
|
* @example 'hello-world'
|
|
2033
2044
|
* @example 'i-love-promptbook'
|
|
2034
2045
|
* @public exported from `@promptbook/utils`
|
|
@@ -2076,11 +2087,11 @@ function normalizeToKebabCase(text) {
|
|
|
2076
2087
|
*/
|
|
2077
2088
|
|
|
2078
2089
|
/**
|
|
2079
|
-
*
|
|
2090
|
+
* Converts a title string into a normalized name.
|
|
2080
2091
|
*
|
|
2081
|
-
* @param value
|
|
2082
|
-
* @returns
|
|
2083
|
-
* @example
|
|
2092
|
+
* @param value The title string to be converted to a name.
|
|
2093
|
+
* @returns A normalized name derived from the input title.
|
|
2094
|
+
* @example 'Hello World!' -> 'hello-world'
|
|
2084
2095
|
* @public exported from `@promptbook/utils`
|
|
2085
2096
|
*/
|
|
2086
2097
|
function titleToName(value) {
|
|
@@ -2100,7 +2111,8 @@ function titleToName(value) {
|
|
|
2100
2111
|
}
|
|
2101
2112
|
|
|
2102
2113
|
/**
|
|
2103
|
-
*
|
|
2114
|
+
* Converts a name to a properly formatted subfolder path for cache storage.
|
|
2115
|
+
* Handles normalization and path formatting to create consistent cache directory structures.
|
|
2104
2116
|
*
|
|
2105
2117
|
* @private for `FileCacheStorage`
|
|
2106
2118
|
*/
|
|
@@ -2109,7 +2121,10 @@ function nameToSubfolderPath(name) {
|
|
|
2109
2121
|
}
|
|
2110
2122
|
|
|
2111
2123
|
/**
|
|
2112
|
-
*
|
|
2124
|
+
* A storage implementation that caches data in files organized in a directory structure.
|
|
2125
|
+
* Provides methods for retrieving, storing, and managing cached data on the filesystem.
|
|
2126
|
+
*
|
|
2127
|
+
* This class implements the PromptbookStorage interface for filesystem-based caching.
|
|
2113
2128
|
*
|
|
2114
2129
|
* @public exported from `@promptbook/node`
|
|
2115
2130
|
*/
|
|
@@ -2122,7 +2137,8 @@ class FileCacheStorage {
|
|
|
2122
2137
|
}
|
|
2123
2138
|
}
|
|
2124
2139
|
/**
|
|
2125
|
-
*
|
|
2140
|
+
* Converts a storage key to a filesystem path where the data should be stored.
|
|
2141
|
+
* Creates a consistent, deterministic file path based on the key string.
|
|
2126
2142
|
*/
|
|
2127
2143
|
getFilenameForKey(key) {
|
|
2128
2144
|
// TODO: [👬] DRY
|
|
@@ -2134,7 +2150,8 @@ class FileCacheStorage {
|
|
|
2134
2150
|
...nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */), `${name.substring(0, MAX_FILENAME_LENGTH)}.json`);
|
|
2135
2151
|
}
|
|
2136
2152
|
/**
|
|
2137
|
-
*
|
|
2153
|
+
* Returns the current value associated with the given key, or null if the given key does not exist.
|
|
2154
|
+
* Retrieves the cached data from the file system storage.
|
|
2138
2155
|
*/
|
|
2139
2156
|
async getItem(key) {
|
|
2140
2157
|
const filename = this.getFilenameForKey(key);
|
|
@@ -2147,7 +2164,8 @@ class FileCacheStorage {
|
|
|
2147
2164
|
return value;
|
|
2148
2165
|
}
|
|
2149
2166
|
/**
|
|
2150
|
-
*
|
|
2167
|
+
* Sets the value of the pair identified by key to value, creating a new key/value pair if none existed for key previously.
|
|
2168
|
+
* Persists data to the file system, creating necessary directory structure if it doesn't exist.
|
|
2151
2169
|
*/
|
|
2152
2170
|
async setItem(key, value) {
|
|
2153
2171
|
const filename = this.getFilenameForKey(key);
|
|
@@ -2159,7 +2177,8 @@ class FileCacheStorage {
|
|
|
2159
2177
|
await writeFile(filename, fileContent, 'utf-8');
|
|
2160
2178
|
}
|
|
2161
2179
|
/**
|
|
2162
|
-
*
|
|
2180
|
+
* Removes the key/value pair with the given key from the storage, if a key/value pair with the given key exists.
|
|
2181
|
+
* Deletes the corresponding file from the filesystem.
|
|
2163
2182
|
*/
|
|
2164
2183
|
async removeItem(key) {
|
|
2165
2184
|
const filename = this.getFilenameForKey(key);
|
|
@@ -2782,9 +2801,9 @@ function cacheLlmTools(llmTools, options = {}) {
|
|
|
2782
2801
|
/**
|
|
2783
2802
|
* TODO: [🧠][💸] Maybe make some common abstraction `interceptLlmTools` and use here (or use javascript Proxy?)
|
|
2784
2803
|
* TODO: [🧠] Is there some meaningfull way how to test this util
|
|
2785
|
-
* TODO: [👷♂️]
|
|
2786
|
-
*
|
|
2787
|
-
*
|
|
2804
|
+
* TODO: [👷♂️] Comprehensive manual about construction of llmTools
|
|
2805
|
+
* Detailed explanation about caching strategies and appropriate storage selection for different use cases
|
|
2806
|
+
* Examples of how to combine multiple interceptors for advanced caching, logging, and usage tracking
|
|
2788
2807
|
*/
|
|
2789
2808
|
|
|
2790
2809
|
/**
|
|
@@ -2974,9 +2993,8 @@ function countUsage(llmTools) {
|
|
|
2974
2993
|
*/
|
|
2975
2994
|
|
|
2976
2995
|
/**
|
|
2977
|
-
*
|
|
2996
|
+
* Provides LLM tools configuration by reading environment variables.
|
|
2978
2997
|
*
|
|
2979
|
-
* @@@ .env
|
|
2980
2998
|
* Note: `$` is used to indicate that this function is not a pure function - it uses filesystem to access `.env` file
|
|
2981
2999
|
*
|
|
2982
3000
|
* It looks for environment variables:
|
|
@@ -2984,7 +3002,8 @@ function countUsage(llmTools) {
|
|
|
2984
3002
|
* - `process.env.ANTHROPIC_CLAUDE_API_KEY`
|
|
2985
3003
|
* - ...
|
|
2986
3004
|
*
|
|
2987
|
-
* @
|
|
3005
|
+
* @see Environment variables documentation or .env file for required variables.
|
|
3006
|
+
* @returns A promise that resolves to the LLM tools configuration, or null if configuration is incomplete or missing.
|
|
2988
3007
|
* @public exported from `@promptbook/node`
|
|
2989
3008
|
*/
|
|
2990
3009
|
async function $provideLlmToolsConfigurationFromEnv() {
|
|
@@ -3902,10 +3921,11 @@ async function $provideExecutablesForNode(options) {
|
|
|
3902
3921
|
*/
|
|
3903
3922
|
|
|
3904
3923
|
/**
|
|
3905
|
-
*
|
|
3924
|
+
* Registry for all available scrapers in the system.
|
|
3925
|
+
* Central point for registering and accessing different types of content scrapers.
|
|
3906
3926
|
*
|
|
3907
3927
|
* Note: `$` is used to indicate that this interacts with the global scope
|
|
3908
|
-
* @singleton Only one instance of each register is created per build, but
|
|
3928
|
+
* @singleton Only one instance of each register is created per build, but there can be more than one in different build modules
|
|
3909
3929
|
* @public exported from `@promptbook/core`
|
|
3910
3930
|
*/
|
|
3911
3931
|
const $scrapersRegister = new $Register('scraper_constructors');
|
|
@@ -3914,11 +3934,9 @@ const $scrapersRegister = new $Register('scraper_constructors');
|
|
|
3914
3934
|
*/
|
|
3915
3935
|
|
|
3916
3936
|
/**
|
|
3917
|
-
*
|
|
3918
|
-
*
|
|
3919
|
-
*
|
|
3920
|
-
* 2) @@@
|
|
3921
|
-
*
|
|
3937
|
+
* Provides a collection of scrapers optimized for Node.js environment.
|
|
3938
|
+
* 1) `provideScrapersForNode` use as default
|
|
3939
|
+
* 2) `provideScrapersForBrowser` use in limited browser environment *
|
|
3922
3940
|
* @public exported from `@promptbook/node`
|
|
3923
3941
|
*/
|
|
3924
3942
|
async function $provideScrapersForNode(tools, options) {
|
|
@@ -4822,7 +4840,7 @@ class SimplePipelineCollection {
|
|
|
4822
4840
|
/**
|
|
4823
4841
|
* Constructs a pipeline collection from pipelines
|
|
4824
4842
|
*
|
|
4825
|
-
* @param pipelines
|
|
4843
|
+
* @param pipelines Array of pipeline JSON objects to include in the collection
|
|
4826
4844
|
*
|
|
4827
4845
|
* Note: During the construction logic of all pipelines are validated
|
|
4828
4846
|
* Note: It is not recommended to use this constructor directly, use `createCollectionFromJson` *(or other variant)* instead
|
|
@@ -4934,8 +4952,8 @@ function createCollectionFromJson(...promptbooks) {
|
|
|
4934
4952
|
* @public exported from `@promptbook/core`
|
|
4935
4953
|
*/
|
|
4936
4954
|
function isPipelinePrepared(pipeline) {
|
|
4937
|
-
// Note: Ignoring `pipeline.preparations`
|
|
4938
|
-
// Note: Ignoring `pipeline.knowledgePieces`
|
|
4955
|
+
// Note: Ignoring `pipeline.preparations`
|
|
4956
|
+
// Note: Ignoring `pipeline.knowledgePieces`
|
|
4939
4957
|
if (pipeline.title === undefined || pipeline.title === '' || pipeline.title === DEFAULT_BOOK_TITLE) {
|
|
4940
4958
|
// TODO: !!! Comment this out
|
|
4941
4959
|
console.log('Pipeline is not prepared because title is undefined or empty', pipeline);
|
|
@@ -5510,6 +5528,15 @@ const CsvFormatParser = {
|
|
|
5510
5528
|
mappedData.push(mappedRow);
|
|
5511
5529
|
if (onProgress) {
|
|
5512
5530
|
// Note: Report the CSV with all rows mapped so far
|
|
5531
|
+
/*
|
|
5532
|
+
!!!!
|
|
5533
|
+
// Report progress with updated value
|
|
5534
|
+
const progressData = mappedData.map((row, i) =>
|
|
5535
|
+
i > index ? { ...row, [outputParameterName]: PENDING_VALUE_PLACEHOLDER } : row,
|
|
5536
|
+
);
|
|
5537
|
+
|
|
5538
|
+
|
|
5539
|
+
*/
|
|
5513
5540
|
await onProgress(unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
|
|
5514
5541
|
}
|
|
5515
5542
|
}
|
|
@@ -5726,7 +5753,7 @@ function mapAvailableToExpectedParameters(options) {
|
|
|
5726
5753
|
else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
|
|
5727
5754
|
}
|
|
5728
5755
|
if (expectedParameterNames.size === 0) {
|
|
5729
|
-
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent
|
|
5756
|
+
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
|
|
5730
5757
|
Object.freeze(mappedParameters);
|
|
5731
5758
|
return mappedParameters;
|
|
5732
5759
|
}
|
|
@@ -5757,7 +5784,7 @@ function mapAvailableToExpectedParameters(options) {
|
|
|
5757
5784
|
for (let i = 0; i < expectedParameterNames.size; i++) {
|
|
5758
5785
|
mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
|
|
5759
5786
|
}
|
|
5760
|
-
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent
|
|
5787
|
+
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
|
|
5761
5788
|
Object.freeze(mappedParameters);
|
|
5762
5789
|
return mappedParameters;
|
|
5763
5790
|
}
|
|
@@ -6529,15 +6556,14 @@ async function executeFormatSubvalues(options) {
|
|
|
6529
6556
|
const highLevelError = new PipelineExecutionError(spaceTrim((block) => `
|
|
6530
6557
|
${error.message}
|
|
6531
6558
|
|
|
6532
|
-
This is error in FOREACH command when mapping data
|
|
6559
|
+
This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
|
|
6533
6560
|
You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
|
|
6534
6561
|
|
|
6535
6562
|
${block(pipelineIdentification)}
|
|
6536
|
-
Subparameter index: ${index}
|
|
6537
6563
|
`));
|
|
6538
6564
|
if (length > BIG_DATASET_TRESHOLD) {
|
|
6539
6565
|
console.error(highLevelError);
|
|
6540
|
-
return
|
|
6566
|
+
return FAILED_VALUE_PLACEHOLDER;
|
|
6541
6567
|
}
|
|
6542
6568
|
throw highLevelError;
|
|
6543
6569
|
}
|
|
@@ -6561,14 +6587,13 @@ async function executeFormatSubvalues(options) {
|
|
|
6561
6587
|
catch (error) {
|
|
6562
6588
|
if (length > BIG_DATASET_TRESHOLD) {
|
|
6563
6589
|
console.error(spaceTrim((block) => `
|
|
6564
|
-
|
|
6590
|
+
${error.message}
|
|
6565
6591
|
|
|
6566
|
-
${
|
|
6592
|
+
This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
|
|
6567
6593
|
|
|
6568
6594
|
${block(pipelineIdentification)}
|
|
6569
|
-
Subparameter index: ${index}
|
|
6570
6595
|
`));
|
|
6571
|
-
return
|
|
6596
|
+
return FAILED_VALUE_PLACEHOLDER;
|
|
6572
6597
|
}
|
|
6573
6598
|
throw error;
|
|
6574
6599
|
}
|
|
@@ -7387,7 +7412,9 @@ function mimeTypeToExtension(value) {
|
|
|
7387
7412
|
}
|
|
7388
7413
|
|
|
7389
7414
|
/**
|
|
7390
|
-
*
|
|
7415
|
+
* Factory function that creates a handler for processing knowledge sources.
|
|
7416
|
+
* Provides standardized processing of different types of knowledge sources
|
|
7417
|
+
* across various scraper implementations.
|
|
7391
7418
|
*
|
|
7392
7419
|
* @public exported from `@promptbook/core`
|
|
7393
7420
|
*/
|
|
@@ -7628,9 +7655,12 @@ TODO: [🧊] This is how it can look in future
|
|
|
7628
7655
|
*/
|
|
7629
7656
|
|
|
7630
7657
|
/**
|
|
7631
|
-
*
|
|
7658
|
+
* Prepares tasks by adding knowledge to the prompt and ensuring all necessary parameters are included.
|
|
7632
7659
|
*
|
|
7633
|
-
* @
|
|
7660
|
+
* @param tasks Sequence of tasks that are chained together to form a pipeline
|
|
7661
|
+
* @returns A promise that resolves to the prepared tasks.
|
|
7662
|
+
*
|
|
7663
|
+
* @private internal utility of `preparePipeline`
|
|
7634
7664
|
*/
|
|
7635
7665
|
async function prepareTasks(pipeline, tools, options) {
|
|
7636
7666
|
const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT } = options;
|
|
@@ -8536,11 +8566,11 @@ const expectCommandParser = {
|
|
|
8536
8566
|
};
|
|
8537
8567
|
|
|
8538
8568
|
/**
|
|
8539
|
-
*
|
|
8569
|
+
* Normalizes a given text to camelCase format.
|
|
8540
8570
|
*
|
|
8541
|
-
* @param text
|
|
8542
|
-
* @param _isFirstLetterCapital
|
|
8543
|
-
* @returns
|
|
8571
|
+
* @param text The text to be normalized.
|
|
8572
|
+
* @param _isFirstLetterCapital Whether the first letter should be capitalized.
|
|
8573
|
+
* @returns The camelCase formatted string.
|
|
8544
8574
|
* @example 'helloWorld'
|
|
8545
8575
|
* @example 'iLovePromptbook'
|
|
8546
8576
|
* @public exported from `@promptbook/utils`
|
|
@@ -9028,7 +9058,7 @@ const GeneratorFormfactorDefinition = {
|
|
|
9028
9058
|
};
|
|
9029
9059
|
|
|
9030
9060
|
/**
|
|
9031
|
-
*
|
|
9061
|
+
* Pipeline interface which is equivalent to `any`
|
|
9032
9062
|
*
|
|
9033
9063
|
* @see https://github.com/webgptorg/promptbook/discussions/171
|
|
9034
9064
|
*
|
|
@@ -9109,14 +9139,15 @@ const MatcherFormfactorDefinition = {
|
|
|
9109
9139
|
};
|
|
9110
9140
|
|
|
9111
9141
|
/**
|
|
9112
|
-
* Sheets is form of app that
|
|
9142
|
+
* Sheets is form of app that processes tabular data in CSV format, allowing transformation
|
|
9143
|
+
* and analysis of structured data through AI-powered operations
|
|
9113
9144
|
*
|
|
9114
9145
|
* @public exported from `@promptbook/core`
|
|
9115
9146
|
*/
|
|
9116
9147
|
const SheetsFormfactorDefinition = {
|
|
9117
9148
|
name: 'SHEETS',
|
|
9118
9149
|
aliasNames: ['SHEETS', 'SHEET'],
|
|
9119
|
-
description:
|
|
9150
|
+
description: `A formfactor for processing spreadsheet-like data in CSV format, enabling AI transformations on tabular data`,
|
|
9120
9151
|
documentationUrl: `https://github.com/webgptorg/promptbook/discussions/176`,
|
|
9121
9152
|
pipelineInterface: {
|
|
9122
9153
|
inputParameters: [
|
|
@@ -9192,7 +9223,7 @@ const FORMFACTOR_DEFINITIONS = [
|
|
|
9192
9223
|
/**
|
|
9193
9224
|
* Parses the formfactor command
|
|
9194
9225
|
*
|
|
9195
|
-
* Note:
|
|
9226
|
+
* Note: This command is used as a formfactor for new commands and defines the app type format - it should NOT be used in any `.book` file
|
|
9196
9227
|
*
|
|
9197
9228
|
* @see `documentationUrl` for more details
|
|
9198
9229
|
* @public exported from `@promptbook/editable`
|
|
@@ -9214,7 +9245,7 @@ const formfactorCommandParser = {
|
|
|
9214
9245
|
/**
|
|
9215
9246
|
* Description of the FORMFACTOR command
|
|
9216
9247
|
*/
|
|
9217
|
-
description:
|
|
9248
|
+
description: `Specifies the application type and interface requirements that this promptbook should conform to`,
|
|
9218
9249
|
/**
|
|
9219
9250
|
* Link to documentation
|
|
9220
9251
|
*/
|
|
@@ -9357,8 +9388,7 @@ const jokerCommandParser = {
|
|
|
9357
9388
|
};
|
|
9358
9389
|
|
|
9359
9390
|
/**
|
|
9360
|
-
*
|
|
9361
|
-
*
|
|
9391
|
+
* @see {@link ModelVariant}
|
|
9362
9392
|
* @public exported from `@promptbook/core`
|
|
9363
9393
|
*/
|
|
9364
9394
|
const MODEL_VARIANTS = ['COMPLETION', 'CHAT', 'EMBEDDING' /* <- TODO [🏳] */ /* <- [🤖] */];
|
|
@@ -10331,7 +10361,7 @@ function parseCommandVariant(input) {
|
|
|
10331
10361
|
}
|
|
10332
10362
|
|
|
10333
10363
|
/**
|
|
10334
|
-
*
|
|
10364
|
+
* Extracts the interface (input and output parameters) from a pipeline.
|
|
10335
10365
|
*
|
|
10336
10366
|
* @deprecated https://github.com/webgptorg/promptbook/pull/186
|
|
10337
10367
|
* @see https://github.com/webgptorg/promptbook/discussions/171
|
|
@@ -10364,7 +10394,7 @@ function getPipelineInterface(pipeline) {
|
|
|
10364
10394
|
}
|
|
10365
10395
|
|
|
10366
10396
|
/**
|
|
10367
|
-
*
|
|
10397
|
+
* Checks if two pipeline interfaces are structurally identical.
|
|
10368
10398
|
*
|
|
10369
10399
|
* @deprecated https://github.com/webgptorg/promptbook/pull/186
|
|
10370
10400
|
* @see https://github.com/webgptorg/promptbook/discussions/171
|
|
@@ -10396,10 +10426,11 @@ function isPipelineInterfacesEqual(pipelineInterface1, pipelineInterface2) {
|
|
|
10396
10426
|
}
|
|
10397
10427
|
|
|
10398
10428
|
/**
|
|
10399
|
-
*
|
|
10429
|
+
* Checks if a given pipeline satisfies the requirements of a specified pipeline interface.
|
|
10400
10430
|
*
|
|
10401
10431
|
* @deprecated https://github.com/webgptorg/promptbook/pull/186
|
|
10402
10432
|
* @see https://github.com/webgptorg/promptbook/discussions/171
|
|
10433
|
+
* @returns `true` if the pipeline implements the interface, `false` otherwise.
|
|
10403
10434
|
*
|
|
10404
10435
|
* @public exported from `@promptbook/core`
|
|
10405
10436
|
*/
|
|
@@ -10585,7 +10616,8 @@ function removeMarkdownComments(content) {
|
|
|
10585
10616
|
}
|
|
10586
10617
|
|
|
10587
10618
|
/**
|
|
10588
|
-
*
|
|
10619
|
+
* Utility to determine if a pipeline string is in flat format.
|
|
10620
|
+
* A flat pipeline is a simple text without proper structure (headers, blocks, etc).
|
|
10589
10621
|
*
|
|
10590
10622
|
* @public exported from `@promptbook/editable`
|
|
10591
10623
|
*/
|
|
@@ -10606,7 +10638,10 @@ function isFlatPipeline(pipelineString) {
|
|
|
10606
10638
|
}
|
|
10607
10639
|
|
|
10608
10640
|
/**
|
|
10609
|
-
*
|
|
10641
|
+
* Converts a pipeline structure to its string representation.
|
|
10642
|
+
*
|
|
10643
|
+
* Transforms a flat, simple pipeline into a properly formatted pipeline string
|
|
10644
|
+
* with sections for title, prompt, and return statement.
|
|
10610
10645
|
*
|
|
10611
10646
|
* @public exported from `@promptbook/editable`
|
|
10612
10647
|
*/
|
|
@@ -10663,7 +10698,7 @@ function deflatePipeline(pipelineString) {
|
|
|
10663
10698
|
* Note: It can not work with html syntax and comments
|
|
10664
10699
|
*
|
|
10665
10700
|
* @param markdown any valid markdown
|
|
10666
|
-
* @returns
|
|
10701
|
+
* @returns An array of strings, each representing an individual list item found in the markdown
|
|
10667
10702
|
* @public exported from `@promptbook/markdown-utils`
|
|
10668
10703
|
*/
|
|
10669
10704
|
function extractAllListItemsFromMarkdown(markdown) {
|
|
@@ -11427,11 +11462,11 @@ function parseKeywordsFromString(input) {
|
|
|
11427
11462
|
}
|
|
11428
11463
|
|
|
11429
11464
|
/**
|
|
11430
|
-
*
|
|
11465
|
+
* Converts a name string into a URI-compatible format.
|
|
11431
11466
|
*
|
|
11432
|
-
* @param name
|
|
11433
|
-
* @returns
|
|
11434
|
-
* @example
|
|
11467
|
+
* @param name The string to be converted to a URI-compatible format.
|
|
11468
|
+
* @returns A URI-compatible string derived from the input name.
|
|
11469
|
+
* @example 'Hello World' -> 'hello-world'
|
|
11435
11470
|
* @public exported from `@promptbook/utils`
|
|
11436
11471
|
*/
|
|
11437
11472
|
function nameToUriPart(name) {
|
|
@@ -11445,11 +11480,11 @@ function nameToUriPart(name) {
|
|
|
11445
11480
|
}
|
|
11446
11481
|
|
|
11447
11482
|
/**
|
|
11448
|
-
*
|
|
11483
|
+
* Converts a given name into URI-compatible parts.
|
|
11449
11484
|
*
|
|
11450
|
-
* @param name
|
|
11451
|
-
* @returns
|
|
11452
|
-
* @example
|
|
11485
|
+
* @param name The name to be converted into URI parts.
|
|
11486
|
+
* @returns An array of URI-compatible parts derived from the name.
|
|
11487
|
+
* @example 'Example Name' -> ['example', 'name']
|
|
11453
11488
|
* @public exported from `@promptbook/utils`
|
|
11454
11489
|
*/
|
|
11455
11490
|
function nameToUriParts(name) {
|
|
@@ -16946,14 +16981,23 @@ function computeOpenAiUsage(promptContent, // <- Note: Intentionally using [] to
|
|
|
16946
16981
|
resultContent, rawResponse) {
|
|
16947
16982
|
var _a, _b;
|
|
16948
16983
|
if (rawResponse.usage === undefined) {
|
|
16984
|
+
console.log('!!! computeOpenAiUsage', 'The usage is not defined in the response from OpenAI');
|
|
16949
16985
|
throw new PipelineExecutionError('The usage is not defined in the response from OpenAI');
|
|
16950
16986
|
}
|
|
16951
16987
|
if (((_a = rawResponse.usage) === null || _a === void 0 ? void 0 : _a.prompt_tokens) === undefined) {
|
|
16988
|
+
console.log('!!! computeOpenAiUsage', 'In OpenAI response `usage.prompt_tokens` not defined');
|
|
16952
16989
|
throw new PipelineExecutionError('In OpenAI response `usage.prompt_tokens` not defined');
|
|
16953
16990
|
}
|
|
16954
16991
|
const inputTokens = rawResponse.usage.prompt_tokens;
|
|
16955
16992
|
const outputTokens = ((_b = rawResponse.usage) === null || _b === void 0 ? void 0 : _b.completion_tokens) || 0;
|
|
16956
16993
|
const modelInfo = OPENAI_MODELS.find((model) => model.modelName === rawResponse.model);
|
|
16994
|
+
console.log('!!! computeOpenAiUsage', {
|
|
16995
|
+
inputTokens,
|
|
16996
|
+
outputTokens,
|
|
16997
|
+
rawResponse,
|
|
16998
|
+
resultContent,
|
|
16999
|
+
modelInfo,
|
|
17000
|
+
});
|
|
16957
17001
|
let price;
|
|
16958
17002
|
if (modelInfo === undefined || modelInfo.pricing === undefined) {
|
|
16959
17003
|
price = uncertainNumber();
|
|
@@ -17563,9 +17607,8 @@ const _OpenAiAssistantRegistration = $llmToolsRegister.register(createOpenAiAssi
|
|
|
17563
17607
|
*/
|
|
17564
17608
|
|
|
17565
17609
|
/**
|
|
17566
|
-
*
|
|
17567
|
-
*
|
|
17568
|
-
* Note: It also checks if directory exists and creates it if not
|
|
17610
|
+
* Retrieves an intermediate source for a scraper based on the knowledge source.
|
|
17611
|
+
* Manages the caching and retrieval of intermediate scraper results for optimized performance.
|
|
17569
17612
|
*
|
|
17570
17613
|
* @private as internal utility for scrapers
|
|
17571
17614
|
*/
|
|
@@ -17792,14 +17835,14 @@ const boilerplateScraperMetadata = $deepFreeze({
|
|
|
17792
17835
|
packageName: '@promptbook/boilerplate',
|
|
17793
17836
|
className: 'BoilerplateScraper',
|
|
17794
17837
|
mimeTypes: [
|
|
17795
|
-
'
|
|
17796
|
-
// <- TODO:
|
|
17838
|
+
'@@/@@',
|
|
17839
|
+
// <- TODO: @@ Add compatible mime types with Boilerplate scraper
|
|
17797
17840
|
],
|
|
17798
|
-
documentationUrl: 'https://github.com/webgptorg/promptbook/discussions
|
|
17841
|
+
documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
|
|
17799
17842
|
isAvilableInBrowser: false,
|
|
17800
17843
|
// <- Note: [🌏] Only `MarkdownScraper` makes sense to be available in the browser, for scraping non-markdown sources in the browser use a remote server
|
|
17801
17844
|
requiredExecutables: [
|
|
17802
|
-
/*
|
|
17845
|
+
/* @@ 'Pandoc' */
|
|
17803
17846
|
],
|
|
17804
17847
|
}); /* <- Note: [🤛] */
|
|
17805
17848
|
/**
|
|
@@ -17817,7 +17860,7 @@ const _BoilerplateScraperMetadataRegistration = $scrapersMetadataRegister.regist
|
|
|
17817
17860
|
*/
|
|
17818
17861
|
|
|
17819
17862
|
/**
|
|
17820
|
-
* Scraper of
|
|
17863
|
+
* Scraper of @@ files
|
|
17821
17864
|
*
|
|
17822
17865
|
* @see `documentationUrl` for more details
|
|
17823
17866
|
* @public exported from `@promptbook/boilerplate`
|
|
@@ -17835,30 +17878,30 @@ class BoilerplateScraper {
|
|
|
17835
17878
|
this.markdownScraper = new MarkdownScraper(tools, options);
|
|
17836
17879
|
}
|
|
17837
17880
|
/**
|
|
17838
|
-
* Convert the
|
|
17881
|
+
* Convert the `.@@` to `.md` file and returns intermediate source
|
|
17839
17882
|
*
|
|
17840
17883
|
* Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
|
|
17841
17884
|
*/
|
|
17842
17885
|
async $convert(source) {
|
|
17843
17886
|
var _a;
|
|
17844
17887
|
const { rootDirname = process.cwd(), cacheDirname = DEFAULT_SCRAPE_CACHE_DIRNAME, intermediateFilesStrategy = DEFAULT_INTERMEDIATE_FILES_STRATEGY, isVerbose = DEFAULT_IS_VERBOSE, } = this.options;
|
|
17845
|
-
// TODO:
|
|
17888
|
+
// TODO: @@ Preserve or delete
|
|
17846
17889
|
if (!$isRunningInNode()) {
|
|
17847
17890
|
throw new KnowledgeScrapeError('BoilerplateScraper is only supported in Node environment');
|
|
17848
17891
|
}
|
|
17849
|
-
// TODO:
|
|
17892
|
+
// TODO: @@ Preserve or delete
|
|
17850
17893
|
if (this.tools.fs === undefined) {
|
|
17851
17894
|
throw new EnvironmentMismatchError('Can not scrape boilerplates without filesystem tools');
|
|
17852
17895
|
// <- TODO: [🧠] What is the best error type here`
|
|
17853
17896
|
}
|
|
17854
|
-
// TODO:
|
|
17897
|
+
// TODO: @@ Preserve, delete or modify
|
|
17855
17898
|
if (((_a = this.tools.executables) === null || _a === void 0 ? void 0 : _a.pandocPath) === undefined) {
|
|
17856
17899
|
throw new MissingToolsError('Pandoc is required for scraping .docx files');
|
|
17857
17900
|
}
|
|
17858
|
-
// TODO:
|
|
17901
|
+
// TODO: @@ Preserve, delete or modify
|
|
17859
17902
|
if (source.filename === null) {
|
|
17860
17903
|
// TODO: [🧠] Maybe save file as temporary
|
|
17861
|
-
throw new KnowledgeScrapeError('When parsing
|
|
17904
|
+
throw new KnowledgeScrapeError('When parsing .@@ file, it must be real file in the file system');
|
|
17862
17905
|
}
|
|
17863
17906
|
const extension = getFileExtension(source.filename);
|
|
17864
17907
|
const cacheFilehandler = await getScraperIntermediateSource(source, {
|
|
@@ -17868,7 +17911,7 @@ class BoilerplateScraper {
|
|
|
17868
17911
|
extension: 'md',
|
|
17869
17912
|
isVerbose,
|
|
17870
17913
|
});
|
|
17871
|
-
// TODO:
|
|
17914
|
+
// TODO: @@ Preserve, delete or modify
|
|
17872
17915
|
// Note: Running Pandoc ONLY if the file in the cache does not exist
|
|
17873
17916
|
if (!(await isFileExisting(cacheFilehandler.filename, this.tools.fs))) {
|
|
17874
17917
|
const command = `"${this.tools.executables.pandocPath}" -f ${extension} -t markdown "${source.filename}" -o "${cacheFilehandler.filename}"`;
|
|
@@ -17894,7 +17937,7 @@ class BoilerplateScraper {
|
|
|
17894
17937
|
*/
|
|
17895
17938
|
async scrape(source) {
|
|
17896
17939
|
const cacheFilehandler = await this.$convert(source);
|
|
17897
|
-
// TODO:
|
|
17940
|
+
// TODO: @@ Preserve, delete or modify
|
|
17898
17941
|
const markdownSource = {
|
|
17899
17942
|
source: source.source,
|
|
17900
17943
|
filename: cacheFilehandler.filename,
|
|
@@ -17925,7 +17968,7 @@ class BoilerplateScraper {
|
|
|
17925
17968
|
* TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
|
|
17926
17969
|
* TODO: [🪂] Do it in parallel
|
|
17927
17970
|
* Note: No need to aggregate usage here, it is done by intercepting the llmTools
|
|
17928
|
-
*
|
|
17971
|
+
* @@ Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
17929
17972
|
*/
|
|
17930
17973
|
|
|
17931
17974
|
/**
|
|
@@ -18259,7 +18302,8 @@ class LegacyDocumentScraper {
|
|
|
18259
18302
|
*/
|
|
18260
18303
|
|
|
18261
18304
|
/**
|
|
18262
|
-
*
|
|
18305
|
+
* Creates a scraper for legacy document formats (.doc, .rtf, etc).
|
|
18306
|
+
* Uses LibreOffice for conversion to extract content from older document formats.
|
|
18263
18307
|
*
|
|
18264
18308
|
* @public exported from `@promptbook/legacy-documents`
|
|
18265
18309
|
*/
|
|
@@ -18286,7 +18330,7 @@ const _LegacyDocumentScraperRegistration = $scrapersRegister.register(createLega
|
|
|
18286
18330
|
*/
|
|
18287
18331
|
|
|
18288
18332
|
/**
|
|
18289
|
-
*
|
|
18333
|
+
* Creates a scraper for document content.
|
|
18290
18334
|
*
|
|
18291
18335
|
* @public exported from `@promptbook/documents`
|
|
18292
18336
|
*/
|
|
@@ -18313,7 +18357,7 @@ const _DocumentScraperRegistration = $scrapersRegister.register(createDocumentSc
|
|
|
18313
18357
|
*/
|
|
18314
18358
|
|
|
18315
18359
|
/**
|
|
18316
|
-
*
|
|
18360
|
+
* Creates a scraper for markdown content.
|
|
18317
18361
|
*
|
|
18318
18362
|
* @public exported from `@promptbook/markdown-utils`
|
|
18319
18363
|
*/
|
|
@@ -18419,8 +18463,8 @@ class MarkitdownScraper {
|
|
|
18419
18463
|
extension: 'md',
|
|
18420
18464
|
isVerbose,
|
|
18421
18465
|
});
|
|
18422
|
-
// TODO:
|
|
18423
|
-
// Note: Running
|
|
18466
|
+
// TODO: Determine if Markitdown conversion should run only if the cache file doesn't exist, or always.
|
|
18467
|
+
// Note: Running Markitdown conversion ONLY if the file in the cache does not exist
|
|
18424
18468
|
if (!(await isFileExisting(cacheFilehandler.filename, this.tools.fs))) {
|
|
18425
18469
|
const src = source.filename || source.url || null;
|
|
18426
18470
|
// console.log('!!', { src, source, cacheFilehandler });
|
|
@@ -18442,11 +18486,11 @@ class MarkitdownScraper {
|
|
|
18442
18486
|
return cacheFilehandler;
|
|
18443
18487
|
}
|
|
18444
18488
|
/**
|
|
18445
|
-
* Scrapes the
|
|
18489
|
+
* Scrapes the source document (PDF, DOCX, etc.) and returns the knowledge pieces or `null` if it can't scrape it.
|
|
18446
18490
|
*/
|
|
18447
18491
|
async scrape(source) {
|
|
18448
18492
|
const cacheFilehandler = await this.$convert(source);
|
|
18449
|
-
// TODO:
|
|
18493
|
+
// TODO: Ensure this correctly creates the source object for the internal MarkdownScraper using the converted file.
|
|
18450
18494
|
const markdownSource = {
|
|
18451
18495
|
source: source.source,
|
|
18452
18496
|
filename: cacheFilehandler.filename,
|
|
@@ -18590,7 +18634,8 @@ class PdfScraper {
|
|
|
18590
18634
|
*/
|
|
18591
18635
|
|
|
18592
18636
|
/**
|
|
18593
|
-
*
|
|
18637
|
+
* Factory function to create an instance of PdfScraper.
|
|
18638
|
+
* It bundles the scraper class with its metadata.
|
|
18594
18639
|
*
|
|
18595
18640
|
* @public exported from `@promptbook/pdf`
|
|
18596
18641
|
*/
|
|
@@ -18766,7 +18811,8 @@ class WebsiteScraper {
|
|
|
18766
18811
|
*/
|
|
18767
18812
|
|
|
18768
18813
|
/**
|
|
18769
|
-
*
|
|
18814
|
+
* Factory function to create an instance of WebsiteScraper.
|
|
18815
|
+
* It bundles the scraper class with its metadata.
|
|
18770
18816
|
*
|
|
18771
18817
|
* @public exported from `@promptbook/website-crawler`
|
|
18772
18818
|
*/
|