@promptbook/remote-server 0.71.0-0 → 0.71.0-7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/README.md +6 -0
  2. package/esm/index.es.js +6 -6
  3. package/esm/index.es.js.map +1 -1
  4. package/esm/typings/src/_packages/browser.index.d.ts +1 -1
  5. package/esm/typings/src/_packages/core.index.d.ts +28 -10
  6. package/esm/typings/src/_packages/node.index.d.ts +6 -2
  7. package/esm/typings/src/_packages/types.index.d.ts +28 -20
  8. package/esm/typings/src/cli/cli-commands/about.d.ts +1 -1
  9. package/esm/typings/src/cli/cli-commands/hello.d.ts +1 -1
  10. package/esm/typings/src/cli/cli-commands/make.d.ts +1 -1
  11. package/esm/typings/src/cli/cli-commands/prettify.d.ts +1 -1
  12. package/esm/typings/src/cli/main.d.ts +1 -1
  13. package/esm/typings/src/cli/promptbookCli.d.ts +1 -1
  14. package/esm/typings/src/collection/constructors/createCollectionFromDirectory.d.ts +8 -5
  15. package/esm/typings/src/collection/constructors/createCollectionFromUrl.d.ts +1 -1
  16. package/esm/typings/src/commands/EXPECT/expectCommandParser.d.ts +1 -1
  17. package/esm/typings/src/commands/FOREACH/ForeachJson.d.ts +5 -1
  18. package/esm/typings/src/commands/FOREACH/foreachCommandParser.d.ts +1 -1
  19. package/esm/typings/src/commands/FORMAT/formatCommandParser.d.ts +1 -1
  20. package/esm/typings/src/commands/JOKER/jokerCommandParser.d.ts +1 -1
  21. package/esm/typings/src/commands/KNOWLEDGE/knowledgeCommandParser.d.ts +1 -1
  22. package/esm/typings/src/commands/KNOWLEDGE/utils/sourceContentToName.d.ts +11 -0
  23. package/esm/typings/src/commands/KNOWLEDGE/utils/sourceContentToName.test.d.ts +4 -0
  24. package/esm/typings/src/commands/MODEL/modelCommandParser.d.ts +1 -1
  25. package/esm/typings/src/commands/PARAMETER/parameterCommandParser.d.ts +1 -1
  26. package/esm/typings/src/commands/PERSONA/personaCommandParser.d.ts +1 -1
  27. package/esm/typings/src/commands/POSTPROCESS/postprocessCommandParser.d.ts +1 -1
  28. package/esm/typings/src/commands/PROMPTBOOK_VERSION/promptbookVersionCommandParser.d.ts +1 -1
  29. package/esm/typings/src/commands/TEMPLATE/templateCommandParser.d.ts +1 -1
  30. package/esm/typings/src/commands/URL/urlCommandParser.d.ts +1 -1
  31. package/esm/typings/src/commands/X_ACTION/actionCommandParser.d.ts +1 -1
  32. package/esm/typings/src/commands/X_INSTRUMENT/instrumentCommandParser.d.ts +1 -1
  33. package/esm/typings/src/commands/_BOILERPLATE/boilerplateCommandParser.d.ts +1 -1
  34. package/esm/typings/src/config.d.ts +10 -0
  35. package/esm/typings/src/conversion/pipelineStringToJson.d.ts +2 -15
  36. package/esm/typings/src/conversion/pipelineStringToJsonSync.d.ts +1 -1
  37. package/esm/typings/src/conversion/validation/_importPipeline.d.ts +1 -1
  38. package/esm/typings/src/{knowledge/dialogs → dialogs}/callback/CallbackInterfaceTools.d.ts +2 -2
  39. package/esm/typings/src/{knowledge/dialogs → dialogs}/callback/CallbackInterfaceToolsOptions.d.ts +2 -2
  40. package/esm/typings/src/{knowledge/dialogs → dialogs}/simple-prompt/SimplePromptInterfaceTools.d.ts +4 -4
  41. package/esm/typings/src/errors/AbstractFormatError.d.ts +11 -0
  42. package/esm/typings/src/errors/KnowledgeScrapeError.d.ts +9 -0
  43. package/esm/typings/src/errors/MissingToolsError.d.ts +9 -0
  44. package/esm/typings/src/execution/ExecutionTools.d.ts +3 -3
  45. package/esm/typings/src/execution/PipelineExecutor.d.ts +1 -0
  46. package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorOptions.d.ts +5 -2
  47. package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorSettings.d.ts +2 -13
  48. package/esm/typings/src/execution/createPipelineExecutor/00-createPipelineExecutor.d.ts +3 -0
  49. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +4 -1
  50. package/esm/typings/src/execution/createPipelineExecutor/20-executeTemplate.d.ts +3 -0
  51. package/esm/typings/src/execution/createPipelineExecutor/{30-executeFormatCells.d.ts → 30-executeFormatSubvalues.d.ts} +2 -6
  52. package/esm/typings/src/execution/embeddingVectorToString.d.ts +1 -1
  53. package/esm/typings/src/execution/utils/forEachAsync.d.ts +1 -0
  54. package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +2 -1
  55. package/esm/typings/src/formats/csv/CsvFormatError.d.ts +10 -0
  56. package/esm/typings/src/llm-providers/_common/createLlmToolsFromConfigurationFromEnv.d.ts +1 -1
  57. package/esm/typings/src/llm-providers/_common/createLlmToolsFromEnv.d.ts +1 -1
  58. package/esm/typings/src/llm-providers/_common/getLlmToolsForCli.d.ts +1 -1
  59. package/esm/typings/src/llm-providers/anthropic-claude/AnthropicClaudeExecutionTools.d.ts +2 -1
  60. package/esm/typings/src/llm-providers/anthropic-claude/playground/playground.d.ts +3 -2
  61. package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +8 -1
  62. package/esm/typings/src/llm-providers/azure-openai/playground/playground.d.ts +1 -0
  63. package/esm/typings/src/llm-providers/langtail/playground/playground.d.ts +3 -0
  64. package/esm/typings/src/llm-providers/multiple/playground/playground.d.ts +3 -0
  65. package/esm/typings/src/llm-providers/openai/OpenAiExecutionTools.d.ts +2 -1
  66. package/esm/typings/src/llm-providers/openai/playground/playground.d.ts +2 -1
  67. package/esm/typings/src/llm-providers/remote/playground/playground.d.ts +3 -0
  68. package/esm/typings/src/personas/preparePersona.d.ts +2 -2
  69. package/esm/typings/src/prepare/PrepareAndScrapeOptions.d.ts +67 -0
  70. package/esm/typings/src/prepare/preparePipeline.d.ts +2 -2
  71. package/esm/typings/src/prepare/prepareTemplates.d.ts +2 -2
  72. package/esm/typings/src/scrapers/_common/Converter.d.ts +28 -0
  73. package/esm/typings/src/scrapers/_common/Scraper.d.ts +71 -0
  74. package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +11 -0
  75. package/esm/typings/src/{knowledge/prepare-knowledge → scrapers}/_common/prepareKnowledgePieces.d.ts +4 -4
  76. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +33 -0
  77. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.test.d.ts +4 -0
  78. package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +10 -0
  79. package/esm/typings/src/scrapers/document/documentScraper.d.ts +37 -0
  80. package/esm/typings/src/scrapers/document/documentScraper.test.d.ts +4 -0
  81. package/esm/typings/src/scrapers/document/playground/document-scraper-playground.d.ts +5 -0
  82. package/esm/typings/src/scrapers/document-legacy/legacyDocumentScraper.d.ts +37 -0
  83. package/esm/typings/src/scrapers/document-legacy/legacyDocumentScraper.test.d.ts +4 -0
  84. package/esm/typings/src/scrapers/document-legacy/playground/legacy-document-scraper-playground.d.ts +5 -0
  85. package/esm/typings/src/scrapers/index.d.ts +7 -0
  86. package/esm/typings/src/scrapers/markdown/markdownScraper.d.ts +29 -0
  87. package/esm/typings/src/scrapers/markdown/playground/markdown-scraper-playground.d.ts +5 -0
  88. package/esm/typings/src/scrapers/pdf/pdfScraper.d.ts +35 -0
  89. package/esm/typings/src/scrapers/pdf/playground/pdf-scraper-playground.d.ts +5 -0
  90. package/esm/typings/src/scrapers/website/playground/website-scraper-playground.d.ts +5 -0
  91. package/esm/typings/src/scrapers/website/utils/markdownConverter.d.ts +12 -0
  92. package/esm/typings/src/scrapers/website/websiteScraper.d.ts +43 -0
  93. package/esm/typings/src/storage/{files-storage/FilesStorage.d.ts → file-cache-storage/FileCacheStorage.d.ts} +4 -4
  94. package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +10 -0
  95. package/esm/typings/src/storage/{files-storage → file-cache-storage}/utils/nameToSubfolderPath.d.ts +1 -1
  96. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.test.d.ts +1 -0
  97. package/esm/typings/src/storage/local-storage/getLocalStorage.d.ts +1 -1
  98. package/esm/typings/src/storage/local-storage/getSessionStorage.d.ts +1 -1
  99. package/esm/typings/src/types/PipelineJson/ParameterJson.d.ts +1 -1
  100. package/esm/typings/src/types/PipelineJson/PipelineJson.d.ts +2 -2
  101. package/esm/typings/src/types/execution-report/ExecutionReportJson.d.ts +0 -3
  102. package/esm/typings/src/types/execution-report/executionReportJsonToString.d.ts +2 -1
  103. package/esm/typings/src/types/typeAliases.d.ts +9 -12
  104. package/esm/typings/src/utils/execCommand/$execCommand.d.ts +14 -0
  105. package/esm/typings/src/utils/execCommand/$execCommands.d.ts +17 -0
  106. package/esm/typings/src/utils/execCommand/IExecCommandOptions.d.ts +23 -0
  107. package/esm/typings/src/utils/execCommand/execCommand.test.d.ts +1 -0
  108. package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.d.ts +10 -0
  109. package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.test.d.ts +1 -0
  110. package/esm/typings/src/utils/files/$isDirectoryExisting.d.ts +3 -3
  111. package/esm/typings/src/utils/files/$isFileExisting.d.ts +3 -3
  112. package/esm/typings/src/utils/files/$listAllFiles.d.ts +4 -4
  113. package/esm/typings/src/utils/files/extensionToMimeType.d.ts +8 -0
  114. package/esm/typings/src/utils/files/extensionToMimeType.test.d.ts +1 -0
  115. package/esm/typings/src/utils/files/getFileExtension.d.ts +8 -0
  116. package/esm/typings/src/utils/files/getFileExtension.test.d.ts +1 -0
  117. package/esm/typings/src/utils/validators/filePath/isValidFilePath.d.ts +2 -2
  118. package/package.json +2 -2
  119. package/umd/index.umd.js +6 -6
  120. package/umd/index.umd.js.map +1 -1
  121. package/esm/typings/src/knowledge/prepare-knowledge/_common/Scraper.d.ts +0 -37
  122. package/esm/typings/src/knowledge/prepare-knowledge/markdown/playground/markdown-knowledge-playground.d.ts +0 -2
  123. package/esm/typings/src/knowledge/prepare-knowledge/markdown/prepareKnowledgeFromMarkdown.d.ts +0 -14
  124. package/esm/typings/src/knowledge/prepare-knowledge/pdf/prepareKnowledgeFromPdf.d.ts +0 -15
  125. package/esm/typings/src/prepare/PrepareOptions.d.ts +0 -22
  126. package/esm/typings/src/storage/files-storage/FilesStorageOptions.d.ts +0 -10
  127. /package/esm/typings/src/{knowledge/dialogs → dialogs}/user-interface-execution-tools.test.d.ts +0 -0
  128. /package/esm/typings/src/{knowledge/prepare-knowledge → scrapers}/_common/prepareKnowledgePieces.test.d.ts +0 -0
  129. /package/esm/typings/src/{knowledge/prepare-knowledge/markdown/prepareKnowledgeFromMarkdown.test.d.ts → scrapers/markdown/markdownScraper.test.d.ts} +0 -0
  130. /package/esm/typings/src/{knowledge/prepare-knowledge/pdf/prepareKnowledgeFromPdf.test.d.ts → scrapers/website/utils/markdownConverter.test.d.ts} +0 -0
  131. /package/esm/typings/src/{storage/files-storage/utils/nameToSubfolderPath.test.d.ts → scrapers/website/websiteScraper.test.d.ts} +0 -0
@@ -1,2 +1,5 @@
1
1
  #!/usr/bin/env ts-node
2
2
  import '../../openai/register-constructor';
3
+ /**
4
+ * Note: [⚫] Code in this file should never be published in any package
5
+ */
@@ -1,4 +1,4 @@
1
- import type { PrepareOptions } from '../prepare/PrepareOptions';
1
+ import type { PrepareAndScrapeOptions } from '../prepare/PrepareAndScrapeOptions';
2
2
  import type { PersonaPreparedJson } from '../types/PipelineJson/PersonaJson';
3
3
  import type { string_persona_description } from '../types/typeAliases';
4
4
  /**
@@ -7,7 +7,7 @@ import type { string_persona_description } from '../types/typeAliases';
7
7
  * @see https://github.com/webgptorg/promptbook/discussions/22
8
8
  * @public exported from `@promptbook/core`
9
9
  */
10
- export declare function preparePersona(personaDescription: string_persona_description, options: PrepareOptions): Promise<PersonaPreparedJson['modelRequirements']>;
10
+ export declare function preparePersona(personaDescription: string_persona_description, options: PrepareAndScrapeOptions): Promise<PersonaPreparedJson['modelRequirements']>;
11
11
  /**
12
12
  * TODO: [🔃][main] !!!!! If the persona was prepared with different version or different set of models, prepare it once again
13
13
  * TODO: [🏢] !! Check validity of `modelName` in pipeline
@@ -0,0 +1,67 @@
1
+ import type { LlmExecutionTools } from '../execution/LlmExecutionTools';
2
+ import type { string_dirname } from '../types/typeAliases';
3
+ /**
4
+ * Options for preparation of the pipeline
5
+ */
6
+ export type PrepareAndScrapeOptions = {
7
+ /**
8
+ * LLM tools
9
+ */
10
+ readonly llmTools?: LlmExecutionTools;
11
+ /**
12
+ * Path to the root folder of the pipeline
13
+ *
14
+ * Note: When the pipeline is not created from files, it is `null`
15
+ * Note: This folder must exist
16
+ */
17
+ readonly rootDirname: string_dirname | null;
18
+ /**
19
+ * Path to the cache folder
20
+ *
21
+ * Note: When the folder does not exist, it is created recursively
22
+ *
23
+ * @default SCRAPE_CACHE_DIRNAME
24
+ */
25
+ readonly cacheDirname?: string_dirname;
26
+ /**
27
+ * If true, the cache is cleaned after the scraping
28
+ *
29
+ *
30
+ * @default false // <- TODO: !!!!!! Change to `cacheStrategy`/`intermediateFiles`, Put to global config, change to `true` and explicitly set to `false` in all playgrounds
31
+ */
32
+ readonly isCacheCleaned?: boolean;
33
+ /**
34
+ * Maximum number of tasks running in parallel
35
+ *
36
+ * @default MAX_PARALLEL_COUNT
37
+ */
38
+ readonly maxParallelCount?: number;
39
+ /**
40
+ * Path to the external programs executables
41
+ *
42
+ * TODO: !!!!!! Transform to scrapers and make them Classy
43
+ */
44
+ readonly externalProgramsPaths?: {
45
+ /**
46
+ * Path to the `pandoc` executable
47
+ *
48
+ * @example 'C:/Users/me/AppData/Local/Pandoc/pandoc.exe'
49
+ */
50
+ readonly pandocPath?: string;
51
+ /**
52
+ * Path to the LibreOffice executable
53
+ *
54
+ * @example 'C:/Program Files/LibreOffice/program/swriter.exe'
55
+ */
56
+ readonly libreOfficePath?: string;
57
+ };
58
+ /**
59
+ * If true, the preparation logs additional information
60
+ *
61
+ * @default false
62
+ */
63
+ readonly isVerbose?: boolean;
64
+ };
65
+ /**
66
+ * TODO: [🧠] Maybe split `PrepareAndScrapeOptions` and `ScrapeOptions` (`ScrapeOptions` should be extended from `PrepareAndScrapeOptions`)
67
+ */
@@ -1,5 +1,5 @@
1
1
  import type { PipelineJson } from '../types/PipelineJson/PipelineJson';
2
- import type { PrepareOptions } from './PrepareOptions';
2
+ import type { PrepareAndScrapeOptions } from './PrepareAndScrapeOptions';
3
3
  /**
4
4
  * Prepare pipeline from string (markdown) format to JSON format
5
5
  *
@@ -8,7 +8,7 @@ import type { PrepareOptions } from './PrepareOptions';
8
8
  * Note: When the pipeline is already prepared, it returns the same pipeline
9
9
  * @public exported from `@promptbook/core`
10
10
  */
11
- export declare function preparePipeline(pipeline: PipelineJson, options: PrepareOptions): Promise<PipelineJson>;
11
+ export declare function preparePipeline(pipeline: PipelineJson, options: PrepareAndScrapeOptions): Promise<PipelineJson>;
12
12
  /**
13
13
  * TODO: Write tests for `preparePipeline`
14
14
  * TODO: [🏏] Leverage the batch API and build queues @see https://platform.openai.com/docs/guides/batch
@@ -1,6 +1,6 @@
1
1
  import type { PipelineJson } from '../types/PipelineJson/PipelineJson';
2
2
  import type { TemplateJson } from '../types/PipelineJson/TemplateJson';
3
- import type { PrepareOptions } from './PrepareOptions';
3
+ import type { PrepareAndScrapeOptions } from './PrepareAndScrapeOptions';
4
4
  type PrepareTemplateInput = Pick<PipelineJson, 'templates' | 'parameters'> & {
5
5
  /**
6
6
  * @@@
@@ -18,7 +18,7 @@ type PreparedTemplates = {
18
18
  *
19
19
  * @public exported from `@promptbook/core`
20
20
  */
21
- export declare function prepareTemplates(pipeline: PrepareTemplateInput, options: PrepareOptions): Promise<PreparedTemplates>;
21
+ export declare function prepareTemplates(pipeline: PrepareTemplateInput, options: PrepareAndScrapeOptions): Promise<PreparedTemplates>;
22
22
  export {};
23
23
  /**
24
24
  * TODO: [🧠] Add context to each template (if missing)
@@ -0,0 +1,28 @@
1
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
2
+ import type { string_mime_type } from '../../types/typeAliases';
3
+ import type { string_promptbook_documentation_url } from '../../types/typeAliases';
4
+ import type { ScraperSourceHandler } from './Scraper';
5
+ import type { ScraperIntermediateSource } from './ScraperIntermediateSource';
6
+ /**
7
+ * @@@
8
+ *
9
+ */
10
+ export type Converter = {
11
+ /**
12
+ * Mime types that this scraper can handle
13
+ */
14
+ readonly mimeTypes: Array<string_mime_type>;
15
+ /**
16
+ * Link to documentation
17
+ */
18
+ readonly documentationUrl: string_promptbook_documentation_url;
19
+ /**
20
+ * Convert the the file and returns intermediate source or `null` if it can't convert it
21
+ *
22
+ * For example, convert a `.docx` to `.doc` file
23
+ * Or convert a `.pdf` to `.md` file
24
+ *
25
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
26
+ */
27
+ $convert(source: ScraperSourceHandler, options: PrepareAndScrapeOptions): Promise<ScraperIntermediateSource>;
28
+ };
@@ -0,0 +1,71 @@
1
+ import type { Promisable } from 'type-fest';
2
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
3
+ import type { KnowledgePiecePreparedJson } from '../../types/PipelineJson/KnowledgePieceJson';
4
+ import type { string_filename } from '../../types/typeAliases';
5
+ import type { string_knowledge_source_link } from '../../types/typeAliases';
6
+ import type { string_mime_type } from '../../types/typeAliases';
7
+ import type { string_promptbook_documentation_url } from '../../types/typeAliases';
8
+ import type { string_url } from '../../types/typeAliases';
9
+ /**
10
+ * @@@
11
+ *
12
+ */
13
+ export type Scraper = {
14
+ /**
15
+ * Mime types that this scraper can handle
16
+ */
17
+ readonly mimeTypes: Array<string_mime_type>;
18
+ /**
19
+ * Link to documentation
20
+ */
21
+ readonly documentationUrl: string_promptbook_documentation_url;
22
+ /**
23
+ * Scrapes the markdown file and returns the knowledge pieces or `null` if it can't scrape it
24
+ */
25
+ scrape(source: ScraperSourceHandler, options: PrepareAndScrapeOptions): Promisable<Array<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
26
+ };
27
+ /**
28
+ * @@@
29
+ */
30
+ export type ScraperSourceHandler = {
31
+ /**
32
+ * The source of the knowledge
33
+ */
34
+ readonly source: string_knowledge_source_link;
35
+ /**
36
+ * The path to the file, if it is a file
37
+ *
38
+ * Note: Typically one of the `filename` or `url` is set and the other is `null`
39
+ */
40
+ readonly filename: string_filename | null;
41
+ /**
42
+ * The URL, if it is online
43
+ *
44
+ * Note: Typically one of the `filename` or `url` is set and the other is `null`
45
+ */
46
+ readonly url: string_url | null;
47
+ /**
48
+ * Mime type of the source
49
+ */
50
+ readonly mimeType: string_mime_type;
51
+ /**
52
+ * Get the content as parsed JSON
53
+ */
54
+ asJson(): Promisable<unknown>;
55
+ /**
56
+ * Get the content as a utf-8 string
57
+ */
58
+ asText(): Promisable<string>;
59
+ /**
60
+ * Get the content as a blob
61
+ */
62
+ asBlob(): Promisable<Blob>;
63
+ };
64
+ /**
65
+ * TODO: [🐝] @@@ Annotate all
66
+ * TODO: [🔼] Export via types
67
+ */
68
+ /**
69
+ * TODO: !!!!!! Test that this is catched
70
+ * Note: [⚫] Code in this file should never be published in any package
71
+ */
@@ -0,0 +1,11 @@
1
+ import type { IDestroyable } from 'destroyable';
2
+ import type { string_absolute_filename } from '../../types/typeAliases';
3
+ /**
4
+ * @@@
5
+ */
6
+ export type ScraperIntermediateSource = IDestroyable & {
7
+ /**
8
+ * @@@
9
+ */
10
+ readonly filename: string_absolute_filename;
11
+ };
@@ -1,13 +1,13 @@
1
- import type { PrepareOptions } from '../../../prepare/PrepareOptions';
2
- import type { KnowledgePiecePreparedJson } from '../../../types/PipelineJson/KnowledgePieceJson';
3
- import type { KnowledgeSourceJson } from '../../../types/PipelineJson/KnowledgeSourceJson';
1
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
2
+ import type { KnowledgePiecePreparedJson } from '../../types/PipelineJson/KnowledgePieceJson';
3
+ import type { KnowledgeSourceJson } from '../../types/PipelineJson/KnowledgeSourceJson';
4
4
  /**
5
5
  * Prepares the knowle
6
6
  *
7
7
  * @see https://github.com/webgptorg/promptbook/discussions/41
8
8
  * @public exported from `@promptbook/core`
9
9
  */
10
- export declare function prepareKnowledgePieces(knowledgeSources: Array<KnowledgeSourceJson>, options: PrepareOptions): Promise<Array<Omit<KnowledgePiecePreparedJson, 'preparationIds'>>>;
10
+ export declare function prepareKnowledgePieces(knowledgeSources: Array<KnowledgeSourceJson>, options: PrepareAndScrapeOptions): Promise<Array<Omit<KnowledgePiecePreparedJson, 'preparationIds'>>>;
11
11
  /**
12
12
  * TODO: [🧊] In future one preparation can take data from previous preparation and save tokens and time
13
13
  * Put `knowledgePieces` into `PrepareKnowledgeOptions`
@@ -0,0 +1,33 @@
1
+ import type { PrepareAndScrapeOptions } from '../../../prepare/PrepareAndScrapeOptions';
2
+ import type { string_file_extension } from '../../../types/typeAliases';
3
+ import type { ScraperSourceHandler } from '../Scraper';
4
+ import type { ScraperIntermediateSource } from '../ScraperIntermediateSource';
5
+ /**
6
+ * @@@
7
+ *
8
+ * @private internal utility of `getScraperIntermediateSource`
9
+ */
10
+ type GetScraperIntermediateSourceSource = Pick<ScraperSourceHandler, 'filename' | 'url'>;
11
+ /**
12
+ * @@@
13
+ *
14
+ * @private internal utility of `getScraperIntermediateSource`
15
+ */
16
+ type GetScraperIntermediateSourceHandler = Required<Pick<PrepareAndScrapeOptions, 'rootDirname' | 'cacheDirname' | 'isCacheCleaned' | 'isVerbose'>> & {
17
+ readonly extension: string_file_extension;
18
+ };
19
+ /**
20
+ * Create a filename for intermediate cache for scrapers
21
+ *
22
+ * Note: It also checks if directory exists and creates it if not
23
+ *
24
+ * @private as internal utility for scrapers
25
+ */
26
+ export declare function getScraperIntermediateSource(source: GetScraperIntermediateSourceSource, options: GetScraperIntermediateSourceHandler): Promise<ScraperIntermediateSource>;
27
+ export {};
28
+ /**
29
+ * Note: Not using `FileCacheStorage` for two reasons:
30
+ * 1) Need to store more than serialized JSONs
31
+ * 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: !!!!
32
+ * TODO: [🐱‍🐉][🧠] Make some smart crop
33
+ */
@@ -0,0 +1,4 @@
1
+ export {};
2
+ /**
3
+ * TODO: [🐱‍🐉][🧠] Make some smart crop
4
+ */
@@ -0,0 +1,10 @@
1
+ import type { SetOptional } from 'type-fest';
2
+ import type { PrepareAndScrapeOptions } from '../../../prepare/PrepareAndScrapeOptions';
3
+ import type { KnowledgeSourceJson } from '../../../types/PipelineJson/KnowledgeSourceJson';
4
+ import type { ScraperSourceHandler } from '../Scraper';
5
+ /**
6
+ * @@@
7
+ *
8
+ * @private for scraper utilities
9
+ */
10
+ export declare function makeKnowledgeSourceHandler(knowledgeSource: SetOptional<KnowledgeSourceJson, 'name'>, options?: Pick<PrepareAndScrapeOptions, 'rootDirname' | 'isVerbose'>): Promise<ScraperSourceHandler>;
@@ -0,0 +1,37 @@
1
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
2
+ import type { KnowledgePiecePreparedJson } from '../../types/PipelineJson/KnowledgePieceJson';
3
+ import type { ScraperSourceHandler } from '../_common/Scraper';
4
+ import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource';
5
+ /**
6
+ * Scraper of .docx and .odt files
7
+ *
8
+ * @see `documentationUrl` for more details
9
+ * @public exported from `@promptbook/core`
10
+ */
11
+ export declare const documentScraper: {
12
+ /**
13
+ * Mime types that this scraper can handle
14
+ */
15
+ mimeTypes: string[];
16
+ /**
17
+ * Link to documentation
18
+ */
19
+ documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
20
+ /**
21
+ * Convert the `.docx` or `.odt` to `.md` file and returns intermediate source
22
+ *
23
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
24
+ */
25
+ $convert(source: ScraperSourceHandler, options: PrepareAndScrapeOptions): Promise<ScraperIntermediateSource>;
26
+ /**
27
+ * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
28
+ */
29
+ scrape(source: ScraperSourceHandler, options: PrepareAndScrapeOptions): Promise<Array<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
30
+ };
31
+ /**
32
+ * TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
33
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
34
+ * TODO: [🪂] Do it in parallel 11:11
35
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
36
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
37
+ */
@@ -0,0 +1,4 @@
1
+ export {};
2
+ /**
3
+ * TODO: [📓] Maybe test all file in samples (not just 10-simple.docx)
4
+ */
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ts-node
2
+ export {};
3
+ /**
4
+ * Note: [⚫] Code in this file should never be published in any package
5
+ */
@@ -0,0 +1,37 @@
1
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
2
+ import type { KnowledgePiecePreparedJson } from '../../types/PipelineJson/KnowledgePieceJson';
3
+ import type { ScraperSourceHandler } from '../_common/Scraper';
4
+ import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource';
5
+ /**
6
+ * Scraper for .docx files
7
+ *
8
+ * @see `documentationUrl` for more details
9
+ * @public exported from `@promptbook/core`
10
+ */
11
+ export declare const legacyDocumentScraper: {
12
+ /**
13
+ * Mime types that this scraper can handle
14
+ */
15
+ mimeTypes: string[];
16
+ /**
17
+ * Link to documentation
18
+ */
19
+ documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
20
+ /**
21
+ * Convert the `.doc` or `.rtf` to `.doc` file and returns intermediate source
22
+ *
23
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
24
+ */
25
+ $convert(source: ScraperSourceHandler, options: PrepareAndScrapeOptions): Promise<ScraperIntermediateSource>;
26
+ /**
27
+ * Scrapes the `.doc` or `.rtf` file and returns the knowledge pieces or `null` if it can't scrape it
28
+ */
29
+ scrape(source: ScraperSourceHandler, options: PrepareAndScrapeOptions): Promise<Array<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
30
+ };
31
+ /**
32
+ * TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
33
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
34
+ * TODO: [🪂] Do it in parallel 11:11
35
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
36
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
37
+ */
@@ -0,0 +1,4 @@
1
+ export {};
2
+ /**
3
+ * TODO: [📓] Maybe test all file in samples (not just 10-simple.doc)
4
+ */
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ts-node
2
+ export {};
3
+ /**
4
+ * Note: [⚫] Code in this file should never be published in any package
5
+ */
@@ -0,0 +1,7 @@
1
+ import type { Scraper } from './_common/Scraper';
2
+ /**
3
+ * @@@
4
+ *
5
+ * @public exported from `@promptbook/core`
6
+ */
7
+ export declare const SCRAPERS: Array<Scraper>;
@@ -0,0 +1,29 @@
1
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
2
+ import type { KnowledgePiecePreparedJson } from '../../types/PipelineJson/KnowledgePieceJson';
3
+ import type { ScraperSourceHandler } from '../_common/Scraper';
4
+ /**
5
+ * Scraper for markdown files
6
+ *
7
+ * @see `documentationUrl` for more details
8
+ * @public exported from `@promptbook/core`
9
+ */
10
+ export declare const markdownScraper: {
11
+ /**
12
+ * Mime types that this scraper can handle
13
+ */
14
+ mimeTypes: string[];
15
+ /**
16
+ * Link to documentation
17
+ */
18
+ documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
19
+ /**
20
+ * Scrapes the markdown file and returns the knowledge pieces or `null` if it can't scrape it
21
+ */
22
+ scrape(source: ScraperSourceHandler, options: PrepareAndScrapeOptions): Promise<Array<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
23
+ };
24
+ /**
25
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
26
+ * TODO: [🪂] Do it in parallel 11:11
27
+ * TODO: [🦷] Ideally use `as const satisfies Scraper` BUT this combination throws errors
28
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
29
+ */
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ts-node
2
+ export {};
3
+ /**
4
+ * Note: [⚫] Code in this file should never be published in any package
5
+ */
@@ -0,0 +1,35 @@
1
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
2
+ import type { KnowledgePiecePreparedJson } from '../../types/PipelineJson/KnowledgePieceJson';
3
+ import type { ScraperSourceHandler } from '../_common/Scraper';
4
+ import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource';
5
+ /**
6
+ * Scraper for .docx files
7
+ *
8
+ * @see `documentationUrl` for more details
9
+ * @public exported from `@promptbook/core`
10
+ */
11
+ export declare const pdfScraper: {
12
+ /**
13
+ * Mime types that this scraper can handle
14
+ */
15
+ mimeTypes: string[];
16
+ /**
17
+ * Link to documentation
18
+ */
19
+ documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
20
+ /**
21
+ * Converts the `.pdf` file to `.md` file and returns intermediate source
22
+ */
23
+ $convert(source: ScraperSourceHandler, options: PrepareAndScrapeOptions): Promise<ScraperIntermediateSource>;
24
+ /**
25
+ * Scrapes the `.pdf` file and returns the knowledge pieces or `null` if it can't scrape it
26
+ */
27
+ scrape(source: ScraperSourceHandler, options: PrepareAndScrapeOptions): Promise<Array<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
28
+ };
29
+ /**
30
+ * TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
31
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
32
+ * TODO: [🪂] Do it in parallel 11:11
33
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
34
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
35
+ */
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ts-node
2
+ export {};
3
+ /**
4
+ * Note: [⚫] Code in this file should never be published in any package
5
+ */
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ts-node
2
+ export {};
3
+ /**
4
+ * Note: [⚫] Code in this file should never be published in any package
5
+ */
@@ -0,0 +1,12 @@
1
+ import { Converter } from 'showdown';
2
+ /**
3
+ * A converter instance that uses showdown and highlight extensions
4
+ *
5
+ * @type {Converter}
6
+ * @private for markdown and html knowledge scrapers
7
+ */
8
+ export declare const markdownConverter: Converter;
9
+ /**
10
+ * TODO: !!!!!! Figure out better name not to confuse with `Converter`
11
+ * TODO: !!!!!! Lazy-make converter
12
+ */
@@ -0,0 +1,43 @@
1
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
2
+ import type { KnowledgePiecePreparedJson } from '../../types/PipelineJson/KnowledgePieceJson';
3
+ import type { string_markdown } from '../../types/typeAliases';
4
+ import type { ScraperSourceHandler } from '../_common/Scraper';
5
+ import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource';
6
+ /**
7
+ * Scraper for .docx files
8
+ *
9
+ * @see `documentationUrl` for more details
10
+ * @public exported from `@promptbook/core`
11
+ */
12
+ export declare const websiteScraper: {
13
+ /**
14
+ * Mime types that this scraper can handle
15
+ */
16
+ mimeTypes: string[];
17
+ /**
18
+ * Link to documentation
19
+ */
20
+ documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
21
+ /**
22
+ * Convert the website to `.md` file and returns intermediate source
23
+ *
24
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
25
+ */
26
+ $convert(source: ScraperSourceHandler, options: PrepareAndScrapeOptions): Promise<import("destroyable").IDestroyable & {
27
+ readonly filename: string;
28
+ } & {
29
+ markdown: string_markdown;
30
+ }>;
31
+ /**
32
+ * Scrapes the website and returns the knowledge pieces or `null` if it can't scrape it
33
+ */
34
+ scrape(source: ScraperSourceHandler, options: PrepareAndScrapeOptions): Promise<Array<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
35
+ };
36
+ /**
37
+ * TODO: !!!!!! Put into separate package
38
+ * TODO: [👣] Scraped website in .md can act as cache item - there is no need to run conversion each time
39
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
40
+ * TODO: [🪂] Do it in parallel 11:11
41
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
42
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
43
+ */
@@ -1,13 +1,13 @@
1
1
  import type { PromptbookStorage } from '../_common/PromptbookStorage';
2
- import type { FilesStorageOptions } from './FilesStorageOptions';
2
+ import type { FileCacheStorageOptions } from './FileCacheStorageOptions';
3
3
  /**
4
4
  * @@@
5
5
  *
6
6
  * @public exported from `@promptbook/node`
7
7
  */
8
- export declare class FilesStorage<TItem> implements PromptbookStorage<TItem> {
8
+ export declare class FileCacheStorage<TItem> implements PromptbookStorage<TItem> {
9
9
  private readonly options;
10
- constructor(options: FilesStorageOptions);
10
+ constructor(options: FileCacheStorageOptions);
11
11
  /**
12
12
  * @@@
13
13
  */
@@ -27,5 +27,5 @@ export declare class FilesStorage<TItem> implements PromptbookStorage<TItem> {
27
27
  }
28
28
  /**
29
29
  * TODO: [🌗] Maybe some checkers, not all valid JSONs are desired and valid values
30
- * Note: [🟢] This code should never be published outside of `@promptbook/node` and `@promptbook/cli` and `@promptbook/cli`
30
+ * Note: [🟢] Code in this file should never be published outside of `@promptbook/node` and `@promptbook/cli`
31
31
  */
@@ -0,0 +1,10 @@
1
+ import type { string_dirname } from '../../types/typeAliases';
2
+ /**
3
+ * @@@
4
+ */
5
+ export type FileCacheStorageOptions = {
6
+ /**
7
+ * @@@
8
+ */
9
+ rootFolderPath: string_dirname;
10
+ };
@@ -2,6 +2,6 @@ import type { string_name } from '../../../types/typeAliases';
2
2
  /**
3
3
  * @@@
4
4
  *
5
- * @private for `FilesStorage`
5
+ * @private for `FileCacheStorage`
6
6
  */
7
7
  export declare function nameToSubfolderPath(name: string_name): Array<string>;
@@ -6,5 +6,5 @@ import type { PromptbookStorage } from '../_common/PromptbookStorage';
6
6
  */
7
7
  export declare function getLocalStorage<TItem>(): PromptbookStorage<TItem>;
8
8
  /**
9
- * Note: [🔵] This code should never be published outside of `@promptbook/browser`
9
+ * Note: [🔵] Code in this file should never be published outside of `@promptbook/browser`
10
10
  */
@@ -6,5 +6,5 @@ import type { PromptbookStorage } from '../_common/PromptbookStorage';
6
6
  */
7
7
  export declare function getSessionStorage<TItem>(): PromptbookStorage<TItem>;
8
8
  /**
9
- * Note: [🔵] This code should never be published outside of `@promptbook/browser`
9
+ * Note: [🔵] Code in this file should never be published outside of `@promptbook/browser`
10
10
  */
@@ -33,7 +33,7 @@ export type ParameterJson = {
33
33
  readonly sampleValues?: Array<string_parameter_value>;
34
34
  };
35
35
  /**
36
- * TODO: [🧠] !!!!!! Should be here registered subparameter from foreach or not?
36
+ * TODO: [🧠] Should be here registered subparameters from foreach or not?
37
37
  * TODO: [♈] Probbably move expectations from templates to parameters
38
38
  * TODO: [🍙] Make some standard order of json properties
39
39
  */