@promptbook/browser 0.72.0-8 → 0.72.0-9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/esm/index.es.js +46 -2
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/browser.index.d.ts +2 -0
- package/esm/typings/src/_packages/cli.index.d.ts +20 -0
- package/esm/typings/src/_packages/core.index.d.ts +20 -14
- package/esm/typings/src/_packages/documents.index.d.ts +8 -0
- package/esm/typings/src/_packages/legacy-documents.index.d.ts +8 -0
- package/esm/typings/src/_packages/markdown-utils.index.d.ts +6 -0
- package/esm/typings/src/_packages/node.index.d.ts +10 -4
- package/esm/typings/src/_packages/pdf.index.d.ts +8 -0
- package/esm/typings/src/_packages/types.index.d.ts +15 -5
- package/esm/typings/src/_packages/website-crawler.index.d.ts +8 -0
- package/esm/typings/src/collection/constructors/createCollectionFromDirectory.d.ts +5 -3
- package/esm/typings/src/config.d.ts +6 -0
- package/esm/typings/src/conversion/pipelineStringToJson.d.ts +3 -1
- package/esm/typings/src/dialogs/callback/CallbackInterfaceToolsOptions.d.ts +2 -2
- package/esm/typings/src/dialogs/simple-prompt/SimplePromptInterfaceTools.d.ts +3 -3
- package/esm/typings/src/execution/{CommonExecutionToolsOptions.d.ts → CommonToolsOptions.d.ts} +1 -1
- package/esm/typings/src/execution/ExecutionTools.d.ts +26 -6
- package/esm/typings/src/execution/FilesystemTools.d.ts +9 -0
- package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +1 -1
- package/esm/typings/src/execution/createPipelineExecutor/20-executeTemplate.d.ts +1 -6
- package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +1 -6
- package/esm/typings/src/execution/translation/automatic-translate/translateMessages.d.ts +1 -0
- package/esm/typings/src/execution/utils/$provideExecutionToolsForNode.d.ts +13 -0
- package/esm/typings/src/llm-providers/_common/{$llmToolsMetadataRegister.d.ts → register/$llmToolsMetadataRegister.d.ts} +4 -1
- package/esm/typings/src/llm-providers/_common/{$llmToolsRegister.d.ts → register/$llmToolsRegister.d.ts} +5 -2
- package/esm/typings/src/llm-providers/_common/{createLlmToolsFromConfigurationFromEnv.d.ts → register/$provideLlmToolsConfigurationFromEnv.d.ts} +3 -3
- package/esm/typings/src/llm-providers/_common/{getLlmToolsForCli.d.ts → register/$provideLlmToolsForCli.d.ts} +4 -11
- package/esm/typings/src/llm-providers/_common/{getLlmToolsForTestingAndScriptsAndPlayground.d.ts → register/$provideLlmToolsForTestingAndScriptsAndPlayground.d.ts} +4 -3
- package/esm/typings/src/llm-providers/_common/{createLlmToolsFromEnv.d.ts → register/$provideLlmToolsFromEnv.d.ts} +6 -5
- package/esm/typings/src/llm-providers/_common/{$registeredLlmToolsMessage.d.ts → register/$registeredLlmToolsMessage.d.ts} +5 -2
- package/esm/typings/src/llm-providers/_common/{LlmToolsConfiguration.d.ts → register/LlmToolsConfiguration.d.ts} +5 -4
- package/esm/typings/src/llm-providers/_common/{LlmToolsMetadata.d.ts → register/LlmToolsMetadata.d.ts} +5 -4
- package/esm/typings/src/llm-providers/_common/{LlmToolsOptions.d.ts → register/LlmToolsOptions.d.ts} +4 -1
- package/esm/typings/src/llm-providers/_common/{createLlmToolsFromConfiguration.d.ts → register/createLlmToolsFromConfiguration.d.ts} +5 -4
- package/esm/typings/src/llm-providers/anthropic-claude/AnthropicClaudeExecutionToolsOptions.d.ts +3 -3
- package/esm/typings/src/llm-providers/anthropic-claude/register-configuration.d.ts +4 -3
- package/esm/typings/src/llm-providers/anthropic-claude/register-constructor.d.ts +4 -3
- package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionToolsOptions.d.ts +2 -2
- package/esm/typings/src/llm-providers/azure-openai/register-configuration.d.ts +4 -3
- package/esm/typings/src/llm-providers/azure-openai/register-constructor.d.ts +4 -3
- package/esm/typings/src/llm-providers/mocked/MockedEchoLlmExecutionTools.d.ts +3 -3
- package/esm/typings/src/llm-providers/mocked/MockedFackedLlmExecutionTools.d.ts +3 -3
- package/esm/typings/src/llm-providers/multiple/MultipleLlmExecutionTools.d.ts +1 -0
- package/esm/typings/src/llm-providers/openai/OpenAiExecutionToolsOptions.d.ts +2 -2
- package/esm/typings/src/llm-providers/openai/register-configuration.d.ts +5 -4
- package/esm/typings/src/llm-providers/openai/register-constructor.d.ts +5 -4
- package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_ListModels_Request.d.ts +1 -1
- package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_Prompt_Request.d.ts +1 -1
- package/esm/typings/src/llm-providers/remote/interfaces/RemoteLlmExecutionToolsOptions.d.ts +3 -3
- package/esm/typings/src/llm-providers/remote/interfaces/RemoteServerOptions.d.ts +2 -2
- package/esm/typings/src/personas/preparePersona.d.ts +2 -1
- package/esm/typings/src/prepare/PrepareAndScrapeOptions.d.ts +8 -7
- package/esm/typings/src/prepare/preparePipeline.d.ts +2 -1
- package/esm/typings/src/prepare/prepareTemplates.d.ts +2 -1
- package/esm/typings/src/scrapers/_common/Converter.d.ts +4 -10
- package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -9
- package/esm/typings/src/scrapers/_common/prepareKnowledgePieces.d.ts +2 -1
- package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +11 -0
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +12 -0
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +15 -0
- package/esm/typings/src/scrapers/_common/register/$registeredScrapersMessage.d.ts +12 -0
- package/esm/typings/src/scrapers/_common/register/$scrapersMetadataRegister.d.ts +13 -0
- package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +13 -0
- package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +41 -0
- package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +12 -0
- package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +1 -0
- package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +2 -1
- package/esm/typings/src/scrapers/document/{documentScraper.d.ts → DocumentScraper.d.ts} +18 -12
- package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +20 -0
- package/esm/typings/src/scrapers/document/register-constructor.d.ts +13 -0
- package/esm/typings/src/scrapers/document/register-metadata.d.ts +24 -0
- package/esm/typings/src/scrapers/document-legacy/{legacyDocumentScraper.d.ts → LegacyDocumentScraper.d.ts} +18 -12
- package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +20 -0
- package/esm/typings/src/scrapers/document-legacy/register-constructor.d.ts +13 -0
- package/esm/typings/src/scrapers/document-legacy/register-metadata.d.ts +24 -0
- package/esm/typings/src/scrapers/markdown/MarkdownScraper.d.ts +29 -0
- package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +20 -0
- package/esm/typings/src/scrapers/markdown/register-constructor.d.ts +13 -0
- package/esm/typings/src/scrapers/markdown/register-metadata.d.ts +24 -0
- package/esm/typings/src/scrapers/pdf/PdfScraper.d.ts +40 -0
- package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +20 -0
- package/esm/typings/src/scrapers/pdf/register-constructor.d.ts +13 -0
- package/esm/typings/src/scrapers/pdf/register-metadata.d.ts +24 -0
- package/esm/typings/src/scrapers/website/{websiteScraper.d.ts → WebsiteScraper.d.ts} +18 -14
- package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +20 -0
- package/esm/typings/src/scrapers/website/register-constructor.d.ts +13 -0
- package/esm/typings/src/scrapers/website/register-metadata.d.ts +24 -0
- package/esm/typings/src/scripting/javascript/JavascriptExecutionToolsOptions.d.ts +2 -2
- package/esm/typings/src/scripting/python/PythonExecutionTools.d.ts +3 -3
- package/esm/typings/src/scripting/typescript/TypescriptExecutionTools.d.ts +3 -3
- package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +5 -3
- package/esm/typings/src/storage/{utils → memory/utils}/PrefixStorage.d.ts +1 -1
- package/esm/typings/src/storage/{utils → memory/utils}/makePromptbookStorageFromWebStorage.d.ts +1 -1
- package/esm/typings/src/types/typeAliases.d.ts +7 -0
- package/esm/typings/src/utils/$Register.d.ts +19 -6
- package/esm/typings/src/utils/execCommand/$execCommand.d.ts +1 -1
- package/esm/typings/src/utils/execCommand/$execCommands.d.ts +1 -1
- package/esm/typings/src/utils/files/isDirectoryExisting.d.ts +14 -0
- package/esm/typings/src/utils/files/isFileExisting.d.ts +13 -0
- package/esm/typings/src/utils/files/{$listAllFiles.d.ts → listAllFiles.d.ts} +3 -4
- package/package.json +2 -2
- package/umd/index.umd.js +46 -1
- package/umd/index.umd.js.map +1 -1
- package/esm/typings/src/scrapers/index.d.ts +0 -8
- package/esm/typings/src/scrapers/markdown/markdownScraper.d.ts +0 -29
- package/esm/typings/src/scrapers/pdf/pdfScraper.d.ts +0 -35
- package/esm/typings/src/utils/files/$isDirectoryExisting.d.ts +0 -15
- package/esm/typings/src/utils/files/$isFileExisting.d.ts +0 -14
- /package/esm/typings/src/scrapers/document/{documentScraper.test.d.ts → DocumentScraper.test.d.ts} +0 -0
- /package/esm/typings/src/scrapers/document-legacy/{legacyDocumentScraper.test.d.ts → LegacyDocumentScraper.test.d.ts} +0 -0
- /package/esm/typings/src/scrapers/markdown/{markdownScraper.test.d.ts → MarkdownScraper.test.d.ts} +0 -0
- /package/esm/typings/src/scrapers/website/{websiteScraper.test.d.ts → WebsiteScraper.test.d.ts} +0 -0
- /package/esm/typings/src/utils/files/{$isDirectoryExisting.test.d.ts → isDirectoryExisting.test.d.ts} +0 -0
- /package/esm/typings/src/utils/files/{$isFileExisting.test.d.ts → isFileExisting.test.d.ts} +0 -0
- /package/esm/typings/src/utils/files/{$listAllFiles.test.d.ts → listAllFiles.test.d.ts} +0 -0
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { CommonToolsOptions } from '../../../execution/CommonToolsOptions';
|
|
2
2
|
import type { string_base_url } from '../../../types/typeAliases';
|
|
3
3
|
import type { string_uri } from '../../../types/typeAliases';
|
|
4
4
|
import type { string_user_id } from '../../../types/typeAliases';
|
|
5
|
-
import type { LlmToolsConfiguration } from '../../_common/LlmToolsConfiguration';
|
|
5
|
+
import type { LlmToolsConfiguration } from '../../_common/register/LlmToolsConfiguration';
|
|
6
6
|
/**
|
|
7
7
|
* Options for `RemoteLlmExecutionTools`
|
|
8
8
|
*
|
|
9
9
|
* @public exported from `@promptbook/remote-client`
|
|
10
10
|
*/
|
|
11
|
-
export type RemoteLlmExecutionToolsOptions =
|
|
11
|
+
export type RemoteLlmExecutionToolsOptions = CommonToolsOptions & {
|
|
12
12
|
/**
|
|
13
13
|
* URL of the remote PROMPTBOOK server
|
|
14
14
|
* On this server will be connected to the socket.io server
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { PipelineCollection } from '../../../collection/PipelineCollection';
|
|
2
|
-
import type {
|
|
2
|
+
import type { CommonToolsOptions } from '../../../execution/CommonToolsOptions';
|
|
3
3
|
import type { LlmExecutionTools } from '../../../execution/LlmExecutionTools';
|
|
4
4
|
import type { string_uri } from '../../../types/typeAliases';
|
|
5
5
|
import type { string_user_id } from '../../../types/typeAliases';
|
|
@@ -17,7 +17,7 @@ import type { string_user_id } from '../../../types/typeAliases';
|
|
|
17
17
|
* @public exported from `@promptbook/remote-client`
|
|
18
18
|
* @public exported from `@promptbook/remote-server`
|
|
19
19
|
*/
|
|
20
|
-
export type RemoteServerOptions =
|
|
20
|
+
export type RemoteServerOptions = CommonToolsOptions & {
|
|
21
21
|
/**
|
|
22
22
|
* Port on which the server will listen
|
|
23
23
|
*/
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { ExecutionTools } from '../execution/ExecutionTools';
|
|
1
2
|
import type { PrepareAndScrapeOptions } from '../prepare/PrepareAndScrapeOptions';
|
|
2
3
|
import type { PersonaPreparedJson } from '../types/PipelineJson/PersonaJson';
|
|
3
4
|
import type { string_persona_description } from '../types/typeAliases';
|
|
@@ -7,7 +8,7 @@ import type { string_persona_description } from '../types/typeAliases';
|
|
|
7
8
|
* @see https://github.com/webgptorg/promptbook/discussions/22
|
|
8
9
|
* @public exported from `@promptbook/core`
|
|
9
10
|
*/
|
|
10
|
-
export declare function preparePersona(personaDescription: string_persona_description, options: PrepareAndScrapeOptions): Promise<PersonaPreparedJson['modelRequirements']>;
|
|
11
|
+
export declare function preparePersona(personaDescription: string_persona_description, tools: Pick<ExecutionTools, 'llm'>, options: PrepareAndScrapeOptions): Promise<PersonaPreparedJson['modelRequirements']>;
|
|
11
12
|
/**
|
|
12
13
|
* TODO: [🔃][main] !!!!! If the persona was prepared with different version or different set of models, prepare it once again
|
|
13
14
|
* TODO: [🏢] !! Check validity of `modelName` in pipeline
|
|
@@ -1,20 +1,17 @@
|
|
|
1
|
-
import type { LlmExecutionTools } from '../execution/LlmExecutionTools';
|
|
2
1
|
import type { string_dirname } from '../types/typeAliases';
|
|
3
2
|
/**
|
|
4
3
|
* Options for preparation of the pipeline
|
|
5
4
|
*/
|
|
6
5
|
export type PrepareAndScrapeOptions = {
|
|
7
|
-
/**
|
|
8
|
-
* LLM tools
|
|
9
|
-
*/
|
|
10
|
-
readonly llmTools?: LlmExecutionTools;
|
|
11
6
|
/**
|
|
12
7
|
* Path to the root folder of the pipeline
|
|
13
8
|
*
|
|
14
9
|
* Note: When the pipeline is not created from files, it is `null`
|
|
15
|
-
* Note: This folder must exist
|
|
10
|
+
* Note: This folder must exist (=it is not created recursively)
|
|
11
|
+
*
|
|
12
|
+
* @default process.cwd()
|
|
16
13
|
*/
|
|
17
|
-
readonly rootDirname
|
|
14
|
+
readonly rootDirname?: string_dirname | null;
|
|
18
15
|
/**
|
|
19
16
|
* Path to the cache folder
|
|
20
17
|
*
|
|
@@ -55,6 +52,10 @@ export type PrepareAndScrapeOptions = {
|
|
|
55
52
|
*/
|
|
56
53
|
readonly libreOfficePath?: string;
|
|
57
54
|
};
|
|
55
|
+
/**
|
|
56
|
+
* If true, the missing software is automatically installed
|
|
57
|
+
*/
|
|
58
|
+
readonly isAutoInstalled?: boolean;
|
|
58
59
|
/**
|
|
59
60
|
* If true, the preparation logs additional information
|
|
60
61
|
*
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { ExecutionTools } from '../execution/ExecutionTools';
|
|
1
2
|
import type { PipelineJson } from '../types/PipelineJson/PipelineJson';
|
|
2
3
|
import type { PrepareAndScrapeOptions } from './PrepareAndScrapeOptions';
|
|
3
4
|
/**
|
|
@@ -8,7 +9,7 @@ import type { PrepareAndScrapeOptions } from './PrepareAndScrapeOptions';
|
|
|
8
9
|
* Note: When the pipeline is already prepared, it returns the same pipeline
|
|
9
10
|
* @public exported from `@promptbook/core`
|
|
10
11
|
*/
|
|
11
|
-
export declare function preparePipeline(pipeline: PipelineJson, options: PrepareAndScrapeOptions): Promise<PipelineJson>;
|
|
12
|
+
export declare function preparePipeline(pipeline: PipelineJson, tools: Pick<ExecutionTools, 'llm' | 'fs' | 'scrapers'>, options: PrepareAndScrapeOptions): Promise<PipelineJson>;
|
|
12
13
|
/**
|
|
13
14
|
* TODO: Write tests for `preparePipeline`
|
|
14
15
|
* TODO: [🏏] Leverage the batch API and build queues @see https://platform.openai.com/docs/guides/batch
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { ExecutionTools } from '../execution/ExecutionTools';
|
|
1
2
|
import type { PipelineJson } from '../types/PipelineJson/PipelineJson';
|
|
2
3
|
import type { TemplateJson } from '../types/PipelineJson/TemplateJson';
|
|
3
4
|
import type { PrepareAndScrapeOptions } from './PrepareAndScrapeOptions';
|
|
@@ -18,7 +19,7 @@ type PreparedTemplates = {
|
|
|
18
19
|
*
|
|
19
20
|
* @public exported from `@promptbook/core`
|
|
20
21
|
*/
|
|
21
|
-
export declare function prepareTemplates(pipeline: PrepareTemplateInput, options: PrepareAndScrapeOptions): Promise<PreparedTemplates>;
|
|
22
|
+
export declare function prepareTemplates(pipeline: PrepareTemplateInput, tools: Pick<ExecutionTools, 'llm' | 'fs' | 'scrapers'>, options: PrepareAndScrapeOptions): Promise<PreparedTemplates>;
|
|
22
23
|
export {};
|
|
23
24
|
/**
|
|
24
25
|
* TODO: [🧠] Add context to each template (if missing)
|
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
import type { string_mime_type } from '../../types/typeAliases';
|
|
3
|
-
import type { string_promptbook_documentation_url } from '../../types/typeAliases';
|
|
1
|
+
import type { ScraperAndConverterMetadata } from './register/ScraperAndConverterMetadata';
|
|
4
2
|
import type { ScraperSourceHandler } from './Scraper';
|
|
5
3
|
import type { ScraperIntermediateSource } from './ScraperIntermediateSource';
|
|
6
4
|
/**
|
|
@@ -9,13 +7,9 @@ import type { ScraperIntermediateSource } from './ScraperIntermediateSource';
|
|
|
9
7
|
*/
|
|
10
8
|
export type Converter = {
|
|
11
9
|
/**
|
|
12
|
-
*
|
|
10
|
+
* Metadata of the converter which includes title, mime types, etc.
|
|
13
11
|
*/
|
|
14
|
-
readonly
|
|
15
|
-
/**
|
|
16
|
-
* Link to documentation
|
|
17
|
-
*/
|
|
18
|
-
readonly documentationUrl: string_promptbook_documentation_url;
|
|
12
|
+
readonly metadata: ScraperAndConverterMetadata;
|
|
19
13
|
/**
|
|
20
14
|
* Convert the the file and returns intermediate source or `null` if it can't convert it
|
|
21
15
|
*
|
|
@@ -24,5 +18,5 @@ export type Converter = {
|
|
|
24
18
|
*
|
|
25
19
|
* Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
|
|
26
20
|
*/
|
|
27
|
-
$convert(source: ScraperSourceHandler
|
|
21
|
+
$convert(source: ScraperSourceHandler): Promise<ScraperIntermediateSource>;
|
|
28
22
|
};
|
|
@@ -1,28 +1,23 @@
|
|
|
1
1
|
import type { Promisable } from 'type-fest';
|
|
2
|
-
import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
|
|
3
2
|
import type { KnowledgePiecePreparedJson } from '../../types/PipelineJson/KnowledgePieceJson';
|
|
4
3
|
import type { string_filename } from '../../types/typeAliases';
|
|
5
4
|
import type { string_knowledge_source_link } from '../../types/typeAliases';
|
|
6
5
|
import type { string_mime_type } from '../../types/typeAliases';
|
|
7
|
-
import type { string_promptbook_documentation_url } from '../../types/typeAliases';
|
|
8
6
|
import type { string_url } from '../../types/typeAliases';
|
|
7
|
+
import type { ScraperAndConverterMetadata } from './register/ScraperAndConverterMetadata';
|
|
9
8
|
/**
|
|
10
9
|
* @@@
|
|
11
10
|
*
|
|
12
11
|
*/
|
|
13
12
|
export type Scraper = {
|
|
14
13
|
/**
|
|
15
|
-
*
|
|
14
|
+
* Metadata of the scraper which includes title, mime types, etc.
|
|
16
15
|
*/
|
|
17
|
-
readonly
|
|
18
|
-
/**
|
|
19
|
-
* Link to documentation
|
|
20
|
-
*/
|
|
21
|
-
readonly documentationUrl: string_promptbook_documentation_url;
|
|
16
|
+
readonly metadata: ScraperAndConverterMetadata;
|
|
22
17
|
/**
|
|
23
18
|
* Scrapes the markdown file and returns the knowledge pieces or `null` if it can't scrape it
|
|
24
19
|
*/
|
|
25
|
-
scrape(source: ScraperSourceHandler
|
|
20
|
+
scrape(source: ScraperSourceHandler): Promisable<Array<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
|
|
26
21
|
};
|
|
27
22
|
/**
|
|
28
23
|
* @@@
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
|
|
2
2
|
import type { KnowledgePiecePreparedJson } from '../../types/PipelineJson/KnowledgePieceJson';
|
|
3
3
|
import type { KnowledgeSourceJson } from '../../types/PipelineJson/KnowledgeSourceJson';
|
|
4
|
+
import type { ExecutionTools } from '../../execution/ExecutionTools';
|
|
4
5
|
/**
|
|
5
6
|
* Prepares the knowle
|
|
6
7
|
*
|
|
7
8
|
* @see https://github.com/webgptorg/promptbook/discussions/41
|
|
8
9
|
* @public exported from `@promptbook/core`
|
|
9
10
|
*/
|
|
10
|
-
export declare function prepareKnowledgePieces(knowledgeSources: Array<KnowledgeSourceJson>, options: PrepareAndScrapeOptions): Promise<Array<Omit<KnowledgePiecePreparedJson, 'preparationIds'>>>;
|
|
11
|
+
export declare function prepareKnowledgePieces(knowledgeSources: Array<KnowledgeSourceJson>, tools: Pick<ExecutionTools, 'llm' | 'fs' | 'scrapers'>, options: PrepareAndScrapeOptions): Promise<Array<Omit<KnowledgePiecePreparedJson, 'preparationIds'>>>;
|
|
11
12
|
/**
|
|
12
13
|
* TODO: [🧊] In future one preparation can take data from previous preparation and save tokens and time
|
|
13
14
|
* Put `knowledgePieces` into `PrepareKnowledgeOptions`
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { FilesystemTools } from '../../../execution/FilesystemTools';
|
|
2
|
+
import type { PrepareAndScrapeOptions } from '../../../prepare/PrepareAndScrapeOptions';
|
|
3
|
+
/**
|
|
4
|
+
* @@@
|
|
5
|
+
*
|
|
6
|
+
* @public exported from `@promptbook/node`
|
|
7
|
+
*/
|
|
8
|
+
export declare function $provideFilesystemForNode(options?: Pick<PrepareAndScrapeOptions, 'isVerbose'>): FilesystemTools;
|
|
9
|
+
/**
|
|
10
|
+
* Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
11
|
+
*/
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { ExecutionTools } from '../../../execution/ExecutionTools';
|
|
2
|
+
import type { PrepareAndScrapeOptions } from '../../../prepare/PrepareAndScrapeOptions';
|
|
3
|
+
import type { Scraper } from '../Scraper';
|
|
4
|
+
/**
|
|
5
|
+
* @@@!!!!!!
|
|
6
|
+
*
|
|
7
|
+
* 1) @@@
|
|
8
|
+
* 2) @@@
|
|
9
|
+
*
|
|
10
|
+
* @public exported from `@promptbook/browser`
|
|
11
|
+
*/
|
|
12
|
+
export declare function $provideScrapersForBrowser(tools: Pick<ExecutionTools, 'llm'>, options?: PrepareAndScrapeOptions): Promise<Array<Scraper>>;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { ExecutionTools } from '../../../execution/ExecutionTools';
|
|
2
|
+
import type { PrepareAndScrapeOptions } from '../../../prepare/PrepareAndScrapeOptions';
|
|
3
|
+
import type { Scraper } from '../Scraper';
|
|
4
|
+
/**
|
|
5
|
+
* !!!!!!
|
|
6
|
+
*
|
|
7
|
+
* 1) @@@
|
|
8
|
+
* 2) @@@
|
|
9
|
+
*
|
|
10
|
+
* @public exported from `@promptbook/node`
|
|
11
|
+
*/
|
|
12
|
+
export declare function $provideScrapersForNode(tools: Pick<ExecutionTools, 'fs' | 'llm'>, options?: PrepareAndScrapeOptions): Promise<Array<Scraper>>;
|
|
13
|
+
/**
|
|
14
|
+
* Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
15
|
+
*/
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { string_markdown } from '../../../types/typeAliases';
|
|
2
|
+
/**
|
|
3
|
+
* Creates a message with all registered scrapers
|
|
4
|
+
*
|
|
5
|
+
* Note: This function is used to create a (error) message when there is no scraper for particular mime type
|
|
6
|
+
*
|
|
7
|
+
* @private internal function of `createScrapersFromConfiguration` and `createScrapersFromEnv`
|
|
8
|
+
*/
|
|
9
|
+
export declare function $registeredScrapersMessage(): string_markdown;
|
|
10
|
+
/**
|
|
11
|
+
* TODO: [®] DRY Register logic
|
|
12
|
+
*/
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { $Register } from '../../../utils/$Register';
|
|
2
|
+
import type { ScraperAndConverterMetadata } from './ScraperAndConverterMetadata';
|
|
3
|
+
/**
|
|
4
|
+
* @@@
|
|
5
|
+
*
|
|
6
|
+
* Note: `$` is used to indicate that this interacts with the global scope
|
|
7
|
+
* @singleton Only one instance of each register is created per build, but thare can be more @@@
|
|
8
|
+
* @public exported from `@promptbook/core`
|
|
9
|
+
*/
|
|
10
|
+
export declare const $scrapersMetadataRegister: $Register<ScraperAndConverterMetadata>;
|
|
11
|
+
/**
|
|
12
|
+
* TODO: [®] DRY Register logic
|
|
13
|
+
*/
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { $Register } from '../../../utils/$Register';
|
|
2
|
+
import type { ScraperConstructor } from './ScraperConstructor';
|
|
3
|
+
/**
|
|
4
|
+
* @@@
|
|
5
|
+
*
|
|
6
|
+
* Note: `$` is used to indicate that this interacts with the global scope
|
|
7
|
+
* @singleton Only one instance of each register is created per build, but thare can be more @@@
|
|
8
|
+
* @public exported from `@promptbook/core`
|
|
9
|
+
*/
|
|
10
|
+
export declare const $scrapersRegister: $Register<ScraperConstructor>;
|
|
11
|
+
/**
|
|
12
|
+
* TODO: [®] DRY Register logic
|
|
13
|
+
*/
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import type { string_mime_type } from '../../../types/typeAliases';
|
|
2
|
+
import type { string_promptbook_documentation_url } from '../../../types/typeAliases';
|
|
3
|
+
import type { string_title } from '../../../types/typeAliases';
|
|
4
|
+
import type { Registered } from '../../../utils/$Register';
|
|
5
|
+
import type { TODO_any } from '../../../utils/organization/TODO_any';
|
|
6
|
+
/**
|
|
7
|
+
* @@@
|
|
8
|
+
*
|
|
9
|
+
* @@@
|
|
10
|
+
* x) `Scraper`
|
|
11
|
+
* x) `Converter`
|
|
12
|
+
* x) `ScraperConstructor`
|
|
13
|
+
* x) `Registered`
|
|
14
|
+
* x) `ExecutionTools`
|
|
15
|
+
* x) `ScraperAndConverterMetadata`
|
|
16
|
+
* x) `PrepareAndScrapeOptions`
|
|
17
|
+
* x) `ScraperConfiguration`
|
|
18
|
+
* x) `ScraperOptions`
|
|
19
|
+
*/
|
|
20
|
+
export type ScraperAndConverterMetadata = Registered & {
|
|
21
|
+
/**
|
|
22
|
+
* @@@
|
|
23
|
+
*/
|
|
24
|
+
readonly title: string_title;
|
|
25
|
+
/**
|
|
26
|
+
* Mime types that this scraper can handle
|
|
27
|
+
*/
|
|
28
|
+
readonly mimeTypes: ReadonlyArray<string_mime_type>;
|
|
29
|
+
/**
|
|
30
|
+
* @@@
|
|
31
|
+
*/
|
|
32
|
+
readonly isAvilableInBrowser: boolean;
|
|
33
|
+
/**
|
|
34
|
+
* @@@
|
|
35
|
+
*/
|
|
36
|
+
readonly requiredExecutables: TODO_any;
|
|
37
|
+
/**
|
|
38
|
+
* Link to documentation
|
|
39
|
+
*/
|
|
40
|
+
readonly documentationUrl: string_promptbook_documentation_url;
|
|
41
|
+
};
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { ExecutionTools } from '../../../execution/ExecutionTools';
|
|
2
|
+
import type { PrepareAndScrapeOptions } from '../../../prepare/PrepareAndScrapeOptions';
|
|
3
|
+
import type { Registered } from '../../../utils/$Register';
|
|
4
|
+
import type { Scraper } from '../Scraper';
|
|
5
|
+
import type { ScraperAndConverterMetadata } from './ScraperAndConverterMetadata';
|
|
6
|
+
/**
|
|
7
|
+
* @@@
|
|
8
|
+
*/
|
|
9
|
+
export type ScraperConstructor = Registered & ScraperAndConverterMetadata & ((tools: Pick<ExecutionTools, 'llm'>, options: PrepareAndScrapeOptions) => Scraper);
|
|
10
|
+
/**
|
|
11
|
+
* TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
|
|
12
|
+
*/
|
|
@@ -30,4 +30,5 @@ export {};
|
|
|
30
30
|
* 1) Need to store more than serialized JSONs
|
|
31
31
|
* 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: !!!!
|
|
32
32
|
* TODO: [🐱🐉][🧠] Make some smart crop
|
|
33
|
+
* Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
33
34
|
*/
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { SetOptional } from 'type-fest';
|
|
2
|
+
import type { ExecutionTools } from '../../../execution/ExecutionTools';
|
|
2
3
|
import type { PrepareAndScrapeOptions } from '../../../prepare/PrepareAndScrapeOptions';
|
|
3
4
|
import type { KnowledgeSourceJson } from '../../../types/PipelineJson/KnowledgeSourceJson';
|
|
4
5
|
import type { ScraperSourceHandler } from '../Scraper';
|
|
@@ -7,4 +8,4 @@ import type { ScraperSourceHandler } from '../Scraper';
|
|
|
7
8
|
*
|
|
8
9
|
* @private for scraper utilities
|
|
9
10
|
*/
|
|
10
|
-
export declare function makeKnowledgeSourceHandler(knowledgeSource: SetOptional<KnowledgeSourceJson, 'name'>, options?: Pick<PrepareAndScrapeOptions, 'rootDirname' | 'isVerbose'>): Promise<ScraperSourceHandler>;
|
|
11
|
+
export declare function makeKnowledgeSourceHandler(knowledgeSource: SetOptional<KnowledgeSourceJson, 'name'>, tools: Pick<ExecutionTools, 'fs'>, options?: Pick<PrepareAndScrapeOptions, 'rootDirname' | 'isVerbose'>): Promise<ScraperSourceHandler>;
|
|
@@ -1,37 +1,43 @@
|
|
|
1
|
-
import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
|
|
2
1
|
import type { KnowledgePiecePreparedJson } from '../../types/PipelineJson/KnowledgePieceJson';
|
|
2
|
+
import type { ExecutionTools } from '../../execution/ExecutionTools';
|
|
3
|
+
import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
|
|
4
|
+
import type { Converter } from '../_common/Converter';
|
|
5
|
+
import type { ScraperAndConverterMetadata } from '../_common/register/ScraperAndConverterMetadata';
|
|
6
|
+
import type { Scraper } from '../_common/Scraper';
|
|
3
7
|
import type { ScraperSourceHandler } from '../_common/Scraper';
|
|
4
8
|
import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource';
|
|
5
9
|
/**
|
|
6
10
|
* Scraper of .docx and .odt files
|
|
7
11
|
*
|
|
8
12
|
* @see `documentationUrl` for more details
|
|
9
|
-
* @public exported from `@promptbook/
|
|
13
|
+
* @public exported from `@promptbook/documents`
|
|
10
14
|
*/
|
|
11
|
-
export declare
|
|
15
|
+
export declare class DocumentScraper implements Converter, Scraper {
|
|
16
|
+
private readonly tools;
|
|
17
|
+
private readonly options;
|
|
12
18
|
/**
|
|
13
|
-
*
|
|
19
|
+
* Metadata of the scraper which includes title, mime types, etc.
|
|
14
20
|
*/
|
|
15
|
-
|
|
21
|
+
get metadata(): ScraperAndConverterMetadata;
|
|
16
22
|
/**
|
|
17
|
-
*
|
|
23
|
+
* Markdown scraper is used internally
|
|
18
24
|
*/
|
|
19
|
-
|
|
25
|
+
private readonly markdownScraper;
|
|
26
|
+
constructor(tools: Pick<ExecutionTools, 'fs' | 'llm'>, options: PrepareAndScrapeOptions);
|
|
20
27
|
/**
|
|
21
28
|
* Convert the `.docx` or `.odt` to `.md` file and returns intermediate source
|
|
22
29
|
*
|
|
23
30
|
* Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
|
|
24
31
|
*/
|
|
25
|
-
$convert(source: ScraperSourceHandler
|
|
32
|
+
$convert(source: ScraperSourceHandler): Promise<ScraperIntermediateSource>;
|
|
26
33
|
/**
|
|
27
34
|
* Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
|
|
28
35
|
*/
|
|
29
|
-
scrape(source: ScraperSourceHandler
|
|
30
|
-
}
|
|
36
|
+
scrape(source: ScraperSourceHandler): Promise<Array<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
|
|
37
|
+
}
|
|
31
38
|
/**
|
|
32
39
|
* TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
|
|
33
|
-
* TODO: [🦖] Make some system for putting scrapers to separete packages
|
|
34
40
|
* TODO: [🪂] Do it in parallel 11:11
|
|
35
|
-
* TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
|
|
36
41
|
* Note: No need to aggregate usage here, it is done by intercepting the llmTools
|
|
42
|
+
* Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
37
43
|
*/
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type { ExecutionTools } from '../../execution/ExecutionTools';
|
|
2
|
+
import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
|
|
3
|
+
import { DocumentScraper } from './DocumentScraper';
|
|
4
|
+
/**
|
|
5
|
+
* @@@
|
|
6
|
+
*
|
|
7
|
+
* @public exported from `@promptbook/documents`
|
|
8
|
+
*/
|
|
9
|
+
export declare const createDocumentScraper: ((tools: Pick<ExecutionTools, 'llm'>, options: PrepareAndScrapeOptions) => DocumentScraper) & import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
|
|
10
|
+
title: string;
|
|
11
|
+
packageName: string;
|
|
12
|
+
className: string;
|
|
13
|
+
mimeTypes: string[];
|
|
14
|
+
documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
|
|
15
|
+
isAvilableInBrowser: false;
|
|
16
|
+
requiredExecutables: string[];
|
|
17
|
+
}>;
|
|
18
|
+
/**
|
|
19
|
+
* TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
|
|
20
|
+
*/
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { Registration } from '../../utils/$Register';
|
|
2
|
+
/**
|
|
3
|
+
* Registration of known scraper
|
|
4
|
+
*
|
|
5
|
+
* Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
|
|
6
|
+
*
|
|
7
|
+
* @public exported from `@promptbook/documents`
|
|
8
|
+
* @public exported from `@promptbook/cli`
|
|
9
|
+
*/
|
|
10
|
+
export declare const _DocumentScraperRegistration: Registration;
|
|
11
|
+
/**
|
|
12
|
+
* TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
|
|
13
|
+
*/
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { Registration } from '../../utils/$Register';
|
|
2
|
+
/**
|
|
3
|
+
* Metadata of the scraper
|
|
4
|
+
*
|
|
5
|
+
* @private within the scraper directory
|
|
6
|
+
*/
|
|
7
|
+
export declare const documentScraperMetadata: import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
|
|
8
|
+
title: string;
|
|
9
|
+
packageName: string;
|
|
10
|
+
className: string;
|
|
11
|
+
mimeTypes: string[];
|
|
12
|
+
documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
|
|
13
|
+
isAvilableInBrowser: false;
|
|
14
|
+
requiredExecutables: string[];
|
|
15
|
+
}>;
|
|
16
|
+
/**
|
|
17
|
+
* Registration of known scraper metadata
|
|
18
|
+
*
|
|
19
|
+
* Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
|
|
20
|
+
*
|
|
21
|
+
* @public exported from `@promptbook/core`
|
|
22
|
+
* @public exported from `@promptbook/cli`
|
|
23
|
+
*/
|
|
24
|
+
export declare const _DocumentScraperMetadataRegistration: Registration;
|
|
@@ -1,37 +1,43 @@
|
|
|
1
|
-
import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
|
|
2
1
|
import type { KnowledgePiecePreparedJson } from '../../types/PipelineJson/KnowledgePieceJson';
|
|
2
|
+
import type { ExecutionTools } from '../../execution/ExecutionTools';
|
|
3
|
+
import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
|
|
4
|
+
import type { Converter } from '../_common/Converter';
|
|
5
|
+
import type { ScraperAndConverterMetadata } from '../_common/register/ScraperAndConverterMetadata';
|
|
6
|
+
import type { Scraper } from '../_common/Scraper';
|
|
3
7
|
import type { ScraperSourceHandler } from '../_common/Scraper';
|
|
4
8
|
import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource';
|
|
5
9
|
/**
|
|
6
10
|
* Scraper for .docx files
|
|
7
11
|
*
|
|
8
12
|
* @see `documentationUrl` for more details
|
|
9
|
-
* @public exported from `@promptbook/
|
|
13
|
+
* @public exported from `@promptbook/legacy-documents`
|
|
10
14
|
*/
|
|
11
|
-
export declare
|
|
15
|
+
export declare class LegacyDocumentScraper implements Converter, Scraper {
|
|
16
|
+
private readonly tools;
|
|
17
|
+
private readonly options;
|
|
12
18
|
/**
|
|
13
|
-
*
|
|
19
|
+
* Metadata of the scraper which includes title, mime types, etc.
|
|
14
20
|
*/
|
|
15
|
-
|
|
21
|
+
get metadata(): ScraperAndConverterMetadata;
|
|
16
22
|
/**
|
|
17
|
-
*
|
|
23
|
+
* Document scraper is used internally
|
|
18
24
|
*/
|
|
19
|
-
|
|
25
|
+
private readonly documentScraper;
|
|
26
|
+
constructor(tools: Pick<ExecutionTools, 'fs' | 'llm'>, options: PrepareAndScrapeOptions);
|
|
20
27
|
/**
|
|
21
28
|
* Convert the `.doc` or `.rtf` to `.doc` file and returns intermediate source
|
|
22
29
|
*
|
|
23
30
|
* Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
|
|
24
31
|
*/
|
|
25
|
-
$convert(source: ScraperSourceHandler
|
|
32
|
+
$convert(source: ScraperSourceHandler): Promise<ScraperIntermediateSource>;
|
|
26
33
|
/**
|
|
27
34
|
* Scrapes the `.doc` or `.rtf` file and returns the knowledge pieces or `null` if it can't scrape it
|
|
28
35
|
*/
|
|
29
|
-
scrape(source: ScraperSourceHandler
|
|
30
|
-
}
|
|
36
|
+
scrape(source: ScraperSourceHandler): Promise<Array<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
|
|
37
|
+
}
|
|
31
38
|
/**
|
|
32
39
|
* TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
|
|
33
|
-
* TODO: [🦖] Make some system for putting scrapers to separete packages
|
|
34
40
|
* TODO: [🪂] Do it in parallel 11:11
|
|
35
|
-
* TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
|
|
36
41
|
* Note: No need to aggregate usage here, it is done by intercepting the llmTools
|
|
42
|
+
* Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
37
43
|
*/
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type { ExecutionTools } from '../../execution/ExecutionTools';
|
|
2
|
+
import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
|
|
3
|
+
import { LegacyDocumentScraper } from './LegacyDocumentScraper';
|
|
4
|
+
/**
|
|
5
|
+
* @@@
|
|
6
|
+
*
|
|
7
|
+
* @public exported from `@promptbook/legacy-documents`
|
|
8
|
+
*/
|
|
9
|
+
export declare const createLegacyDocumentScraper: ((tools: Pick<ExecutionTools, 'llm'>, options: PrepareAndScrapeOptions) => LegacyDocumentScraper) & import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
|
|
10
|
+
title: string;
|
|
11
|
+
packageName: string;
|
|
12
|
+
className: string;
|
|
13
|
+
mimeTypes: string[];
|
|
14
|
+
documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
|
|
15
|
+
isAvilableInBrowser: false;
|
|
16
|
+
requiredExecutables: string[];
|
|
17
|
+
}>;
|
|
18
|
+
/**
|
|
19
|
+
* TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
|
|
20
|
+
*/
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { Registration } from '../../utils/$Register';
|
|
2
|
+
/**
|
|
3
|
+
* Registration of known scraper
|
|
4
|
+
*
|
|
5
|
+
* Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
|
|
6
|
+
*
|
|
7
|
+
* @public exported from `@promptbook/legacy-documents`
|
|
8
|
+
* @public exported from `@promptbook/cli`
|
|
9
|
+
*/
|
|
10
|
+
export declare const _LegacyDocumentScraperRegistration: Registration;
|
|
11
|
+
/**
|
|
12
|
+
* TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
|
|
13
|
+
*/
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { Registration } from '../../utils/$Register';
|
|
2
|
+
/**
|
|
3
|
+
* Metadata of the scraper
|
|
4
|
+
*
|
|
5
|
+
* @private within the scraper directory
|
|
6
|
+
*/
|
|
7
|
+
export declare const legacyDocumentScraperMetadata: import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
|
|
8
|
+
title: string;
|
|
9
|
+
packageName: string;
|
|
10
|
+
className: string;
|
|
11
|
+
mimeTypes: string[];
|
|
12
|
+
documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
|
|
13
|
+
isAvilableInBrowser: false;
|
|
14
|
+
requiredExecutables: string[];
|
|
15
|
+
}>;
|
|
16
|
+
/**
|
|
17
|
+
* Registration of known scraper metadata
|
|
18
|
+
*
|
|
19
|
+
* Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
|
|
20
|
+
*
|
|
21
|
+
* @public exported from `@promptbook/core`
|
|
22
|
+
* @public exported from `@promptbook/cli`
|
|
23
|
+
*/
|
|
24
|
+
export declare const _LegacyDocumentScraperMetadataRegistration: Registration;
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { KnowledgePiecePreparedJson } from '../../types/PipelineJson/KnowledgePieceJson';
|
|
2
|
+
import type { Scraper } from '../_common/Scraper';
|
|
3
|
+
import type { ScraperSourceHandler } from '../_common/Scraper';
|
|
4
|
+
import type { ExecutionTools } from '../../execution/ExecutionTools';
|
|
5
|
+
import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
|
|
6
|
+
import type { ScraperAndConverterMetadata } from '../_common/register/ScraperAndConverterMetadata';
|
|
7
|
+
/**
|
|
8
|
+
* Scraper for markdown files
|
|
9
|
+
*
|
|
10
|
+
* @see `documentationUrl` for more details
|
|
11
|
+
* @public exported from `@promptbook/markdown-utils`
|
|
12
|
+
*/
|
|
13
|
+
export declare class MarkdownScraper implements Scraper {
|
|
14
|
+
private readonly tools;
|
|
15
|
+
private readonly options;
|
|
16
|
+
/**
|
|
17
|
+
* Metadata of the scraper which includes title, mime types, etc.
|
|
18
|
+
*/
|
|
19
|
+
get metadata(): ScraperAndConverterMetadata;
|
|
20
|
+
constructor(tools: Pick<ExecutionTools, 'llm'>, options: PrepareAndScrapeOptions);
|
|
21
|
+
/**
|
|
22
|
+
* Scrapes the markdown file and returns the knowledge pieces or `null` if it can't scrape it
|
|
23
|
+
*/
|
|
24
|
+
scrape(source: ScraperSourceHandler): Promise<Array<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* TODO: [🪂] Do it in parallel 11:11
|
|
28
|
+
* Note: No need to aggregate usage here, it is done by intercepting the llmTools
|
|
29
|
+
*/
|