@promptbook/markdown-utils 0.83.0 → 0.84.0-10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -0
- package/esm/index.es.js +4 -4
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/cli.index.d.ts +8 -0
- package/esm/typings/src/_packages/core.index.d.ts +4 -0
- package/esm/typings/src/_packages/editable.index.d.ts +8 -2
- package/esm/typings/src/_packages/markitdown.index.d.ts +8 -0
- package/esm/typings/src/_packages/pdf.index.d.ts +6 -0
- package/esm/typings/src/_packages/utils.index.d.ts +2 -0
- package/esm/typings/src/_packages/wizzard.index.d.ts +8 -0
- package/esm/typings/src/constants.d.ts +1 -1
- package/esm/typings/src/executables/platforms/locateAppOnLinux.d.ts +1 -1
- package/esm/typings/src/executables/platforms/locateAppOnMacOs.d.ts +1 -1
- package/esm/typings/src/pipeline/book-notation.d.ts +4 -0
- package/esm/typings/src/pipeline/prompt-notation.d.ts +18 -0
- package/esm/typings/src/pipeline/prompt-notation.test.d.ts +4 -0
- package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +43 -0
- package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +20 -0
- package/esm/typings/src/scrapers/_boilerplate/playground/boilerplate-scraper-playground.d.ts +5 -0
- package/esm/typings/src/scrapers/_boilerplate/register-constructor.d.ts +15 -0
- package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +28 -0
- package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +50 -0
- package/esm/typings/src/scrapers/markitdown/createMarkitdownScraper.d.ts +22 -0
- package/esm/typings/src/scrapers/markitdown/playground/markitdown-scraper-playground.d.ts +5 -0
- package/esm/typings/src/scrapers/markitdown/register-constructor.d.ts +17 -0
- package/esm/typings/src/scrapers/markitdown/register-metadata.d.ts +28 -0
- package/esm/typings/src/types/typeAliases.d.ts +1 -1
- package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +17 -0
- package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +10 -0
- package/esm/typings/src/utils/editable/{utils → edit-pipeline-string}/removePipelineCommand.d.ts +1 -1
- package/esm/typings/src/utils/editable/edit-pipeline-string/removePipelineCommand.test.d.ts +1 -0
- package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +7 -0
- package/esm/typings/src/utils/editable/utils/renamePipelineParameter.d.ts +3 -0
- package/package.json +2 -2
- package/umd/index.umd.js +4 -4
- package/umd/index.umd.js.map +1 -1
- /package/esm/typings/src/utils/editable/{utils/removePipelineCommand.test.d.ts → edit-pipeline-string/addPipelineCommand.test.d.ts} +0 -0
|
@@ -10,12 +10,16 @@ import { _OpenAiMetadataRegistration } from '../llm-providers/openai/register-co
|
|
|
10
10
|
import { _OpenAiAssistantMetadataRegistration } from '../llm-providers/openai/register-configuration';
|
|
11
11
|
import { _OpenAiRegistration } from '../llm-providers/openai/register-constructor';
|
|
12
12
|
import { _OpenAiAssistantRegistration } from '../llm-providers/openai/register-constructor';
|
|
13
|
+
import { _BoilerplateScraperRegistration } from '../scrapers/_boilerplate/register-constructor';
|
|
14
|
+
import { _BoilerplateScraperMetadataRegistration } from '../scrapers/_boilerplate/register-metadata';
|
|
13
15
|
import { _LegacyDocumentScraperRegistration } from '../scrapers/document-legacy/register-constructor';
|
|
14
16
|
import { _LegacyDocumentScraperMetadataRegistration } from '../scrapers/document-legacy/register-metadata';
|
|
15
17
|
import { _DocumentScraperRegistration } from '../scrapers/document/register-constructor';
|
|
16
18
|
import { _DocumentScraperMetadataRegistration } from '../scrapers/document/register-metadata';
|
|
17
19
|
import { _MarkdownScraperRegistration } from '../scrapers/markdown/register-constructor';
|
|
18
20
|
import { _MarkdownScraperMetadataRegistration } from '../scrapers/markdown/register-metadata';
|
|
21
|
+
import { _MarkitdownScraperRegistration } from '../scrapers/markitdown/register-constructor';
|
|
22
|
+
import { _MarkitdownScraperMetadataRegistration } from '../scrapers/markitdown/register-metadata';
|
|
19
23
|
import { _PdfScraperRegistration } from '../scrapers/pdf/register-constructor';
|
|
20
24
|
import { _PdfScraperMetadataRegistration } from '../scrapers/pdf/register-metadata';
|
|
21
25
|
import { _WebsiteScraperRegistration } from '../scrapers/website/register-constructor';
|
|
@@ -32,12 +36,16 @@ export { _OpenAiMetadataRegistration };
|
|
|
32
36
|
export { _OpenAiAssistantMetadataRegistration };
|
|
33
37
|
export { _OpenAiRegistration };
|
|
34
38
|
export { _OpenAiAssistantRegistration };
|
|
39
|
+
export { _BoilerplateScraperRegistration };
|
|
40
|
+
export { _BoilerplateScraperMetadataRegistration };
|
|
35
41
|
export { _LegacyDocumentScraperRegistration };
|
|
36
42
|
export { _LegacyDocumentScraperMetadataRegistration };
|
|
37
43
|
export { _DocumentScraperRegistration };
|
|
38
44
|
export { _DocumentScraperMetadataRegistration };
|
|
39
45
|
export { _MarkdownScraperRegistration };
|
|
40
46
|
export { _MarkdownScraperMetadataRegistration };
|
|
47
|
+
export { _MarkitdownScraperRegistration };
|
|
48
|
+
export { _MarkitdownScraperMetadataRegistration };
|
|
41
49
|
export { _PdfScraperRegistration };
|
|
42
50
|
export { _PdfScraperMetadataRegistration };
|
|
43
51
|
export { _WebsiteScraperRegistration };
|
|
@@ -106,6 +106,7 @@ import { isPipelinePrepared } from '../prepare/isPipelinePrepared';
|
|
|
106
106
|
import { preparePipeline } from '../prepare/preparePipeline';
|
|
107
107
|
import { prepareTasks } from '../prepare/prepareTasks';
|
|
108
108
|
import { unpreparePipeline } from '../prepare/unpreparePipeline';
|
|
109
|
+
import { _BoilerplateScraperMetadataRegistration } from '../scrapers/_boilerplate/register-metadata';
|
|
109
110
|
import { prepareKnowledgePieces } from '../scrapers/_common/prepareKnowledgePieces';
|
|
110
111
|
import { $scrapersMetadataRegister } from '../scrapers/_common/register/$scrapersMetadataRegister';
|
|
111
112
|
import { $scrapersRegister } from '../scrapers/_common/register/$scrapersRegister';
|
|
@@ -113,6 +114,7 @@ import { makeKnowledgeSourceHandler } from '../scrapers/_common/utils/makeKnowle
|
|
|
113
114
|
import { _LegacyDocumentScraperMetadataRegistration } from '../scrapers/document-legacy/register-metadata';
|
|
114
115
|
import { _DocumentScraperMetadataRegistration } from '../scrapers/document/register-metadata';
|
|
115
116
|
import { _MarkdownScraperMetadataRegistration } from '../scrapers/markdown/register-metadata';
|
|
117
|
+
import { _MarkitdownScraperMetadataRegistration } from '../scrapers/markitdown/register-metadata';
|
|
116
118
|
import { _PdfScraperMetadataRegistration } from '../scrapers/pdf/register-metadata';
|
|
117
119
|
import { _WebsiteScraperMetadataRegistration } from '../scrapers/website/register-metadata';
|
|
118
120
|
import { BlackholeStorage } from '../storage/blackhole/BlackholeStorage';
|
|
@@ -230,6 +232,7 @@ export { isPipelinePrepared };
|
|
|
230
232
|
export { preparePipeline };
|
|
231
233
|
export { prepareTasks };
|
|
232
234
|
export { unpreparePipeline };
|
|
235
|
+
export { _BoilerplateScraperMetadataRegistration };
|
|
233
236
|
export { prepareKnowledgePieces };
|
|
234
237
|
export { $scrapersMetadataRegister };
|
|
235
238
|
export { $scrapersRegister };
|
|
@@ -237,6 +240,7 @@ export { makeKnowledgeSourceHandler };
|
|
|
237
240
|
export { _LegacyDocumentScraperMetadataRegistration };
|
|
238
241
|
export { _DocumentScraperMetadataRegistration };
|
|
239
242
|
export { _MarkdownScraperMetadataRegistration };
|
|
243
|
+
export { _MarkitdownScraperMetadataRegistration };
|
|
240
244
|
export { _PdfScraperMetadataRegistration };
|
|
241
245
|
export { _WebsiteScraperMetadataRegistration };
|
|
242
246
|
export { BlackholeStorage };
|
|
@@ -36,8 +36,11 @@ import type { ActionCommand } from '../commands/X_ACTION/ActionCommand';
|
|
|
36
36
|
import { actionCommandParser } from '../commands/X_ACTION/actionCommandParser';
|
|
37
37
|
import type { InstrumentCommand } from '../commands/X_INSTRUMENT/InstrumentCommand';
|
|
38
38
|
import { instrumentCommandParser } from '../commands/X_INSTRUMENT/instrumentCommandParser';
|
|
39
|
+
import { addPipelineCommand } from '../utils/editable/edit-pipeline-string/addPipelineCommand';
|
|
40
|
+
import { deflatePipeline } from '../utils/editable/edit-pipeline-string/deflatePipeline';
|
|
41
|
+
import { removePipelineCommand } from '../utils/editable/edit-pipeline-string/removePipelineCommand';
|
|
39
42
|
import type { PipelineEditableSerialized } from '../utils/editable/types/PipelineEditableSerialized';
|
|
40
|
-
import {
|
|
43
|
+
import { isFlatPipeline } from '../utils/editable/utils/isFlatPipeline';
|
|
41
44
|
import { renamePipelineParameter } from '../utils/editable/utils/renamePipelineParameter';
|
|
42
45
|
import { stringifyPipelineJson } from '../utils/editable/utils/stringifyPipelineJson';
|
|
43
46
|
export { BOOK_LANGUAGE_VERSION, PROMPTBOOK_ENGINE_VERSION };
|
|
@@ -78,7 +81,10 @@ export type { ActionCommand };
|
|
|
78
81
|
export { actionCommandParser };
|
|
79
82
|
export type { InstrumentCommand };
|
|
80
83
|
export { instrumentCommandParser };
|
|
81
|
-
export
|
|
84
|
+
export { addPipelineCommand };
|
|
85
|
+
export { deflatePipeline };
|
|
82
86
|
export { removePipelineCommand };
|
|
87
|
+
export type { PipelineEditableSerialized };
|
|
88
|
+
export { isFlatPipeline };
|
|
83
89
|
export { renamePipelineParameter };
|
|
84
90
|
export { stringifyPipelineJson };
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { BOOK_LANGUAGE_VERSION, PROMPTBOOK_ENGINE_VERSION } from '../version';
|
|
2
|
+
import { createMarkitdownScraper } from '../scrapers/markitdown/createMarkitdownScraper';
|
|
3
|
+
import { MarkitdownScraper } from '../scrapers/markitdown/MarkitdownScraper';
|
|
4
|
+
import { _MarkitdownScraperRegistration } from '../scrapers/markitdown/register-constructor';
|
|
5
|
+
export { BOOK_LANGUAGE_VERSION, PROMPTBOOK_ENGINE_VERSION };
|
|
6
|
+
export { createMarkitdownScraper };
|
|
7
|
+
export { MarkitdownScraper };
|
|
8
|
+
export { _MarkitdownScraperRegistration };
|
|
@@ -1,8 +1,14 @@
|
|
|
1
1
|
import { BOOK_LANGUAGE_VERSION, PROMPTBOOK_ENGINE_VERSION } from '../version';
|
|
2
|
+
import { createMarkitdownScraper } from '../scrapers/markitdown/createMarkitdownScraper';
|
|
3
|
+
import { MarkitdownScraper } from '../scrapers/markitdown/MarkitdownScraper';
|
|
4
|
+
import { _MarkitdownScraperRegistration } from '../scrapers/markitdown/register-constructor';
|
|
2
5
|
import { createPdfScraper } from '../scrapers/pdf/createPdfScraper';
|
|
3
6
|
import { PdfScraper } from '../scrapers/pdf/PdfScraper';
|
|
4
7
|
import { _PdfScraperRegistration } from '../scrapers/pdf/register-constructor';
|
|
5
8
|
export { BOOK_LANGUAGE_VERSION, PROMPTBOOK_ENGINE_VERSION };
|
|
9
|
+
export { createMarkitdownScraper };
|
|
10
|
+
export { MarkitdownScraper };
|
|
11
|
+
export { _MarkitdownScraperRegistration };
|
|
6
12
|
export { createPdfScraper };
|
|
7
13
|
export { PdfScraper };
|
|
8
14
|
export { _PdfScraperRegistration };
|
|
@@ -7,6 +7,7 @@ import { deserializeError } from '../errors/utils/deserializeError';
|
|
|
7
7
|
import { serializeError } from '../errors/utils/serializeError';
|
|
8
8
|
import { forEachAsync } from '../execution/utils/forEachAsync';
|
|
9
9
|
import { isValidJsonString } from '../formats/json/utils/isValidJsonString';
|
|
10
|
+
import { prompt } from '../pipeline/prompt-notation';
|
|
10
11
|
import { $getCurrentDate } from '../utils/$getCurrentDate';
|
|
11
12
|
import { $isRunningInBrowser } from '../utils/environment/$isRunningInBrowser';
|
|
12
13
|
import { $isRunningInJest } from '../utils/environment/$isRunningInJest';
|
|
@@ -87,6 +88,7 @@ export { deserializeError };
|
|
|
87
88
|
export { serializeError };
|
|
88
89
|
export { forEachAsync };
|
|
89
90
|
export { isValidJsonString };
|
|
91
|
+
export { prompt };
|
|
90
92
|
export { $getCurrentDate };
|
|
91
93
|
export { $isRunningInBrowser };
|
|
92
94
|
export { $isRunningInJest };
|
|
@@ -9,12 +9,16 @@ import { _OpenAiMetadataRegistration } from '../llm-providers/openai/register-co
|
|
|
9
9
|
import { _OpenAiAssistantMetadataRegistration } from '../llm-providers/openai/register-configuration';
|
|
10
10
|
import { _OpenAiRegistration } from '../llm-providers/openai/register-constructor';
|
|
11
11
|
import { _OpenAiAssistantRegistration } from '../llm-providers/openai/register-constructor';
|
|
12
|
+
import { _BoilerplateScraperRegistration } from '../scrapers/_boilerplate/register-constructor';
|
|
13
|
+
import { _BoilerplateScraperMetadataRegistration } from '../scrapers/_boilerplate/register-metadata';
|
|
12
14
|
import { _LegacyDocumentScraperRegistration } from '../scrapers/document-legacy/register-constructor';
|
|
13
15
|
import { _LegacyDocumentScraperMetadataRegistration } from '../scrapers/document-legacy/register-metadata';
|
|
14
16
|
import { _DocumentScraperRegistration } from '../scrapers/document/register-constructor';
|
|
15
17
|
import { _DocumentScraperMetadataRegistration } from '../scrapers/document/register-metadata';
|
|
16
18
|
import { _MarkdownScraperRegistration } from '../scrapers/markdown/register-constructor';
|
|
17
19
|
import { _MarkdownScraperMetadataRegistration } from '../scrapers/markdown/register-metadata';
|
|
20
|
+
import { _MarkitdownScraperRegistration } from '../scrapers/markitdown/register-constructor';
|
|
21
|
+
import { _MarkitdownScraperMetadataRegistration } from '../scrapers/markitdown/register-metadata';
|
|
18
22
|
import { _PdfScraperRegistration } from '../scrapers/pdf/register-constructor';
|
|
19
23
|
import { _PdfScraperMetadataRegistration } from '../scrapers/pdf/register-metadata';
|
|
20
24
|
import { _WebsiteScraperRegistration } from '../scrapers/website/register-constructor';
|
|
@@ -31,12 +35,16 @@ export { _OpenAiMetadataRegistration };
|
|
|
31
35
|
export { _OpenAiAssistantMetadataRegistration };
|
|
32
36
|
export { _OpenAiRegistration };
|
|
33
37
|
export { _OpenAiAssistantRegistration };
|
|
38
|
+
export { _BoilerplateScraperRegistration };
|
|
39
|
+
export { _BoilerplateScraperMetadataRegistration };
|
|
34
40
|
export { _LegacyDocumentScraperRegistration };
|
|
35
41
|
export { _LegacyDocumentScraperMetadataRegistration };
|
|
36
42
|
export { _DocumentScraperRegistration };
|
|
37
43
|
export { _DocumentScraperMetadataRegistration };
|
|
38
44
|
export { _MarkdownScraperRegistration };
|
|
39
45
|
export { _MarkdownScraperMetadataRegistration };
|
|
46
|
+
export { _MarkitdownScraperRegistration };
|
|
47
|
+
export { _MarkitdownScraperMetadataRegistration };
|
|
40
48
|
export { _PdfScraperRegistration };
|
|
41
49
|
export { _PdfScraperMetadataRegistration };
|
|
42
50
|
export { _WebsiteScraperRegistration };
|
|
@@ -11,7 +11,7 @@ export declare const ORDER_OF_PIPELINE_JSON: ExportJsonOptions<PipelineJson>['or
|
|
|
11
11
|
*
|
|
12
12
|
* @private within the repository
|
|
13
13
|
*/
|
|
14
|
-
export declare const REPLACING_NONCE = "
|
|
14
|
+
export declare const REPLACING_NONCE = "ptbkauk42kV2dzao34faw7FudQUHYPtW";
|
|
15
15
|
/**
|
|
16
16
|
* @@@
|
|
17
17
|
*
|
|
@@ -5,7 +5,7 @@ import type { LocateAppOptions } from '../locateApp';
|
|
|
5
5
|
*
|
|
6
6
|
* @private within the repository
|
|
7
7
|
*/
|
|
8
|
-
export declare function locateAppOnLinux({
|
|
8
|
+
export declare function locateAppOnLinux({ linuxWhich, }: Pick<Required<LocateAppOptions>, 'linuxWhich'>): Promise<string_executable_path | null>;
|
|
9
9
|
/**
|
|
10
10
|
* TODO: [🧠][♿] Maybe export through `@promptbook/node`
|
|
11
11
|
* Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
@@ -5,7 +5,7 @@ import type { LocateAppOptions } from '../locateApp';
|
|
|
5
5
|
*
|
|
6
6
|
* @private within the repository
|
|
7
7
|
*/
|
|
8
|
-
export declare function locateAppOnMacOs({
|
|
8
|
+
export declare function locateAppOnMacOs({ macOsName, }: Pick<Required<LocateAppOptions>, 'macOsName'>): Promise<string_executable_path | null>;
|
|
9
9
|
/**
|
|
10
10
|
* TODO: [🧠][♿] Maybe export through `@promptbook/node`
|
|
11
11
|
* Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
@@ -2,6 +2,10 @@ import type { PipelineString } from './PipelineString';
|
|
|
2
2
|
/**
|
|
3
3
|
* Tag function for notating a pipeline with a book\`...\ notation as template literal
|
|
4
4
|
*
|
|
5
|
+
* Note: There are 2 similar functions:
|
|
6
|
+
* 1) `prompt` for notating single prompt exported from `@promptbook/utils`
|
|
7
|
+
* 1) `book` for notating and validating entire books exported from `@promptbook/utils`
|
|
8
|
+
*
|
|
5
9
|
* @param strings @@@
|
|
6
10
|
* @param values @@@
|
|
7
11
|
* @returns the pipeline string
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { string_prompt } from '../types/typeAliases';
|
|
2
|
+
/**
|
|
3
|
+
* Tag function for notating a prompt as template literal
|
|
4
|
+
*
|
|
5
|
+
* Note: There are 2 similar functions:
|
|
6
|
+
* 1) `prompt` for notating single prompt exported from `@promptbook/utils`
|
|
7
|
+
* 1) `book` for notating and validating entire books exported from `@promptbook/utils`
|
|
8
|
+
*
|
|
9
|
+
* @param strings @@@
|
|
10
|
+
* @param values @@@
|
|
11
|
+
* @returns the pipeline string
|
|
12
|
+
* @public exported from `@promptbook/utils`
|
|
13
|
+
*/
|
|
14
|
+
export declare function prompt(strings: TemplateStringsArray, ...values: Array<string>): string_prompt;
|
|
15
|
+
/**
|
|
16
|
+
* TODO: [🧠][🈴] Where is the best location for this file
|
|
17
|
+
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
18
|
+
*/
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import type { ExecutionTools } from '../../execution/ExecutionTools';
|
|
2
|
+
import type { KnowledgePiecePreparedJson } from '../../pipeline/PipelineJson/KnowledgePieceJson';
|
|
3
|
+
import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
|
|
4
|
+
import type { Converter } from '../_common/Converter';
|
|
5
|
+
import type { ScraperAndConverterMetadata } from '../_common/register/ScraperAndConverterMetadata';
|
|
6
|
+
import type { Scraper } from '../_common/Scraper';
|
|
7
|
+
import type { ScraperSourceHandler } from '../_common/Scraper';
|
|
8
|
+
import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource';
|
|
9
|
+
/**
|
|
10
|
+
* Scraper of @@@ files
|
|
11
|
+
*
|
|
12
|
+
* @see `documentationUrl` for more details
|
|
13
|
+
* @public exported from `@promptbook/boilerplate`
|
|
14
|
+
*/
|
|
15
|
+
export declare class BoilerplateScraper implements Converter, Scraper {
|
|
16
|
+
private readonly tools;
|
|
17
|
+
private readonly options;
|
|
18
|
+
/**
|
|
19
|
+
* Metadata of the scraper which includes title, mime types, etc.
|
|
20
|
+
*/
|
|
21
|
+
get metadata(): ScraperAndConverterMetadata;
|
|
22
|
+
/**
|
|
23
|
+
* Markdown scraper is used internally
|
|
24
|
+
*/
|
|
25
|
+
private readonly markdownScraper;
|
|
26
|
+
constructor(tools: Pick<ExecutionTools, 'fs' | 'llm' | 'executables'>, options: PrepareAndScrapeOptions);
|
|
27
|
+
/**
|
|
28
|
+
* Convert the `.@@@` to `.md` file and returns intermediate source
|
|
29
|
+
*
|
|
30
|
+
* Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
|
|
31
|
+
*/
|
|
32
|
+
$convert(source: ScraperSourceHandler): Promise<ScraperIntermediateSource>;
|
|
33
|
+
/**
|
|
34
|
+
* Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
|
|
35
|
+
*/
|
|
36
|
+
scrape(source: ScraperSourceHandler): Promise<ReadonlyArray<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
|
|
40
|
+
* TODO: [🪂] Do it in parallel
|
|
41
|
+
* Note: No need to aggregate usage here, it is done by intercepting the llmTools
|
|
42
|
+
* @@@ Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
43
|
+
*/
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type { ExecutionTools } from '../../execution/ExecutionTools';
|
|
2
|
+
import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
|
|
3
|
+
import { BoilerplateScraper } from './BoilerplateScraper';
|
|
4
|
+
/**
|
|
5
|
+
* Constructor of `BoilerplateScraper`
|
|
6
|
+
*
|
|
7
|
+
* @public exported from `@promptbook/boilerplate`
|
|
8
|
+
*/
|
|
9
|
+
export declare const createBoilerplateScraper: ((tools: Pick<ExecutionTools, 'llm'>, options: PrepareAndScrapeOptions) => BoilerplateScraper) & import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
|
|
10
|
+
title: string;
|
|
11
|
+
packageName: string;
|
|
12
|
+
className: string;
|
|
13
|
+
mimeTypes: string[];
|
|
14
|
+
documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@@";
|
|
15
|
+
isAvilableInBrowser: false;
|
|
16
|
+
requiredExecutables: never[];
|
|
17
|
+
}>;
|
|
18
|
+
/**
|
|
19
|
+
* TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
|
|
20
|
+
*/
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { Registration } from '../../utils/$Register';
|
|
2
|
+
/**
|
|
3
|
+
* Registration of known scraper
|
|
4
|
+
*
|
|
5
|
+
* Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
|
|
6
|
+
*
|
|
7
|
+
* @public exported from `@promptbook/boilerplate`
|
|
8
|
+
* @public exported from `@promptbook/wizzard`
|
|
9
|
+
* @public exported from `@promptbook/cli`
|
|
10
|
+
*/
|
|
11
|
+
export declare const _BoilerplateScraperRegistration: Registration;
|
|
12
|
+
/**
|
|
13
|
+
* TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
|
|
14
|
+
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
15
|
+
*/
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type { Registration } from '../../utils/$Register';
|
|
2
|
+
/**
|
|
3
|
+
* Metadata of the scraper
|
|
4
|
+
*
|
|
5
|
+
* @private within the scraper directory
|
|
6
|
+
*/
|
|
7
|
+
export declare const boilerplateScraperMetadata: import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
|
|
8
|
+
title: string;
|
|
9
|
+
packageName: string;
|
|
10
|
+
className: string;
|
|
11
|
+
mimeTypes: string[];
|
|
12
|
+
documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@@";
|
|
13
|
+
isAvilableInBrowser: false;
|
|
14
|
+
requiredExecutables: never[];
|
|
15
|
+
}>;
|
|
16
|
+
/**
|
|
17
|
+
* Registration of known scraper metadata
|
|
18
|
+
*
|
|
19
|
+
* Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
|
|
20
|
+
*
|
|
21
|
+
* @public exported from `@promptbook/core`
|
|
22
|
+
* @public exported from `@promptbook/wizzard`
|
|
23
|
+
* @public exported from `@promptbook/cli`
|
|
24
|
+
*/
|
|
25
|
+
export declare const _BoilerplateScraperMetadataRegistration: Registration;
|
|
26
|
+
/**
|
|
27
|
+
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
28
|
+
*/
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import type { ExecutionTools } from '../../execution/ExecutionTools';
|
|
2
|
+
import type { KnowledgePiecePreparedJson } from '../../pipeline/PipelineJson/KnowledgePieceJson';
|
|
3
|
+
import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
|
|
4
|
+
import type { Converter } from '../_common/Converter';
|
|
5
|
+
import type { ScraperAndConverterMetadata } from '../_common/register/ScraperAndConverterMetadata';
|
|
6
|
+
import type { Scraper } from '../_common/Scraper';
|
|
7
|
+
import type { ScraperSourceHandler } from '../_common/Scraper';
|
|
8
|
+
import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource';
|
|
9
|
+
/**
|
|
10
|
+
* Integration of Markitdown by Microsoft into Promptbook
|
|
11
|
+
*
|
|
12
|
+
* @see https://github.com/microsoft/markitdown
|
|
13
|
+
* @see `documentationUrl` for more details
|
|
14
|
+
* @public exported from `@promptbook/markitdown`
|
|
15
|
+
* @public exported from `@promptbook/pdf`
|
|
16
|
+
*/
|
|
17
|
+
export declare class MarkitdownScraper implements Converter, Scraper {
|
|
18
|
+
private readonly tools;
|
|
19
|
+
private readonly options;
|
|
20
|
+
/**
|
|
21
|
+
* Metadata of the scraper which includes title, mime types, etc.
|
|
22
|
+
*/
|
|
23
|
+
get metadata(): ScraperAndConverterMetadata;
|
|
24
|
+
/**
|
|
25
|
+
* Markdown scraper is used internally
|
|
26
|
+
*/
|
|
27
|
+
private readonly markdownScraper;
|
|
28
|
+
/**
|
|
29
|
+
* Markdown scraper is used internally
|
|
30
|
+
*/
|
|
31
|
+
private readonly markitdown;
|
|
32
|
+
constructor(tools: Pick<ExecutionTools, 'fs' | 'llm' | 'executables'>, options: PrepareAndScrapeOptions);
|
|
33
|
+
/**
|
|
34
|
+
* Convert the documents to `.md` file and returns intermediate source
|
|
35
|
+
*
|
|
36
|
+
* Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
|
|
37
|
+
*/
|
|
38
|
+
$convert(source: ScraperSourceHandler): Promise<ScraperIntermediateSource>;
|
|
39
|
+
/**
|
|
40
|
+
* Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
|
|
41
|
+
*/
|
|
42
|
+
scrape(source: ScraperSourceHandler): Promise<ReadonlyArray<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* TODO: [🧠][🌜] Export only from `@promptbook/markitdown` or `@promptbook/pdf` NOT both
|
|
46
|
+
* TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
|
|
47
|
+
* TODO: [🪂] Do it in parallel
|
|
48
|
+
* Note: No need to aggregate usage here, it is done by intercepting the llmTools
|
|
49
|
+
* Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
50
|
+
*/
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { ExecutionTools } from '../../execution/ExecutionTools';
|
|
2
|
+
import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
|
|
3
|
+
import { MarkitdownScraper } from './MarkitdownScraper';
|
|
4
|
+
/**
|
|
5
|
+
* Constructor of `MarkitdownScraper`
|
|
6
|
+
*
|
|
7
|
+
* @public exported from `@promptbook/markitdown`
|
|
8
|
+
* @public exported from `@promptbook/pdf`
|
|
9
|
+
*/
|
|
10
|
+
export declare const createMarkitdownScraper: ((tools: Pick<ExecutionTools, 'llm'>, options: PrepareAndScrapeOptions) => MarkitdownScraper) & import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
|
|
11
|
+
title: string;
|
|
12
|
+
packageName: string;
|
|
13
|
+
className: string;
|
|
14
|
+
mimeTypes: string[];
|
|
15
|
+
documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
|
|
16
|
+
isAvilableInBrowser: false;
|
|
17
|
+
requiredExecutables: never[];
|
|
18
|
+
}>;
|
|
19
|
+
/**
|
|
20
|
+
* TODO: [🧠][🌜] Export only from `@promptbook/markitdown` or `@promptbook/pdf` NOT both
|
|
21
|
+
* TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
|
|
22
|
+
*/
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { Registration } from '../../utils/$Register';
|
|
2
|
+
/**
|
|
3
|
+
* Registration of known scraper
|
|
4
|
+
*
|
|
5
|
+
* Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
|
|
6
|
+
*
|
|
7
|
+
* @public exported from `@promptbook/markitdown`
|
|
8
|
+
* @public exported from `@promptbook/pdf`
|
|
9
|
+
* @public exported from `@promptbook/wizzard`
|
|
10
|
+
* @public exported from `@promptbook/cli`
|
|
11
|
+
*/
|
|
12
|
+
export declare const _MarkitdownScraperRegistration: Registration;
|
|
13
|
+
/**
|
|
14
|
+
* TODO: [🧠][🌜] Export only from `@promptbook/markitdown` or `@promptbook/pdf` NOT both
|
|
15
|
+
* TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
|
|
16
|
+
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
17
|
+
*/
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type { Registration } from '../../utils/$Register';
|
|
2
|
+
/**
|
|
3
|
+
* Metadata of the scraper
|
|
4
|
+
*
|
|
5
|
+
* @private within the scraper directory
|
|
6
|
+
*/
|
|
7
|
+
export declare const markitdownScraperMetadata: import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
|
|
8
|
+
title: string;
|
|
9
|
+
packageName: string;
|
|
10
|
+
className: string;
|
|
11
|
+
mimeTypes: string[];
|
|
12
|
+
documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
|
|
13
|
+
isAvilableInBrowser: false;
|
|
14
|
+
requiredExecutables: never[];
|
|
15
|
+
}>;
|
|
16
|
+
/**
|
|
17
|
+
* Registration of known scraper metadata
|
|
18
|
+
*
|
|
19
|
+
* Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
|
|
20
|
+
*
|
|
21
|
+
* @public exported from `@promptbook/core`
|
|
22
|
+
* @public exported from `@promptbook/wizzard`
|
|
23
|
+
* @public exported from `@promptbook/cli`
|
|
24
|
+
*/
|
|
25
|
+
export declare const _MarkitdownScraperMetadataRegistration: Registration;
|
|
26
|
+
/**
|
|
27
|
+
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
28
|
+
*/
|
|
@@ -235,7 +235,7 @@ export type string_markdown_codeblock_language = 'book' | 'markdown' | 'text' |
|
|
|
235
235
|
/**
|
|
236
236
|
* @@@
|
|
237
237
|
*/
|
|
238
|
-
export type string_promptbook_documentation_url = `https://github.com/webgptorg/promptbook/discussions/${number |
|
|
238
|
+
export type string_promptbook_documentation_url = `https://github.com/webgptorg/promptbook/discussions/${number | `@@${string}`}`;
|
|
239
239
|
/**
|
|
240
240
|
* Semantic helper
|
|
241
241
|
*
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { PipelineString } from '../../../pipeline/PipelineString';
|
|
2
|
+
import type { string_markdown_text } from '../../../types/typeAliases';
|
|
3
|
+
type AddPipelineCommandOptions = {
|
|
4
|
+
commandString: string_markdown_text;
|
|
5
|
+
pipelineString: PipelineString;
|
|
6
|
+
};
|
|
7
|
+
/**
|
|
8
|
+
* @@@
|
|
9
|
+
*
|
|
10
|
+
* @public exported from `@promptbook/editable`
|
|
11
|
+
*/
|
|
12
|
+
export declare function addPipelineCommand(options: AddPipelineCommandOptions): PipelineString;
|
|
13
|
+
export {};
|
|
14
|
+
/**
|
|
15
|
+
* TODO: [🧠] What is the better solution - `- xxx`, - `- xxx` or preserve (see also next TODO)
|
|
16
|
+
* TODO: When existing commands 1) as 2) number 3) list, add 4) new command as next number
|
|
17
|
+
*/
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { PipelineString } from '../../../pipeline/PipelineString';
|
|
2
|
+
/**
|
|
3
|
+
* @@@
|
|
4
|
+
*
|
|
5
|
+
* @public exported from `@promptbook/editable`
|
|
6
|
+
*/
|
|
7
|
+
export declare function deflatePipeline(pipelineString: PipelineString): PipelineString;
|
|
8
|
+
/**
|
|
9
|
+
* TODO: Unit test
|
|
10
|
+
*/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@promptbook/markdown-utils",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.84.0-10",
|
|
4
4
|
"description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
|
|
5
5
|
"--note-0": " <- [🐊]",
|
|
6
6
|
"private": false,
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
"url": "https://github.com/webgptorg/promptbook"
|
|
11
11
|
},
|
|
12
12
|
"contributors": [
|
|
13
|
-
"Pavol Hejný <
|
|
13
|
+
"Pavol Hejný <pavol@ptbk.io> (https://www.pavolhejny.com/)"
|
|
14
14
|
],
|
|
15
15
|
"--todo-0": "TODO: [❇️] Make better list of keywords",
|
|
16
16
|
"keywords": [
|
package/umd/index.umd.js
CHANGED
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
* @generated
|
|
25
25
|
* @see https://github.com/webgptorg/promptbook
|
|
26
26
|
*/
|
|
27
|
-
var PROMPTBOOK_ENGINE_VERSION = '0.
|
|
27
|
+
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-9';
|
|
28
28
|
/**
|
|
29
29
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
30
30
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -662,7 +662,7 @@
|
|
|
662
662
|
*
|
|
663
663
|
* @public exported from `@promptbook/core`
|
|
664
664
|
*/
|
|
665
|
-
var ADMIN_EMAIL = '
|
|
665
|
+
var ADMIN_EMAIL = 'pavol@ptbk.io';
|
|
666
666
|
/**
|
|
667
667
|
* Name of the responsible person for the Promptbook on GitHub
|
|
668
668
|
*
|
|
@@ -1063,7 +1063,7 @@
|
|
|
1063
1063
|
*
|
|
1064
1064
|
* @private within the repository
|
|
1065
1065
|
*/
|
|
1066
|
-
var REPLACING_NONCE = '
|
|
1066
|
+
var REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
|
|
1067
1067
|
/**
|
|
1068
1068
|
* @@@
|
|
1069
1069
|
*
|
|
@@ -1906,7 +1906,7 @@
|
|
|
1906
1906
|
var NotYetImplementedError = /** @class */ (function (_super) {
|
|
1907
1907
|
__extends(NotYetImplementedError, _super);
|
|
1908
1908
|
function NotYetImplementedError(message) {
|
|
1909
|
-
var _this = _super.call(this, spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(message), "\n\n Note: This feature is not implemented yet but it will be soon.\n\n If you want speed up the implementation or just read more, look here:\n https://github.com/webgptorg/promptbook\n\n Or contact us on
|
|
1909
|
+
var _this = _super.call(this, spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(message), "\n\n Note: This feature is not implemented yet but it will be soon.\n\n If you want speed up the implementation or just read more, look here:\n https://github.com/webgptorg/promptbook\n\n Or contact us on pavol@ptbk.io\n\n "); })) || this;
|
|
1910
1910
|
_this.name = 'NotYetImplementedError';
|
|
1911
1911
|
Object.setPrototypeOf(_this, NotYetImplementedError.prototype);
|
|
1912
1912
|
return _this;
|