@promptbook/markdown-utils 0.84.0-0 β†’ 0.84.0-10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/README.md +1 -0
  2. package/esm/index.es.js +4 -4
  3. package/esm/index.es.js.map +1 -1
  4. package/esm/typings/src/_packages/cli.index.d.ts +8 -0
  5. package/esm/typings/src/_packages/core.index.d.ts +4 -0
  6. package/esm/typings/src/_packages/markitdown.index.d.ts +8 -0
  7. package/esm/typings/src/_packages/pdf.index.d.ts +6 -0
  8. package/esm/typings/src/_packages/utils.index.d.ts +2 -0
  9. package/esm/typings/src/_packages/wizzard.index.d.ts +8 -0
  10. package/esm/typings/src/constants.d.ts +1 -1
  11. package/esm/typings/src/executables/platforms/locateAppOnLinux.d.ts +1 -1
  12. package/esm/typings/src/executables/platforms/locateAppOnMacOs.d.ts +1 -1
  13. package/esm/typings/src/pipeline/book-notation.d.ts +4 -0
  14. package/esm/typings/src/pipeline/prompt-notation.d.ts +18 -0
  15. package/esm/typings/src/pipeline/prompt-notation.test.d.ts +4 -0
  16. package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +43 -0
  17. package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +20 -0
  18. package/esm/typings/src/scrapers/_boilerplate/playground/boilerplate-scraper-playground.d.ts +5 -0
  19. package/esm/typings/src/scrapers/_boilerplate/register-constructor.d.ts +15 -0
  20. package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +28 -0
  21. package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +50 -0
  22. package/esm/typings/src/scrapers/markitdown/createMarkitdownScraper.d.ts +22 -0
  23. package/esm/typings/src/scrapers/markitdown/playground/markitdown-scraper-playground.d.ts +5 -0
  24. package/esm/typings/src/scrapers/markitdown/register-constructor.d.ts +17 -0
  25. package/esm/typings/src/scrapers/markitdown/register-metadata.d.ts +28 -0
  26. package/esm/typings/src/types/typeAliases.d.ts +1 -1
  27. package/package.json +2 -2
  28. package/umd/index.umd.js +4 -4
  29. package/umd/index.umd.js.map +1 -1
@@ -10,12 +10,16 @@ import { _OpenAiMetadataRegistration } from '../llm-providers/openai/register-co
10
10
  import { _OpenAiAssistantMetadataRegistration } from '../llm-providers/openai/register-configuration';
11
11
  import { _OpenAiRegistration } from '../llm-providers/openai/register-constructor';
12
12
  import { _OpenAiAssistantRegistration } from '../llm-providers/openai/register-constructor';
13
+ import { _BoilerplateScraperRegistration } from '../scrapers/_boilerplate/register-constructor';
14
+ import { _BoilerplateScraperMetadataRegistration } from '../scrapers/_boilerplate/register-metadata';
13
15
  import { _LegacyDocumentScraperRegistration } from '../scrapers/document-legacy/register-constructor';
14
16
  import { _LegacyDocumentScraperMetadataRegistration } from '../scrapers/document-legacy/register-metadata';
15
17
  import { _DocumentScraperRegistration } from '../scrapers/document/register-constructor';
16
18
  import { _DocumentScraperMetadataRegistration } from '../scrapers/document/register-metadata';
17
19
  import { _MarkdownScraperRegistration } from '../scrapers/markdown/register-constructor';
18
20
  import { _MarkdownScraperMetadataRegistration } from '../scrapers/markdown/register-metadata';
21
+ import { _MarkitdownScraperRegistration } from '../scrapers/markitdown/register-constructor';
22
+ import { _MarkitdownScraperMetadataRegistration } from '../scrapers/markitdown/register-metadata';
19
23
  import { _PdfScraperRegistration } from '../scrapers/pdf/register-constructor';
20
24
  import { _PdfScraperMetadataRegistration } from '../scrapers/pdf/register-metadata';
21
25
  import { _WebsiteScraperRegistration } from '../scrapers/website/register-constructor';
@@ -32,12 +36,16 @@ export { _OpenAiMetadataRegistration };
32
36
  export { _OpenAiAssistantMetadataRegistration };
33
37
  export { _OpenAiRegistration };
34
38
  export { _OpenAiAssistantRegistration };
39
+ export { _BoilerplateScraperRegistration };
40
+ export { _BoilerplateScraperMetadataRegistration };
35
41
  export { _LegacyDocumentScraperRegistration };
36
42
  export { _LegacyDocumentScraperMetadataRegistration };
37
43
  export { _DocumentScraperRegistration };
38
44
  export { _DocumentScraperMetadataRegistration };
39
45
  export { _MarkdownScraperRegistration };
40
46
  export { _MarkdownScraperMetadataRegistration };
47
+ export { _MarkitdownScraperRegistration };
48
+ export { _MarkitdownScraperMetadataRegistration };
41
49
  export { _PdfScraperRegistration };
42
50
  export { _PdfScraperMetadataRegistration };
43
51
  export { _WebsiteScraperRegistration };
@@ -106,6 +106,7 @@ import { isPipelinePrepared } from '../prepare/isPipelinePrepared';
106
106
  import { preparePipeline } from '../prepare/preparePipeline';
107
107
  import { prepareTasks } from '../prepare/prepareTasks';
108
108
  import { unpreparePipeline } from '../prepare/unpreparePipeline';
109
+ import { _BoilerplateScraperMetadataRegistration } from '../scrapers/_boilerplate/register-metadata';
109
110
  import { prepareKnowledgePieces } from '../scrapers/_common/prepareKnowledgePieces';
110
111
  import { $scrapersMetadataRegister } from '../scrapers/_common/register/$scrapersMetadataRegister';
111
112
  import { $scrapersRegister } from '../scrapers/_common/register/$scrapersRegister';
@@ -113,6 +114,7 @@ import { makeKnowledgeSourceHandler } from '../scrapers/_common/utils/makeKnowle
113
114
  import { _LegacyDocumentScraperMetadataRegistration } from '../scrapers/document-legacy/register-metadata';
114
115
  import { _DocumentScraperMetadataRegistration } from '../scrapers/document/register-metadata';
115
116
  import { _MarkdownScraperMetadataRegistration } from '../scrapers/markdown/register-metadata';
117
+ import { _MarkitdownScraperMetadataRegistration } from '../scrapers/markitdown/register-metadata';
116
118
  import { _PdfScraperMetadataRegistration } from '../scrapers/pdf/register-metadata';
117
119
  import { _WebsiteScraperMetadataRegistration } from '../scrapers/website/register-metadata';
118
120
  import { BlackholeStorage } from '../storage/blackhole/BlackholeStorage';
@@ -230,6 +232,7 @@ export { isPipelinePrepared };
230
232
  export { preparePipeline };
231
233
  export { prepareTasks };
232
234
  export { unpreparePipeline };
235
+ export { _BoilerplateScraperMetadataRegistration };
233
236
  export { prepareKnowledgePieces };
234
237
  export { $scrapersMetadataRegister };
235
238
  export { $scrapersRegister };
@@ -237,6 +240,7 @@ export { makeKnowledgeSourceHandler };
237
240
  export { _LegacyDocumentScraperMetadataRegistration };
238
241
  export { _DocumentScraperMetadataRegistration };
239
242
  export { _MarkdownScraperMetadataRegistration };
243
+ export { _MarkitdownScraperMetadataRegistration };
240
244
  export { _PdfScraperMetadataRegistration };
241
245
  export { _WebsiteScraperMetadataRegistration };
242
246
  export { BlackholeStorage };
@@ -0,0 +1,8 @@
1
+ import { BOOK_LANGUAGE_VERSION, PROMPTBOOK_ENGINE_VERSION } from '../version';
2
+ import { createMarkitdownScraper } from '../scrapers/markitdown/createMarkitdownScraper';
3
+ import { MarkitdownScraper } from '../scrapers/markitdown/MarkitdownScraper';
4
+ import { _MarkitdownScraperRegistration } from '../scrapers/markitdown/register-constructor';
5
+ export { BOOK_LANGUAGE_VERSION, PROMPTBOOK_ENGINE_VERSION };
6
+ export { createMarkitdownScraper };
7
+ export { MarkitdownScraper };
8
+ export { _MarkitdownScraperRegistration };
@@ -1,8 +1,14 @@
1
1
  import { BOOK_LANGUAGE_VERSION, PROMPTBOOK_ENGINE_VERSION } from '../version';
2
+ import { createMarkitdownScraper } from '../scrapers/markitdown/createMarkitdownScraper';
3
+ import { MarkitdownScraper } from '../scrapers/markitdown/MarkitdownScraper';
4
+ import { _MarkitdownScraperRegistration } from '../scrapers/markitdown/register-constructor';
2
5
  import { createPdfScraper } from '../scrapers/pdf/createPdfScraper';
3
6
  import { PdfScraper } from '../scrapers/pdf/PdfScraper';
4
7
  import { _PdfScraperRegistration } from '../scrapers/pdf/register-constructor';
5
8
  export { BOOK_LANGUAGE_VERSION, PROMPTBOOK_ENGINE_VERSION };
9
+ export { createMarkitdownScraper };
10
+ export { MarkitdownScraper };
11
+ export { _MarkitdownScraperRegistration };
6
12
  export { createPdfScraper };
7
13
  export { PdfScraper };
8
14
  export { _PdfScraperRegistration };
@@ -7,6 +7,7 @@ import { deserializeError } from '../errors/utils/deserializeError';
7
7
  import { serializeError } from '../errors/utils/serializeError';
8
8
  import { forEachAsync } from '../execution/utils/forEachAsync';
9
9
  import { isValidJsonString } from '../formats/json/utils/isValidJsonString';
10
+ import { prompt } from '../pipeline/prompt-notation';
10
11
  import { $getCurrentDate } from '../utils/$getCurrentDate';
11
12
  import { $isRunningInBrowser } from '../utils/environment/$isRunningInBrowser';
12
13
  import { $isRunningInJest } from '../utils/environment/$isRunningInJest';
@@ -87,6 +88,7 @@ export { deserializeError };
87
88
  export { serializeError };
88
89
  export { forEachAsync };
89
90
  export { isValidJsonString };
91
+ export { prompt };
90
92
  export { $getCurrentDate };
91
93
  export { $isRunningInBrowser };
92
94
  export { $isRunningInJest };
@@ -9,12 +9,16 @@ import { _OpenAiMetadataRegistration } from '../llm-providers/openai/register-co
9
9
  import { _OpenAiAssistantMetadataRegistration } from '../llm-providers/openai/register-configuration';
10
10
  import { _OpenAiRegistration } from '../llm-providers/openai/register-constructor';
11
11
  import { _OpenAiAssistantRegistration } from '../llm-providers/openai/register-constructor';
12
+ import { _BoilerplateScraperRegistration } from '../scrapers/_boilerplate/register-constructor';
13
+ import { _BoilerplateScraperMetadataRegistration } from '../scrapers/_boilerplate/register-metadata';
12
14
  import { _LegacyDocumentScraperRegistration } from '../scrapers/document-legacy/register-constructor';
13
15
  import { _LegacyDocumentScraperMetadataRegistration } from '../scrapers/document-legacy/register-metadata';
14
16
  import { _DocumentScraperRegistration } from '../scrapers/document/register-constructor';
15
17
  import { _DocumentScraperMetadataRegistration } from '../scrapers/document/register-metadata';
16
18
  import { _MarkdownScraperRegistration } from '../scrapers/markdown/register-constructor';
17
19
  import { _MarkdownScraperMetadataRegistration } from '../scrapers/markdown/register-metadata';
20
+ import { _MarkitdownScraperRegistration } from '../scrapers/markitdown/register-constructor';
21
+ import { _MarkitdownScraperMetadataRegistration } from '../scrapers/markitdown/register-metadata';
18
22
  import { _PdfScraperRegistration } from '../scrapers/pdf/register-constructor';
19
23
  import { _PdfScraperMetadataRegistration } from '../scrapers/pdf/register-metadata';
20
24
  import { _WebsiteScraperRegistration } from '../scrapers/website/register-constructor';
@@ -31,12 +35,16 @@ export { _OpenAiMetadataRegistration };
31
35
  export { _OpenAiAssistantMetadataRegistration };
32
36
  export { _OpenAiRegistration };
33
37
  export { _OpenAiAssistantRegistration };
38
+ export { _BoilerplateScraperRegistration };
39
+ export { _BoilerplateScraperMetadataRegistration };
34
40
  export { _LegacyDocumentScraperRegistration };
35
41
  export { _LegacyDocumentScraperMetadataRegistration };
36
42
  export { _DocumentScraperRegistration };
37
43
  export { _DocumentScraperMetadataRegistration };
38
44
  export { _MarkdownScraperRegistration };
39
45
  export { _MarkdownScraperMetadataRegistration };
46
+ export { _MarkitdownScraperRegistration };
47
+ export { _MarkitdownScraperMetadataRegistration };
40
48
  export { _PdfScraperRegistration };
41
49
  export { _PdfScraperMetadataRegistration };
42
50
  export { _WebsiteScraperRegistration };
@@ -11,7 +11,7 @@ export declare const ORDER_OF_PIPELINE_JSON: ExportJsonOptions<PipelineJson>['or
11
11
  *
12
12
  * @private within the repository
13
13
  */
14
- export declare const REPLACING_NONCE = "u$k42k%!V2zo34w7Fu#@QUHYPW";
14
+ export declare const REPLACING_NONCE = "ptbkauk42kV2dzao34faw7FudQUHYPtW";
15
15
  /**
16
16
  * @@@
17
17
  *
@@ -5,7 +5,7 @@ import type { LocateAppOptions } from '../locateApp';
5
5
  *
6
6
  * @private within the repository
7
7
  */
8
- export declare function locateAppOnLinux({ appName, linuxWhich, }: Pick<Required<LocateAppOptions>, 'appName' | 'linuxWhich'>): Promise<string_executable_path | null>;
8
+ export declare function locateAppOnLinux({ linuxWhich, }: Pick<Required<LocateAppOptions>, 'linuxWhich'>): Promise<string_executable_path | null>;
9
9
  /**
10
10
  * TODO: [🧠][β™Ώ] Maybe export through `@promptbook/node`
11
11
  * Note: [🟒] Code in this file should never be never released in packages that could be imported into browser environment
@@ -5,7 +5,7 @@ import type { LocateAppOptions } from '../locateApp';
5
5
  *
6
6
  * @private within the repository
7
7
  */
8
- export declare function locateAppOnMacOs({ appName, macOsName, }: Pick<Required<LocateAppOptions>, 'appName' | 'macOsName'>): Promise<string_executable_path | null>;
8
+ export declare function locateAppOnMacOs({ macOsName, }: Pick<Required<LocateAppOptions>, 'macOsName'>): Promise<string_executable_path | null>;
9
9
  /**
10
10
  * TODO: [🧠][β™Ώ] Maybe export through `@promptbook/node`
11
11
  * Note: [🟒] Code in this file should never be never released in packages that could be imported into browser environment
@@ -2,6 +2,10 @@ import type { PipelineString } from './PipelineString';
2
2
  /**
3
3
  * Tag function for notating a pipeline with a book\`...\ notation as template literal
4
4
  *
5
+ * Note: There are 2 similar functions:
6
+ * 1) `prompt` for notating single prompt exported from `@promptbook/utils`
7
+ * 1) `book` for notating and validating entire books exported from `@promptbook/utils`
8
+ *
5
9
  * @param strings @@@
6
10
  * @param values @@@
7
11
  * @returns the pipeline string
@@ -0,0 +1,18 @@
1
+ import type { string_prompt } from '../types/typeAliases';
2
+ /**
3
+ * Tag function for notating a prompt as template literal
4
+ *
5
+ * Note: There are 2 similar functions:
6
+ * 1) `prompt` for notating single prompt exported from `@promptbook/utils`
7
+ * 1) `book` for notating and validating entire books exported from `@promptbook/utils`
8
+ *
9
+ * @param strings @@@
10
+ * @param values @@@
11
+ * @returns the pipeline string
12
+ * @public exported from `@promptbook/utils`
13
+ */
14
+ export declare function prompt(strings: TemplateStringsArray, ...values: Array<string>): string_prompt;
15
+ /**
16
+ * TODO: [🧠][🈴] Where is the best location for this file
17
+ * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
18
+ */
@@ -0,0 +1,4 @@
1
+ export {};
2
+ /**
3
+ * TODO: [🧠][🈴] Where is the best location for this file
4
+ */
@@ -0,0 +1,43 @@
1
+ import type { ExecutionTools } from '../../execution/ExecutionTools';
2
+ import type { KnowledgePiecePreparedJson } from '../../pipeline/PipelineJson/KnowledgePieceJson';
3
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
4
+ import type { Converter } from '../_common/Converter';
5
+ import type { ScraperAndConverterMetadata } from '../_common/register/ScraperAndConverterMetadata';
6
+ import type { Scraper } from '../_common/Scraper';
7
+ import type { ScraperSourceHandler } from '../_common/Scraper';
8
+ import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource';
9
+ /**
10
+ * Scraper of @@@ files
11
+ *
12
+ * @see `documentationUrl` for more details
13
+ * @public exported from `@promptbook/boilerplate`
14
+ */
15
+ export declare class BoilerplateScraper implements Converter, Scraper {
16
+ private readonly tools;
17
+ private readonly options;
18
+ /**
19
+ * Metadata of the scraper which includes title, mime types, etc.
20
+ */
21
+ get metadata(): ScraperAndConverterMetadata;
22
+ /**
23
+ * Markdown scraper is used internally
24
+ */
25
+ private readonly markdownScraper;
26
+ constructor(tools: Pick<ExecutionTools, 'fs' | 'llm' | 'executables'>, options: PrepareAndScrapeOptions);
27
+ /**
28
+ * Convert the `.@@@` to `.md` file and returns intermediate source
29
+ *
30
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
31
+ */
32
+ $convert(source: ScraperSourceHandler): Promise<ScraperIntermediateSource>;
33
+ /**
34
+ * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
35
+ */
36
+ scrape(source: ScraperSourceHandler): Promise<ReadonlyArray<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
37
+ }
38
+ /**
39
+ * TODO: [πŸ‘£] Converted documents can act as cached items - there is no need to run conversion each time
40
+ * TODO: [πŸͺ‚] Do it in parallel
41
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
42
+ * @@@ Note: [🟒] Code in this file should never be never released in packages that could be imported into browser environment
43
+ */
@@ -0,0 +1,20 @@
1
+ import type { ExecutionTools } from '../../execution/ExecutionTools';
2
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
3
+ import { BoilerplateScraper } from './BoilerplateScraper';
4
+ /**
5
+ * Constructor of `BoilerplateScraper`
6
+ *
7
+ * @public exported from `@promptbook/boilerplate`
8
+ */
9
+ export declare const createBoilerplateScraper: ((tools: Pick<ExecutionTools, 'llm'>, options: PrepareAndScrapeOptions) => BoilerplateScraper) & import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
10
+ title: string;
11
+ packageName: string;
12
+ className: string;
13
+ mimeTypes: string[];
14
+ documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@@";
15
+ isAvilableInBrowser: false;
16
+ requiredExecutables: never[];
17
+ }>;
18
+ /**
19
+ * TODO: [🎢] Naming "constructor" vs "creator" vs "factory"
20
+ */
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ts-node
2
+ export {};
3
+ /**
4
+ * Note: [⚫] Code in this file should never be published in any package
5
+ */
@@ -0,0 +1,15 @@
1
+ import type { Registration } from '../../utils/$Register';
2
+ /**
3
+ * Registration of known scraper
4
+ *
5
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
6
+ *
7
+ * @public exported from `@promptbook/boilerplate`
8
+ * @public exported from `@promptbook/wizzard`
9
+ * @public exported from `@promptbook/cli`
10
+ */
11
+ export declare const _BoilerplateScraperRegistration: Registration;
12
+ /**
13
+ * TODO: [🎢] Naming "constructor" vs "creator" vs "factory"
14
+ * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
15
+ */
@@ -0,0 +1,28 @@
1
+ import type { Registration } from '../../utils/$Register';
2
+ /**
3
+ * Metadata of the scraper
4
+ *
5
+ * @private within the scraper directory
6
+ */
7
+ export declare const boilerplateScraperMetadata: import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
8
+ title: string;
9
+ packageName: string;
10
+ className: string;
11
+ mimeTypes: string[];
12
+ documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@@";
13
+ isAvilableInBrowser: false;
14
+ requiredExecutables: never[];
15
+ }>;
16
+ /**
17
+ * Registration of known scraper metadata
18
+ *
19
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
20
+ *
21
+ * @public exported from `@promptbook/core`
22
+ * @public exported from `@promptbook/wizzard`
23
+ * @public exported from `@promptbook/cli`
24
+ */
25
+ export declare const _BoilerplateScraperMetadataRegistration: Registration;
26
+ /**
27
+ * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
28
+ */
@@ -0,0 +1,50 @@
1
+ import type { ExecutionTools } from '../../execution/ExecutionTools';
2
+ import type { KnowledgePiecePreparedJson } from '../../pipeline/PipelineJson/KnowledgePieceJson';
3
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
4
+ import type { Converter } from '../_common/Converter';
5
+ import type { ScraperAndConverterMetadata } from '../_common/register/ScraperAndConverterMetadata';
6
+ import type { Scraper } from '../_common/Scraper';
7
+ import type { ScraperSourceHandler } from '../_common/Scraper';
8
+ import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource';
9
+ /**
10
+ * Integration of Markitdown by Microsoft into Promptbook
11
+ *
12
+ * @see https://github.com/microsoft/markitdown
13
+ * @see `documentationUrl` for more details
14
+ * @public exported from `@promptbook/markitdown`
15
+ * @public exported from `@promptbook/pdf`
16
+ */
17
+ export declare class MarkitdownScraper implements Converter, Scraper {
18
+ private readonly tools;
19
+ private readonly options;
20
+ /**
21
+ * Metadata of the scraper which includes title, mime types, etc.
22
+ */
23
+ get metadata(): ScraperAndConverterMetadata;
24
+ /**
25
+ * Markdown scraper is used internally
26
+ */
27
+ private readonly markdownScraper;
28
+ /**
29
+ * Markdown scraper is used internally
30
+ */
31
+ private readonly markitdown;
32
+ constructor(tools: Pick<ExecutionTools, 'fs' | 'llm' | 'executables'>, options: PrepareAndScrapeOptions);
33
+ /**
34
+ * Convert the documents to `.md` file and returns intermediate source
35
+ *
36
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
37
+ */
38
+ $convert(source: ScraperSourceHandler): Promise<ScraperIntermediateSource>;
39
+ /**
40
+ * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
41
+ */
42
+ scrape(source: ScraperSourceHandler): Promise<ReadonlyArray<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
43
+ }
44
+ /**
45
+ * TODO: [🧠][🌜] Export only from `@promptbook/markitdown` or `@promptbook/pdf` NOT both
46
+ * TODO: [πŸ‘£] Converted documents can act as cached items - there is no need to run conversion each time
47
+ * TODO: [πŸͺ‚] Do it in parallel
48
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
49
+ * Note: [🟒] Code in this file should never be never released in packages that could be imported into browser environment
50
+ */
@@ -0,0 +1,22 @@
1
+ import type { ExecutionTools } from '../../execution/ExecutionTools';
2
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
3
+ import { MarkitdownScraper } from './MarkitdownScraper';
4
+ /**
5
+ * Constructor of `MarkitdownScraper`
6
+ *
7
+ * @public exported from `@promptbook/markitdown`
8
+ * @public exported from `@promptbook/pdf`
9
+ */
10
+ export declare const createMarkitdownScraper: ((tools: Pick<ExecutionTools, 'llm'>, options: PrepareAndScrapeOptions) => MarkitdownScraper) & import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
11
+ title: string;
12
+ packageName: string;
13
+ className: string;
14
+ mimeTypes: string[];
15
+ documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
16
+ isAvilableInBrowser: false;
17
+ requiredExecutables: never[];
18
+ }>;
19
+ /**
20
+ * TODO: [🧠][🌜] Export only from `@promptbook/markitdown` or `@promptbook/pdf` NOT both
21
+ * TODO: [🎢] Naming "constructor" vs "creator" vs "factory"
22
+ */
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ts-node
2
+ export {};
3
+ /**
4
+ * Note: [⚫] Code in this file should never be published in any package
5
+ */
@@ -0,0 +1,17 @@
1
+ import type { Registration } from '../../utils/$Register';
2
+ /**
3
+ * Registration of known scraper
4
+ *
5
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
6
+ *
7
+ * @public exported from `@promptbook/markitdown`
8
+ * @public exported from `@promptbook/pdf`
9
+ * @public exported from `@promptbook/wizzard`
10
+ * @public exported from `@promptbook/cli`
11
+ */
12
+ export declare const _MarkitdownScraperRegistration: Registration;
13
+ /**
14
+ * TODO: [🧠][🌜] Export only from `@promptbook/markitdown` or `@promptbook/pdf` NOT both
15
+ * TODO: [🎢] Naming "constructor" vs "creator" vs "factory"
16
+ * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
17
+ */
@@ -0,0 +1,28 @@
1
+ import type { Registration } from '../../utils/$Register';
2
+ /**
3
+ * Metadata of the scraper
4
+ *
5
+ * @private within the scraper directory
6
+ */
7
+ export declare const markitdownScraperMetadata: import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
8
+ title: string;
9
+ packageName: string;
10
+ className: string;
11
+ mimeTypes: string[];
12
+ documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
13
+ isAvilableInBrowser: false;
14
+ requiredExecutables: never[];
15
+ }>;
16
+ /**
17
+ * Registration of known scraper metadata
18
+ *
19
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
20
+ *
21
+ * @public exported from `@promptbook/core`
22
+ * @public exported from `@promptbook/wizzard`
23
+ * @public exported from `@promptbook/cli`
24
+ */
25
+ export declare const _MarkitdownScraperMetadataRegistration: Registration;
26
+ /**
27
+ * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
28
+ */
@@ -235,7 +235,7 @@ export type string_markdown_codeblock_language = 'book' | 'markdown' | 'text' |
235
235
  /**
236
236
  * @@@
237
237
  */
238
- export type string_promptbook_documentation_url = `https://github.com/webgptorg/promptbook/discussions/${number | '@@'}`;
238
+ export type string_promptbook_documentation_url = `https://github.com/webgptorg/promptbook/discussions/${number | `@@${string}`}`;
239
239
  /**
240
240
  * Semantic helper
241
241
  *
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@promptbook/markdown-utils",
3
- "version": "0.84.0-0",
3
+ "version": "0.84.0-10",
4
4
  "description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
5
5
  "--note-0": " <- [🐊]",
6
6
  "private": false,
@@ -10,7 +10,7 @@
10
10
  "url": "https://github.com/webgptorg/promptbook"
11
11
  },
12
12
  "contributors": [
13
- "Pavol HejnΓ½ <me@pavolhejny.com> (https://www.pavolhejny.com/)"
13
+ "Pavol HejnΓ½ <pavol@ptbk.io> (https://www.pavolhejny.com/)"
14
14
  ],
15
15
  "--todo-0": "TODO: [❇️] Make better list of keywords",
16
16
  "keywords": [
package/umd/index.umd.js CHANGED
@@ -24,7 +24,7 @@
24
24
  * @generated
25
25
  * @see https://github.com/webgptorg/promptbook
26
26
  */
27
- var PROMPTBOOK_ENGINE_VERSION = '0.83.0';
27
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-9';
28
28
  /**
29
29
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
30
30
  * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
@@ -662,7 +662,7 @@
662
662
  *
663
663
  * @public exported from `@promptbook/core`
664
664
  */
665
- var ADMIN_EMAIL = 'me@pavolhejny.com';
665
+ var ADMIN_EMAIL = 'pavol@ptbk.io';
666
666
  /**
667
667
  * Name of the responsible person for the Promptbook on GitHub
668
668
  *
@@ -1063,7 +1063,7 @@
1063
1063
  *
1064
1064
  * @private within the repository
1065
1065
  */
1066
- var REPLACING_NONCE = 'u$k42k%!V2zo34w7Fu#@QUHYPW';
1066
+ var REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
1067
1067
  /**
1068
1068
  * @@@
1069
1069
  *
@@ -1906,7 +1906,7 @@
1906
1906
  var NotYetImplementedError = /** @class */ (function (_super) {
1907
1907
  __extends(NotYetImplementedError, _super);
1908
1908
  function NotYetImplementedError(message) {
1909
- var _this = _super.call(this, spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(message), "\n\n Note: This feature is not implemented yet but it will be soon.\n\n If you want speed up the implementation or just read more, look here:\n https://github.com/webgptorg/promptbook\n\n Or contact us on me@pavolhejny.com\n\n "); })) || this;
1909
+ var _this = _super.call(this, spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(message), "\n\n Note: This feature is not implemented yet but it will be soon.\n\n If you want speed up the implementation or just read more, look here:\n https://github.com/webgptorg/promptbook\n\n Or contact us on pavol@ptbk.io\n\n "); })) || this;
1910
1910
  _this.name = 'NotYetImplementedError';
1911
1911
  Object.setPrototypeOf(_this, NotYetImplementedError.prototype);
1912
1912
  return _this;