@promptbook/legacy-documents 0.84.0-0 β†’ 0.84.0-10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/README.md +1 -0
  2. package/esm/index.es.js +6 -6
  3. package/esm/index.es.js.map +1 -1
  4. package/esm/typings/src/_packages/cli.index.d.ts +8 -0
  5. package/esm/typings/src/_packages/core.index.d.ts +4 -0
  6. package/esm/typings/src/_packages/markitdown.index.d.ts +8 -0
  7. package/esm/typings/src/_packages/pdf.index.d.ts +6 -0
  8. package/esm/typings/src/_packages/utils.index.d.ts +2 -0
  9. package/esm/typings/src/_packages/wizzard.index.d.ts +8 -0
  10. package/esm/typings/src/constants.d.ts +1 -1
  11. package/esm/typings/src/executables/platforms/locateAppOnLinux.d.ts +1 -1
  12. package/esm/typings/src/executables/platforms/locateAppOnMacOs.d.ts +1 -1
  13. package/esm/typings/src/pipeline/book-notation.d.ts +4 -0
  14. package/esm/typings/src/pipeline/prompt-notation.d.ts +18 -0
  15. package/esm/typings/src/pipeline/prompt-notation.test.d.ts +4 -0
  16. package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +43 -0
  17. package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +20 -0
  18. package/esm/typings/src/scrapers/_boilerplate/playground/boilerplate-scraper-playground.d.ts +5 -0
  19. package/esm/typings/src/scrapers/_boilerplate/register-constructor.d.ts +15 -0
  20. package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +28 -0
  21. package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +50 -0
  22. package/esm/typings/src/scrapers/markitdown/createMarkitdownScraper.d.ts +22 -0
  23. package/esm/typings/src/scrapers/markitdown/playground/markitdown-scraper-playground.d.ts +5 -0
  24. package/esm/typings/src/scrapers/markitdown/register-constructor.d.ts +17 -0
  25. package/esm/typings/src/scrapers/markitdown/register-metadata.d.ts +28 -0
  26. package/esm/typings/src/types/typeAliases.d.ts +1 -1
  27. package/package.json +3 -3
  28. package/umd/index.umd.js +6 -6
  29. package/umd/index.umd.js.map +1 -1
@@ -10,12 +10,16 @@ import { _OpenAiMetadataRegistration } from '../llm-providers/openai/register-co
10
10
  import { _OpenAiAssistantMetadataRegistration } from '../llm-providers/openai/register-configuration';
11
11
  import { _OpenAiRegistration } from '../llm-providers/openai/register-constructor';
12
12
  import { _OpenAiAssistantRegistration } from '../llm-providers/openai/register-constructor';
13
+ import { _BoilerplateScraperRegistration } from '../scrapers/_boilerplate/register-constructor';
14
+ import { _BoilerplateScraperMetadataRegistration } from '../scrapers/_boilerplate/register-metadata';
13
15
  import { _LegacyDocumentScraperRegistration } from '../scrapers/document-legacy/register-constructor';
14
16
  import { _LegacyDocumentScraperMetadataRegistration } from '../scrapers/document-legacy/register-metadata';
15
17
  import { _DocumentScraperRegistration } from '../scrapers/document/register-constructor';
16
18
  import { _DocumentScraperMetadataRegistration } from '../scrapers/document/register-metadata';
17
19
  import { _MarkdownScraperRegistration } from '../scrapers/markdown/register-constructor';
18
20
  import { _MarkdownScraperMetadataRegistration } from '../scrapers/markdown/register-metadata';
21
+ import { _MarkitdownScraperRegistration } from '../scrapers/markitdown/register-constructor';
22
+ import { _MarkitdownScraperMetadataRegistration } from '../scrapers/markitdown/register-metadata';
19
23
  import { _PdfScraperRegistration } from '../scrapers/pdf/register-constructor';
20
24
  import { _PdfScraperMetadataRegistration } from '../scrapers/pdf/register-metadata';
21
25
  import { _WebsiteScraperRegistration } from '../scrapers/website/register-constructor';
@@ -32,12 +36,16 @@ export { _OpenAiMetadataRegistration };
32
36
  export { _OpenAiAssistantMetadataRegistration };
33
37
  export { _OpenAiRegistration };
34
38
  export { _OpenAiAssistantRegistration };
39
+ export { _BoilerplateScraperRegistration };
40
+ export { _BoilerplateScraperMetadataRegistration };
35
41
  export { _LegacyDocumentScraperRegistration };
36
42
  export { _LegacyDocumentScraperMetadataRegistration };
37
43
  export { _DocumentScraperRegistration };
38
44
  export { _DocumentScraperMetadataRegistration };
39
45
  export { _MarkdownScraperRegistration };
40
46
  export { _MarkdownScraperMetadataRegistration };
47
+ export { _MarkitdownScraperRegistration };
48
+ export { _MarkitdownScraperMetadataRegistration };
41
49
  export { _PdfScraperRegistration };
42
50
  export { _PdfScraperMetadataRegistration };
43
51
  export { _WebsiteScraperRegistration };
@@ -106,6 +106,7 @@ import { isPipelinePrepared } from '../prepare/isPipelinePrepared';
106
106
  import { preparePipeline } from '../prepare/preparePipeline';
107
107
  import { prepareTasks } from '../prepare/prepareTasks';
108
108
  import { unpreparePipeline } from '../prepare/unpreparePipeline';
109
+ import { _BoilerplateScraperMetadataRegistration } from '../scrapers/_boilerplate/register-metadata';
109
110
  import { prepareKnowledgePieces } from '../scrapers/_common/prepareKnowledgePieces';
110
111
  import { $scrapersMetadataRegister } from '../scrapers/_common/register/$scrapersMetadataRegister';
111
112
  import { $scrapersRegister } from '../scrapers/_common/register/$scrapersRegister';
@@ -113,6 +114,7 @@ import { makeKnowledgeSourceHandler } from '../scrapers/_common/utils/makeKnowle
113
114
  import { _LegacyDocumentScraperMetadataRegistration } from '../scrapers/document-legacy/register-metadata';
114
115
  import { _DocumentScraperMetadataRegistration } from '../scrapers/document/register-metadata';
115
116
  import { _MarkdownScraperMetadataRegistration } from '../scrapers/markdown/register-metadata';
117
+ import { _MarkitdownScraperMetadataRegistration } from '../scrapers/markitdown/register-metadata';
116
118
  import { _PdfScraperMetadataRegistration } from '../scrapers/pdf/register-metadata';
117
119
  import { _WebsiteScraperMetadataRegistration } from '../scrapers/website/register-metadata';
118
120
  import { BlackholeStorage } from '../storage/blackhole/BlackholeStorage';
@@ -230,6 +232,7 @@ export { isPipelinePrepared };
230
232
  export { preparePipeline };
231
233
  export { prepareTasks };
232
234
  export { unpreparePipeline };
235
+ export { _BoilerplateScraperMetadataRegistration };
233
236
  export { prepareKnowledgePieces };
234
237
  export { $scrapersMetadataRegister };
235
238
  export { $scrapersRegister };
@@ -237,6 +240,7 @@ export { makeKnowledgeSourceHandler };
237
240
  export { _LegacyDocumentScraperMetadataRegistration };
238
241
  export { _DocumentScraperMetadataRegistration };
239
242
  export { _MarkdownScraperMetadataRegistration };
243
+ export { _MarkitdownScraperMetadataRegistration };
240
244
  export { _PdfScraperMetadataRegistration };
241
245
  export { _WebsiteScraperMetadataRegistration };
242
246
  export { BlackholeStorage };
@@ -0,0 +1,8 @@
1
+ import { BOOK_LANGUAGE_VERSION, PROMPTBOOK_ENGINE_VERSION } from '../version';
2
+ import { createMarkitdownScraper } from '../scrapers/markitdown/createMarkitdownScraper';
3
+ import { MarkitdownScraper } from '../scrapers/markitdown/MarkitdownScraper';
4
+ import { _MarkitdownScraperRegistration } from '../scrapers/markitdown/register-constructor';
5
+ export { BOOK_LANGUAGE_VERSION, PROMPTBOOK_ENGINE_VERSION };
6
+ export { createMarkitdownScraper };
7
+ export { MarkitdownScraper };
8
+ export { _MarkitdownScraperRegistration };
@@ -1,8 +1,14 @@
1
1
  import { BOOK_LANGUAGE_VERSION, PROMPTBOOK_ENGINE_VERSION } from '../version';
2
+ import { createMarkitdownScraper } from '../scrapers/markitdown/createMarkitdownScraper';
3
+ import { MarkitdownScraper } from '../scrapers/markitdown/MarkitdownScraper';
4
+ import { _MarkitdownScraperRegistration } from '../scrapers/markitdown/register-constructor';
2
5
  import { createPdfScraper } from '../scrapers/pdf/createPdfScraper';
3
6
  import { PdfScraper } from '../scrapers/pdf/PdfScraper';
4
7
  import { _PdfScraperRegistration } from '../scrapers/pdf/register-constructor';
5
8
  export { BOOK_LANGUAGE_VERSION, PROMPTBOOK_ENGINE_VERSION };
9
+ export { createMarkitdownScraper };
10
+ export { MarkitdownScraper };
11
+ export { _MarkitdownScraperRegistration };
6
12
  export { createPdfScraper };
7
13
  export { PdfScraper };
8
14
  export { _PdfScraperRegistration };
@@ -7,6 +7,7 @@ import { deserializeError } from '../errors/utils/deserializeError';
7
7
  import { serializeError } from '../errors/utils/serializeError';
8
8
  import { forEachAsync } from '../execution/utils/forEachAsync';
9
9
  import { isValidJsonString } from '../formats/json/utils/isValidJsonString';
10
+ import { prompt } from '../pipeline/prompt-notation';
10
11
  import { $getCurrentDate } from '../utils/$getCurrentDate';
11
12
  import { $isRunningInBrowser } from '../utils/environment/$isRunningInBrowser';
12
13
  import { $isRunningInJest } from '../utils/environment/$isRunningInJest';
@@ -87,6 +88,7 @@ export { deserializeError };
87
88
  export { serializeError };
88
89
  export { forEachAsync };
89
90
  export { isValidJsonString };
91
+ export { prompt };
90
92
  export { $getCurrentDate };
91
93
  export { $isRunningInBrowser };
92
94
  export { $isRunningInJest };
@@ -9,12 +9,16 @@ import { _OpenAiMetadataRegistration } from '../llm-providers/openai/register-co
9
9
  import { _OpenAiAssistantMetadataRegistration } from '../llm-providers/openai/register-configuration';
10
10
  import { _OpenAiRegistration } from '../llm-providers/openai/register-constructor';
11
11
  import { _OpenAiAssistantRegistration } from '../llm-providers/openai/register-constructor';
12
+ import { _BoilerplateScraperRegistration } from '../scrapers/_boilerplate/register-constructor';
13
+ import { _BoilerplateScraperMetadataRegistration } from '../scrapers/_boilerplate/register-metadata';
12
14
  import { _LegacyDocumentScraperRegistration } from '../scrapers/document-legacy/register-constructor';
13
15
  import { _LegacyDocumentScraperMetadataRegistration } from '../scrapers/document-legacy/register-metadata';
14
16
  import { _DocumentScraperRegistration } from '../scrapers/document/register-constructor';
15
17
  import { _DocumentScraperMetadataRegistration } from '../scrapers/document/register-metadata';
16
18
  import { _MarkdownScraperRegistration } from '../scrapers/markdown/register-constructor';
17
19
  import { _MarkdownScraperMetadataRegistration } from '../scrapers/markdown/register-metadata';
20
+ import { _MarkitdownScraperRegistration } from '../scrapers/markitdown/register-constructor';
21
+ import { _MarkitdownScraperMetadataRegistration } from '../scrapers/markitdown/register-metadata';
18
22
  import { _PdfScraperRegistration } from '../scrapers/pdf/register-constructor';
19
23
  import { _PdfScraperMetadataRegistration } from '../scrapers/pdf/register-metadata';
20
24
  import { _WebsiteScraperRegistration } from '../scrapers/website/register-constructor';
@@ -31,12 +35,16 @@ export { _OpenAiMetadataRegistration };
31
35
  export { _OpenAiAssistantMetadataRegistration };
32
36
  export { _OpenAiRegistration };
33
37
  export { _OpenAiAssistantRegistration };
38
+ export { _BoilerplateScraperRegistration };
39
+ export { _BoilerplateScraperMetadataRegistration };
34
40
  export { _LegacyDocumentScraperRegistration };
35
41
  export { _LegacyDocumentScraperMetadataRegistration };
36
42
  export { _DocumentScraperRegistration };
37
43
  export { _DocumentScraperMetadataRegistration };
38
44
  export { _MarkdownScraperRegistration };
39
45
  export { _MarkdownScraperMetadataRegistration };
46
+ export { _MarkitdownScraperRegistration };
47
+ export { _MarkitdownScraperMetadataRegistration };
40
48
  export { _PdfScraperRegistration };
41
49
  export { _PdfScraperMetadataRegistration };
42
50
  export { _WebsiteScraperRegistration };
@@ -11,7 +11,7 @@ export declare const ORDER_OF_PIPELINE_JSON: ExportJsonOptions<PipelineJson>['or
11
11
  *
12
12
  * @private within the repository
13
13
  */
14
- export declare const REPLACING_NONCE = "u$k42k%!V2zo34w7Fu#@QUHYPW";
14
+ export declare const REPLACING_NONCE = "ptbkauk42kV2dzao34faw7FudQUHYPtW";
15
15
  /**
16
16
  * @@@
17
17
  *
@@ -5,7 +5,7 @@ import type { LocateAppOptions } from '../locateApp';
5
5
  *
6
6
  * @private within the repository
7
7
  */
8
- export declare function locateAppOnLinux({ appName, linuxWhich, }: Pick<Required<LocateAppOptions>, 'appName' | 'linuxWhich'>): Promise<string_executable_path | null>;
8
+ export declare function locateAppOnLinux({ linuxWhich, }: Pick<Required<LocateAppOptions>, 'linuxWhich'>): Promise<string_executable_path | null>;
9
9
  /**
10
10
  * TODO: [🧠][β™Ώ] Maybe export through `@promptbook/node`
11
11
  * Note: [🟒] Code in this file should never be never released in packages that could be imported into browser environment
@@ -5,7 +5,7 @@ import type { LocateAppOptions } from '../locateApp';
5
5
  *
6
6
  * @private within the repository
7
7
  */
8
- export declare function locateAppOnMacOs({ appName, macOsName, }: Pick<Required<LocateAppOptions>, 'appName' | 'macOsName'>): Promise<string_executable_path | null>;
8
+ export declare function locateAppOnMacOs({ macOsName, }: Pick<Required<LocateAppOptions>, 'macOsName'>): Promise<string_executable_path | null>;
9
9
  /**
10
10
  * TODO: [🧠][β™Ώ] Maybe export through `@promptbook/node`
11
11
  * Note: [🟒] Code in this file should never be never released in packages that could be imported into browser environment
@@ -2,6 +2,10 @@ import type { PipelineString } from './PipelineString';
2
2
  /**
3
3
  * Tag function for notating a pipeline with a book\`...\ notation as template literal
4
4
  *
5
+ * Note: There are 2 similar functions:
6
+ * 1) `prompt` for notating single prompt exported from `@promptbook/utils`
7
+ * 1) `book` for notating and validating entire books exported from `@promptbook/utils`
8
+ *
5
9
  * @param strings @@@
6
10
  * @param values @@@
7
11
  * @returns the pipeline string
@@ -0,0 +1,18 @@
1
+ import type { string_prompt } from '../types/typeAliases';
2
+ /**
3
+ * Tag function for notating a prompt as template literal
4
+ *
5
+ * Note: There are 2 similar functions:
6
+ * 1) `prompt` for notating single prompt exported from `@promptbook/utils`
7
+ * 1) `book` for notating and validating entire books exported from `@promptbook/utils`
8
+ *
9
+ * @param strings @@@
10
+ * @param values @@@
11
+ * @returns the pipeline string
12
+ * @public exported from `@promptbook/utils`
13
+ */
14
+ export declare function prompt(strings: TemplateStringsArray, ...values: Array<string>): string_prompt;
15
+ /**
16
+ * TODO: [🧠][🈴] Where is the best location for this file
17
+ * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
18
+ */
@@ -0,0 +1,4 @@
1
+ export {};
2
+ /**
3
+ * TODO: [🧠][🈴] Where is the best location for this file
4
+ */
@@ -0,0 +1,43 @@
1
+ import type { ExecutionTools } from '../../execution/ExecutionTools';
2
+ import type { KnowledgePiecePreparedJson } from '../../pipeline/PipelineJson/KnowledgePieceJson';
3
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
4
+ import type { Converter } from '../_common/Converter';
5
+ import type { ScraperAndConverterMetadata } from '../_common/register/ScraperAndConverterMetadata';
6
+ import type { Scraper } from '../_common/Scraper';
7
+ import type { ScraperSourceHandler } from '../_common/Scraper';
8
+ import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource';
9
+ /**
10
+ * Scraper of @@@ files
11
+ *
12
+ * @see `documentationUrl` for more details
13
+ * @public exported from `@promptbook/boilerplate`
14
+ */
15
+ export declare class BoilerplateScraper implements Converter, Scraper {
16
+ private readonly tools;
17
+ private readonly options;
18
+ /**
19
+ * Metadata of the scraper which includes title, mime types, etc.
20
+ */
21
+ get metadata(): ScraperAndConverterMetadata;
22
+ /**
23
+ * Markdown scraper is used internally
24
+ */
25
+ private readonly markdownScraper;
26
+ constructor(tools: Pick<ExecutionTools, 'fs' | 'llm' | 'executables'>, options: PrepareAndScrapeOptions);
27
+ /**
28
+ * Convert the `.@@@` to `.md` file and returns intermediate source
29
+ *
30
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
31
+ */
32
+ $convert(source: ScraperSourceHandler): Promise<ScraperIntermediateSource>;
33
+ /**
34
+ * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
35
+ */
36
+ scrape(source: ScraperSourceHandler): Promise<ReadonlyArray<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
37
+ }
38
+ /**
39
+ * TODO: [πŸ‘£] Converted documents can act as cached items - there is no need to run conversion each time
40
+ * TODO: [πŸͺ‚] Do it in parallel
41
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
42
+ * @@@ Note: [🟒] Code in this file should never be never released in packages that could be imported into browser environment
43
+ */
@@ -0,0 +1,20 @@
1
+ import type { ExecutionTools } from '../../execution/ExecutionTools';
2
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
3
+ import { BoilerplateScraper } from './BoilerplateScraper';
4
+ /**
5
+ * Constructor of `BoilerplateScraper`
6
+ *
7
+ * @public exported from `@promptbook/boilerplate`
8
+ */
9
+ export declare const createBoilerplateScraper: ((tools: Pick<ExecutionTools, 'llm'>, options: PrepareAndScrapeOptions) => BoilerplateScraper) & import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
10
+ title: string;
11
+ packageName: string;
12
+ className: string;
13
+ mimeTypes: string[];
14
+ documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@@";
15
+ isAvilableInBrowser: false;
16
+ requiredExecutables: never[];
17
+ }>;
18
+ /**
19
+ * TODO: [🎢] Naming "constructor" vs "creator" vs "factory"
20
+ */
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ts-node
2
+ export {};
3
+ /**
4
+ * Note: [⚫] Code in this file should never be published in any package
5
+ */
@@ -0,0 +1,15 @@
1
+ import type { Registration } from '../../utils/$Register';
2
+ /**
3
+ * Registration of known scraper
4
+ *
5
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
6
+ *
7
+ * @public exported from `@promptbook/boilerplate`
8
+ * @public exported from `@promptbook/wizzard`
9
+ * @public exported from `@promptbook/cli`
10
+ */
11
+ export declare const _BoilerplateScraperRegistration: Registration;
12
+ /**
13
+ * TODO: [🎢] Naming "constructor" vs "creator" vs "factory"
14
+ * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
15
+ */
@@ -0,0 +1,28 @@
1
+ import type { Registration } from '../../utils/$Register';
2
+ /**
3
+ * Metadata of the scraper
4
+ *
5
+ * @private within the scraper directory
6
+ */
7
+ export declare const boilerplateScraperMetadata: import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
8
+ title: string;
9
+ packageName: string;
10
+ className: string;
11
+ mimeTypes: string[];
12
+ documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@@";
13
+ isAvilableInBrowser: false;
14
+ requiredExecutables: never[];
15
+ }>;
16
+ /**
17
+ * Registration of known scraper metadata
18
+ *
19
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
20
+ *
21
+ * @public exported from `@promptbook/core`
22
+ * @public exported from `@promptbook/wizzard`
23
+ * @public exported from `@promptbook/cli`
24
+ */
25
+ export declare const _BoilerplateScraperMetadataRegistration: Registration;
26
+ /**
27
+ * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
28
+ */
@@ -0,0 +1,50 @@
1
+ import type { ExecutionTools } from '../../execution/ExecutionTools';
2
+ import type { KnowledgePiecePreparedJson } from '../../pipeline/PipelineJson/KnowledgePieceJson';
3
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
4
+ import type { Converter } from '../_common/Converter';
5
+ import type { ScraperAndConverterMetadata } from '../_common/register/ScraperAndConverterMetadata';
6
+ import type { Scraper } from '../_common/Scraper';
7
+ import type { ScraperSourceHandler } from '../_common/Scraper';
8
+ import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource';
9
+ /**
10
+ * Integration of Markitdown by Microsoft into Promptbook
11
+ *
12
+ * @see https://github.com/microsoft/markitdown
13
+ * @see `documentationUrl` for more details
14
+ * @public exported from `@promptbook/markitdown`
15
+ * @public exported from `@promptbook/pdf`
16
+ */
17
+ export declare class MarkitdownScraper implements Converter, Scraper {
18
+ private readonly tools;
19
+ private readonly options;
20
+ /**
21
+ * Metadata of the scraper which includes title, mime types, etc.
22
+ */
23
+ get metadata(): ScraperAndConverterMetadata;
24
+ /**
25
+ * Markdown scraper is used internally
26
+ */
27
+ private readonly markdownScraper;
28
+ /**
29
+ * Markdown scraper is used internally
30
+ */
31
+ private readonly markitdown;
32
+ constructor(tools: Pick<ExecutionTools, 'fs' | 'llm' | 'executables'>, options: PrepareAndScrapeOptions);
33
+ /**
34
+ * Convert the documents to `.md` file and returns intermediate source
35
+ *
36
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
37
+ */
38
+ $convert(source: ScraperSourceHandler): Promise<ScraperIntermediateSource>;
39
+ /**
40
+ * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
41
+ */
42
+ scrape(source: ScraperSourceHandler): Promise<ReadonlyArray<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>;
43
+ }
44
+ /**
45
+ * TODO: [🧠][🌜] Export only from `@promptbook/markitdown` or `@promptbook/pdf` NOT both
46
+ * TODO: [πŸ‘£] Converted documents can act as cached items - there is no need to run conversion each time
47
+ * TODO: [πŸͺ‚] Do it in parallel
48
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
49
+ * Note: [🟒] Code in this file should never be never released in packages that could be imported into browser environment
50
+ */
@@ -0,0 +1,22 @@
1
+ import type { ExecutionTools } from '../../execution/ExecutionTools';
2
+ import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions';
3
+ import { MarkitdownScraper } from './MarkitdownScraper';
4
+ /**
5
+ * Constructor of `MarkitdownScraper`
6
+ *
7
+ * @public exported from `@promptbook/markitdown`
8
+ * @public exported from `@promptbook/pdf`
9
+ */
10
+ export declare const createMarkitdownScraper: ((tools: Pick<ExecutionTools, 'llm'>, options: PrepareAndScrapeOptions) => MarkitdownScraper) & import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
11
+ title: string;
12
+ packageName: string;
13
+ className: string;
14
+ mimeTypes: string[];
15
+ documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
16
+ isAvilableInBrowser: false;
17
+ requiredExecutables: never[];
18
+ }>;
19
+ /**
20
+ * TODO: [🧠][🌜] Export only from `@promptbook/markitdown` or `@promptbook/pdf` NOT both
21
+ * TODO: [🎢] Naming "constructor" vs "creator" vs "factory"
22
+ */
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ts-node
2
+ export {};
3
+ /**
4
+ * Note: [⚫] Code in this file should never be published in any package
5
+ */
@@ -0,0 +1,17 @@
1
+ import type { Registration } from '../../utils/$Register';
2
+ /**
3
+ * Registration of known scraper
4
+ *
5
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
6
+ *
7
+ * @public exported from `@promptbook/markitdown`
8
+ * @public exported from `@promptbook/pdf`
9
+ * @public exported from `@promptbook/wizzard`
10
+ * @public exported from `@promptbook/cli`
11
+ */
12
+ export declare const _MarkitdownScraperRegistration: Registration;
13
+ /**
14
+ * TODO: [🧠][🌜] Export only from `@promptbook/markitdown` or `@promptbook/pdf` NOT both
15
+ * TODO: [🎢] Naming "constructor" vs "creator" vs "factory"
16
+ * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
17
+ */
@@ -0,0 +1,28 @@
1
+ import type { Registration } from '../../utils/$Register';
2
+ /**
3
+ * Metadata of the scraper
4
+ *
5
+ * @private within the scraper directory
6
+ */
7
+ export declare const markitdownScraperMetadata: import("type-fest/source/readonly-deep").ReadonlyObjectDeep<{
8
+ title: string;
9
+ packageName: string;
10
+ className: string;
11
+ mimeTypes: string[];
12
+ documentationUrl: "https://github.com/webgptorg/promptbook/discussions/@@";
13
+ isAvilableInBrowser: false;
14
+ requiredExecutables: never[];
15
+ }>;
16
+ /**
17
+ * Registration of known scraper metadata
18
+ *
19
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
20
+ *
21
+ * @public exported from `@promptbook/core`
22
+ * @public exported from `@promptbook/wizzard`
23
+ * @public exported from `@promptbook/cli`
24
+ */
25
+ export declare const _MarkitdownScraperMetadataRegistration: Registration;
26
+ /**
27
+ * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
28
+ */
@@ -235,7 +235,7 @@ export type string_markdown_codeblock_language = 'book' | 'markdown' | 'text' |
235
235
  /**
236
236
  * @@@
237
237
  */
238
- export type string_promptbook_documentation_url = `https://github.com/webgptorg/promptbook/discussions/${number | '@@'}`;
238
+ export type string_promptbook_documentation_url = `https://github.com/webgptorg/promptbook/discussions/${number | `@@${string}`}`;
239
239
  /**
240
240
  * Semantic helper
241
241
  *
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@promptbook/legacy-documents",
3
- "version": "0.84.0-0",
3
+ "version": "0.84.0-10",
4
4
  "description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
5
5
  "--note-0": " <- [🐊]",
6
6
  "private": false,
@@ -10,7 +10,7 @@
10
10
  "url": "https://github.com/webgptorg/promptbook"
11
11
  },
12
12
  "contributors": [
13
- "Pavol HejnΓ½ <me@pavolhejny.com> (https://www.pavolhejny.com/)"
13
+ "Pavol HejnΓ½ <pavol@ptbk.io> (https://www.pavolhejny.com/)"
14
14
  ],
15
15
  "--todo-0": "TODO: [❇️] Make better list of keywords",
16
16
  "keywords": [
@@ -54,7 +54,7 @@
54
54
  "module": "./esm/index.es.js",
55
55
  "typings": "./esm/typings/src/_packages/legacy-documents.index.d.ts",
56
56
  "peerDependencies": {
57
- "@promptbook/core": "0.84.0-0"
57
+ "@promptbook/core": "0.84.0-10"
58
58
  },
59
59
  "dependencies": {
60
60
  "colors": "1.4.0",
package/umd/index.umd.js CHANGED
@@ -25,7 +25,7 @@
25
25
  * @generated
26
26
  * @see https://github.com/webgptorg/promptbook
27
27
  */
28
- var PROMPTBOOK_ENGINE_VERSION = '0.83.0';
28
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-9';
29
29
  /**
30
30
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
31
31
  * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
@@ -183,7 +183,7 @@
183
183
  *
184
184
  * @public exported from `@promptbook/core`
185
185
  */
186
- var ADMIN_EMAIL = 'me@pavolhejny.com';
186
+ var ADMIN_EMAIL = 'pavol@ptbk.io';
187
187
  /**
188
188
  * Name of the responsible person for the Promptbook on GitHub
189
189
  *
@@ -1682,7 +1682,7 @@
1682
1682
  *
1683
1683
  * @private within the repository
1684
1684
  */
1685
- var REPLACING_NONCE = 'u$k42k%!V2zo34w7Fu#@QUHYPW';
1685
+ var REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
1686
1686
  /**
1687
1687
  * @@@
1688
1688
  *
@@ -2448,7 +2448,7 @@
2448
2448
  var NotYetImplementedError = /** @class */ (function (_super) {
2449
2449
  __extends(NotYetImplementedError, _super);
2450
2450
  function NotYetImplementedError(message) {
2451
- var _this = _super.call(this, spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(message), "\n\n Note: This feature is not implemented yet but it will be soon.\n\n If you want speed up the implementation or just read more, look here:\n https://github.com/webgptorg/promptbook\n\n Or contact us on me@pavolhejny.com\n\n "); })) || this;
2451
+ var _this = _super.call(this, spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(message), "\n\n Note: This feature is not implemented yet but it will be soon.\n\n If you want speed up the implementation or just read more, look here:\n https://github.com/webgptorg/promptbook\n\n Or contact us on pavol@ptbk.io\n\n "); })) || this;
2452
2452
  _this.name = 'NotYetImplementedError';
2453
2453
  Object.setPrototypeOf(_this, NotYetImplementedError.prototype);
2454
2454
  return _this;
@@ -6472,7 +6472,7 @@
6472
6472
  case 4:
6473
6473
  // Note: [0]
6474
6474
  if (!(_g.sent())) {
6475
- throw new UnexpectedError(spaceTrim__default["default"](function (block) { return "\n File that was supposed to be created by Pandoc does not exist for unknown reason\n\n Expected file:\n ".concat(block(cacheFilehandler.filename), "\n\n Command:\n > ").concat(block(command_1), "\n\n "); }));
6475
+ throw new UnexpectedError(spaceTrim__default["default"](function (block) { return "\n File that was supposed to be created by Pandoc does not exist for unknown reason\n\n Expected file:\n ".concat(block(cacheFilehandler.filename), "\n\n Command:\n > ").concat(block(command_1), "\n\n "); }));
6476
6476
  }
6477
6477
  _g.label = 5;
6478
6478
  case 5: return [2 /*return*/, cacheFilehandler];
@@ -6647,7 +6647,7 @@
6647
6647
  case 4:
6648
6648
  files_1 = _g.sent();
6649
6649
  if (files_1.length !== 1) {
6650
- throw new UnexpectedError(spaceTrim__default["default"](function (block) { return "\n Expected exactly 1 file in the LibreOffice output directory, got ".concat(files_1.length, "\n\n The temporary folder:\n ").concat(block(documentSourceOutdirPathForLibreOffice_1), "\n\n Command:\n > ").concat(block(command_1), "\n "); }));
6650
+ throw new UnexpectedError(spaceTrim__default["default"](function (block) { return "\n Expected exactly 1 file in the LibreOffice output directory, got ".concat(files_1.length, "\n\n The temporary folder:\n ").concat(block(documentSourceOutdirPathForLibreOffice_1), "\n\n Command:\n > ").concat(block(command_1), "\n "); }));
6651
6651
  }
6652
6652
  file = files_1[0];
6653
6653
  return [4 /*yield*/, promises.rename(path.join(documentSourceOutdirPathForLibreOffice_1, file), cacheFilehandler.filename)];