@sillsdev/docu-notion 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +135 -0
  3. package/dist/FlatGuidLayoutStrategy.d.ts +6 -0
  4. package/dist/FlatGuidLayoutStrategy.js +25 -0
  5. package/dist/HierarchicalNamedLayoutStrategy.d.ts +7 -0
  6. package/dist/HierarchicalNamedLayoutStrategy.js +80 -0
  7. package/dist/LayoutStrategy.d.ts +12 -0
  8. package/dist/LayoutStrategy.js +83 -0
  9. package/dist/MakeImagePersistencePlan.d.ts +2 -0
  10. package/dist/MakeImagePersistencePlan.js +66 -0
  11. package/dist/NotionImage-CaptionReading.spec.d.ts +1 -0
  12. package/dist/NotionImage-CaptionReading.spec.js +233 -0
  13. package/dist/NotionPage.d.ts +44 -0
  14. package/dist/NotionPage.js +194 -0
  15. package/dist/config/configuration.d.ts +5 -0
  16. package/dist/config/configuration.js +86 -0
  17. package/dist/config/default.docunotion.config.d.ts +3 -0
  18. package/dist/config/default.docunotion.config.js +37 -0
  19. package/dist/images.d.ts +24 -0
  20. package/dist/images.js +230 -0
  21. package/dist/index.d.ts +7 -0
  22. package/dist/index.js +37 -0
  23. package/dist/log.d.ts +11 -0
  24. package/dist/log.js +61 -0
  25. package/dist/makeImagePersistencePlan.spec.d.ts +1 -0
  26. package/dist/makeImagePersistencePlan.spec.js +35 -0
  27. package/dist/notion-styles.css +58 -0
  28. package/dist/plugins/CalloutTransformer.d.ts +24 -0
  29. package/dist/plugins/CalloutTransformer.js +88 -0
  30. package/dist/plugins/CalloutTransformer.spec.d.ts +1 -0
  31. package/dist/plugins/CalloutTransformer.spec.js +199 -0
  32. package/dist/plugins/ColumnListTransformer.d.ts +2 -0
  33. package/dist/plugins/ColumnListTransformer.js +34 -0
  34. package/dist/plugins/ColumnTransformer.d.ts +2 -0
  35. package/dist/plugins/ColumnTransformer.js +67 -0
  36. package/dist/plugins/EscapeHtmlBlockModifier.d.ts +2 -0
  37. package/dist/plugins/EscapeHtmlBlockModifier.js +41 -0
  38. package/dist/plugins/EscapeHtmlBlockModifier.spec.d.ts +1 -0
  39. package/dist/plugins/EscapeHtmlBlockModifier.spec.js +130 -0
  40. package/dist/plugins/HeadingTranformer.spec.d.ts +1 -0
  41. package/dist/plugins/HeadingTranformer.spec.js +46 -0
  42. package/dist/plugins/HeadingTransformer.d.ts +2 -0
  43. package/dist/plugins/HeadingTransformer.js +63 -0
  44. package/dist/plugins/NumberedListTransformer.d.ts +2 -0
  45. package/dist/plugins/NumberedListTransformer.js +55 -0
  46. package/dist/plugins/NumberedListTransformer.spec.d.ts +1 -0
  47. package/dist/plugins/NumberedListTransformer.spec.js +86 -0
  48. package/dist/plugins/TableTransformer.d.ts +5 -0
  49. package/dist/plugins/TableTransformer.js +70 -0
  50. package/dist/plugins/embedTweaks.d.ts +5 -0
  51. package/dist/plugins/embedTweaks.js +46 -0
  52. package/dist/plugins/embedTweaks.spec.d.ts +1 -0
  53. package/dist/plugins/embedTweaks.spec.js +230 -0
  54. package/dist/plugins/externalLinks.d.ts +2 -0
  55. package/dist/plugins/externalLinks.js +26 -0
  56. package/dist/plugins/externalLinks.spec.d.ts +1 -0
  57. package/dist/plugins/externalLinks.spec.js +132 -0
  58. package/dist/plugins/internalLinks.d.ts +6 -0
  59. package/dist/plugins/internalLinks.js +78 -0
  60. package/dist/plugins/internalLinks.spec.d.ts +1 -0
  61. package/dist/plugins/internalLinks.spec.js +442 -0
  62. package/dist/plugins/pluginTestRun.d.ts +10 -0
  63. package/dist/plugins/pluginTestRun.js +248 -0
  64. package/dist/plugins/pluginTypes.d.ts +42 -0
  65. package/dist/plugins/pluginTypes.js +2 -0
  66. package/dist/pull.d.ts +12 -0
  67. package/dist/pull.js +253 -0
  68. package/dist/run.d.ts +1 -0
  69. package/dist/run.js +35 -0
  70. package/dist/transform.d.ts +6 -0
  71. package/dist/transform.js +195 -0
  72. package/dist/types.d.ts +8 -0
  73. package/dist/types.js +2 -0
  74. package/package.json +96 -0
@@ -0,0 +1,195 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ exports.getMarkdownFromNotionBlocks = exports.getMarkdownForPage = void 0;
16
+ const chalk_1 = __importDefault(require("chalk"));
17
+ const log_1 = require("./log");
18
+ function getMarkdownForPage(config, context, page) {
19
+ return __awaiter(this, void 0, void 0, function* () {
20
+ (0, log_1.info)(`Reading & converting page ${page.layoutContext}/${page.nameOrTitle} (${chalk_1.default.blue(page.hasExplicitSlug
21
+ ? page.slug
22
+ : page.foundDirectlyInOutline
23
+ ? "Descendant of Outline, not Database"
24
+ : "NO SLUG")})`);
25
+ const blocks = yield context.getBlockChildren(page.pageId);
26
+ (0, log_1.logDebugFn)("markdown from page", () => JSON.stringify(blocks, null, 2));
27
+ const body = yield getMarkdownFromNotionBlocks(context, config, blocks);
28
+ const frontmatter = getFrontMatter(page); // todo should be a plugin
29
+ return `${frontmatter}\n${body}`;
30
+ });
31
+ }
32
+ exports.getMarkdownForPage = getMarkdownForPage;
33
+ // this is split off from getMarkdownForPage so that unit tests can provide the block contents
34
+ function getMarkdownFromNotionBlocks(context, config, blocks) {
35
+ return __awaiter(this, void 0, void 0, function* () {
36
+ // changes to the blocks we get from notion API
37
+ doNotionBlockTransforms(blocks, config);
38
+ // overrides for the default notion-to-markdown conversions
39
+ registerNotionToMarkdownCustomTransforms(config, context);
40
+ // the main conversion to markdown, using the notion-to-md library
41
+ let markdown = yield doNotionToMarkdown(context, blocks); // ?
42
+ // corrections to links after they are converted to markdown,
43
+ // with access to all the pages we've seen
44
+ markdown = doLinkFixes(context, markdown, config);
45
+ //console.log("markdown after link fixes", markdown);
46
+ // simple regex-based tweaks. These are usually related to docusaurus
47
+ const { imports, body } = yield doTransformsOnMarkdown(config, markdown);
48
+ // console.log("markdown after regex fixes", markdown);
49
+ // console.log("body after regex", body);
50
+ return `${imports}\n${body}`;
51
+ });
52
+ }
53
+ exports.getMarkdownFromNotionBlocks = getMarkdownFromNotionBlocks;
54
+ // operations on notion blocks before they are converted to markdown
55
+ function doNotionBlockTransforms(blocks, config) {
56
+ for (const block of blocks) {
57
+ config.plugins.forEach(plugin => {
58
+ if (plugin.notionBlockModifications) {
59
+ plugin.notionBlockModifications.forEach(transform => {
60
+ (0, log_1.logDebug)("transforming block with plugin", plugin.name);
61
+ transform.modify(block);
62
+ });
63
+ }
64
+ });
65
+ }
66
+ }
67
+ function doTransformsOnMarkdown(config, input) {
68
+ var _a;
69
+ return __awaiter(this, void 0, void 0, function* () {
70
+ const regexMods = config.plugins
71
+ .filter(plugin => !!plugin.regexMarkdownModifications)
72
+ .map(plugin => {
73
+ const mods = plugin.regexMarkdownModifications;
74
+ // stick the name of the plugin into each mode for logging
75
+ const modsWithNames = mods.map(m => (Object.assign({ name: plugin.name }, m)));
76
+ return modsWithNames;
77
+ })
78
+ .flat();
79
+ // regex that matches markdown code blocks
80
+ const codeBlocks = /```.*\n[\s\S]*?\n```/;
81
+ let body = input;
82
+ //console.log("body before regex: " + body);
83
+ let match;
84
+ const imports = new Set();
85
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
86
+ for (const mod of regexMods) {
87
+ let replacement = undefined;
88
+ // regex.exec is stateful, so we don't want to mess up the plugin's use of its own regex, so we clone it.
89
+ // we also add the "g" flag to make sure we get all matches
90
+ const regex = new RegExp(`${codeBlocks.source}|(${mod.regex.source})`, "g");
91
+ let count = 0;
92
+ while ((match = regex.exec(input)) !== null) {
93
+ if (match[0]) {
94
+ const original = match[0];
95
+ if (original.startsWith("```") && original.endsWith("```")) {
96
+ continue; // code block
97
+ }
98
+ if (mod.getReplacement) {
99
+ replacement = yield mod.getReplacement(original);
100
+ }
101
+ else if (mod.replacementPattern) {
102
+ console.log(`mod.replacementPattern.replace("$1", ${match[2]}`);
103
+ replacement = mod.replacementPattern.replace("$1", match[2]);
104
+ }
105
+ if (replacement !== undefined) {
106
+ (0, log_1.verbose)(`[${mod.name}] ${original} --> ${replacement}`);
107
+ const precedingPart = body.substring(0, match.index); // ?
108
+ const partStartingFromThisMatch = body.substring(match.index); // ?
109
+ body =
110
+ precedingPart +
111
+ partStartingFromThisMatch.replace(original, replacement);
112
+ // add any library imports
113
+ (_a = mod.imports) === null || _a === void 0 ? void 0 : _a.forEach(imp => imports.add(imp));
114
+ }
115
+ }
116
+ }
117
+ }
118
+ (0, log_1.logDebug)("doTransformsOnMarkdown", "body after regex: " + body);
119
+ const uniqueImports = [...new Set(imports)];
120
+ return { body, imports: [...uniqueImports].join("\n") };
121
+ });
122
+ }
123
+ function doNotionToMarkdown(docunotionContext, blocks) {
124
+ return __awaiter(this, void 0, void 0, function* () {
125
+ const mdBlocks = yield docunotionContext.notionToMarkdown.blocksToMarkdown(blocks);
126
+ let markdown = docunotionContext.notionToMarkdown.toMarkdownString(mdBlocks);
127
+ return markdown;
128
+ });
129
+ }
130
+ // corrections to links after they are converted to markdown
131
+ // Note: from notion (or notion-md?) we get slightly different hrefs depending on whether the links is "inline"
132
+ // (has some other text that's been turned into a link) or "raw".
133
+ // Raw links come in without a leading slash, e.g. [link_to_page](4a6de8c0-b90b-444b-8a7b-d534d6ec71a4)
134
+ // Inline links come in with a leading slash, e.g. [pointer to the introduction](/4a6de8c0b90b444b8a7bd534d6ec71a4)
135
+ function doLinkFixes(context, markdown, config) {
136
+ const linkRegExp = /\[.*\]\([^\)]*\)/g;
137
+ (0, log_1.logDebug)("markdown before link fixes", markdown);
138
+ let match;
139
+ // since we're going to make changes to the markdown,
140
+ // we need to keep track of where we are in the string as we search
141
+ const markdownToSearch = markdown;
142
+ // The key to understanding this `while` is that linkRegExp actually has state, and
143
+ // it gives you a new one each time. https://stackoverflow.com/a/1520853/723299
144
+ while ((match = linkRegExp.exec(markdownToSearch)) !== null) {
145
+ const originalLinkMarkdown = match[0];
146
+ (0, log_1.verbose)(`Checking to see if a plugin wants to modify "${originalLinkMarkdown}" `);
147
+ // We only use the first plugin that matches and makes a change to the link.
148
+ // Enhance: we could take the time to see if multiple plugins match, and
149
+ // and point this out in verbose logging mode.
150
+ config.plugins.some(plugin => {
151
+ if (!plugin.linkModifier)
152
+ return false;
153
+ if (plugin.linkModifier.match.exec(originalLinkMarkdown) === null) {
154
+ return false;
155
+ }
156
+ const newMarkdown = plugin.linkModifier.convert(context, originalLinkMarkdown);
157
+ if (newMarkdown !== originalLinkMarkdown) {
158
+ markdown = markdown.replace(originalLinkMarkdown, newMarkdown);
159
+ (0, log_1.verbose)(`plugin "${plugin.name}" transformed link: ${originalLinkMarkdown}-->${newMarkdown}`);
160
+ return true; // the first plugin that matches and does something wins
161
+ }
162
+ else {
163
+ (0, log_1.verbose)(`plugin "${plugin.name}" did not change this url`);
164
+ return false;
165
+ }
166
+ });
167
+ }
168
+ return markdown;
169
+ }
170
+ // overrides for the conversions that notion-to-md does
171
+ function registerNotionToMarkdownCustomTransforms(config, docunotionContext) {
172
+ config.plugins.forEach(plugin => {
173
+ if (plugin.notionToMarkdownTransforms) {
174
+ plugin.notionToMarkdownTransforms.forEach(transform => {
175
+ (0, log_1.logDebug)("registering custom transform", `${plugin.name} for ${transform.type}`);
176
+ docunotionContext.notionToMarkdown.setCustomTransformer(transform.type, (block) => {
177
+ (0, log_1.logDebug)("notion to MD conversion of ", `${transform.type} with plugin: ${plugin.name}`);
178
+ return transform.getStringFromBlock(docunotionContext, block);
179
+ });
180
+ });
181
+ }
182
+ });
183
+ }
184
+ // enhance:make this built-in plugin so that it can be overridden
185
+ function getFrontMatter(page) {
186
+ var _a;
187
+ let frontmatter = "---\n";
188
+ frontmatter += `title: ${page.nameOrTitle.replaceAll(":", "-")}\n`; // I have not found a way to escape colons
189
+ frontmatter += `sidebar_position: ${page.order}\n`;
190
+ frontmatter += `slug: ${(_a = page.slug) !== null && _a !== void 0 ? _a : ""}\n`;
191
+ if (page.keywords)
192
+ frontmatter += `keywords: [${page.keywords}]\n`;
193
+ frontmatter += "---\n";
194
+ return frontmatter;
195
+ }
@@ -0,0 +1,8 @@
1
+ import { BlockObjectResponse } from "@notionhq/client/build/src/api-endpoints";
2
+ export type NotionBlock = BlockObjectResponse;
3
+ export type ICounts = {
4
+ output_normally: number;
5
+ skipped_because_empty: number;
6
+ skipped_because_status: number;
7
+ skipped_because_level_cannot_have_content: number;
8
+ };
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
package/package.json ADDED
@@ -0,0 +1,96 @@
1
+ {
2
+ "scripts": {
3
+ "test": "jest",
4
+ "build": "yarn test && tsc && cp ./src/css/*.css dist/",
5
+ "build-only": "tsc && cp ./src/css/*.css dist/",
6
+ "clean": "rimraf ./dist/",
7
+ "semantic-release": "semantic-release",
8
+ "typecheck": "tsc --noEmit",
9
+ "notion-download": "node dist/index.js",
10
+ "cmdhelp": "ts-node src/index.ts",
11
+ "// note that we're not using ts-node at the moment because of ": "https://github.com/Codex-/cosmiconfig-typescript-loader/issues/70",
12
+ "ts": "yarn tsc && rimraf ./docs/ && cross-var node dist/index.js",
13
+ "// test out with a private sample notion db": "",
14
+ "pull-test-tagged": "yarn ts -n $DOCU_NOTION_INTEGRATION_TOKEN -r $DOCU_NOTION_TEST_ROOT_PAGE_ID --log-level debug --status-tag test",
15
+ "pull-sample-site": "yarn ts -n $DOCU_NOTION_INTEGRATION_TOKEN -r $DOCU_NOTION_SAMPLE_ROOT_PAGE --log-level debug",
16
+ "// test with a semi-stable/public site:": "",
17
+ "pull-sample": "yarn ts -n $DOCU_NOTION_INTEGRATION_TOKEN -r $DOCU_NOTION_SAMPLE_ROOT_PAGE -m ./sample --locales en,es,fr,de --log-level verbose",
18
+ "pull-sample-with-paths": "yarn ts -n $DOCU_NOTION_INTEGRATION_TOKEN -r $DOCU_NOTION_SAMPLE_ROOT_PAGE -m ./sample --img-output-path ./sample_img"
19
+ },
20
+ "//file-type": "have to use this version before they switched to ESM, which gives a compile error related to require()",
21
+ "//node-fetch@2.6.6file-type": "have to use this version before they switched to ESM, which gives a compile error related to require()",
22
+ "//chalk@4": "also ESM related problem",
23
+ "//notion-client@4": "also ESM related problem",
24
+ "dependencies": {
25
+ "@notionhq/client": "2.2.3",
26
+ "chalk": "^4.1.2",
27
+ "commander": "^9.2.0",
28
+ "cosmiconfig": "^8.0.0",
29
+ "cosmiconfig-typescript-loader": "^4.3.0",
30
+ "file-type": "16.5.1",
31
+ "fs-extra": "^10.1.0",
32
+ "limiter": "^2.1.0",
33
+ "markdown-table": "^2.0.0",
34
+ "node-fetch": "2.6.6",
35
+ "notion-client": "^4",
36
+ "notion-to-md": "^2.5.5",
37
+ "path": "^0.12.7",
38
+ "sanitize-filename": "^1.6.3"
39
+ },
40
+ "devDependencies": {
41
+ "@types/fs-extra": "^9.0.13",
42
+ "@types/jest": "^28.1.6",
43
+ "@types/markdown-table": "^2.0.0",
44
+ "@types/node": "^12.20.11",
45
+ "@typescript-eslint/eslint-plugin": "^4.22.0",
46
+ "@typescript-eslint/parser": "^4.22.0",
47
+ "cross-var": "^1.1.0",
48
+ "cz-conventional-changelog": "^3.3.0",
49
+ "eslint": "^7.25.0",
50
+ "eslint-config-prettier": "^8.3.0",
51
+ "eslint-plugin-node": "^11.1.0",
52
+ "eslint-plugin-prettier": "^3.4.0",
53
+ "jest": "^28.1.3",
54
+ "lint-staged": "^10.5.4",
55
+ "prettier": "^2.2.1",
56
+ "rimraf": "^4.1.2",
57
+ "semantic-release": "^19.0.2",
58
+ "ts-jest": "^28.0.7",
59
+ "ts-node": "^10.2.1",
60
+ "typescript": "^4.6.4"
61
+ },
62
+ "name": "@sillsdev/docu-notion",
63
+ "description": "Download Notion pages as markdown and image files, preserving hierarchy and enabling workflow properties. Works with Docusaurus.",
64
+ "license": "MIT",
65
+ "author": {
66
+ "name": "SIL Language Software Development",
67
+ "email": "sillsdev@users.noreply.github.com",
68
+ "url": "https://github.com/sillsdev"
69
+ },
70
+ "keywords": [
71
+ "docusaurus",
72
+ "notion",
73
+ "markdown",
74
+ "documentation"
75
+ ],
76
+ "bugs": {
77
+ "url": "https://github.com/sillsdev/docu-notion/issues"
78
+ },
79
+ "homepage": "https://github.com/sillsdev/docu-notion#readme",
80
+ "packageManager": "yarn@3.4.1",
81
+ "main": "./dist/index.js",
82
+ "bin": "dist/index.js",
83
+ "files": [
84
+ "dist/**/*"
85
+ ],
86
+ "release": {
87
+ "branches": [
88
+ "main",
89
+ "release"
90
+ ]
91
+ },
92
+ "publishConfig": {
93
+ "access": "public"
94
+ },
95
+ "version": "0.12.0"
96
+ }