@sillsdev/docu-notion 0.13.2 → 0.13.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -31,29 +31,29 @@ Currently, docu-notion expects that each page has only one of the following: sub
31
31
 
32
32
  ## 6. Pull your pages
33
33
 
34
- First, determine the id of your root page by clicking "Share" and looking at the the url it gives you. E.g.
34
+ First, determine the id of your root page by clicking "Share" and looking at the url it gives you. E.g.
35
35
  https://www.notion.so/hattonjohn/My-Docs-0456aa5842946bdbea3a4f37c97a0e5
36
36
  means that the id is "0456aa5842946PRETEND4f37c97a0e5".
37
37
 
38
38
  Determine where you want the markdown files and images to land. The following works well for Docusaurus instances:
39
39
 
40
40
  ```
41
- npx docu-notion -n secret_PRETEND123456789PRETEND123456789PRETEND6789 -r 0456aa5842946PRETEND4f37c97a0e5"
41
+ npx @sillsdev/docu-notion -n secret_PRETEND123456789PRETEND123456789PRETEND6789 -r 0456aa5842946PRETEND4f37c97a0e5"
42
42
  ```
43
43
 
44
44
  Likely, you will want to store these codes in your environment variables and then use them like this:
45
45
 
46
46
  ```
47
47
  (windows)
48
- npx docu-notion -n %MY_NOTION_TOKEN% -r %MY_NOTION_DOCS_ROOT_PAGE_ID%
48
+ npx @sillsdev/docu-notion -n %MY_NOTION_TOKEN% -r %MY_NOTION_DOCS_ROOT_PAGE_ID%
49
49
  ```
50
50
 
51
51
  ```
52
52
  (linux / mac)
53
- npx docu-notion -n $MY_NOTION_TOKEN -r $MY_NOTION_DOCS_ROOT_PAGE_ID
53
+ npx @sillsdev/docu-notion -n $MY_NOTION_TOKEN -r $MY_NOTION_DOCS_ROOT_PAGE_ID
54
54
  ```
55
55
 
56
- NOTE: In the above, we are using `npx` to use the latest `docu-notion`. A more conservative approach would be to `npm i cross-var docu-notion` and then create a script in your package.json like this:
56
+ NOTE: In the above, we are using `npx` to use the latest `docu-notion`. A more conservative approach would be to `npm i cross-var @sillsdev/docu-notion` and then create a script in your package.json like this:
57
57
 
58
58
  ```
59
59
  "scripts": {
@@ -133,3 +133,16 @@ Options:
133
133
  # Plugins
134
134
 
135
135
  If your project needs some processing that docu-notion doesn't already provide, you can provide a plugin that does it. See the [plugin readme](src/plugins/README.md).
136
+
137
+ # Callouts ➜ Admonitions
138
+
139
+ To map Notion callouts to Docusaurus admonitions, ensure the icon is for the type you want.
140
+
141
+ - ℹ️ ➜ note
142
+ - 📝➜ note
143
+ - 💡➜ tip
144
+ - ❗➜ info
145
+ - ⚠️➜ caution
146
+ - 🔥➜ danger
147
+
148
+ The default admonition type, if no matching icon is found, is "note".
@@ -1,2 +1,3 @@
1
1
  import { ImageSet } from "./images";
2
2
  export declare function makeImagePersistencePlan(imageSet: ImageSet, imageOutputRootPath: string, imagePrefix: string): void;
3
+ export declare function hashOfString(s: string): number;
@@ -23,21 +23,21 @@ var __importStar = (this && this.__importStar) || function (mod) {
23
23
  return result;
24
24
  };
25
25
  Object.defineProperty(exports, "__esModule", { value: true });
26
- exports.makeImagePersistencePlan = void 0;
26
+ exports.hashOfString = exports.makeImagePersistencePlan = void 0;
27
27
  const Path = __importStar(require("path"));
28
28
  const log_1 = require("./log");
29
29
  const process_1 = require("process");
30
30
  function makeImagePersistencePlan(imageSet, imageOutputRootPath, imagePrefix) {
31
- var _a;
31
+ var _a, _b;
32
32
  if ((_a = imageSet.fileType) === null || _a === void 0 ? void 0 : _a.ext) {
33
33
  // Since most images come from pasting screenshots, there isn't normally a filename. That's fine, we just make a hash of the url
34
34
  // Images that are stored by notion come to us with a complex url that changes over time, so we pick out the UUID that doesn't change. Example:
35
35
  // https://s3.us-west-2.amazonaws.com/secure.notion-static.com/d1058f46-4d2f-4292-8388-4ad393383439/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20220516%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20220516T233630Z&X-Amz-Expires=3600&X-Amz-Signature=f215704094fcc884d37073b0b108cf6d1c9da9b7d57a898da38bc30c30b4c4b5&X-Amz-SignedHeaders=host&x-id=GetObject
36
- let thingToHash = imageSet.primaryUrl;
37
- const m = /.*secure\.notion-static\.com\/(.*)\//gm.exec(imageSet.primaryUrl);
38
- if (m && m.length > 1) {
39
- thingToHash = m[1];
40
- }
36
+ // But around Sept 2023, they changed the url to be something like:
37
+ // https://prod-files-secure.s3.us-west-2.amazonaws.com/d9a2b712-cf69-4bd6-9d65-87a4ceeacca2/d1bcdc8c-b065-4e40-9a11-392aabeb220e/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20230915%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20230915T161258Z&X-Amz-Expires=3600&X-Amz-Signature=28fca48e65fba86d539c3c4b7676fce1fa0857aa194f7b33dd4a468ecca6ab24&X-Amz-SignedHeaders=host&x-id=GetObject
38
+ // The thing we want is the last UUID before the ?
39
+ const urlBeforeQuery = imageSet.primaryUrl.split("?")[0];
40
+ const thingToHash = (_b = findLastUuid(urlBeforeQuery)) !== null && _b !== void 0 ? _b : urlBeforeQuery;
41
41
  const hash = hashOfString(thingToHash);
42
42
  imageSet.outputFileName = `${hash}.${imageSet.fileType.ext}`;
43
43
  imageSet.primaryFileOutputPath = Path.posix.join((imageOutputRootPath === null || imageOutputRootPath === void 0 ? void 0 : imageOutputRootPath.length) > 0
@@ -58,9 +58,18 @@ function makeImagePersistencePlan(imageSet, imageOutputRootPath, imagePrefix) {
58
58
  }
59
59
  }
60
60
  exports.makeImagePersistencePlan = makeImagePersistencePlan;
61
+ function findLastUuid(url) {
62
+ // Regex for a UUID surrounded by slashes
63
+ const uuidPattern = /(?<=\/)[0-9a-f]{8}-[0-9a-f]{4}-[0-5][0-9a-f]{3}-[089ab][0-9a-f]{3}-[0-9a-f]{12}(?=\/)/gi;
64
+ // Find all UUIDs
65
+ const uuids = url.match(uuidPattern);
66
+ // Return the last UUID if any exist, else return null
67
+ return uuids ? uuids[uuids.length - 1].trim() : null;
68
+ }
61
69
  function hashOfString(s) {
62
70
  let hash = 0;
63
71
  for (let i = 0; i < s.length; ++i)
64
72
  hash = Math.imul(31, hash) + s.charCodeAt(i);
65
73
  return Math.abs(hash);
66
74
  }
75
+ exports.hashOfString = hashOfString;
@@ -9,9 +9,10 @@ test("primary file with explicit file output path and prefix", () => {
9
9
  fileType: { ext: "png", mime: "image/png" },
10
10
  };
11
11
  (0, MakeImagePersistencePlan_1.makeImagePersistencePlan)(imageSet, "./static/notion_imgs", "/notion_imgs");
12
- expect(imageSet.outputFileName).toBe("463556435.png");
13
- expect(imageSet.primaryFileOutputPath).toBe("static/notion_imgs/463556435.png");
14
- expect(imageSet.filePathToUseInMarkdown).toBe("/notion_imgs/463556435.png");
12
+ const expectedHash = (0, MakeImagePersistencePlan_1.hashOfString)("https://s3.us-west-2.amazonaws.com/primaryImage");
13
+ expect(imageSet.outputFileName).toBe(`${expectedHash}.png`);
14
+ expect(imageSet.primaryFileOutputPath).toBe(`static/notion_imgs/${expectedHash}.png`);
15
+ expect(imageSet.filePathToUseInMarkdown).toBe(`/notion_imgs/${expectedHash}.png`);
15
16
  });
16
17
  test("primary file with defaults for image output path and prefix", () => {
17
18
  const imageSet = {
@@ -21,10 +22,31 @@ test("primary file with defaults for image output path and prefix", () => {
21
22
  fileType: { ext: "png", mime: "image/png" },
22
23
  };
23
24
  (0, MakeImagePersistencePlan_1.makeImagePersistencePlan)(imageSet, "", "");
24
- expect(imageSet.outputFileName).toBe("463556435.png");
25
+ const expectedHash = (0, MakeImagePersistencePlan_1.hashOfString)("https://s3.us-west-2.amazonaws.com/primaryImage");
26
+ expect(imageSet.outputFileName).toBe(`${expectedHash}.png`);
25
27
  // the default behavior is to put the image next to the markdown file
26
- expect(imageSet.primaryFileOutputPath).toBe("/pathToParentSomewhere/463556435.png");
27
- expect(imageSet.filePathToUseInMarkdown).toBe("./463556435.png");
28
+ expect(imageSet.primaryFileOutputPath).toBe(`/pathToParentSomewhere/${expectedHash}.png`);
29
+ expect(imageSet.filePathToUseInMarkdown).toBe(`./${expectedHash}.png`);
30
+ });
31
+ test("properly extract UUID from old-style notion image url", () => {
32
+ const imageSet = {
33
+ primaryUrl: "https://s3.us-west-2.amazonaws.com/secure.notion-static.com/e1058f46-4d2f-4292-8388-4ad393383439/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20220516%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20220516T233630Z&X-Amz-Expires=3600&X-Amz-Signature=f215704094fcc884d37073b0b108cf6d1c9da9b7d57a898da38bc30c30b4c4b5&X-Amz-SignedHeaders=host&x-id=GetObject",
34
+ localizedUrls: [],
35
+ fileType: { ext: "png", mime: "image/png" },
36
+ };
37
+ (0, MakeImagePersistencePlan_1.makeImagePersistencePlan)(imageSet, "./static/notion_imgs", "/notion_imgs");
38
+ const expectedHash = (0, MakeImagePersistencePlan_1.hashOfString)("e1058f46-4d2f-4292-8388-4ad393383439");
39
+ expect(imageSet.outputFileName).toBe(`${expectedHash}.png`);
40
+ });
41
+ test("properly extract UUID from new-style (Sept 2023) notion image url", () => {
42
+ const imageSet = {
43
+ primaryUrl: "https://prod-files-secure.s3.us-west-2.amazonaws.com/d9a2b712-cf69-4bd6-9d65-87a4ceeacca2/d1bcdc8c-b065-4e40-9a11-392aabeb220e/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20230915%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20230915T161258Z&X-Amz-Expires=3600&X-Amz-Signature=28fca48e65fba86d539c3c4b7676fce1fa0857aa194f7b33dd4a468ecca6ab24&X-Amz-SignedHeaders=host&x-id=GetObject",
44
+ localizedUrls: [],
45
+ fileType: { ext: "png", mime: "image/png" },
46
+ };
47
+ (0, MakeImagePersistencePlan_1.makeImagePersistencePlan)(imageSet, "./static/notion_imgs", "/notion_imgs");
48
+ const expectedHash = (0, MakeImagePersistencePlan_1.hashOfString)("d1bcdc8c-b065-4e40-9a11-392aabeb220e");
49
+ expect(imageSet.outputFileName).toBe(`${expectedHash}.png`);
28
50
  });
29
51
  // In order to make image fallback work with other languages, we have to have
30
52
  // a file for each image, in each Docusaurus language directory. This is true
@@ -11,6 +11,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
11
11
  Object.defineProperty(exports, "__esModule", { value: true });
12
12
  exports.standardColumnTransformer = void 0;
13
13
  const notion_client_1 = require("notion-client");
14
+ const pull_1 = require("../pull");
14
15
  exports.standardColumnTransformer = {
15
16
  name: "standardColumnTransformer",
16
17
  notionToMarkdownTransforms: [
@@ -49,8 +50,10 @@ function getColumnWidth(block) {
49
50
  return __awaiter(this, void 0, void 0, function* () {
50
51
  const unofficialNotionClient = new notion_client_1.NotionAPI();
51
52
  const blockId = block.id;
52
- // Yes, it is odd to call 'getPage' for a block, but that's how we access the format info.
53
- const recordMap = yield unofficialNotionClient.getPage(blockId);
53
+ const recordMap = yield (0, pull_1.executeWithRateLimitAndRetries)(`unofficialNotionClient.getPage(${blockId}) in getColumnWidth()`, () => {
54
+ // Yes, it is odd to call 'getPage' for a block, but that's how we access the format info.
55
+ return unofficialNotionClient.getPage(blockId);
56
+ });
54
57
  const blockResult = recordMap.block[blockId];
55
58
  // ENHANCE: could we use https://github.com/NotionX/react-notion-x/tree/master/packages/notion-types
56
59
  // to get away from "any", which might be particularly helpful in the future
@@ -346,6 +346,34 @@ test("does not interfere with mailto links", () => __awaiter(void 0, void 0, voi
346
346
  });
347
347
  expect(results.trim()).toBe(`[mailme](mailto:foo@example.com)`);
348
348
  }));
349
+ test("does not interfere with https links", () => __awaiter(void 0, void 0, void 0, function* () {
350
+ const results = yield getMarkdown({
351
+ type: "paragraph",
352
+ paragraph: {
353
+ rich_text: [
354
+ {
355
+ type: "text",
356
+ text: {
357
+ content: "google",
358
+ link: { url: `https://www.google.com` },
359
+ },
360
+ annotations: {
361
+ bold: false,
362
+ italic: false,
363
+ strikethrough: false,
364
+ underline: false,
365
+ code: false,
366
+ color: "default",
367
+ },
368
+ plain_text: "google",
369
+ href: `https://www.google.com`,
370
+ },
371
+ ],
372
+ color: "default",
373
+ },
374
+ });
375
+ expect(results.trim()).toBe(`[google](https://www.google.com)`);
376
+ }));
349
377
  test("links to other notion pages that are not in this site give PROBLEM LINK", () => __awaiter(void 0, void 0, void 0, function* () {
350
378
  const results = yield getMarkdown({
351
379
  type: "paragraph",
package/dist/pull.d.ts CHANGED
@@ -9,4 +9,5 @@ export type DocuNotionOptions = {
9
9
  statusTag: string;
10
10
  };
11
11
  export declare function notionPull(options: DocuNotionOptions): Promise<void>;
12
+ export declare function executeWithRateLimitAndRetries<T>(label: string, asyncFunction: () => Promise<T>): Promise<T>;
12
13
  export declare function initNotionClient(notionToken: string): Client;
package/dist/pull.js CHANGED
@@ -32,7 +32,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
32
32
  });
33
33
  };
34
34
  Object.defineProperty(exports, "__esModule", { value: true });
35
- exports.initNotionClient = exports.notionPull = void 0;
35
+ exports.initNotionClient = exports.executeWithRateLimitAndRetries = exports.notionPull = void 0;
36
36
  const fs = __importStar(require("fs-extra"));
37
37
  const notion_to_md_1 = require("notion-to-md");
38
38
  const HierarchicalNamedLayoutStrategy_1 = require("./HierarchicalNamedLayoutStrategy");
@@ -188,12 +188,45 @@ const notionLimiter = new limiter_1.RateLimiter({
188
188
  let notionClient;
189
189
  function getPageMetadata(id) {
190
190
  return __awaiter(this, void 0, void 0, function* () {
191
- yield rateLimit();
192
- return yield notionClient.pages.retrieve({
193
- page_id: id,
191
+ return yield executeWithRateLimitAndRetries(`pages.retrieve(${id})`, () => {
192
+ return notionClient.pages.retrieve({
193
+ page_id: id,
194
+ });
194
195
  });
195
196
  });
196
197
  }
198
+ // While everything works fine locally, on Github Actions we are getting a lot of timeouts, so
199
+ // we're trying this extra retry-able wrapper.
200
+ function executeWithRateLimitAndRetries(label, asyncFunction) {
201
+ return __awaiter(this, void 0, void 0, function* () {
202
+ yield rateLimit();
203
+ const kRetries = 10;
204
+ let lastException = undefined;
205
+ for (let i = 0; i < kRetries; i++) {
206
+ try {
207
+ return yield asyncFunction();
208
+ }
209
+ catch (e) {
210
+ lastException = e;
211
+ if ((e === null || e === void 0 ? void 0 : e.code) === "notionhq_client_request_timeout" ||
212
+ e.message.includes("timeout") ||
213
+ e.message.includes("Timeout") ||
214
+ e.message.includes("limit") ||
215
+ e.message.includes("Limit")) {
216
+ const secondsToWait = i + 1;
217
+ (0, log_1.info)(`While doing "${label}", got error "${e.message}". Will retry after ${secondsToWait}s...`);
218
+ yield new Promise(resolve => setTimeout(resolve, 1000 * secondsToWait));
219
+ }
220
+ else {
221
+ throw e;
222
+ }
223
+ }
224
+ }
225
+ (0, log_1.error)(`Error: could not complete "${label}" after ${kRetries} retries.`);
226
+ throw lastException;
227
+ });
228
+ }
229
+ exports.executeWithRateLimitAndRetries = executeWithRateLimitAndRetries;
197
230
  function rateLimit() {
198
231
  return __awaiter(this, void 0, void 0, function* () {
199
232
  if (notionLimiter.getTokensRemaining() < 1) {
@@ -213,10 +246,11 @@ function getBlockChildren(id) {
213
246
  // Note: there is a now a collectPaginatedAPI() in the notion client, so
214
247
  // we could switch to using that (I don't know if it does rate limiting?)
215
248
  do {
216
- yield rateLimit();
217
- const response = yield notionClient.blocks.children.list({
218
- start_cursor: start_cursor,
219
- block_id: id,
249
+ const response = yield executeWithRateLimitAndRetries(`getBlockChildren(${id})`, () => {
250
+ return notionClient.blocks.children.list({
251
+ start_cursor: start_cursor,
252
+ block_id: id,
253
+ });
220
254
  });
221
255
  if (!overallResult) {
222
256
  overallResult = response;
package/dist/transform.js CHANGED
@@ -103,7 +103,6 @@ function doTransformsOnMarkdown(context, config, input) {
103
103
  replacement = yield mod.getReplacement(context, matchAsThePluginWouldExpectIt);
104
104
  }
105
105
  else if (mod.replacementPattern) {
106
- console.log(`mod.replacementPattern.replace("$1", ${match[2]}`);
107
106
  replacement = mod.replacementPattern.replace("$1", match[2]);
108
107
  }
109
108
  if (replacement !== undefined) {
package/package.json CHANGED
@@ -11,6 +11,7 @@
11
11
  "// typescript check": "",
12
12
  "tsc": "tsc",
13
13
  "// test out with a private sample notion db": "",
14
+ "large-site-test": "npm run ts -- -n $SIL_BLOOM_DOCS_NOTION_TOKEN -r $SIL_BLOOM_DOCS_NOTION_ROOT_PAGE --locales en,fr",
14
15
  "pull-test-tagged": "npm run ts -- -n $DOCU_NOTION_INTEGRATION_TOKEN -r $DOCU_NOTION_TEST_ROOT_PAGE_ID --log-level debug --status-tag test",
15
16
  "pull-sample-site": "npm run ts -- -n $DOCU_NOTION_INTEGRATION_TOKEN -r $DOCU_NOTION_SAMPLE_ROOT_PAGE --log-level debug",
16
17
  "// test with a semi-stable/public site:": "",
@@ -89,5 +90,5 @@
89
90
  "volta": {
90
91
  "node": "18.16.0"
91
92
  },
92
- "version": "0.13.2"
93
+ "version": "0.13.4"
93
94
  }