gatsby-source-notion-churnotion 1.0.65 → 1.0.67
Sign up to get free protection for your applications and to get access to all the features.
- package/dist/api/getPages.js +2 -1
- package/dist/constants.d.ts +1 -0
- package/dist/constants.js +1 -0
- package/dist/createSchemaCustomization.js +5 -0
- package/dist/gatsby-node.d.ts +1 -1
- package/dist/gatsby-node.js +2 -2
- package/dist/{on-plugin-init.js → onPluginInit.js} +1 -1
- package/dist/onPostBootstrap.d.ts +0 -0
- package/dist/onPostBootstrap.js +113 -0
- package/dist/util/processor.d.ts +1 -1
- package/dist/util/processor.js +33 -4
- package/dist/util/relatedPost.d.ts +2 -0
- package/dist/util/relatedPost.js +33 -0
- package/package.json +3 -1
- /package/dist/{on-plugin-init.d.ts → onPluginInit.d.ts} +0 -0
package/dist/api/getPages.js
CHANGED
@@ -139,7 +139,7 @@ const getPages = async ({ databaseId, reporter, getCache, actions, createNode, c
|
|
139
139
|
});
|
140
140
|
}
|
141
141
|
const bookId = page.properties?.book?.relation?.[0]?.id || null;
|
142
|
-
const [imageNode, tableOfContents, updatedBlocks] = await (0, processor_1.processor)(pageData.results, actions, getCache, createNodeId, reporter);
|
142
|
+
const [imageNode, tableOfContents, updatedBlocks, rawText] = await (0, processor_1.processor)(pageData.results, actions, getCache, createNodeId, reporter);
|
143
143
|
const postNode = {
|
144
144
|
id: nodeId,
|
145
145
|
category: parentCategoryId,
|
@@ -167,6 +167,7 @@ const getPages = async ({ databaseId, reporter, getCache, actions, createNode, c
|
|
167
167
|
parent: null,
|
168
168
|
url: `${constants_1.COMMON_URI}/${constants_1.POST_URI}${parentCategoryUrl}/${slug}`,
|
169
169
|
thumbnail: imageNode,
|
170
|
+
rawText,
|
170
171
|
};
|
171
172
|
await createNode(postNode);
|
172
173
|
// book과 post 부모-자식 관계 설정
|
package/dist/constants.d.ts
CHANGED
package/dist/constants.js
CHANGED
@@ -22,6 +22,7 @@ const createSchemaCustomization = ({ actions }) => {
|
|
22
22
|
category_list: [${constants_1.NODE_TYPE.Category}]
|
23
23
|
url: String!
|
24
24
|
thumbnail: File @link(by: "id", from: "thumbnail")
|
25
|
+
rawText: String!
|
25
26
|
}
|
26
27
|
|
27
28
|
type ${constants_1.NODE_TYPE.Tag} implements Node {
|
@@ -61,6 +62,10 @@ const createSchemaCustomization = ({ actions }) => {
|
|
61
62
|
image: String,
|
62
63
|
url: String,
|
63
64
|
}
|
65
|
+
|
66
|
+
type ${constants_1.NODE_TYPE.RelatedPost} implements Node {
|
67
|
+
posts: [${constants_1.NODE_TYPE.RelatedPost}]
|
68
|
+
}
|
64
69
|
`);
|
65
70
|
};
|
66
71
|
exports.createSchemaCustomization = createSchemaCustomization;
|
package/dist/gatsby-node.d.ts
CHANGED
package/dist/gatsby-node.js
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
"use strict";
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
3
3
|
exports.createSchemaCustomization = exports.sourceNodes = exports.onPluginInit = void 0;
|
4
|
-
var
|
5
|
-
Object.defineProperty(exports, "onPluginInit", { enumerable: true, get: function () { return
|
4
|
+
var onPluginInit_1 = require("./onPluginInit");
|
5
|
+
Object.defineProperty(exports, "onPluginInit", { enumerable: true, get: function () { return onPluginInit_1.onPluginInit; } });
|
6
6
|
var source_nodes_1 = require("./source-nodes");
|
7
7
|
Object.defineProperty(exports, "sourceNodes", { enumerable: true, get: function () { return source_nodes_1.sourceNodes; } });
|
8
8
|
var createSchemaCustomization_1 = require("./createSchemaCustomization");
|
@@ -2,6 +2,6 @@
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
3
3
|
exports.onPluginInit = void 0;
|
4
4
|
const onPluginInit = ({ reporter }) => {
|
5
|
-
reporter.info(`
|
5
|
+
reporter.info(`Churnotion plugin loaded`);
|
6
6
|
};
|
7
7
|
exports.onPluginInit = onPluginInit;
|
File without changes
|
@@ -0,0 +1,113 @@
|
|
1
|
+
"use strict";
|
2
|
+
// import { GatsbyNode } from "gatsby";
|
3
|
+
// import { TfIdf, TfIdfTerm } from "natural";
|
4
|
+
// import { NODE_TYPE } from "./constants";
|
5
|
+
// import crypto from "crypto";
|
6
|
+
// const md5 = (str: string): string => {
|
7
|
+
// const md5 = crypto.createHash("md5");
|
8
|
+
// return md5.update(str, "binary").digest("hex");
|
9
|
+
// };
|
10
|
+
// const getSpaceSeparatedDoc: {
|
11
|
+
// [key: string]: (doc: string) => Promise<string[]>;
|
12
|
+
// } = {
|
13
|
+
// en: async (doc) => {
|
14
|
+
// return doc.toLowerCase().split(' ');
|
15
|
+
// },
|
16
|
+
// ja: async (doc) => {
|
17
|
+
// if (kuromoji_tokenizer === null)
|
18
|
+
// kuromoji_tokenizer = await getKuromojiTokenizer();
|
19
|
+
// return kuromoji_tokenizer
|
20
|
+
// .tokenize(doc)
|
21
|
+
// .filter(
|
22
|
+
// (x) =>
|
23
|
+
// x.pos === '名詞' &&
|
24
|
+
// ['一般', '固有名詞'].indexOf(x.pos_detail_1) !== -1
|
25
|
+
// )
|
26
|
+
// .map((x) => (x.basic_form !== '*' ? x.basic_form : x.surface_form));
|
27
|
+
// },
|
28
|
+
// };
|
29
|
+
// export const onPostBootstrap: GatsbyNode["onPostBootstrap"] = async ({
|
30
|
+
// actions,
|
31
|
+
// getNode,
|
32
|
+
// getNodesByType,
|
33
|
+
// createNodeId,
|
34
|
+
// reporter,
|
35
|
+
// cache,
|
36
|
+
// }) => {
|
37
|
+
// const nodes = getNodesByType(NODE_TYPE.Post);
|
38
|
+
// const docs: Record<string, string>[] = nodes.map((node) => ({
|
39
|
+
// id: node.id,
|
40
|
+
// text: node.rawText as string,
|
41
|
+
// }));
|
42
|
+
// const tfidf = new TfIdf();
|
43
|
+
// for (let doc of docs) {
|
44
|
+
// const key = `${md5(doc.text)}-related-post`;
|
45
|
+
// const cached_ssd = await cache.get(key);
|
46
|
+
// if (cached_ssd !== undefined) {
|
47
|
+
// tfidf.addDocument(cached_ssd);
|
48
|
+
// continue;
|
49
|
+
// }
|
50
|
+
// const ssd = await getSpaceSeparatedDoc[option.doc_lang](
|
51
|
+
// getTextFromMarkdown(doc.text)
|
52
|
+
// );
|
53
|
+
// tfidf.addDocument(ssd);
|
54
|
+
// await cache.set(key, ssd);
|
55
|
+
// }
|
56
|
+
// // generate bow vectors
|
57
|
+
// type Term = TfIdfTerm & {
|
58
|
+
// tf: number;
|
59
|
+
// idf: number;
|
60
|
+
// };
|
61
|
+
// //// extract keywords from each document
|
62
|
+
// const doc_terms = docs.map((_, i) =>
|
63
|
+
// (tfidf.listTerms(i) as Term[])
|
64
|
+
// .map((x) => ({ ...x, tfidf: (x as Term).tf * (x as Term).idf }))
|
65
|
+
// .sort((x, y) => y.tfidf - x.tfidf)
|
66
|
+
// );
|
67
|
+
// // DEBUG: print terms
|
68
|
+
// // doc_terms.forEach((x, i) =>
|
69
|
+
// // console.log(
|
70
|
+
// // docs[i].id,
|
71
|
+
// // x.map((x) => x.term)
|
72
|
+
// // )
|
73
|
+
// //);
|
74
|
+
// const all_keywords = new Set<string>();
|
75
|
+
// const tfidf_map_for_each_doc: Map<string, number>[] = [];
|
76
|
+
// doc_terms.forEach((x, i) => {
|
77
|
+
// tfidf_map_for_each_doc[i] = new Map<string, number>();
|
78
|
+
// x.slice(0, option.each_bow_size).forEach((x) => {
|
79
|
+
// all_keywords.add(x.term);
|
80
|
+
// tfidf_map_for_each_doc[i].set(x.term, x.tfidf);
|
81
|
+
// });
|
82
|
+
// });
|
83
|
+
// //// generate vectors
|
84
|
+
// const bow_vectors = new Map<string, BowVector>();
|
85
|
+
// docs.forEach((x, i) => {
|
86
|
+
// if (bow_vectors === null) return;
|
87
|
+
// bow_vectors.set(
|
88
|
+
// x.id,
|
89
|
+
// Array.from(all_keywords)
|
90
|
+
// .map((x) => tfidf_map_for_each_doc[i].get(x))
|
91
|
+
// .map((x) => (x === undefined ? 0 : x))
|
92
|
+
// );
|
93
|
+
// });
|
94
|
+
// reporter.info(
|
95
|
+
// `[related-posts] bow vectors generated, dimention: ${all_keywords.size}`
|
96
|
+
// );
|
97
|
+
// // create related nodes
|
98
|
+
// nodes.forEach((node) => {
|
99
|
+
// const related_nodes = getRelatedPosts(node.id, bow_vectors)
|
100
|
+
// .slice(1)
|
101
|
+
// .map((id) => getNode(id));
|
102
|
+
// const digest = `${node.id} >>> related${option.target_node}s`;
|
103
|
+
// actions.createNode({
|
104
|
+
// id: createNodeId(digest),
|
105
|
+
// parent: node.id,
|
106
|
+
// internal: {
|
107
|
+
// type: `related${option.target_node}s`,
|
108
|
+
// contentDigest: digest,
|
109
|
+
// },
|
110
|
+
// posts: related_nodes,
|
111
|
+
// });
|
112
|
+
// });
|
113
|
+
// };
|
package/dist/util/processor.d.ts
CHANGED
package/dist/util/processor.js
CHANGED
@@ -5,22 +5,51 @@ const gatsby_source_filesystem_1 = require("gatsby-source-filesystem");
|
|
5
5
|
const metadataProcessor_1 = require("./metadataProcessor");
|
6
6
|
const tableOfContent_1 = require("./tableOfContent");
|
7
7
|
const processor = async (blocks, actions, getCache, createNodeId, reporter) => {
|
8
|
-
const { thumbnail, tableOfContents, updatedBlocks } = await processBlocksForContent(blocks, actions, getCache, createNodeId, reporter);
|
8
|
+
const { thumbnail, tableOfContents, updatedBlocks, rawText } = await processBlocksForContent(blocks, actions, getCache, createNodeId, reporter);
|
9
9
|
await (0, metadataProcessor_1.processMetadata)(blocks, actions, createNodeId, reporter);
|
10
|
-
return [thumbnail, tableOfContents, updatedBlocks];
|
10
|
+
return [thumbnail, tableOfContents, updatedBlocks, rawText];
|
11
11
|
};
|
12
12
|
exports.processor = processor;
|
13
13
|
const processBlocksForContent = async (blocks, actions, getCache, createNodeId, reporter) => {
|
14
14
|
const tableOfContents = [];
|
15
15
|
let thumbnail = null;
|
16
|
+
let rawText = "";
|
16
17
|
const updatedBlocks = (await Promise.all(blocks.map(async (block) => {
|
17
18
|
await (0, tableOfContent_1.processTableOfContents)(block, tableOfContents);
|
19
|
+
const plainText = extractPlainText(block);
|
20
|
+
if (plainText) {
|
21
|
+
rawText += plainText + " ";
|
22
|
+
}
|
18
23
|
if (isImageBlock(block)) {
|
19
|
-
|
24
|
+
const updatedBlock = await processImageBlock(block, actions, getCache, createNodeId, reporter);
|
25
|
+
if (!thumbnail && updatedBlock?.image?.fileId) {
|
26
|
+
thumbnail = updatedBlock.image.fileId;
|
27
|
+
}
|
28
|
+
return updatedBlock;
|
20
29
|
}
|
21
30
|
return block;
|
22
31
|
}))).filter((block) => block !== null);
|
23
|
-
return { thumbnail, tableOfContents, updatedBlocks };
|
32
|
+
return { thumbnail, tableOfContents, updatedBlocks, rawText };
|
33
|
+
};
|
34
|
+
const isTextContentBlock = (block) => {
|
35
|
+
return [
|
36
|
+
"paragraph",
|
37
|
+
"heading_1",
|
38
|
+
"heading_2",
|
39
|
+
"heading_3",
|
40
|
+
"quote",
|
41
|
+
"bulleted_list_item",
|
42
|
+
"numbered_list_item",
|
43
|
+
"callout",
|
44
|
+
"code",
|
45
|
+
].includes(block.type);
|
46
|
+
};
|
47
|
+
const extractPlainText = (block) => {
|
48
|
+
if (isTextContentBlock(block)) {
|
49
|
+
const richTextArray = block[block.type]?.rich_text || [];
|
50
|
+
return richTextArray.map((text) => text.plain_text).join(" ");
|
51
|
+
}
|
52
|
+
return null;
|
24
53
|
};
|
25
54
|
const isImageBlock = (block) => {
|
26
55
|
return block.type === "image" && "image" in block;
|
package/dist/util/relatedPost.js
CHANGED
@@ -1 +1,34 @@
|
|
1
1
|
"use strict";
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
+
};
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
+
exports.onPostBootstrap = void 0;
|
7
|
+
const crypto_1 = __importDefault(require("crypto"));
|
8
|
+
const natural_1 = require("natural");
|
9
|
+
const constants_1 = require("../constants");
|
10
|
+
const md5 = (str) => {
|
11
|
+
const md5 = crypto_1.default.createHash("md5");
|
12
|
+
return md5.update(str, "binary").digest("hex");
|
13
|
+
};
|
14
|
+
const getSpaceSeparatedDoc = async (doc) => { };
|
15
|
+
const onPostBootstrap = async ({ actions, getNode, getNodesByType, createNodeId, reporter, cache, }) => {
|
16
|
+
const nodes = getNodesByType(constants_1.NODE_TYPE.Post);
|
17
|
+
const docs = nodes.map((node) => ({
|
18
|
+
id: node.id,
|
19
|
+
text: node.rawText,
|
20
|
+
}));
|
21
|
+
const tfidf = new natural_1.TfIdf();
|
22
|
+
docs.map(async (doc) => {
|
23
|
+
if (doc.text) {
|
24
|
+
const key = `${md5(doc.text)}-doc`;
|
25
|
+
const cached_ssd = await cache.get(key);
|
26
|
+
if (cached_ssd !== undefined) {
|
27
|
+
tfidf.addDocument(cached_ssd);
|
28
|
+
}
|
29
|
+
else {
|
30
|
+
}
|
31
|
+
}
|
32
|
+
});
|
33
|
+
};
|
34
|
+
exports.onPostBootstrap = onPostBootstrap;
|
package/package.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
{
|
2
2
|
"name": "gatsby-source-notion-churnotion",
|
3
3
|
"description": "Gatsby plugin that can connect with One Notion Database RECURSIVELY using official API",
|
4
|
-
"version": "1.0.
|
4
|
+
"version": "1.0.67",
|
5
5
|
"skipLibCheck": true,
|
6
6
|
"license": "0BSD",
|
7
7
|
"main": "./dist/gatsby-node.js",
|
@@ -40,11 +40,13 @@
|
|
40
40
|
"gatsby-source-filesystem": "^5.14.0",
|
41
41
|
"gatsby-transformer-json": "^5.14.0",
|
42
42
|
"gatsby-transformer-sharp": "^5.14.0",
|
43
|
+
"kiwi-nlp": "^0.20.3",
|
43
44
|
"metascraper": "^5.45.25",
|
44
45
|
"metascraper-description": "^5.45.25",
|
45
46
|
"metascraper-image": "^5.45.27",
|
46
47
|
"metascraper-title": "^5.45.25",
|
47
48
|
"metascraper-url": "^5.45.25",
|
49
|
+
"natural": "^8.0.1",
|
48
50
|
"node-fetch": "^3.3.2",
|
49
51
|
"notion-to-md": "^3.1.1",
|
50
52
|
"notion-types": "^7.1.5",
|
File without changes
|