gatsby-source-notion-churnotion 1.0.67 → 1.0.68
Sign up to get free protection for your applications and to get access to all the features.
- package/dist/gatsby-node.d.ts +1 -0
- package/dist/gatsby-node.js +3 -1
- package/dist/onPostBootstrap.d.ts +2 -0
- package/dist/onPostBootstrap.js +134 -112
- package/dist/util/processor.js +5 -1
- package/package.json +2 -1
- package/dist/util/relatedPost.d.ts +0 -2
- package/dist/util/relatedPost.js +0 -34
package/dist/gatsby-node.d.ts
CHANGED
package/dist/gatsby-node.js
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
"use strict";
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
-
exports.createSchemaCustomization = exports.sourceNodes = exports.onPluginInit = void 0;
|
3
|
+
exports.onPostBootstrap = exports.createSchemaCustomization = exports.sourceNodes = exports.onPluginInit = void 0;
|
4
4
|
var onPluginInit_1 = require("./onPluginInit");
|
5
5
|
Object.defineProperty(exports, "onPluginInit", { enumerable: true, get: function () { return onPluginInit_1.onPluginInit; } });
|
6
6
|
var source_nodes_1 = require("./source-nodes");
|
7
7
|
Object.defineProperty(exports, "sourceNodes", { enumerable: true, get: function () { return source_nodes_1.sourceNodes; } });
|
8
8
|
var createSchemaCustomization_1 = require("./createSchemaCustomization");
|
9
9
|
Object.defineProperty(exports, "createSchemaCustomization", { enumerable: true, get: function () { return createSchemaCustomization_1.createSchemaCustomization; } });
|
10
|
+
var onPostBootstrap_1 = require("./onPostBootstrap");
|
11
|
+
Object.defineProperty(exports, "onPostBootstrap", { enumerable: true, get: function () { return onPostBootstrap_1.onPostBootstrap; } });
|
package/dist/onPostBootstrap.js
CHANGED
@@ -1,113 +1,135 @@
|
|
1
1
|
"use strict";
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
//
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
//
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
+
};
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
+
exports.onPostBootstrap = void 0;
|
7
|
+
const crypto_1 = __importDefault(require("crypto"));
|
8
|
+
const kiwi_nlp_1 = require("kiwi-nlp");
|
9
|
+
const natural_1 = require("natural");
|
10
|
+
const constants_1 = require("./constants");
|
11
|
+
const computeCosineSimilarity = require("compute-cosine-similarity");
|
12
|
+
const vector_similarity_memo = new Map();
|
13
|
+
const md5 = (str) => {
|
14
|
+
const md5 = crypto_1.default.createHash("md5");
|
15
|
+
return md5.update(str, "binary").digest("hex");
|
16
|
+
};
|
17
|
+
const getMemorizedVectorSimilarity = (v1, v2) => {
|
18
|
+
const id = v1.id < v2.id ? `${v1.id} ${v2.id}` : `${v2.id} ${v1.id}`;
|
19
|
+
const memorized_similarity = vector_similarity_memo.get(id);
|
20
|
+
if (memorized_similarity !== undefined)
|
21
|
+
return memorized_similarity;
|
22
|
+
const similarity = calcVectorSimilarity(v1.vector, v2.vector);
|
23
|
+
vector_similarity_memo.set(id, similarity);
|
24
|
+
return similarity;
|
25
|
+
};
|
26
|
+
const calcVectorSimilarity = (v1, v2) => {
|
27
|
+
if (v1.length !== v2.length)
|
28
|
+
throw new Error("Both vector's size must be equal");
|
29
|
+
return computeCosineSimilarity(v1, v2);
|
30
|
+
};
|
31
|
+
const getRelatedPosts = (id, bow_vectors) => {
|
32
|
+
const vector = bow_vectors.get(id);
|
33
|
+
if (vector === undefined)
|
34
|
+
return [];
|
35
|
+
const vector_node = {
|
36
|
+
id,
|
37
|
+
vector,
|
38
|
+
};
|
39
|
+
return Array.from(bow_vectors.entries())
|
40
|
+
.sort((x, y) => {
|
41
|
+
const vector_x = {
|
42
|
+
id: x[0],
|
43
|
+
vector: x[1],
|
44
|
+
};
|
45
|
+
const vector_y = {
|
46
|
+
id: y[0],
|
47
|
+
vector: y[1],
|
48
|
+
};
|
49
|
+
return (getMemorizedVectorSimilarity(vector_y, vector_node) -
|
50
|
+
getMemorizedVectorSimilarity(vector_x, vector_node));
|
51
|
+
})
|
52
|
+
.map((x) => x[0]);
|
53
|
+
};
|
54
|
+
const getTextFromRawText = async (doc) => {
|
55
|
+
return doc
|
56
|
+
.replace(/http[^ ]+/g, "")
|
57
|
+
.replace(/[\#\!\(\)\*\_\[\]\|\=\>\+\`\:\-]/g, "");
|
58
|
+
};
|
59
|
+
const getSpaceSeparatedDoc = async (doc, kiwi) => {
|
60
|
+
return kiwi.tokenize(doc).map((tokenInfo) => tokenInfo.str);
|
61
|
+
};
|
62
|
+
const onPostBootstrap = async ({ actions, getNode, getNodesByType, createNodeId, reporter, cache, }) => {
|
63
|
+
const builder = await kiwi_nlp_1.KiwiBuilder.create("/dist/kiwi-wasm.wasm");
|
64
|
+
const kiwi = await builder.build({
|
65
|
+
modelFiles: {
|
66
|
+
"combiningRule.txt": "/dist/model/combiningRule.txt",
|
67
|
+
"default.dict": "/dist/model/default.dict",
|
68
|
+
"extract.mdl": "/dist/model/extract.mdl",
|
69
|
+
"multi.dict": "/dist/model/multi.dict",
|
70
|
+
"sj.knlm": "/dist/model/sj.knlm",
|
71
|
+
"sj.morph": "/dist/model/sj.morph",
|
72
|
+
"skipbigram.mdl": "/dist/model/skipbigram.mdl",
|
73
|
+
"typo.dict": "/dist/model/typo.dict",
|
74
|
+
},
|
75
|
+
});
|
76
|
+
const nodes = getNodesByType(constants_1.NODE_TYPE.Post);
|
77
|
+
const docs = nodes.map((node) => ({
|
78
|
+
id: node.id,
|
79
|
+
text: node.rawText,
|
80
|
+
}));
|
81
|
+
const tfidf = new natural_1.TfIdf();
|
82
|
+
// tfidf
|
83
|
+
docs.map(async (doc) => {
|
84
|
+
if (doc.text) {
|
85
|
+
const key = `${md5(doc.text)}-doc`;
|
86
|
+
const cached_ssd = await cache.get(key);
|
87
|
+
if (cached_ssd !== undefined) {
|
88
|
+
tfidf.addDocument(cached_ssd);
|
89
|
+
}
|
90
|
+
else {
|
91
|
+
const ssd = await getSpaceSeparatedDoc(await getTextFromRawText(doc.text), kiwi);
|
92
|
+
tfidf.addDocument(ssd);
|
93
|
+
await cache.set(key, ssd);
|
94
|
+
}
|
95
|
+
}
|
96
|
+
});
|
97
|
+
//
|
98
|
+
const doc_terms = docs.map((_, i) => tfidf.listTerms(i)
|
99
|
+
.map((x) => ({ ...x, tfidf: x.tf * x.idf }))
|
100
|
+
.sort((x, y) => y.tfidf - x.tfidf));
|
101
|
+
const all_keywords = new Set();
|
102
|
+
const tfidf_map_for_each_doc = [];
|
103
|
+
doc_terms.forEach((x, i) => {
|
104
|
+
tfidf_map_for_each_doc[i] = new Map();
|
105
|
+
x.slice(0, 30).forEach((x) => {
|
106
|
+
all_keywords.add(x.term);
|
107
|
+
tfidf_map_for_each_doc[i].set(x.term, x.tfidf);
|
108
|
+
});
|
109
|
+
});
|
110
|
+
const bow_vectors = new Map();
|
111
|
+
docs.forEach((x, i) => {
|
112
|
+
if (bow_vectors === null)
|
113
|
+
return;
|
114
|
+
bow_vectors.set(x.id, Array.from(all_keywords)
|
115
|
+
.map((x) => tfidf_map_for_each_doc[i].get(x))
|
116
|
+
.map((x) => (x === undefined ? 0 : x)));
|
117
|
+
});
|
118
|
+
reporter.info(`[related-posts] bow vectors generated, dimention: ${all_keywords.size}`);
|
119
|
+
nodes.forEach((node) => {
|
120
|
+
const related_nodes = getRelatedPosts(node.id, bow_vectors)
|
121
|
+
.slice(1)
|
122
|
+
.map((id) => getNode(id));
|
123
|
+
const digest = `${node.id} - ${constants_1.NODE_TYPE.RelatedPost}`;
|
124
|
+
actions.createNode({
|
125
|
+
id: createNodeId(digest),
|
126
|
+
parent: node.id,
|
127
|
+
internal: {
|
128
|
+
type: `related${constants_1.NODE_TYPE.RelatedPost}s`,
|
129
|
+
contentDigest: digest,
|
130
|
+
},
|
131
|
+
posts: related_nodes,
|
132
|
+
});
|
133
|
+
});
|
134
|
+
};
|
135
|
+
exports.onPostBootstrap = onPostBootstrap;
|
package/dist/util/processor.js
CHANGED
@@ -47,7 +47,11 @@ const isTextContentBlock = (block) => {
|
|
47
47
|
const extractPlainText = (block) => {
|
48
48
|
if (isTextContentBlock(block)) {
|
49
49
|
const richTextArray = block[block.type]?.rich_text || [];
|
50
|
-
return richTextArray
|
50
|
+
return richTextArray
|
51
|
+
.map((text) => block.type === "code" // code의 \n 제거
|
52
|
+
? text.plain_text.replace(/\\n/g, "")
|
53
|
+
: text.plain_text)
|
54
|
+
.join(" ");
|
51
55
|
}
|
52
56
|
return null;
|
53
57
|
};
|
package/package.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
{
|
2
2
|
"name": "gatsby-source-notion-churnotion",
|
3
3
|
"description": "Gatsby plugin that can connect with One Notion Database RECURSIVELY using official API",
|
4
|
-
"version": "1.0.
|
4
|
+
"version": "1.0.68",
|
5
5
|
"skipLibCheck": true,
|
6
6
|
"license": "0BSD",
|
7
7
|
"main": "./dist/gatsby-node.js",
|
@@ -36,6 +36,7 @@
|
|
36
36
|
"@notionhq/client": "^2.2.15",
|
37
37
|
"@types/node": "^22.10.2",
|
38
38
|
"axios": "^1.7.9",
|
39
|
+
"compute-cosine-similarity": "^1.1.0",
|
39
40
|
"gatsby-plugin-sharp": "^5.14.0",
|
40
41
|
"gatsby-source-filesystem": "^5.14.0",
|
41
42
|
"gatsby-transformer-json": "^5.14.0",
|
package/dist/util/relatedPost.js
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
"use strict";
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
-
};
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
-
exports.onPostBootstrap = void 0;
|
7
|
-
const crypto_1 = __importDefault(require("crypto"));
|
8
|
-
const natural_1 = require("natural");
|
9
|
-
const constants_1 = require("../constants");
|
10
|
-
const md5 = (str) => {
|
11
|
-
const md5 = crypto_1.default.createHash("md5");
|
12
|
-
return md5.update(str, "binary").digest("hex");
|
13
|
-
};
|
14
|
-
const getSpaceSeparatedDoc = async (doc) => { };
|
15
|
-
const onPostBootstrap = async ({ actions, getNode, getNodesByType, createNodeId, reporter, cache, }) => {
|
16
|
-
const nodes = getNodesByType(constants_1.NODE_TYPE.Post);
|
17
|
-
const docs = nodes.map((node) => ({
|
18
|
-
id: node.id,
|
19
|
-
text: node.rawText,
|
20
|
-
}));
|
21
|
-
const tfidf = new natural_1.TfIdf();
|
22
|
-
docs.map(async (doc) => {
|
23
|
-
if (doc.text) {
|
24
|
-
const key = `${md5(doc.text)}-doc`;
|
25
|
-
const cached_ssd = await cache.get(key);
|
26
|
-
if (cached_ssd !== undefined) {
|
27
|
-
tfidf.addDocument(cached_ssd);
|
28
|
-
}
|
29
|
-
else {
|
30
|
-
}
|
31
|
-
}
|
32
|
-
});
|
33
|
-
};
|
34
|
-
exports.onPostBootstrap = onPostBootstrap;
|