gatsby-source-notion-churnotion 1.0.67 → 1.0.68
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/gatsby-node.d.ts +1 -0
- package/dist/gatsby-node.js +3 -1
- package/dist/onPostBootstrap.d.ts +2 -0
- package/dist/onPostBootstrap.js +134 -112
- package/dist/util/processor.js +5 -1
- package/package.json +2 -1
- package/dist/util/relatedPost.d.ts +0 -2
- package/dist/util/relatedPost.js +0 -34
package/dist/gatsby-node.d.ts
CHANGED
package/dist/gatsby-node.js
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
"use strict";
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
-
exports.createSchemaCustomization = exports.sourceNodes = exports.onPluginInit = void 0;
|
3
|
+
exports.onPostBootstrap = exports.createSchemaCustomization = exports.sourceNodes = exports.onPluginInit = void 0;
|
4
4
|
var onPluginInit_1 = require("./onPluginInit");
|
5
5
|
Object.defineProperty(exports, "onPluginInit", { enumerable: true, get: function () { return onPluginInit_1.onPluginInit; } });
|
6
6
|
var source_nodes_1 = require("./source-nodes");
|
7
7
|
Object.defineProperty(exports, "sourceNodes", { enumerable: true, get: function () { return source_nodes_1.sourceNodes; } });
|
8
8
|
var createSchemaCustomization_1 = require("./createSchemaCustomization");
|
9
9
|
Object.defineProperty(exports, "createSchemaCustomization", { enumerable: true, get: function () { return createSchemaCustomization_1.createSchemaCustomization; } });
|
10
|
+
var onPostBootstrap_1 = require("./onPostBootstrap");
|
11
|
+
Object.defineProperty(exports, "onPostBootstrap", { enumerable: true, get: function () { return onPostBootstrap_1.onPostBootstrap; } });
|
package/dist/onPostBootstrap.js
CHANGED
@@ -1,113 +1,135 @@
|
|
1
1
|
"use strict";
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
//
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
//
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
+
};
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
+
exports.onPostBootstrap = void 0;
|
7
|
+
const crypto_1 = __importDefault(require("crypto"));
|
8
|
+
const kiwi_nlp_1 = require("kiwi-nlp");
|
9
|
+
const natural_1 = require("natural");
|
10
|
+
const constants_1 = require("./constants");
|
11
|
+
const computeCosineSimilarity = require("compute-cosine-similarity");
|
12
|
+
const vector_similarity_memo = new Map();
|
13
|
+
const md5 = (str) => {
|
14
|
+
const md5 = crypto_1.default.createHash("md5");
|
15
|
+
return md5.update(str, "binary").digest("hex");
|
16
|
+
};
|
17
|
+
const getMemorizedVectorSimilarity = (v1, v2) => {
|
18
|
+
const id = v1.id < v2.id ? `${v1.id} ${v2.id}` : `${v2.id} ${v1.id}`;
|
19
|
+
const memorized_similarity = vector_similarity_memo.get(id);
|
20
|
+
if (memorized_similarity !== undefined)
|
21
|
+
return memorized_similarity;
|
22
|
+
const similarity = calcVectorSimilarity(v1.vector, v2.vector);
|
23
|
+
vector_similarity_memo.set(id, similarity);
|
24
|
+
return similarity;
|
25
|
+
};
|
26
|
+
const calcVectorSimilarity = (v1, v2) => {
|
27
|
+
if (v1.length !== v2.length)
|
28
|
+
throw new Error("Both vector's size must be equal");
|
29
|
+
return computeCosineSimilarity(v1, v2);
|
30
|
+
};
|
31
|
+
const getRelatedPosts = (id, bow_vectors) => {
|
32
|
+
const vector = bow_vectors.get(id);
|
33
|
+
if (vector === undefined)
|
34
|
+
return [];
|
35
|
+
const vector_node = {
|
36
|
+
id,
|
37
|
+
vector,
|
38
|
+
};
|
39
|
+
return Array.from(bow_vectors.entries())
|
40
|
+
.sort((x, y) => {
|
41
|
+
const vector_x = {
|
42
|
+
id: x[0],
|
43
|
+
vector: x[1],
|
44
|
+
};
|
45
|
+
const vector_y = {
|
46
|
+
id: y[0],
|
47
|
+
vector: y[1],
|
48
|
+
};
|
49
|
+
return (getMemorizedVectorSimilarity(vector_y, vector_node) -
|
50
|
+
getMemorizedVectorSimilarity(vector_x, vector_node));
|
51
|
+
})
|
52
|
+
.map((x) => x[0]);
|
53
|
+
};
|
54
|
+
const getTextFromRawText = async (doc) => {
|
55
|
+
return doc
|
56
|
+
.replace(/http[^ ]+/g, "")
|
57
|
+
.replace(/[\#\!\(\)\*\_\[\]\|\=\>\+\`\:\-]/g, "");
|
58
|
+
};
|
59
|
+
const getSpaceSeparatedDoc = async (doc, kiwi) => {
|
60
|
+
return kiwi.tokenize(doc).map((tokenInfo) => tokenInfo.str);
|
61
|
+
};
|
62
|
+
const onPostBootstrap = async ({ actions, getNode, getNodesByType, createNodeId, reporter, cache, }) => {
|
63
|
+
const builder = await kiwi_nlp_1.KiwiBuilder.create("/dist/kiwi-wasm.wasm");
|
64
|
+
const kiwi = await builder.build({
|
65
|
+
modelFiles: {
|
66
|
+
"combiningRule.txt": "/dist/model/combiningRule.txt",
|
67
|
+
"default.dict": "/dist/model/default.dict",
|
68
|
+
"extract.mdl": "/dist/model/extract.mdl",
|
69
|
+
"multi.dict": "/dist/model/multi.dict",
|
70
|
+
"sj.knlm": "/dist/model/sj.knlm",
|
71
|
+
"sj.morph": "/dist/model/sj.morph",
|
72
|
+
"skipbigram.mdl": "/dist/model/skipbigram.mdl",
|
73
|
+
"typo.dict": "/dist/model/typo.dict",
|
74
|
+
},
|
75
|
+
});
|
76
|
+
const nodes = getNodesByType(constants_1.NODE_TYPE.Post);
|
77
|
+
const docs = nodes.map((node) => ({
|
78
|
+
id: node.id,
|
79
|
+
text: node.rawText,
|
80
|
+
}));
|
81
|
+
const tfidf = new natural_1.TfIdf();
|
82
|
+
// tfidf
|
83
|
+
docs.map(async (doc) => {
|
84
|
+
if (doc.text) {
|
85
|
+
const key = `${md5(doc.text)}-doc`;
|
86
|
+
const cached_ssd = await cache.get(key);
|
87
|
+
if (cached_ssd !== undefined) {
|
88
|
+
tfidf.addDocument(cached_ssd);
|
89
|
+
}
|
90
|
+
else {
|
91
|
+
const ssd = await getSpaceSeparatedDoc(await getTextFromRawText(doc.text), kiwi);
|
92
|
+
tfidf.addDocument(ssd);
|
93
|
+
await cache.set(key, ssd);
|
94
|
+
}
|
95
|
+
}
|
96
|
+
});
|
97
|
+
//
|
98
|
+
const doc_terms = docs.map((_, i) => tfidf.listTerms(i)
|
99
|
+
.map((x) => ({ ...x, tfidf: x.tf * x.idf }))
|
100
|
+
.sort((x, y) => y.tfidf - x.tfidf));
|
101
|
+
const all_keywords = new Set();
|
102
|
+
const tfidf_map_for_each_doc = [];
|
103
|
+
doc_terms.forEach((x, i) => {
|
104
|
+
tfidf_map_for_each_doc[i] = new Map();
|
105
|
+
x.slice(0, 30).forEach((x) => {
|
106
|
+
all_keywords.add(x.term);
|
107
|
+
tfidf_map_for_each_doc[i].set(x.term, x.tfidf);
|
108
|
+
});
|
109
|
+
});
|
110
|
+
const bow_vectors = new Map();
|
111
|
+
docs.forEach((x, i) => {
|
112
|
+
if (bow_vectors === null)
|
113
|
+
return;
|
114
|
+
bow_vectors.set(x.id, Array.from(all_keywords)
|
115
|
+
.map((x) => tfidf_map_for_each_doc[i].get(x))
|
116
|
+
.map((x) => (x === undefined ? 0 : x)));
|
117
|
+
});
|
118
|
+
reporter.info(`[related-posts] bow vectors generated, dimention: ${all_keywords.size}`);
|
119
|
+
nodes.forEach((node) => {
|
120
|
+
const related_nodes = getRelatedPosts(node.id, bow_vectors)
|
121
|
+
.slice(1)
|
122
|
+
.map((id) => getNode(id));
|
123
|
+
const digest = `${node.id} - ${constants_1.NODE_TYPE.RelatedPost}`;
|
124
|
+
actions.createNode({
|
125
|
+
id: createNodeId(digest),
|
126
|
+
parent: node.id,
|
127
|
+
internal: {
|
128
|
+
type: `related${constants_1.NODE_TYPE.RelatedPost}s`,
|
129
|
+
contentDigest: digest,
|
130
|
+
},
|
131
|
+
posts: related_nodes,
|
132
|
+
});
|
133
|
+
});
|
134
|
+
};
|
135
|
+
exports.onPostBootstrap = onPostBootstrap;
|
package/dist/util/processor.js
CHANGED
@@ -47,7 +47,11 @@ const isTextContentBlock = (block) => {
|
|
47
47
|
const extractPlainText = (block) => {
|
48
48
|
if (isTextContentBlock(block)) {
|
49
49
|
const richTextArray = block[block.type]?.rich_text || [];
|
50
|
-
return richTextArray
|
50
|
+
return richTextArray
|
51
|
+
.map((text) => block.type === "code" // code의 \n 제거
|
52
|
+
? text.plain_text.replace(/\\n/g, "")
|
53
|
+
: text.plain_text)
|
54
|
+
.join(" ");
|
51
55
|
}
|
52
56
|
return null;
|
53
57
|
};
|
package/package.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
{
|
2
2
|
"name": "gatsby-source-notion-churnotion",
|
3
3
|
"description": "Gatsby plugin that can connect with One Notion Database RECURSIVELY using official API",
|
4
|
-
"version": "1.0.
|
4
|
+
"version": "1.0.68",
|
5
5
|
"skipLibCheck": true,
|
6
6
|
"license": "0BSD",
|
7
7
|
"main": "./dist/gatsby-node.js",
|
@@ -36,6 +36,7 @@
|
|
36
36
|
"@notionhq/client": "^2.2.15",
|
37
37
|
"@types/node": "^22.10.2",
|
38
38
|
"axios": "^1.7.9",
|
39
|
+
"compute-cosine-similarity": "^1.1.0",
|
39
40
|
"gatsby-plugin-sharp": "^5.14.0",
|
40
41
|
"gatsby-source-filesystem": "^5.14.0",
|
41
42
|
"gatsby-transformer-json": "^5.14.0",
|
package/dist/util/relatedPost.js
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
"use strict";
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
-
};
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
-
exports.onPostBootstrap = void 0;
|
7
|
-
const crypto_1 = __importDefault(require("crypto"));
|
8
|
-
const natural_1 = require("natural");
|
9
|
-
const constants_1 = require("../constants");
|
10
|
-
const md5 = (str) => {
|
11
|
-
const md5 = crypto_1.default.createHash("md5");
|
12
|
-
return md5.update(str, "binary").digest("hex");
|
13
|
-
};
|
14
|
-
const getSpaceSeparatedDoc = async (doc) => { };
|
15
|
-
const onPostBootstrap = async ({ actions, getNode, getNodesByType, createNodeId, reporter, cache, }) => {
|
16
|
-
const nodes = getNodesByType(constants_1.NODE_TYPE.Post);
|
17
|
-
const docs = nodes.map((node) => ({
|
18
|
-
id: node.id,
|
19
|
-
text: node.rawText,
|
20
|
-
}));
|
21
|
-
const tfidf = new natural_1.TfIdf();
|
22
|
-
docs.map(async (doc) => {
|
23
|
-
if (doc.text) {
|
24
|
-
const key = `${md5(doc.text)}-doc`;
|
25
|
-
const cached_ssd = await cache.get(key);
|
26
|
-
if (cached_ssd !== undefined) {
|
27
|
-
tfidf.addDocument(cached_ssd);
|
28
|
-
}
|
29
|
-
else {
|
30
|
-
}
|
31
|
-
}
|
32
|
-
});
|
33
|
-
};
|
34
|
-
exports.onPostBootstrap = onPostBootstrap;
|