gatsby-source-notion-churnotion 1.1.35 → 1.1.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -65,9 +65,10 @@ const getSpaceSeparatedDoc = (doc) => {
65
65
  const tokens = getTokens(doc);
66
66
  return tokens.join(" ");
67
67
  };
68
- const onPostBootstrap = async ({ getNodesByType, actions, reporter, }) => {
69
- const { createNodeField } = actions;
68
+ const onPostBootstrap = async ({ getNodesByType, actions, reporter, createNodeId, cache, }) => {
69
+ const { createNodeField, createNode } = actions;
70
70
  reporter.info(`Creating explicit relationships between nodes...`);
71
+ // 1. Book과 Post 간의 관계 설정
71
72
  // Get all Book and Post nodes
72
73
  const books = getNodesByType(constants_1.NODE_TYPE.Book);
73
74
  const posts = getNodesByType(constants_1.NODE_TYPE.Post);
@@ -93,6 +94,112 @@ const onPostBootstrap = async ({ getNodesByType, actions, reporter, }) => {
93
94
  });
94
95
  reporter.info(`Added ${relatedPostIds.length} posts to book: ${book.book_name}`);
95
96
  });
96
- reporter.info(`Relationship creation completed`);
97
+ reporter.info(`Book-Post relationship creation completed`);
98
+ // 2. 관련 포스트 기능 구현
99
+ reporter.info(`Creating related posts...`);
100
+ // 유효한 텍스트가 있는 포스트 필터링
101
+ const docsWithText = posts
102
+ .filter((post) => post.rawText && post.rawText.trim() !== "")
103
+ .map((post) => ({
104
+ id: post.id,
105
+ text: post.rawText || "",
106
+ }));
107
+ if (docsWithText.length === 0) {
108
+ reporter.warn(`No posts with valid text content found for related posts calculation`);
109
+ // 빈 관련 포스트 노드라도 생성
110
+ posts.forEach((post) => {
111
+ const digest = `${post.id} - ${constants_1.NODE_TYPE.RelatedPost}`;
112
+ createNode({
113
+ id: createNodeId(digest),
114
+ parent: post.id,
115
+ internal: {
116
+ type: constants_1.NODE_TYPE.RelatedPost,
117
+ contentDigest: digest,
118
+ },
119
+ posts: [],
120
+ });
121
+ });
122
+ return;
123
+ }
124
+ reporter.info(`Processing ${docsWithText.length} posts for related content`);
125
+ // TF-IDF 계산 준비
126
+ const tfidf = new natural_1.TfIdf();
127
+ // 텍스트 전처리 및 TF-IDF 문서 추가
128
+ for (const doc of docsWithText) {
129
+ if (!doc.text)
130
+ continue;
131
+ const cacheKey = `${md5(doc.text)}-doc`;
132
+ let processedText;
133
+ try {
134
+ // 캐시에서 전처리된 문서 가져오기 시도
135
+ const cachedText = await cache.get(cacheKey);
136
+ if (cachedText) {
137
+ processedText = cachedText;
138
+ }
139
+ else {
140
+ // 텍스트 전처리
141
+ const cleanedText = await getTextFromRawText(doc.text);
142
+ processedText = getSpaceSeparatedDoc(cleanedText);
143
+ await cache.set(cacheKey, processedText);
144
+ }
145
+ // TF-IDF에 문서 추가
146
+ tfidf.addDocument(processedText);
147
+ }
148
+ catch (err) {
149
+ reporter.warn(`Error processing text for post ${doc.id}: ${err}`);
150
+ tfidf.addDocument(""); // 오류 방지를 위해 빈 문서 추가
151
+ }
152
+ }
153
+ const docTerms = Array.from({ length: docsWithText.length }, (_, i) => {
154
+ try {
155
+ return tfidf.listTerms(i)
156
+ .map((x) => ({ ...x, tfidf: x.tf * x.idf }))
157
+ .sort((x, y) => y.tfidf - x.tfidf);
158
+ }
159
+ catch (err) {
160
+ reporter.warn(`Error listing terms for document index ${i}: ${err}`);
161
+ return [];
162
+ }
163
+ });
164
+ // 모든 키워드 수집 및 TF-IDF 맵 생성
165
+ const allKeywords = new Set();
166
+ const tfidfMapForEachDoc = [];
167
+ docTerms.forEach((terms, i) => {
168
+ tfidfMapForEachDoc[i] = new Map();
169
+ terms.slice(0, 30).forEach((term) => {
170
+ allKeywords.add(term.term);
171
+ tfidfMapForEachDoc[i].set(term.term, term.tfidf);
172
+ });
173
+ });
174
+ // 각 문서의 BOW 벡터 생성
175
+ const bowVectors = new Map();
176
+ docsWithText.forEach((doc, i) => {
177
+ const vector = Array.from(allKeywords).map((keyword) => tfidfMapForEachDoc[i].get(keyword) || 0);
178
+ bowVectors.set(doc.id, vector);
179
+ });
180
+ // 각 포스트에 대해 관련 포스트 노드 생성
181
+ posts.forEach((post) => {
182
+ let relatedNodeIds = [];
183
+ if (bowVectors.has(post.id)) {
184
+ try {
185
+ relatedNodeIds = getRelatedPosts(post.id, bowVectors);
186
+ }
187
+ catch (err) {
188
+ reporter.warn(`Error getting related posts for ${post.id}: ${err}`);
189
+ }
190
+ }
191
+ const digest = `${post.id} - ${constants_1.NODE_TYPE.RelatedPost}`;
192
+ createNode({
193
+ id: createNodeId(digest),
194
+ parent: post.id,
195
+ internal: {
196
+ type: constants_1.NODE_TYPE.RelatedPost,
197
+ contentDigest: digest,
198
+ },
199
+ posts: relatedNodeIds,
200
+ });
201
+ reporter.info(`Created related posts node for: ${post.title} with ${relatedNodeIds.length} related posts`);
202
+ });
203
+ reporter.info(`Related posts creation completed`);
97
204
  };
98
205
  exports.onPostBootstrap = onPostBootstrap;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "gatsby-source-notion-churnotion",
3
3
  "description": "Gatsby plugin that can connect with One Notion Database RECURSIVELY using official API",
4
- "version": "1.1.35",
4
+ "version": "1.1.36",
5
5
  "skipLibCheck": true,
6
6
  "license": "0BSD",
7
7
  "main": "./dist/gatsby-node.js",