gatsby-source-notion-churnotion 1.0.99 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,17 +8,9 @@ const crypto_1 = __importDefault(require("crypto"));
8
8
  const natural_1 = require("natural");
9
9
  const constants_1 = require("./constants");
10
10
  const compute_cosine_similarity_1 = __importDefault(require("compute-cosine-similarity"));
11
- const node_nlp_1 = require("node-nlp");
12
- const manager = new node_nlp_1.NlpManager({ languages: ["ko"] });
13
- const getTokens = async (doc) => {
14
- const result = await manager.process("ko", doc);
15
- console.dir(result);
16
- if (!result.entities) {
17
- return [];
18
- }
19
- return result.entities
20
- .map((entity) => entity.utteranceText)
21
- .filter((text) => text.length > 1);
11
+ const tokenizer = new natural_1.WordTokenizer();
12
+ const getTokens = (doc) => {
13
+ return tokenizer.tokenize(doc);
22
14
  };
23
15
  const vector_similarity_memo = new Map();
24
16
  const md5 = (str) => {
@@ -69,11 +61,8 @@ const getTextFromRawText = async (doc) => {
69
61
  .replace(/http[^ ]+/g, "")
70
62
  .replace(/[\#\!\(\)\*\_\[\]\|\=\>\+\`\:\-]/g, "");
71
63
  };
72
- const getSpaceSeparatedDoc = async (doc) => {
73
- if (!doc.trim()) {
74
- return "";
75
- }
76
- const tokens = await getTokens(doc);
64
+ const getSpaceSeparatedDoc = (doc) => {
65
+ const tokens = getTokens(doc);
77
66
  return tokens.join(" ");
78
67
  };
79
68
  const onPostBootstrap = async ({ actions, getNode, getNodesByType, createNodeId, reporter, cache }, options) => {
@@ -86,8 +75,6 @@ const onPostBootstrap = async ({ actions, getNode, getNodesByType, createNodeId,
86
75
  docs.map(async (doc) => {
87
76
  if (doc.text) {
88
77
  const key = `${md5(doc.text)}-doc`;
89
- reporter.info(`[TEST1] ${doc.text}`);
90
- reporter.info(`[TEST2] ${await getTextFromRawText(doc.text)}`);
91
78
  const cached_ssd = await cache.get(key);
92
79
  if (cached_ssd !== undefined) {
93
80
  tfidf.addDocument(cached_ssd);
@@ -114,6 +101,7 @@ const onPostBootstrap = async ({ actions, getNode, getNodesByType, createNodeId,
114
101
  });
115
102
  });
116
103
  const bow_vectors = new Map();
104
+ reporter.info(`[DEBUG] BOW Vectors: ${JSON.stringify([...bow_vectors.entries()])}`);
117
105
  docs.forEach((x, i) => {
118
106
  if (bow_vectors === null)
119
107
  return;
@@ -126,12 +114,13 @@ const onPostBootstrap = async ({ actions, getNode, getNodesByType, createNodeId,
126
114
  const related_nodes = getRelatedPosts(node.id, bow_vectors)
127
115
  .slice(1)
128
116
  .map((id) => getNode(id));
117
+ reporter.info(`[DEBUG] Related posts for node ${node.id}: ${JSON.stringify(related_nodes)}`);
129
118
  const digest = `${node.id} - ${constants_1.NODE_TYPE.RelatedPost}`;
130
119
  actions.createNode({
131
120
  id: createNodeId(digest),
132
121
  parent: node.id,
133
122
  internal: {
134
- type: `related${constants_1.NODE_TYPE.RelatedPost}s`,
123
+ type: constants_1.NODE_TYPE.RelatedPost,
135
124
  contentDigest: digest,
136
125
  },
137
126
  posts: related_nodes,
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "gatsby-source-notion-churnotion",
3
3
  "description": "Gatsby plugin that can connect with One Notion Database RECURSIVELY using official API",
4
- "version": "1.0.99",
4
+ "version": "1.1.1",
5
5
  "skipLibCheck": true,
6
6
  "license": "0BSD",
7
7
  "main": "./dist/gatsby-node.js",
@@ -48,7 +48,6 @@
48
48
  "metascraper-url": "^5.45.25",
49
49
  "natural": "^8.0.1",
50
50
  "node-fetch": "^3.3.2",
51
- "node-nlp": "^5.0.0-alpha.4",
52
51
  "notion-to-md": "^3.1.1",
53
52
  "notion-types": "^7.1.5",
54
53
  "typescript": "^5.7.2"