gatsby-source-notion-churnotion 1.0.99 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -8,17 +8,9 @@ const crypto_1 = __importDefault(require("crypto"));
8
8
  const natural_1 = require("natural");
9
9
  const constants_1 = require("./constants");
10
10
  const compute_cosine_similarity_1 = __importDefault(require("compute-cosine-similarity"));
11
- const node_nlp_1 = require("node-nlp");
12
- const manager = new node_nlp_1.NlpManager({ languages: ["ko"] });
13
- const getTokens = async (doc) => {
14
- const result = await manager.process("ko", doc);
15
- console.dir(result);
16
- if (!result.entities) {
17
- return [];
18
- }
19
- return result.entities
20
- .map((entity) => entity.utteranceText)
21
- .filter((text) => text.length > 1);
11
+ const tokenizer = new natural_1.WordTokenizer();
12
+ const getTokens = (doc) => {
13
+ return tokenizer.tokenize(doc);
22
14
  };
23
15
  const vector_similarity_memo = new Map();
24
16
  const md5 = (str) => {
@@ -69,11 +61,8 @@ const getTextFromRawText = async (doc) => {
69
61
  .replace(/http[^ ]+/g, "")
70
62
  .replace(/[\#\!\(\)\*\_\[\]\|\=\>\+\`\:\-]/g, "");
71
63
  };
72
- const getSpaceSeparatedDoc = async (doc) => {
73
- if (!doc.trim()) {
74
- return "";
75
- }
76
- const tokens = await getTokens(doc);
64
+ const getSpaceSeparatedDoc = (doc) => {
65
+ const tokens = getTokens(doc);
77
66
  return tokens.join(" ");
78
67
  };
79
68
  const onPostBootstrap = async ({ actions, getNode, getNodesByType, createNodeId, reporter, cache }, options) => {
@@ -86,8 +75,6 @@ const onPostBootstrap = async ({ actions, getNode, getNodesByType, createNodeId,
86
75
  docs.map(async (doc) => {
87
76
  if (doc.text) {
88
77
  const key = `${md5(doc.text)}-doc`;
89
- reporter.info(`[TEST1] ${doc.text}`);
90
- reporter.info(`[TEST2] ${await getTextFromRawText(doc.text)}`);
91
78
  const cached_ssd = await cache.get(key);
92
79
  if (cached_ssd !== undefined) {
93
80
  tfidf.addDocument(cached_ssd);
@@ -114,6 +101,7 @@ const onPostBootstrap = async ({ actions, getNode, getNodesByType, createNodeId,
114
101
  });
115
102
  });
116
103
  const bow_vectors = new Map();
104
+ reporter.info(`[DEBUG] BOW Vectors: ${JSON.stringify([...bow_vectors.entries()])}`);
117
105
  docs.forEach((x, i) => {
118
106
  if (bow_vectors === null)
119
107
  return;
@@ -126,12 +114,13 @@ const onPostBootstrap = async ({ actions, getNode, getNodesByType, createNodeId,
126
114
  const related_nodes = getRelatedPosts(node.id, bow_vectors)
127
115
  .slice(1)
128
116
  .map((id) => getNode(id));
117
+ reporter.info(`[DEBUG] Related posts for node ${node.id}: ${JSON.stringify(related_nodes)}`);
129
118
  const digest = `${node.id} - ${constants_1.NODE_TYPE.RelatedPost}`;
130
119
  actions.createNode({
131
120
  id: createNodeId(digest),
132
121
  parent: node.id,
133
122
  internal: {
134
- type: `related${constants_1.NODE_TYPE.RelatedPost}s`,
123
+ type: constants_1.NODE_TYPE.RelatedPost,
135
124
  contentDigest: digest,
136
125
  },
137
126
  posts: related_nodes,
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "gatsby-source-notion-churnotion",
3
3
  "description": "Gatsby plugin that can connect with One Notion Database RECURSIVELY using official API",
4
- "version": "1.0.99",
4
+ "version": "1.1.1",
5
5
  "skipLibCheck": true,
6
6
  "license": "0BSD",
7
7
  "main": "./dist/gatsby-node.js",
@@ -48,7 +48,6 @@
48
48
  "metascraper-url": "^5.45.25",
49
49
  "natural": "^8.0.1",
50
50
  "node-fetch": "^3.3.2",
51
- "node-nlp": "^5.0.0-alpha.4",
52
51
  "notion-to-md": "^3.1.1",
53
52
  "notion-types": "^7.1.5",
54
53
  "typescript": "^5.7.2"