noteconnection 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +198 -0
  3. package/dist/backend/CommunityDetection.js +58 -0
  4. package/dist/backend/FileLoader.js +110 -0
  5. package/dist/backend/GraphBuilder.js +347 -0
  6. package/dist/backend/GraphMetrics.js +70 -0
  7. package/dist/backend/algorithms/CycleDetection.js +63 -0
  8. package/dist/backend/algorithms/HybridEngine.js +70 -0
  9. package/dist/backend/algorithms/StatisticalAnalyzer.js +123 -0
  10. package/dist/backend/algorithms/TopologicalSort.js +69 -0
  11. package/dist/backend/algorithms/VectorSpace.js +87 -0
  12. package/dist/backend/build_dag.js +164 -0
  13. package/dist/backend/config.js +17 -0
  14. package/dist/backend/graph.js +108 -0
  15. package/dist/backend/main.js +67 -0
  16. package/dist/backend/parser.js +94 -0
  17. package/dist/backend/test_robustness/test_hybrid.js +60 -0
  18. package/dist/backend/test_robustness/test_statistics.js +58 -0
  19. package/dist/backend/test_robustness/test_vector.js +54 -0
  20. package/dist/backend/test_robustness.js +113 -0
  21. package/dist/backend/types.js +3 -0
  22. package/dist/backend/utils/frontmatterParser.js +121 -0
  23. package/dist/backend/utils/stringUtils.js +66 -0
  24. package/dist/backend/workers/keywordMatchWorker.js +22 -0
  25. package/dist/core/Graph.js +121 -0
  26. package/dist/core/Graph.test.js +37 -0
  27. package/dist/core/types.js +2 -0
  28. package/dist/frontend/analysis.js +356 -0
  29. package/dist/frontend/app.js +1447 -0
  30. package/dist/frontend/data.js +8356 -0
  31. package/dist/frontend/graph_data.json +8356 -0
  32. package/dist/frontend/index.html +279 -0
  33. package/dist/frontend/reader.js +177 -0
  34. package/dist/frontend/settings.js +84 -0
  35. package/dist/frontend/source_manager.js +61 -0
  36. package/dist/frontend/styles.css +577 -0
  37. package/dist/frontend/styles_analysis.css +145 -0
  38. package/dist/index.js +121 -0
  39. package/dist/server.js +149 -0
  40. package/package.json +39 -0
@@ -0,0 +1,347 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.GraphBuilder = void 0;
37
+ const Graph_1 = require("../core/Graph");
38
+ const config_1 = require("./config");
39
+ const path = __importStar(require("path"));
40
+ const os = __importStar(require("os"));
41
+ const worker_threads_1 = require("worker_threads");
42
+ const CommunityDetection_1 = require("./CommunityDetection");
43
+ const GraphMetrics_1 = require("./GraphMetrics");
44
+ const stringUtils_1 = require("./utils/stringUtils");
45
+ const frontmatterParser_1 = require("./utils/frontmatterParser");
46
+ const CycleDetection_1 = require("./algorithms/CycleDetection");
47
+ const TopologicalSort_1 = require("./algorithms/TopologicalSort");
48
+ const StatisticalAnalyzer_1 = require("./algorithms/StatisticalAnalyzer");
49
+ const VectorSpace_1 = require("./algorithms/VectorSpace");
50
+ const HybridEngine_1 = require("./algorithms/HybridEngine");
51
+ /**
52
+ * Service to build the graph from raw files.
53
+ * 从原始文件构建图的服务。
54
+ */
55
+ class GraphBuilder {
56
+ /**
57
+ * Builds a graph from raw files using keyword matching.
58
+ * 使用关键词匹配从原始文件构建图。
59
+ * @param files Array of raw files | 原始文件数组
60
+ * @param layout Optional map of saved node positions | 可选的保存节点位置映射
61
+ */
62
+ static async build(files, layout) {
63
+ const graph = new Graph_1.Graph();
64
+ // 1. Add all nodes first
65
+ // 1. 首先添加所有节点
66
+ const fileMap = new Map();
67
+ files.forEach(file => {
68
+ // Parse Metadata (Tags, Prerequisites, Next)
69
+ const metadata = frontmatterParser_1.FrontmatterParser.parse(file.content);
70
+ const node = {
71
+ id: file.filename,
72
+ label: file.filename,
73
+ inDegree: 0,
74
+ outDegree: 0,
75
+ content: file.content,
76
+ metadata: {
77
+ filepath: file.filepath,
78
+ tags: metadata.tags,
79
+ prerequisites: metadata.prerequisites,
80
+ next: metadata.next
81
+ }
82
+ };
83
+ if (layout && layout.has(file.filename)) {
84
+ const pos = layout.get(file.filename);
85
+ node.x = pos.x;
86
+ node.y = pos.y;
87
+ }
88
+ graph.addNode(node);
89
+ fileMap.set(file.filename, file);
90
+ // 1b. Add Tag Nodes
91
+ if (config_1.config.enableTags) {
92
+ metadata.tags.forEach(tag => {
93
+ const tagId = `#${tag}`;
94
+ if (!graph.hasNode(tagId)) {
95
+ graph.addNode({
96
+ id: tagId,
97
+ label: tagId,
98
+ inDegree: 0, outDegree: 0,
99
+ clusterId: 'tags' // Group tags together
100
+ });
101
+ }
102
+ // Edge: Note -> Tag
103
+ graph.addEdge(node.id, tagId, 'tagged');
104
+ });
105
+ }
106
+ });
107
+ // 2. Identify edges
108
+ // 2a. Explicit Dependencies (Frontmatter)
109
+ // 2a. 显式依赖 (Frontmatter)
110
+ files.forEach(sourceFile => {
111
+ const sourceId = sourceFile.filename;
112
+ const node = graph.getNode(sourceId);
113
+ if (!node || !node.metadata)
114
+ return;
115
+ // Handle 'prerequisites': Target (Prereq) -> Source (Current)
116
+ if (node.metadata.prerequisites && Array.isArray(node.metadata.prerequisites)) {
117
+ node.metadata.prerequisites.forEach((prereq) => {
118
+ let targetId = prereq;
119
+ if (!graph.hasNode(targetId)) {
120
+ if (graph.hasNode(targetId + '.md')) {
121
+ targetId = targetId + '.md';
122
+ }
123
+ else {
124
+ return; // Target not found
125
+ }
126
+ }
127
+ graph.addEdge(targetId, sourceId, 'explicit-prerequisite');
128
+ });
129
+ }
130
+ // Handle 'next': Source (Current) -> Target (Next)
131
+ if (node.metadata.next && Array.isArray(node.metadata.next)) {
132
+ node.metadata.next.forEach((nextItem) => {
133
+ let targetId = nextItem;
134
+ if (!graph.hasNode(targetId)) {
135
+ if (graph.hasNode(targetId + '.md')) {
136
+ targetId = targetId + '.md';
137
+ }
138
+ else {
139
+ return;
140
+ }
141
+ }
142
+ graph.addEdge(sourceId, targetId, 'explicit-next');
143
+ });
144
+ }
145
+ });
146
+ // 2b. Keyword Matching Strategy
147
+ // 2b. 关键词匹配策略
148
+ console.log(`[GraphBuilder] Starting keyword matching for ${files.length} files...`);
149
+ if (files.length > 200) {
150
+ // Use Parallel Processing
151
+ console.log(`[GraphBuilder] Using Parallel Processing (Workers)`);
152
+ await this.runParallelMatching(files, graph);
153
+ }
154
+ else {
155
+ // Use Single Thread (Legacy)
156
+ this.runSequentialMatching(files, graph);
157
+ }
158
+ // 2c. Statistical Inference (v0.6.0)
159
+ if (config_1.config.enableStatisticalInference) {
160
+ console.log('[GraphBuilder] Running Statistical Inference...');
161
+ const terms = Array.from(fileMap.keys());
162
+ const matrix = StatisticalAnalyzer_1.StatisticalAnalyzer.analyze(files, terms);
163
+ const inferredEdges = StatisticalAnalyzer_1.StatisticalAnalyzer.inferDependencies(matrix, 0.05, 0.1);
164
+ inferredEdges.forEach(dep => {
165
+ graph.addEdge(dep.source, dep.target, 'statistical-inferred', dep.confidence);
166
+ });
167
+ console.log(`[GraphBuilder] Added ${inferredEdges.length} inferred edges.`);
168
+ }
169
+ // 2d. Vector Similarity (v0.6.0)
170
+ if (config_1.config.enableVectorSimilarity && !config_1.config.enableHybridInference) {
171
+ console.log('[GraphBuilder] Running Vector Similarity Analysis...');
172
+ const vectorSpace = new VectorSpace_1.VectorSpace(files);
173
+ let similarityEdges = 0;
174
+ files.forEach(file => {
175
+ const similar = vectorSpace.getSimilar(file.filename, 3); // Top 3 similar
176
+ similar.forEach(sim => {
177
+ if (sim.score > 0.3) { // Threshold
178
+ // Add UNDIRECTED association
179
+ graph.addEdge(file.filename, sim.id, 'vector-association', sim.score);
180
+ similarityEdges++;
181
+ }
182
+ });
183
+ });
184
+ console.log(`[GraphBuilder] Added ${similarityEdges} vector association edges.`);
185
+ }
186
+ // 2e. Hybrid Inference (v0.7.0)
187
+ if (config_1.config.enableHybridInference) {
188
+ console.log('[GraphBuilder] Running Hybrid Inference (Stats + Vector)...');
189
+ // We need both Stats Matrix and Vector Space
190
+ const terms = Array.from(fileMap.keys());
191
+ const matrix = StatisticalAnalyzer_1.StatisticalAnalyzer.analyze(files, terms);
192
+ const vectorSpace = new VectorSpace_1.VectorSpace(files);
193
+ const hybridEdges = HybridEngine_1.HybridEngine.infer(matrix, vectorSpace, 0.25, 0.1); // Tune thresholds
194
+ hybridEdges.forEach(dep => {
195
+ graph.addEdge(dep.source, dep.target, 'hybrid-inferred', dep.confidence);
196
+ // Maybe add metadata/reason?
197
+ // Graph edge types currently only store weight/type.
198
+ });
199
+ console.log(`[GraphBuilder] Added ${hybridEdges.length} hybrid inferred edges.`);
200
+ }
201
+ // 3. Community Detection (v0.1.6) or Folder Clustering (v0.5.0)
202
+ if (config_1.config.clusteringStrategy === 'folder') {
203
+ // Folder-based Clustering
204
+ graph.getNodes().forEach(node => {
205
+ // Skip special nodes like tags which might not have filepath
206
+ if (node.clusterId === 'tags')
207
+ return;
208
+ if (node.metadata && node.metadata.filepath) {
209
+ const dirName = path.basename(path.dirname(node.metadata.filepath));
210
+ node.clusterId = dirName;
211
+ }
212
+ else {
213
+ node.clusterId = 'root'; // Fallback
214
+ }
215
+ });
216
+ }
217
+ else {
218
+ // Label Propagation (Default)
219
+ const clusters = CommunityDetection_1.CommunityDetection.detect(graph);
220
+ clusters.forEach((clusterId, nodeId) => {
221
+ const node = graph.getNode(nodeId);
222
+ if (node) {
223
+ // Don't overwrite special cluster IDs like 'tags'
224
+ if (node.clusterId !== 'tags') {
225
+ node.clusterId = clusterId;
226
+ }
227
+ }
228
+ });
229
+ }
230
+ // 4. Graph Metrics (v0.1.7)
231
+ const centrality = GraphMetrics_1.GraphMetrics.calculateBetweenness(graph);
232
+ centrality.forEach((val, nodeId) => {
233
+ const node = graph.getNode(nodeId);
234
+ if (node) {
235
+ node.centrality = val;
236
+ }
237
+ });
238
+ // 5. Algorithmic Core (v0.3.0)
239
+ // Cycle Detection
240
+ if (CycleDetection_1.CycleDetector.hasCycle(graph)) {
241
+ const cycles = CycleDetection_1.CycleDetector.detectCycles(graph);
242
+ console.warn(`[GraphBuilder] Detected ${cycles.length} cycles. Topological Sort may be partial.`);
243
+ // Note: We proceed anyway, but ranks might be inaccurate for cyclic nodes.
244
+ }
245
+ // Topological Sort & Ranking
246
+ const ranks = TopologicalSort_1.TopologicalSort.assignRanks(graph);
247
+ ranks.forEach((rank, nodeId) => {
248
+ const node = graph.getNode(nodeId);
249
+ if (node) {
250
+ node.rank = rank;
251
+ }
252
+ });
253
+ return graph;
254
+ }
255
+ // --- Parallel Execution Helpers ---
256
+ static async runParallelMatching(files, graph) {
257
+ const numCPUs = os.cpus().length;
258
+ const workerCount = Math.min(12, Math.max(1, numCPUs - 1)); // Cap at 12 workers for performance
259
+ const chunkSize = Math.ceil(files.length / workerCount);
260
+ const targetIds = files.map(f => f.filename);
261
+ const workerPromises = [];
262
+ const workerPath = path.join(__dirname, 'workers', 'keywordMatchWorker.ts');
263
+ // Check if we are in TS execution (ts-node) or JS (dist)
264
+ // If extension is .ts, we assume ts-node.
265
+ const isTsNode = path.extname(__filename) === '.ts';
266
+ const actualWorkerPath = isTsNode
267
+ ? workerPath
268
+ : workerPath.replace('.ts', '.js');
269
+ console.log(`[GraphBuilder] Worker Path: ${actualWorkerPath}`);
270
+ console.log(`[GraphBuilder] isTsNode: ${isTsNode}`);
271
+ console.log(`[GraphBuilder] Spawning ${workerCount} workers...`);
272
+ for (let i = 0; i < workerCount; i++) {
273
+ const start = i * chunkSize;
274
+ const end = Math.min(start + chunkSize, files.length);
275
+ if (start >= files.length)
276
+ break;
277
+ const filesChunk = files.slice(start, end);
278
+ const p = new Promise((resolve, reject) => {
279
+ try {
280
+ const execArgv = isTsNode ? ['-r', require.resolve('ts-node/register')] : undefined;
281
+ const worker = new worker_threads_1.Worker(actualWorkerPath, {
282
+ workerData: {
283
+ filesChunk,
284
+ targetIds,
285
+ strategy: config_1.config.matchingStrategy,
286
+ exclusionList: config_1.config.exclusionList
287
+ },
288
+ execArgv
289
+ });
290
+ worker.on('message', (results) => {
291
+ results.forEach(res => {
292
+ graph.addEdge(res.target, res.source, 'keyword-match');
293
+ });
294
+ });
295
+ worker.on('error', (err) => {
296
+ console.error(`[GraphBuilder] Worker error:`, err);
297
+ reject(err);
298
+ });
299
+ worker.on('exit', (code) => {
300
+ if (code !== 0) {
301
+ console.error(`[GraphBuilder] Worker exited with code ${code}`);
302
+ reject(new Error(`Worker stopped with exit code ${code}`));
303
+ }
304
+ else {
305
+ resolve();
306
+ }
307
+ });
308
+ }
309
+ catch (e) {
310
+ console.error(`[GraphBuilder] Failed to spawn worker:`, e);
311
+ reject(e);
312
+ }
313
+ });
314
+ workerPromises.push(p);
315
+ }
316
+ try {
317
+ await Promise.all(workerPromises);
318
+ console.log(`[GraphBuilder] Parallel matching complete.`);
319
+ }
320
+ catch (err) {
321
+ console.error('[GraphBuilder] Parallel matching failed, falling back to sequential.', err);
322
+ // Fallback
323
+ this.runSequentialMatching(files, graph);
324
+ }
325
+ }
326
+ static runSequentialMatching(files, graph) {
327
+ files.forEach(sourceFile => {
328
+ const sourceId = sourceFile.filename;
329
+ const content = sourceFile.content;
330
+ files.forEach(targetFile => {
331
+ const targetId = targetFile.filename;
332
+ if (sourceId === targetId)
333
+ return; // Skip self | 跳过自身
334
+ // Exclusion Check
335
+ if (config_1.config.exclusionList.includes(targetId)) {
336
+ return;
337
+ }
338
+ if ((0, stringUtils_1.checkMatch)(content, targetId, config_1.config.matchingStrategy)) {
339
+ // Found a reference!
340
+ // Target (Concept) -> Source (Context)
341
+ graph.addEdge(targetId, sourceId, 'keyword-match');
342
+ }
343
+ });
344
+ });
345
+ }
346
+ }
347
+ exports.GraphBuilder = GraphBuilder;
@@ -0,0 +1,70 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.GraphMetrics = void 0;
4
+ class GraphMetrics {
5
+ /**
6
+ * Calculates Betweenness Centrality for all nodes.
7
+ * Brandes Algorithm (Unweighted).
8
+ */
9
+ static calculateBetweenness(graph) {
10
+ const nodes = graph.toJSON().nodes;
11
+ const cb = new Map();
12
+ // Initialize
13
+ nodes.forEach(n => cb.set(n.id, 0));
14
+ // For each node s, calculate dependencies
15
+ nodes.forEach(sNode => {
16
+ const s = sNode.id;
17
+ const stack = [];
18
+ const P = new Map(); // Predecessors
19
+ const sigma = new Map(); // Number of shortest paths
20
+ const d = new Map(); // Distance
21
+ // Init
22
+ nodes.forEach(n => {
23
+ P.set(n.id, []);
24
+ sigma.set(n.id, 0);
25
+ d.set(n.id, -1);
26
+ });
27
+ sigma.set(s, 1);
28
+ d.set(s, 0);
29
+ const Q = [s];
30
+ while (Q.length > 0) {
31
+ const v = Q.shift();
32
+ stack.push(v);
33
+ // Neighbors (Outgoing edges for directed graph?)
34
+ // Betweenness usually considers flow. If directed, use outgoing.
35
+ // However, knowledge graphs can be traversed both ways conceptually.
36
+ // Let's stick to Directed for strict dependency.
37
+ const neighbors = graph.getOutgoingEdges(v).map(e => e.target);
38
+ for (const w of neighbors) {
39
+ // Path discovery
40
+ if (d.get(w) === -1) {
41
+ d.set(w, d.get(v) + 1);
42
+ Q.push(w);
43
+ }
44
+ // Path counting
45
+ if (d.get(w) === d.get(v) + 1) {
46
+ sigma.set(w, sigma.get(w) + sigma.get(v));
47
+ P.get(w).push(v);
48
+ }
49
+ }
50
+ }
51
+ const delta = new Map();
52
+ nodes.forEach(n => delta.set(n.id, 0));
53
+ // Accumulation
54
+ while (stack.length > 0) {
55
+ const w = stack.pop();
56
+ for (const v of P.get(w)) {
57
+ delta.set(v, delta.get(v) + (sigma.get(v) / sigma.get(w)) * (1 + delta.get(w)));
58
+ }
59
+ if (w !== s) {
60
+ cb.set(w, cb.get(w) + delta.get(w));
61
+ }
62
+ }
63
+ });
64
+ // Normalize?
65
+ // Standard betweenness is usually roughly O(N^2), so values can be large.
66
+ // We will leave them raw, visualization can scale them.
67
+ return cb;
68
+ }
69
+ }
70
+ exports.GraphMetrics = GraphMetrics;
@@ -0,0 +1,63 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CycleDetector = void 0;
4
+ /**
5
+ * Service to detect cycles in the graph.
6
+ * 用于检测图中循环的服务。
7
+ */
8
+ class CycleDetector {
9
+ /**
10
+ * Detects all simple cycles in the graph using DFS.
11
+ * 使用 DFS 检测图中的所有简单循环。
12
+ * Note: Finding ALL cycles is NP-Hard. This implementation finds cycles reachable via DFS traversals.
13
+ * It is sufficient for detecting if the graph is a DAG.
14
+ * 注意:查找所有循环是 NP-Hard 问题。此实现查找通过 DFS 遍历可达的循环。
15
+ * 这对于检测图是否为 DAG 足够了。
16
+ *
17
+ * @param graph The graph to analyze.
18
+ * @returns Array of cycles, where each cycle is an array of node IDs.
19
+ */
20
+ static detectCycles(graph) {
21
+ const visited = new Set();
22
+ const recursionStack = new Set();
23
+ const cycles = [];
24
+ const path = [];
25
+ const nodes = graph.getNodes();
26
+ const dfs = (nodeId) => {
27
+ visited.add(nodeId);
28
+ recursionStack.add(nodeId);
29
+ path.push(nodeId);
30
+ const neighbors = graph.getNeighbors(nodeId); // Outgoing neighbors
31
+ for (const neighbor of neighbors) {
32
+ if (!visited.has(neighbor)) {
33
+ dfs(neighbor);
34
+ }
35
+ else if (recursionStack.has(neighbor)) {
36
+ // Cycle detected!
37
+ // Extract the cycle from the current path
38
+ const cycleStartIndex = path.indexOf(neighbor);
39
+ if (cycleStartIndex !== -1) {
40
+ cycles.push([...path.slice(cycleStartIndex), neighbor]);
41
+ }
42
+ }
43
+ }
44
+ recursionStack.delete(nodeId);
45
+ path.pop();
46
+ };
47
+ for (const node of nodes) {
48
+ if (!visited.has(node.id)) {
49
+ dfs(node.id);
50
+ }
51
+ }
52
+ return cycles;
53
+ }
54
+ /**
55
+ * Checks if the graph has any cycles.
56
+ * 检查图是否有任何循环。
57
+ */
58
+ static hasCycle(graph) {
59
+ const cycles = this.detectCycles(graph);
60
+ return cycles.length > 0;
61
+ }
62
+ }
63
+ exports.CycleDetector = CycleDetector;
@@ -0,0 +1,70 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.HybridEngine = void 0;
4
+ class HybridEngine {
5
+ /**
6
+ * Infer dependencies using both Statistical and Vector methods.
7
+ * 结合统计和向量方法推断依赖关系。
8
+ *
9
+ * Rule:
10
+ * 1. High Vector Similarity (Content Relevance)
11
+ * 2. High Statistical Asymmetry (Directionality)
12
+ */
13
+ static infer(matrix, vectorSpace, vectorThreshold = 0.3, asymmetryThreshold = 0.1) {
14
+ const results = [];
15
+ const checkedPairs = new Set();
16
+ matrix.forEach((targets, nodeA) => {
17
+ targets.forEach((metricsAtoB, nodeB) => {
18
+ const pairKey = [nodeA, nodeB].sort().join('|');
19
+ if (checkedPairs.has(pairKey))
20
+ return;
21
+ checkedPairs.add(pairKey);
22
+ // Get Reverse Metrics
23
+ const rowB = matrix.get(nodeB);
24
+ const metricsBtoA = rowB ? rowB.get(nodeA) : null;
25
+ if (!metricsBtoA)
26
+ return;
27
+ // 1. Check Vector Similarity
28
+ const vecA = vectorSpace.getVector(nodeA);
29
+ const vecB = vectorSpace.getVector(nodeB);
30
+ if (!vecA || !vecB)
31
+ return;
32
+ // Simple dot product for L2 normalized vectors
33
+ let similarity = 0;
34
+ for (let i = 0; i < vecA.length; i++)
35
+ similarity += vecA[i] * vecB[i];
36
+ if (similarity < vectorThreshold)
37
+ return;
38
+ // 2. Check Asymmetry
39
+ // P(B|A) = metricsAtoB.conditionalProb
40
+ // P(A|B) = metricsBtoA.conditionalProb
41
+ const p_A_given_B = metricsBtoA.conditionalProb;
42
+ const p_B_given_A = metricsAtoB.conditionalProb;
43
+ const diff = p_A_given_B - p_B_given_A;
44
+ if (diff > asymmetryThreshold) {
45
+ // A is Parent (Context) of B
46
+ // Because B appears implies A appears (High P(A|B))
47
+ results.push({
48
+ source: nodeA,
49
+ target: nodeB,
50
+ weight: similarity, // Use similarity as edge weight
51
+ confidence: diff,
52
+ reason: `Hybrid: Sim=${similarity.toFixed(2)}, Asym=${diff.toFixed(2)}`
53
+ });
54
+ }
55
+ else if (-diff > asymmetryThreshold) {
56
+ // B is Parent of A
57
+ results.push({
58
+ source: nodeB,
59
+ target: nodeA,
60
+ weight: similarity,
61
+ confidence: -diff,
62
+ reason: `Hybrid: Sim=${similarity.toFixed(2)}, Asym=${(-diff).toFixed(2)}`
63
+ });
64
+ }
65
+ });
66
+ });
67
+ return results.sort((a, b) => b.confidence - a.confidence);
68
+ }
69
+ }
70
+ exports.HybridEngine = HybridEngine;
@@ -0,0 +1,123 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.StatisticalAnalyzer = void 0;
4
+ class StatisticalAnalyzer {
5
+ /**
6
+ * Analyze co-occurrence of terms across the corpus.
7
+ * 分析语料库中术语的共现情况。
8
+ * @param files All files in the corpus
9
+ * @param terms List of terms (concept IDs) to track
10
+ * @param windowSize Context window (e.g., 'sentence', 'paragraph', or number of words) - currently 'file' for simplicity
11
+ */
12
+ static analyze(files, terms) {
13
+ const matrix = new Map();
14
+ // 1. Build Term Frequency Map (Document Frequency)
15
+ // 1. 构建术语频率映射 (文档频率)
16
+ const termDocCounts = new Map();
17
+ const fileHasTerm = new Map(); // fileId -> Set<term>
18
+ terms.forEach(term => termDocCounts.set(term, 0));
19
+ // Pre-process files to find term occurrences
20
+ files.forEach(file => {
21
+ const content = file.content.toLowerCase();
22
+ const foundTerms = new Set();
23
+ terms.forEach(term => {
24
+ // Simple inclusion check (can be improved with Regex/Tokenization)
25
+ if (content.includes(term.toLowerCase())) {
26
+ foundTerms.add(term);
27
+ }
28
+ });
29
+ fileHasTerm.set(file.filename, foundTerms);
30
+ foundTerms.forEach(term => {
31
+ termDocCounts.set(term, (termDocCounts.get(term) || 0) + 1);
32
+ });
33
+ });
34
+ // 2. Calculate Co-occurrences
35
+ // 2. 计算共现
36
+ terms.forEach(source => {
37
+ const row = new Map();
38
+ matrix.set(source, row);
39
+ const sourceCount = termDocCounts.get(source) || 0;
40
+ if (sourceCount === 0)
41
+ return;
42
+ terms.forEach(target => {
43
+ if (source === target)
44
+ return;
45
+ let intersection = 0;
46
+ // Iterate files
47
+ files.forEach(file => {
48
+ const termsInFile = fileHasTerm.get(file.filename);
49
+ if (termsInFile && termsInFile.has(source) && termsInFile.has(target)) {
50
+ intersection++;
51
+ }
52
+ });
53
+ if (intersection > 0) {
54
+ const targetCount = termDocCounts.get(target) || 0;
55
+ const union = sourceCount + targetCount - intersection;
56
+ row.set(target, {
57
+ count: intersection,
58
+ jaccard: union === 0 ? 0 : intersection / union,
59
+ conditionalProb: intersection / sourceCount
60
+ });
61
+ }
62
+ });
63
+ });
64
+ return matrix;
65
+ }
66
+ /**
67
+ * Infer directional dependencies based on Probability Asymmetry.
68
+ * 基于概率不对称性推断有向依赖关系。
69
+ * Logic: If P(Parent | Child) >> P(Child | Parent), then Child implies Parent context.
70
+ * 逻辑:如果 P(父 | 子) >> P(子 | 父),则子隐含父语境。
71
+ */
72
+ static inferDependencies(matrix, minSupport = 0.1, asymmetryThreshold = 0.2) {
73
+ const dependencies = [];
74
+ const checkedPairs = new Set();
75
+ matrix.forEach((targets, nodeA) => {
76
+ targets.forEach((metricsAtoB, nodeB) => {
77
+ // Avoid checking A-B and B-A twice
78
+ const pairKey = [nodeA, nodeB].sort().join('|');
79
+ if (checkedPairs.has(pairKey))
80
+ return;
81
+ checkedPairs.add(pairKey);
82
+ // Get metrics for B -> A (if exists)
83
+ const rowB = matrix.get(nodeB);
84
+ const metricsBtoA = rowB ? rowB.get(nodeA) : null;
85
+ if (!metricsBtoA)
86
+ return;
87
+ // metricsAtoB.conditionalProb = P(B|A) (Prob of B given A)
88
+ // metricsBtoA.conditionalProb = P(A|B) (Prob of A given B)
89
+ // Hypothesis: General concepts (A) appear often. Specific concepts (B) appear less often but usually with A.
90
+ // So P(A|B) should be HIGH (If B is there, A is there).
91
+ // P(B|A) might be LOW (A can exist without B).
92
+ // If P(A|B) > P(B|A) + threshold
93
+ // Then A is Parent, B is Child. Edge: A -> B.
94
+ const p_A_given_B = metricsBtoA.conditionalProb;
95
+ const p_B_given_A = metricsAtoB.conditionalProb;
96
+ // Jaccard serves as a baseline "relevance" check
97
+ if (metricsAtoB.jaccard < minSupport)
98
+ return;
99
+ const diff = p_A_given_B - p_B_given_A;
100
+ if (diff > asymmetryThreshold) {
101
+ // A is Parent of B
102
+ dependencies.push({
103
+ source: nodeA,
104
+ target: nodeB,
105
+ weight: metricsAtoB.jaccard,
106
+ confidence: diff
107
+ });
108
+ }
109
+ else if (-diff > asymmetryThreshold) {
110
+ // B is Parent of A
111
+ dependencies.push({
112
+ source: nodeB,
113
+ target: nodeA,
114
+ weight: metricsAtoB.jaccard,
115
+ confidence: -diff
116
+ });
117
+ }
118
+ });
119
+ });
120
+ return dependencies.sort((a, b) => b.confidence - a.confidence);
121
+ }
122
+ }
123
+ exports.StatisticalAnalyzer = StatisticalAnalyzer;