npm - noteconnection - Versions diffs - 0.9.0 - Mend

noteconnection 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/LICENSE +21 -0
package/README.md +198 -0
package/dist/backend/CommunityDetection.js +58 -0
package/dist/backend/FileLoader.js +110 -0
package/dist/backend/GraphBuilder.js +347 -0
package/dist/backend/GraphMetrics.js +70 -0
package/dist/backend/algorithms/CycleDetection.js +63 -0
package/dist/backend/algorithms/HybridEngine.js +70 -0
package/dist/backend/algorithms/StatisticalAnalyzer.js +123 -0
package/dist/backend/algorithms/TopologicalSort.js +69 -0
package/dist/backend/algorithms/VectorSpace.js +87 -0
package/dist/backend/build_dag.js +164 -0
package/dist/backend/config.js +17 -0
package/dist/backend/graph.js +108 -0
package/dist/backend/main.js +67 -0
package/dist/backend/parser.js +94 -0
package/dist/backend/test_robustness/test_hybrid.js +60 -0
package/dist/backend/test_robustness/test_statistics.js +58 -0
package/dist/backend/test_robustness/test_vector.js +54 -0
package/dist/backend/test_robustness.js +113 -0
package/dist/backend/types.js +3 -0
package/dist/backend/utils/frontmatterParser.js +121 -0
package/dist/backend/utils/stringUtils.js +66 -0
package/dist/backend/workers/keywordMatchWorker.js +22 -0
package/dist/core/Graph.js +121 -0
package/dist/core/Graph.test.js +37 -0
package/dist/core/types.js +2 -0
package/dist/frontend/analysis.js +356 -0
package/dist/frontend/app.js +1447 -0
package/dist/frontend/data.js +8356 -0
package/dist/frontend/graph_data.json +8356 -0
package/dist/frontend/index.html +279 -0
package/dist/frontend/reader.js +177 -0
package/dist/frontend/settings.js +84 -0
package/dist/frontend/source_manager.js +61 -0
package/dist/frontend/styles.css +577 -0
package/dist/frontend/styles_analysis.css +145 -0
package/dist/index.js +121 -0
package/dist/server.js +149 -0
package/package.json +39 -0

package/dist/backend/GraphBuilder.js ADDED Viewed

@@ -0,0 +1,347 @@
+"use strict";
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.GraphBuilder = void 0;
+const Graph_1 = require("../core/Graph");
+const config_1 = require("./config");
+const path = __importStar(require("path"));
+const os = __importStar(require("os"));
+const worker_threads_1 = require("worker_threads");
+const CommunityDetection_1 = require("./CommunityDetection");
+const GraphMetrics_1 = require("./GraphMetrics");
+const stringUtils_1 = require("./utils/stringUtils");
+const frontmatterParser_1 = require("./utils/frontmatterParser");
+const CycleDetection_1 = require("./algorithms/CycleDetection");
+const TopologicalSort_1 = require("./algorithms/TopologicalSort");
+const StatisticalAnalyzer_1 = require("./algorithms/StatisticalAnalyzer");
+const VectorSpace_1 = require("./algorithms/VectorSpace");
+const HybridEngine_1 = require("./algorithms/HybridEngine");
+/**
+ * Service to build the graph from raw files.
+ * 从原始文件构建图的服务。
+ */
+class GraphBuilder {
+    /**
+     * Builds a graph from raw files using keyword matching.
+     * 使用关键词匹配从原始文件构建图。
+     * @param files Array of raw files | 原始文件数组
+     * @param layout Optional map of saved node positions | 可选的保存节点位置映射
+     */
+    static async build(files, layout) {
+        const graph = new Graph_1.Graph();
+        // 1. Add all nodes first
+        // 1. 首先添加所有节点
+        const fileMap = new Map();
+        files.forEach(file => {
+            // Parse Metadata (Tags, Prerequisites, Next)
+            const metadata = frontmatterParser_1.FrontmatterParser.parse(file.content);
+            const node = {
+                id: file.filename,
+                label: file.filename,
+                inDegree: 0,
+                outDegree: 0,
+                content: file.content,
+                metadata: {
+                    filepath: file.filepath,
+                    tags: metadata.tags,
+                    prerequisites: metadata.prerequisites,
+                    next: metadata.next
+                }
+            };
+            if (layout && layout.has(file.filename)) {
+                const pos = layout.get(file.filename);
+                node.x = pos.x;
+                node.y = pos.y;
+            }
+            graph.addNode(node);
+            fileMap.set(file.filename, file);
+            // 1b. Add Tag Nodes
+            if (config_1.config.enableTags) {
+                metadata.tags.forEach(tag => {
+                    const tagId = `#${tag}`;
+                    if (!graph.hasNode(tagId)) {
+                        graph.addNode({
+                            id: tagId,
+                            label: tagId,
+                            inDegree: 0, outDegree: 0,
+                            clusterId: 'tags' // Group tags together
+                        });
+                    }
+                    // Edge: Note -> Tag
+                    graph.addEdge(node.id, tagId, 'tagged');
+                });
+            }
+        });
+        // 2. Identify edges
+        // 2a. Explicit Dependencies (Frontmatter)
+        // 2a. 显式依赖 (Frontmatter)
+        files.forEach(sourceFile => {
+            const sourceId = sourceFile.filename;
+            const node = graph.getNode(sourceId);
+            if (!node || !node.metadata)
+                return;
+            // Handle 'prerequisites': Target (Prereq) -> Source (Current)
+            if (node.metadata.prerequisites && Array.isArray(node.metadata.prerequisites)) {
+                node.metadata.prerequisites.forEach((prereq) => {
+                    let targetId = prereq;
+                    if (!graph.hasNode(targetId)) {
+                        if (graph.hasNode(targetId + '.md')) {
+                            targetId = targetId + '.md';
+                        }
+                        else {
+                            return; // Target not found
+                        }
+                    }
+                    graph.addEdge(targetId, sourceId, 'explicit-prerequisite');
+                });
+            }
+            // Handle 'next': Source (Current) -> Target (Next)
+            if (node.metadata.next && Array.isArray(node.metadata.next)) {
+                node.metadata.next.forEach((nextItem) => {
+                    let targetId = nextItem;
+                    if (!graph.hasNode(targetId)) {
+                        if (graph.hasNode(targetId + '.md')) {
+                            targetId = targetId + '.md';
+                        }
+                        else {
+                            return;
+                        }
+                    }
+                    graph.addEdge(sourceId, targetId, 'explicit-next');
+                });
+            }
+        });
+        // 2b. Keyword Matching Strategy
+        // 2b. 关键词匹配策略
+        console.log(`[GraphBuilder] Starting keyword matching for ${files.length} files...`);
+        if (files.length > 200) {
+            // Use Parallel Processing
+            console.log(`[GraphBuilder] Using Parallel Processing (Workers)`);
+            await this.runParallelMatching(files, graph);
+        }
+        else {
+            // Use Single Thread (Legacy)
+            this.runSequentialMatching(files, graph);
+        }
+        // 2c. Statistical Inference (v0.6.0)
+        if (config_1.config.enableStatisticalInference) {
+            console.log('[GraphBuilder] Running Statistical Inference...');
+            const terms = Array.from(fileMap.keys());
+            const matrix = StatisticalAnalyzer_1.StatisticalAnalyzer.analyze(files, terms);
+            const inferredEdges = StatisticalAnalyzer_1.StatisticalAnalyzer.inferDependencies(matrix, 0.05, 0.1);
+            inferredEdges.forEach(dep => {
+                graph.addEdge(dep.source, dep.target, 'statistical-inferred', dep.confidence);
+            });
+            console.log(`[GraphBuilder] Added ${inferredEdges.length} inferred edges.`);
+        }
+        // 2d. Vector Similarity (v0.6.0)
+        if (config_1.config.enableVectorSimilarity && !config_1.config.enableHybridInference) {
+            console.log('[GraphBuilder] Running Vector Similarity Analysis...');
+            const vectorSpace = new VectorSpace_1.VectorSpace(files);
+            let similarityEdges = 0;
+            files.forEach(file => {
+                const similar = vectorSpace.getSimilar(file.filename, 3); // Top 3 similar
+                similar.forEach(sim => {
+                    if (sim.score > 0.3) { // Threshold
+                        // Add UNDIRECTED association
+                        graph.addEdge(file.filename, sim.id, 'vector-association', sim.score);
+                        similarityEdges++;
+                    }
+                });
+            });
+            console.log(`[GraphBuilder] Added ${similarityEdges} vector association edges.`);
+        }
+        // 2e. Hybrid Inference (v0.7.0)
+        if (config_1.config.enableHybridInference) {
+            console.log('[GraphBuilder] Running Hybrid Inference (Stats + Vector)...');
+            // We need both Stats Matrix and Vector Space
+            const terms = Array.from(fileMap.keys());
+            const matrix = StatisticalAnalyzer_1.StatisticalAnalyzer.analyze(files, terms);
+            const vectorSpace = new VectorSpace_1.VectorSpace(files);
+            const hybridEdges = HybridEngine_1.HybridEngine.infer(matrix, vectorSpace, 0.25, 0.1); // Tune thresholds
+            hybridEdges.forEach(dep => {
+                graph.addEdge(dep.source, dep.target, 'hybrid-inferred', dep.confidence);
+                // Maybe add metadata/reason?
+                // Graph edge types currently only store weight/type.
+            });
+            console.log(`[GraphBuilder] Added ${hybridEdges.length} hybrid inferred edges.`);
+        }
+        // 3. Community Detection (v0.1.6) or Folder Clustering (v0.5.0)
+        if (config_1.config.clusteringStrategy === 'folder') {
+            // Folder-based Clustering
+            graph.getNodes().forEach(node => {
+                // Skip special nodes like tags which might not have filepath
+                if (node.clusterId === 'tags')
+                    return;
+                if (node.metadata && node.metadata.filepath) {
+                    const dirName = path.basename(path.dirname(node.metadata.filepath));
+                    node.clusterId = dirName;
+                }
+                else {
+                    node.clusterId = 'root'; // Fallback
+                }
+            });
+        }
+        else {
+            // Label Propagation (Default)
+            const clusters = CommunityDetection_1.CommunityDetection.detect(graph);
+            clusters.forEach((clusterId, nodeId) => {
+                const node = graph.getNode(nodeId);
+                if (node) {
+                    // Don't overwrite special cluster IDs like 'tags'
+                    if (node.clusterId !== 'tags') {
+                        node.clusterId = clusterId;
+                    }
+                }
+            });
+        }
+        // 4. Graph Metrics (v0.1.7)
+        const centrality = GraphMetrics_1.GraphMetrics.calculateBetweenness(graph);
+        centrality.forEach((val, nodeId) => {
+            const node = graph.getNode(nodeId);
+            if (node) {
+                node.centrality = val;
+            }
+        });
+        // 5. Algorithmic Core (v0.3.0)
+        // Cycle Detection
+        if (CycleDetection_1.CycleDetector.hasCycle(graph)) {
+            const cycles = CycleDetection_1.CycleDetector.detectCycles(graph);
+            console.warn(`[GraphBuilder] Detected ${cycles.length} cycles. Topological Sort may be partial.`);
+            // Note: We proceed anyway, but ranks might be inaccurate for cyclic nodes.
+        }
+        // Topological Sort & Ranking
+        const ranks = TopologicalSort_1.TopologicalSort.assignRanks(graph);
+        ranks.forEach((rank, nodeId) => {
+            const node = graph.getNode(nodeId);
+            if (node) {
+                node.rank = rank;
+            }
+        });
+        return graph;
+    }
+    // --- Parallel Execution Helpers ---
+    static async runParallelMatching(files, graph) {
+        const numCPUs = os.cpus().length;
+        const workerCount = Math.min(12, Math.max(1, numCPUs - 1)); // Cap at 12 workers for performance
+        const chunkSize = Math.ceil(files.length / workerCount);
+        const targetIds = files.map(f => f.filename);
+        const workerPromises = [];
+        const workerPath = path.join(__dirname, 'workers', 'keywordMatchWorker.ts');
+        // Check if we are in TS execution (ts-node) or JS (dist)
+        // If extension is .ts, we assume ts-node.
+        const isTsNode = path.extname(__filename) === '.ts';
+        const actualWorkerPath = isTsNode
+            ? workerPath
+            : workerPath.replace('.ts', '.js');
+        console.log(`[GraphBuilder] Worker Path: ${actualWorkerPath}`);
+        console.log(`[GraphBuilder] isTsNode: ${isTsNode}`);
+        console.log(`[GraphBuilder] Spawning ${workerCount} workers...`);
+        for (let i = 0; i < workerCount; i++) {
+            const start = i * chunkSize;
+            const end = Math.min(start + chunkSize, files.length);
+            if (start >= files.length)
+                break;
+            const filesChunk = files.slice(start, end);
+            const p = new Promise((resolve, reject) => {
+                try {
+                    const execArgv = isTsNode ? ['-r', require.resolve('ts-node/register')] : undefined;
+                    const worker = new worker_threads_1.Worker(actualWorkerPath, {
+                        workerData: {
+                            filesChunk,
+                            targetIds,
+                            strategy: config_1.config.matchingStrategy,
+                            exclusionList: config_1.config.exclusionList
+                        },
+                        execArgv
+                    });
+                    worker.on('message', (results) => {
+                        results.forEach(res => {
+                            graph.addEdge(res.target, res.source, 'keyword-match');
+                        });
+                    });
+                    worker.on('error', (err) => {
+                        console.error(`[GraphBuilder] Worker error:`, err);
+                        reject(err);
+                    });
+                    worker.on('exit', (code) => {
+                        if (code !== 0) {
+                            console.error(`[GraphBuilder] Worker exited with code ${code}`);
+                            reject(new Error(`Worker stopped with exit code ${code}`));
+                        }
+                        else {
+                            resolve();
+                        }
+                    });
+                }
+                catch (e) {
+                    console.error(`[GraphBuilder] Failed to spawn worker:`, e);
+                    reject(e);
+                }
+            });
+            workerPromises.push(p);
+        }
+        try {
+            await Promise.all(workerPromises);
+            console.log(`[GraphBuilder] Parallel matching complete.`);
+        }
+        catch (err) {
+            console.error('[GraphBuilder] Parallel matching failed, falling back to sequential.', err);
+            // Fallback
+            this.runSequentialMatching(files, graph);
+        }
+    }
+    static runSequentialMatching(files, graph) {
+        files.forEach(sourceFile => {
+            const sourceId = sourceFile.filename;
+            const content = sourceFile.content;
+            files.forEach(targetFile => {
+                const targetId = targetFile.filename;
+                if (sourceId === targetId)
+                    return; // Skip self | 跳过自身
+                // Exclusion Check
+                if (config_1.config.exclusionList.includes(targetId)) {
+                    return;
+                }
+                if ((0, stringUtils_1.checkMatch)(content, targetId, config_1.config.matchingStrategy)) {
+                    // Found a reference!
+                    // Target (Concept) -> Source (Context)
+                    graph.addEdge(targetId, sourceId, 'keyword-match');
+                }
+            });
+        });
+    }
+}
+exports.GraphBuilder = GraphBuilder;

package/dist/backend/GraphMetrics.js ADDED Viewed

@@ -0,0 +1,70 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.GraphMetrics = void 0;
+class GraphMetrics {
+    /**
+     * Calculates Betweenness Centrality for all nodes.
+     * Brandes Algorithm (Unweighted).
+     */
+    static calculateBetweenness(graph) {
+        const nodes = graph.toJSON().nodes;
+        const cb = new Map();
+        // Initialize
+        nodes.forEach(n => cb.set(n.id, 0));
+        // For each node s, calculate dependencies
+        nodes.forEach(sNode => {
+            const s = sNode.id;
+            const stack = [];
+            const P = new Map(); // Predecessors
+            const sigma = new Map(); // Number of shortest paths
+            const d = new Map(); // Distance
+            // Init
+            nodes.forEach(n => {
+                P.set(n.id, []);
+                sigma.set(n.id, 0);
+                d.set(n.id, -1);
+            });
+            sigma.set(s, 1);
+            d.set(s, 0);
+            const Q = [s];
+            while (Q.length > 0) {
+                const v = Q.shift();
+                stack.push(v);
+                // Neighbors (Outgoing edges for directed graph?)
+                // Betweenness usually considers flow. If directed, use outgoing.
+                // However, knowledge graphs can be traversed both ways conceptually.
+                // Let's stick to Directed for strict dependency.
+                const neighbors = graph.getOutgoingEdges(v).map(e => e.target);
+                for (const w of neighbors) {
+                    // Path discovery
+                    if (d.get(w) === -1) {
+                        d.set(w, d.get(v) + 1);
+                        Q.push(w);
+                    }
+                    // Path counting
+                    if (d.get(w) === d.get(v) + 1) {
+                        sigma.set(w, sigma.get(w) + sigma.get(v));
+                        P.get(w).push(v);
+                    }
+                }
+            }
+            const delta = new Map();
+            nodes.forEach(n => delta.set(n.id, 0));
+            // Accumulation
+            while (stack.length > 0) {
+                const w = stack.pop();
+                for (const v of P.get(w)) {
+                    delta.set(v, delta.get(v) + (sigma.get(v) / sigma.get(w)) * (1 + delta.get(w)));
+                }
+                if (w !== s) {
+                    cb.set(w, cb.get(w) + delta.get(w));
+                }
+            }
+        });
+        // Normalize?
+        // Standard betweenness is usually roughly O(N^2), so values can be large.
+        // We will leave them raw, visualization can scale them.
+        return cb;
+    }
+}
+exports.GraphMetrics = GraphMetrics;

package/dist/backend/algorithms/CycleDetection.js ADDED Viewed

@@ -0,0 +1,63 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.CycleDetector = void 0;
+/**
+ * Service to detect cycles in the graph.
+ * 用于检测图中循环的服务。
+ */
+class CycleDetector {
+    /**
+     * Detects all simple cycles in the graph using DFS.
+     * 使用 DFS 检测图中的所有简单循环。
+     * Note: Finding ALL cycles is NP-Hard. This implementation finds cycles reachable via DFS traversals.
+     * It is sufficient for detecting if the graph is a DAG.
+     * 注意：查找所有循环是 NP-Hard 问题。此实现查找通过 DFS 遍历可达的循环。
+     * 这对于检测图是否为 DAG 足够了。
+     *
+     * @param graph The graph to analyze.
+     * @returns Array of cycles, where each cycle is an array of node IDs.
+     */
+    static detectCycles(graph) {
+        const visited = new Set();
+        const recursionStack = new Set();
+        const cycles = [];
+        const path = [];
+        const nodes = graph.getNodes();
+        const dfs = (nodeId) => {
+            visited.add(nodeId);
+            recursionStack.add(nodeId);
+            path.push(nodeId);
+            const neighbors = graph.getNeighbors(nodeId); // Outgoing neighbors
+            for (const neighbor of neighbors) {
+                if (!visited.has(neighbor)) {
+                    dfs(neighbor);
+                }
+                else if (recursionStack.has(neighbor)) {
+                    // Cycle detected!
+                    // Extract the cycle from the current path
+                    const cycleStartIndex = path.indexOf(neighbor);
+                    if (cycleStartIndex !== -1) {
+                        cycles.push([...path.slice(cycleStartIndex), neighbor]);
+                    }
+                }
+            }
+            recursionStack.delete(nodeId);
+            path.pop();
+        };
+        for (const node of nodes) {
+            if (!visited.has(node.id)) {
+                dfs(node.id);
+            }
+        }
+        return cycles;
+    }
+    /**
+     * Checks if the graph has any cycles.
+     * 检查图是否有任何循环。
+     */
+    static hasCycle(graph) {
+        const cycles = this.detectCycles(graph);
+        return cycles.length > 0;
+    }
+}
+exports.CycleDetector = CycleDetector;

package/dist/backend/algorithms/HybridEngine.js ADDED Viewed

@@ -0,0 +1,70 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.HybridEngine = void 0;
+class HybridEngine {
+    /**
+     * Infer dependencies using both Statistical and Vector methods.
+     * 结合统计和向量方法推断依赖关系。
+     *
+     * Rule:
+     * 1. High Vector Similarity (Content Relevance)
+     * 2. High Statistical Asymmetry (Directionality)
+     */
+    static infer(matrix, vectorSpace, vectorThreshold = 0.3, asymmetryThreshold = 0.1) {
+        const results = [];
+        const checkedPairs = new Set();
+        matrix.forEach((targets, nodeA) => {
+            targets.forEach((metricsAtoB, nodeB) => {
+                const pairKey = [nodeA, nodeB].sort().join('|');
+                if (checkedPairs.has(pairKey))
+                    return;
+                checkedPairs.add(pairKey);
+                // Get Reverse Metrics
+                const rowB = matrix.get(nodeB);
+                const metricsBtoA = rowB ? rowB.get(nodeA) : null;
+                if (!metricsBtoA)
+                    return;
+                // 1. Check Vector Similarity
+                const vecA = vectorSpace.getVector(nodeA);
+                const vecB = vectorSpace.getVector(nodeB);
+                if (!vecA || !vecB)
+                    return;
+                // Simple dot product for L2 normalized vectors
+                let similarity = 0;
+                for (let i = 0; i < vecA.length; i++)
+                    similarity += vecA[i] * vecB[i];
+                if (similarity < vectorThreshold)
+                    return;
+                // 2. Check Asymmetry
+                // P(B|A) = metricsAtoB.conditionalProb
+                // P(A|B) = metricsBtoA.conditionalProb
+                const p_A_given_B = metricsBtoA.conditionalProb;
+                const p_B_given_A = metricsAtoB.conditionalProb;
+                const diff = p_A_given_B - p_B_given_A;
+                if (diff > asymmetryThreshold) {
+                    // A is Parent (Context) of B
+                    // Because B appears implies A appears (High P(A|B))
+                    results.push({
+                        source: nodeA,
+                        target: nodeB,
+                        weight: similarity, // Use similarity as edge weight
+                        confidence: diff,
+                        reason: `Hybrid: Sim=${similarity.toFixed(2)}, Asym=${diff.toFixed(2)}`
+                    });
+                }
+                else if (-diff > asymmetryThreshold) {
+                    // B is Parent of A
+                    results.push({
+                        source: nodeB,
+                        target: nodeA,
+                        weight: similarity,
+                        confidence: -diff,
+                        reason: `Hybrid: Sim=${similarity.toFixed(2)}, Asym=${(-diff).toFixed(2)}`
+                    });
+                }
+            });
+        });
+        return results.sort((a, b) => b.confidence - a.confidence);
+    }
+}
+exports.HybridEngine = HybridEngine;

package/dist/backend/algorithms/StatisticalAnalyzer.js ADDED Viewed

@@ -0,0 +1,123 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.StatisticalAnalyzer = void 0;
+class StatisticalAnalyzer {
+    /**
+     * Analyze co-occurrence of terms across the corpus.
+     * 分析语料库中术语的共现情况。
+     * @param files All files in the corpus
+     * @param terms List of terms (concept IDs) to track
+     * @param windowSize Context window (e.g., 'sentence', 'paragraph', or number of words) - currently 'file' for simplicity
+     */
+    static analyze(files, terms) {
+        const matrix = new Map();
+        // 1. Build Term Frequency Map (Document Frequency)
+        // 1. 构建术语频率映射 (文档频率)
+        const termDocCounts = new Map();
+        const fileHasTerm = new Map(); // fileId -> Set<term>
+        terms.forEach(term => termDocCounts.set(term, 0));
+        // Pre-process files to find term occurrences
+        files.forEach(file => {
+            const content = file.content.toLowerCase();
+            const foundTerms = new Set();
+            terms.forEach(term => {
+                // Simple inclusion check (can be improved with Regex/Tokenization)
+                if (content.includes(term.toLowerCase())) {
+                    foundTerms.add(term);
+                }
+            });
+            fileHasTerm.set(file.filename, foundTerms);
+            foundTerms.forEach(term => {
+                termDocCounts.set(term, (termDocCounts.get(term) || 0) + 1);
+            });
+        });
+        // 2. Calculate Co-occurrences
+        // 2. 计算共现
+        terms.forEach(source => {
+            const row = new Map();
+            matrix.set(source, row);
+            const sourceCount = termDocCounts.get(source) || 0;
+            if (sourceCount === 0)
+                return;
+            terms.forEach(target => {
+                if (source === target)
+                    return;
+                let intersection = 0;
+                // Iterate files
+                files.forEach(file => {
+                    const termsInFile = fileHasTerm.get(file.filename);
+                    if (termsInFile && termsInFile.has(source) && termsInFile.has(target)) {
+                        intersection++;
+                    }
+                });
+                if (intersection > 0) {
+                    const targetCount = termDocCounts.get(target) || 0;
+                    const union = sourceCount + targetCount - intersection;
+                    row.set(target, {
+                        count: intersection,
+                        jaccard: union === 0 ? 0 : intersection / union,
+                        conditionalProb: intersection / sourceCount
+                    });
+                }
+            });
+        });
+        return matrix;
+    }
+    /**
+     * Infer directional dependencies based on Probability Asymmetry.
+     * 基于概率不对称性推断有向依赖关系。
+     * Logic: If P(Parent | Child) >> P(Child | Parent), then Child implies Parent context.
+     * 逻辑：如果 P(父 | 子) >> P(子 | 父)，则子隐含父语境。
+     */
+    static inferDependencies(matrix, minSupport = 0.1, asymmetryThreshold = 0.2) {
+        const dependencies = [];
+        const checkedPairs = new Set();
+        matrix.forEach((targets, nodeA) => {
+            targets.forEach((metricsAtoB, nodeB) => {
+                // Avoid checking A-B and B-A twice
+                const pairKey = [nodeA, nodeB].sort().join('|');
+                if (checkedPairs.has(pairKey))
+                    return;
+                checkedPairs.add(pairKey);
+                // Get metrics for B -> A (if exists)
+                const rowB = matrix.get(nodeB);
+                const metricsBtoA = rowB ? rowB.get(nodeA) : null;
+                if (!metricsBtoA)
+                    return;
+                // metricsAtoB.conditionalProb = P(B|A) (Prob of B given A)
+                // metricsBtoA.conditionalProb = P(A|B) (Prob of A given B)
+                // Hypothesis: General concepts (A) appear often. Specific concepts (B) appear less often but usually with A.
+                // So P(A|B) should be HIGH (If B is there, A is there).
+                // P(B|A) might be LOW (A can exist without B).
+                // If P(A|B) > P(B|A) + threshold
+                // Then A is Parent, B is Child. Edge: A -> B.
+                const p_A_given_B = metricsBtoA.conditionalProb;
+                const p_B_given_A = metricsAtoB.conditionalProb;
+                // Jaccard serves as a baseline "relevance" check
+                if (metricsAtoB.jaccard < minSupport)
+                    return;
+                const diff = p_A_given_B - p_B_given_A;
+                if (diff > asymmetryThreshold) {
+                    // A is Parent of B
+                    dependencies.push({
+                        source: nodeA,
+                        target: nodeB,
+                        weight: metricsAtoB.jaccard,
+                        confidence: diff
+                    });
+                }
+                else if (-diff > asymmetryThreshold) {
+                    // B is Parent of A
+                    dependencies.push({
+                        source: nodeB,
+                        target: nodeA,
+                        weight: metricsAtoB.jaccard,
+                        confidence: -diff
+                    });
+                }
+            });
+        });
+        return dependencies.sort((a, b) => b.confidence - a.confidence);
+    }
+}
+exports.StatisticalAnalyzer = StatisticalAnalyzer;