@easbot/note 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.en.md +211 -0
- package/README.md +211 -0
- package/dist/assets/txt/assets/jieba_dict.txt +349046 -0
- package/dist/index.cjs +125 -0
- package/dist/index.d.cts +279 -0
- package/dist/index.d.ts +279 -0
- package/dist/index.mjs +125 -0
- package/package.json +95 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 houjallen
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.en.md
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
# @easbot/note
|
|
2
|
+
|
|
3
|
+
> Note Knowledge Base - Unstructured Memory and Document Knowledge Base Management System
|
|
4
|
+
|
|
5
|
+
## Introduction
|
|
6
|
+
|
|
7
|
+
Note Knowledge Base is a TypeScript/Node.js-based unstructured memory and document knowledge base management system, supporting hybrid search (vector similarity + full-text search + graph relationship reasoning) and LLM-driven knowledge ingestion.
|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- **Document Ingestion**: Markdown and plain text parsing, intelligent chunking and embedding generation
|
|
12
|
+
- **Hybrid Search**: Vector similarity + FTS5 full-text search + graph relationship reasoning with configurable weights
|
|
13
|
+
- **Graph Queries**: SQL interface for querying nodes and edges, path finding support
|
|
14
|
+
- **Entity Management**: Complete CRUD operations for nodes and edges
|
|
15
|
+
- **Model Injection**: Support for AI SDK-compliant LLM and Embedding models
|
|
16
|
+
- **Status Monitoring**: Knowledge base status and health check
|
|
17
|
+
- **Intelligent Chunking**: Markdown-aware chunking that preserves code blocks and inline code
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
npm install @easbot/note
|
|
23
|
+
# or
|
|
24
|
+
pnpm add @easbot/note
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
### Basic Usage
|
|
30
|
+
|
|
31
|
+
```typescript
|
|
32
|
+
import { NoteKnowledge } from '@easbot/note';
|
|
33
|
+
|
|
34
|
+
// Create knowledge base instance
|
|
35
|
+
const kb = new NoteKnowledge({
|
|
36
|
+
storagePath: './data/knowledge.db',
|
|
37
|
+
vectorDims: 768,
|
|
38
|
+
chunkSize: 1000,
|
|
39
|
+
chunkOverlap: 200,
|
|
40
|
+
embedding: yourEmbeddingModel, // AI SDK EmbeddingModelV2
|
|
41
|
+
llm: yourLLM, // Optional, for entity extraction
|
|
42
|
+
hybridSearch: {
|
|
43
|
+
vectorWeight: 0.7,
|
|
44
|
+
ftsWeight: 0.3,
|
|
45
|
+
graphWeight: 0.0,
|
|
46
|
+
rerankTopK: 10,
|
|
47
|
+
},
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// Initialize
|
|
51
|
+
await kb.initialize();
|
|
52
|
+
|
|
53
|
+
// Ingest document
|
|
54
|
+
const result = await kb.ingestDocument('./docs/example.md');
|
|
55
|
+
console.log(`Created ${result.chunksCreated} chunks`);
|
|
56
|
+
|
|
57
|
+
// Search
|
|
58
|
+
const results = await kb.search('query content', { limit: 10 });
|
|
59
|
+
for (const r of results) {
|
|
60
|
+
console.log(`[${r.score}] ${r.content}`);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Close connection
|
|
64
|
+
await kb.close();
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Graph Queries
|
|
68
|
+
|
|
69
|
+
```typescript
|
|
70
|
+
// Create nodes
|
|
71
|
+
const node1 = await kb.createNode('EntityA', 'Type1', { prop: 'value' });
|
|
72
|
+
const node2 = await kb.createNode('EntityB', 'Type1');
|
|
73
|
+
|
|
74
|
+
// Create edge
|
|
75
|
+
const edge = await kb.createEdge(node1.id, node2.id, 'RELATED_TO');
|
|
76
|
+
|
|
77
|
+
// Find path
|
|
78
|
+
const paths = await kb.findPath('EntityA', 'EntityB', 3);
|
|
79
|
+
|
|
80
|
+
// SQL query
|
|
81
|
+
const nodes = await kb.queryNodes('SELECT * FROM nodes WHERE type = ?', ['Type1']);
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## API Documentation
|
|
85
|
+
|
|
86
|
+
### NoteKnowledge
|
|
87
|
+
|
|
88
|
+
#### Constructor
|
|
89
|
+
|
|
90
|
+
```typescript
|
|
91
|
+
constructor(config: NoteKnowledgeConfig)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Configuration options:
|
|
95
|
+
- `storagePath`: Database storage path
|
|
96
|
+
- `vectorDims`: Vector dimensions (default 768)
|
|
97
|
+
- `chunkSize`: Chunk size (default 1000)
|
|
98
|
+
- `chunkOverlap`: Chunk overlap (default 200)
|
|
99
|
+
- `embedding`: Embedding model (AI SDK EmbeddingModelV2)
|
|
100
|
+
- `llm`: LLM model (optional, for summary generation)
|
|
101
|
+
- `graphLlm`: Graph LLM (optional, for entity extraction)
|
|
102
|
+
- `rerankLlm`: Rerank LLM (optional)
|
|
103
|
+
- `hybridSearch`: Hybrid search weight configuration
|
|
104
|
+
|
|
105
|
+
#### Document Operations
|
|
106
|
+
|
|
107
|
+
- `initialize()`: Initialize knowledge base
|
|
108
|
+
- `ingestDocument(filePath)`: Ingest single document
|
|
109
|
+
- `ingestDocuments(filePaths, progress?)`: Batch ingest documents
|
|
110
|
+
- `removeDocument(filePath)`: Remove document
|
|
111
|
+
|
|
112
|
+
#### Search Operations
|
|
113
|
+
|
|
114
|
+
- `search(query, options?)`: Hybrid search
|
|
115
|
+
|
|
116
|
+
#### Graph Operations
|
|
117
|
+
|
|
118
|
+
- `queryNodes(sql, params?)`: SQL query for nodes
|
|
119
|
+
- `queryEdges(sql, params?)`: SQL query for edges
|
|
120
|
+
- `findPath(startNode, endNode, maxDepth?)`: Find path
|
|
121
|
+
- `createNode(name, type, properties?)`: Create node
|
|
122
|
+
- `updateNode(id, properties)`: Update node
|
|
123
|
+
- `deleteNode(id)`: Delete node
|
|
124
|
+
- `createEdge(source, target, relation, properties?)`: Create edge
|
|
125
|
+
- `deleteEdge(id)`: Delete edge
|
|
126
|
+
|
|
127
|
+
#### Status and Lifecycle
|
|
128
|
+
|
|
129
|
+
- `getStatus()`: Get knowledge base status
|
|
130
|
+
- `healthCheck()`: Health check
|
|
131
|
+
- `close()`: Close connection
|
|
132
|
+
|
|
133
|
+
## Data Models
|
|
134
|
+
|
|
135
|
+
### Document
|
|
136
|
+
|
|
137
|
+
```typescript
|
|
138
|
+
interface Document {
|
|
139
|
+
id: number;
|
|
140
|
+
path: string;
|
|
141
|
+
title?: string;
|
|
142
|
+
contentHash: string;
|
|
143
|
+
summary?: string;
|
|
144
|
+
metadata?: Record<string, unknown>;
|
|
145
|
+
lastModified: number;
|
|
146
|
+
}
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Node
|
|
150
|
+
|
|
151
|
+
```typescript
|
|
152
|
+
interface Node {
|
|
153
|
+
id: number;
|
|
154
|
+
name: string;
|
|
155
|
+
type: string;
|
|
156
|
+
properties?: Record<string, unknown>;
|
|
157
|
+
createdAt: number;
|
|
158
|
+
updatedAt: number;
|
|
159
|
+
}
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### Edge
|
|
163
|
+
|
|
164
|
+
```typescript
|
|
165
|
+
interface Edge {
|
|
166
|
+
id: number;
|
|
167
|
+
source: number;
|
|
168
|
+
target: number;
|
|
169
|
+
relation: string;
|
|
170
|
+
properties?: Record<string, unknown>;
|
|
171
|
+
createdAt: number;
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### SearchResult
|
|
176
|
+
|
|
177
|
+
```typescript
|
|
178
|
+
interface SearchResult {
|
|
179
|
+
id: number;
|
|
180
|
+
content: string;
|
|
181
|
+
score: number;
|
|
182
|
+
path: string;
|
|
183
|
+
startLine: number;
|
|
184
|
+
endLine: number;
|
|
185
|
+
documentId: number;
|
|
186
|
+
}
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## Hybrid Search
|
|
190
|
+
|
|
191
|
+
The system supports three search methods:
|
|
192
|
+
|
|
193
|
+
1. **Vector Search**: Cosine similarity based on embedding vectors
|
|
194
|
+
2. **Full-Text Search**: SQLite FTS5 with porter stemmer and unicode61 tokenizer
|
|
195
|
+
3. **Graph Search**: Graph traversal based on entity relationships
|
|
196
|
+
|
|
197
|
+
Results are merged using weighted score combination with configurable weights for each method.
|
|
198
|
+
|
|
199
|
+
## Dependencies
|
|
200
|
+
|
|
201
|
+
- `better-sqlite3`: SQLite database
|
|
202
|
+
- `@easbot/utils`: Utility library (Markdown parsing, intelligent chunking)
|
|
203
|
+
- `ai`: AI SDK
|
|
204
|
+
|
|
205
|
+
## License
|
|
206
|
+
|
|
207
|
+
MIT
|
|
208
|
+
|
|
209
|
+
## Author
|
|
210
|
+
|
|
211
|
+
houjallen
|
package/README.md
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
# @easbot/note
|
|
2
|
+
|
|
3
|
+
> Note Knowledge Base - 非结构化记忆和文档知识库管理系统
|
|
4
|
+
|
|
5
|
+
## 简介
|
|
6
|
+
|
|
7
|
+
Note Knowledge Base 是一个基于 TypeScript/Node.js 的非结构化记忆和文档知识库管理系统,支持混合搜索(向量相似度 + 全文搜索 + 图关系推理)和 LLM 驱动的知识摄取。
|
|
8
|
+
|
|
9
|
+
## 特性
|
|
10
|
+
|
|
11
|
+
- **文档摄取**:支持 Markdown、纯文本解析,智能分块和嵌入生成
|
|
12
|
+
- **混合搜索**:向量相似度 + FTS5 全文搜索 + 图关系推理,可配置权重
|
|
13
|
+
- **图查询**:SQL 接口查询节点和边,支持路径查找
|
|
14
|
+
- **实体管理**:节点和边的完整 CRUD 操作
|
|
15
|
+
- **模型注入**:支持 AI SDK 规范的 LLM 和 Embedding 模型
|
|
16
|
+
- **状态监控**:提供知识库状态和健康检查
|
|
17
|
+
- **智能分块**:使用 Markdown 智能分块,保护代码块和内联代码
|
|
18
|
+
|
|
19
|
+
## 安装
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
npm install @easbot/note
|
|
23
|
+
# 或
|
|
24
|
+
pnpm add @easbot/note
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## 快速开始
|
|
28
|
+
|
|
29
|
+
### 基本使用
|
|
30
|
+
|
|
31
|
+
```typescript
|
|
32
|
+
import { NoteKnowledge } from '@easbot/note';
|
|
33
|
+
|
|
34
|
+
// 创建知识库实例
|
|
35
|
+
const kb = new NoteKnowledge({
|
|
36
|
+
storagePath: './data/knowledge.db',
|
|
37
|
+
vectorDims: 768,
|
|
38
|
+
chunkSize: 1000,
|
|
39
|
+
chunkOverlap: 200,
|
|
40
|
+
embedding: yourEmbeddingModel, // AI SDK EmbeddingModelV2
|
|
41
|
+
llm: yourLLM, // 可选,用于实体提取
|
|
42
|
+
hybridSearch: {
|
|
43
|
+
vectorWeight: 0.7,
|
|
44
|
+
ftsWeight: 0.3,
|
|
45
|
+
graphWeight: 0.0,
|
|
46
|
+
rerankTopK: 10,
|
|
47
|
+
},
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// 初始化
|
|
51
|
+
await kb.initialize();
|
|
52
|
+
|
|
53
|
+
// 摄取文档
|
|
54
|
+
const result = await kb.ingestDocument('./docs/example.md');
|
|
55
|
+
console.log(`创建了 ${result.chunksCreated} 个块`);
|
|
56
|
+
|
|
57
|
+
// 搜索
|
|
58
|
+
const results = await kb.search('查询内容', { limit: 10 });
|
|
59
|
+
for (const r of results) {
|
|
60
|
+
console.log(`[${r.score}] ${r.content}`);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// 关闭连接
|
|
64
|
+
await kb.close();
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### 图查询
|
|
68
|
+
|
|
69
|
+
```typescript
|
|
70
|
+
// 创建节点
|
|
71
|
+
const node1 = await kb.createNode('实体A', '类型1', { prop: 'value' });
|
|
72
|
+
const node2 = await kb.createNode('实体B', '类型1');
|
|
73
|
+
|
|
74
|
+
// 创建边
|
|
75
|
+
const edge = await kb.createEdge(node1.id, node2.id, 'RELATED_TO');
|
|
76
|
+
|
|
77
|
+
// 查找路径
|
|
78
|
+
const paths = await kb.findPath('实体A', '实体B', 3);
|
|
79
|
+
|
|
80
|
+
// SQL 查询
|
|
81
|
+
const nodes = await kb.queryNodes('SELECT * FROM nodes WHERE type = ?', ['类型1']);
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## API 文档
|
|
85
|
+
|
|
86
|
+
### NoteKnowledge
|
|
87
|
+
|
|
88
|
+
#### 构造函数
|
|
89
|
+
|
|
90
|
+
```typescript
|
|
91
|
+
constructor(config: NoteKnowledgeConfig)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
配置选项:
|
|
95
|
+
- `storagePath`: 数据库存储路径
|
|
96
|
+
- `vectorDims`: 向量维度(默认 768)
|
|
97
|
+
- `chunkSize`: 分块大小(默认 1000)
|
|
98
|
+
- `chunkOverlap`: 分块重叠(默认 200)
|
|
99
|
+
- `embedding`: Embedding 模型(AI SDK EmbeddingModelV2)
|
|
100
|
+
- `llm`: LLM 模型(可选,用于摘要生成)
|
|
101
|
+
- `graphLlm`: 图 LLM(可选,用于实体提取)
|
|
102
|
+
- `rerankLlm`: 重排序 LLM(可选)
|
|
103
|
+
- `hybridSearch`: 混合搜索权重配置
|
|
104
|
+
|
|
105
|
+
#### 文档操作
|
|
106
|
+
|
|
107
|
+
- `initialize()`: 初始化知识库
|
|
108
|
+
- `ingestDocument(filePath)`: 摄取单个文档
|
|
109
|
+
- `ingestDocuments(filePaths, progress?)`: 批量摄取文档
|
|
110
|
+
- `removeDocument(filePath)`: 删除文档
|
|
111
|
+
|
|
112
|
+
#### 搜索操作
|
|
113
|
+
|
|
114
|
+
- `search(query, options?)`: 混合搜索
|
|
115
|
+
|
|
116
|
+
#### 图操作
|
|
117
|
+
|
|
118
|
+
- `queryNodes(sql, params?)`: SQL 查询节点
|
|
119
|
+
- `queryEdges(sql, params?)`: SQL 查询边
|
|
120
|
+
- `findPath(startNode, endNode, maxDepth?)`: 查找路径
|
|
121
|
+
- `createNode(name, type, properties?)`: 创建节点
|
|
122
|
+
- `updateNode(id, properties)`: 更新节点
|
|
123
|
+
- `deleteNode(id)`: 删除节点
|
|
124
|
+
- `createEdge(source, target, relation, properties?)`: 创建边
|
|
125
|
+
- `deleteEdge(id)`: 删除边
|
|
126
|
+
|
|
127
|
+
#### 状态和生命周期
|
|
128
|
+
|
|
129
|
+
- `getStatus()`: 获取知识库状态
|
|
130
|
+
- `healthCheck()`: 健康检查
|
|
131
|
+
- `close()`: 关闭连接
|
|
132
|
+
|
|
133
|
+
## 数据模型
|
|
134
|
+
|
|
135
|
+
### 文档 (Document)
|
|
136
|
+
|
|
137
|
+
```typescript
|
|
138
|
+
interface Document {
|
|
139
|
+
id: number;
|
|
140
|
+
path: string;
|
|
141
|
+
title?: string;
|
|
142
|
+
contentHash: string;
|
|
143
|
+
summary?: string;
|
|
144
|
+
metadata?: Record<string, unknown>;
|
|
145
|
+
lastModified: number;
|
|
146
|
+
}
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### 节点 (Node)
|
|
150
|
+
|
|
151
|
+
```typescript
|
|
152
|
+
interface Node {
|
|
153
|
+
id: number;
|
|
154
|
+
name: string;
|
|
155
|
+
type: string;
|
|
156
|
+
properties?: Record<string, unknown>;
|
|
157
|
+
createdAt: number;
|
|
158
|
+
updatedAt: number;
|
|
159
|
+
}
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### 边 (Edge)
|
|
163
|
+
|
|
164
|
+
```typescript
|
|
165
|
+
interface Edge {
|
|
166
|
+
id: number;
|
|
167
|
+
source: number;
|
|
168
|
+
target: number;
|
|
169
|
+
relation: string;
|
|
170
|
+
properties?: Record<string, unknown>;
|
|
171
|
+
createdAt: number;
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### 搜索结果 (SearchResult)
|
|
176
|
+
|
|
177
|
+
```typescript
|
|
178
|
+
interface SearchResult {
|
|
179
|
+
id: number;
|
|
180
|
+
content: string;
|
|
181
|
+
score: number;
|
|
182
|
+
path: string;
|
|
183
|
+
startLine: number;
|
|
184
|
+
endLine: number;
|
|
185
|
+
documentId: number;
|
|
186
|
+
}
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## 混合搜索
|
|
190
|
+
|
|
191
|
+
系统支持三种搜索方式的混合:
|
|
192
|
+
|
|
193
|
+
1. **向量搜索**:基于嵌入向量的余弦相似度
|
|
194
|
+
2. **全文搜索**:SQLite FTS5,支持 porter 词干提取和 unicode61 分词
|
|
195
|
+
3. **图搜索**:基于实体关系的图遍历
|
|
196
|
+
|
|
197
|
+
结果合并使用加权分数组合,可配置各搜索方式的权重。
|
|
198
|
+
|
|
199
|
+
## 依赖
|
|
200
|
+
|
|
201
|
+
- `better-sqlite3`: SQLite 数据库
|
|
202
|
+
- `@easbot/utils`: 工具库(Markdown 解析、智能分块)
|
|
203
|
+
- `ai`: AI SDK
|
|
204
|
+
|
|
205
|
+
## 许可证
|
|
206
|
+
|
|
207
|
+
MIT
|
|
208
|
+
|
|
209
|
+
## 作者
|
|
210
|
+
|
|
211
|
+
houjallen
|