knowledge-graph-kit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ # ========== 节点类型定义 ==========
2
+ 节点类型:
3
+ # 1. 知识节点(核心骨架)
4
+ - Topic: # 知识主题
5
+ 属性: [name, level(认知层次), difficulty, estimatedTime]
6
+ - Concept: # 原子概念(如“多态”),Topic的子类
7
+ - Skill: # 可操作技能(如“调试”),Topic的子类
8
+
9
+ # 2. 资源节点(知识载体)
10
+ - Video: # MOOC视频片段
11
+ 属性: [title, url, duration, startTime, endTime]
12
+ - TextSegment: # 教材/讲义段落
13
+ 属性: [content, pageNum]
14
+ - CodeExample: # 代码示例
15
+ 属性: [codeSnippet, language]
16
+
17
+ # 3. 教学节点(智能应用)
18
+ - Question: # 题目(预设或生成)
19
+ 属性: [stem(题干), type(选择/填空/简答), correctAnswer, difficulty]
20
+
21
+ # ========== 关系类型定义(部分) ==========
22
+ 关系类型:
23
+ # ---- 知识-知识关系 ----
24
+ hasPrerequisite: # 知识A是B的前置
25
+ domain: Topic, range: Topic
26
+ isA: # 概念上下位
27
+ domain: Topic, range: Topic
28
+ commonlyConfusedWith:# 易混淆
29
+ domain: Topic, range: Topic
30
+
31
+ # ---- 知识-资源关系 ----
32
+ teaches: # 视频讲解知识点
33
+ domain: Video, range: Topic
34
+ hasExample: # 知识点有代码示例
35
+ domain: Topic, range: CodeExample
36
+ assessedBy: # 知识点被题目考查
37
+ domain: Topic, range: Question
38
+
@@ -0,0 +1,120 @@
1
+ Metadata-Version: 2.4
2
+ Name: knowledge-graph-kit
3
+ Version: 0.1.0
4
+ Summary: 知识图谱构建管线:文本分块、LLM抽取、实体解析、Neo4j写入
5
+ Author-email: hbue_jerry <lovecpp@foxmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/jerryhbue/property-graph
8
+ Project-URL: Source, https://github.com/jerryhbue/property-graph
9
+ Project-URL: Tracker, https://github.com/jerryhbue/property-graph/issues
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: python-dotenv>=1.0.0
22
+ Requires-Dist: openai>=1.0.0
23
+ Requires-Dist: pydantic>=2.0.0
24
+ Requires-Dist: neo4j>=5.0.0
25
+ Requires-Dist: httpx>=0.28.0
26
+ Dynamic: license-file
27
+
28
+ # knowledge-graph-kit
29
+
30
+ 知识图谱构建管线:从教材文本中抽取实体和关系,写入 Neo4j 图数据库。
31
+
32
+ ## 安装
33
+
34
+ ```bash
35
+ pip install knowledge-graph-kit
36
+ ```
37
+
38
+ ## 环境变量
39
+
40
+ | 变量 | 说明 | 默认值 |
41
+ |---|---|---|
42
+ | `OPENAI_API_KEY` | OpenAI API 密钥 | — |
43
+ | `OPENAI_BASE_URL` | OpenAI API 地址(可换兼容 API) | — |
44
+ | `LLM_MODEL_NAME` | 模型名称 | `gpt-4o-mini` |
45
+ | `NEO4J_URI` | Neo4j 连接地址 | `bolt://localhost:7687` |
46
+ | `NEO4J_USERNAME` | Neo4j 用户名 | `neo4j` |
47
+ | `NEO4J_PASSWORD` | Neo4j 密码 | `12345678` |
48
+
49
+ 在项目根目录创建 `.env` 文件即可自动加载,或直接在 shell 中设置。
50
+
51
+ ## CLI 命令
52
+
53
+ 安装后提供 4 个命令行工具:
54
+
55
+ ### 1. 文本分块
56
+
57
+ ```bash
58
+ kg-chunker <txt_path>
59
+ ```
60
+
61
+ 按章节标题将教材文本拆分为语义块。
62
+
63
+ ### 2. 实体关系抽取
64
+
65
+ ```bash
66
+ kg-extractor <txt_path>
67
+ ```
68
+
69
+ 基于本体 schema 引导 LLM 抽取实体和关系,输出 `extraction_result.json`。
70
+
71
+ 可通过 `KG_SCHEMA_PATH` 环境变量指定自定义 ontology 文件。
72
+
73
+ ### 3. 实体解析去重
74
+
75
+ ```bash
76
+ kg-resolver <input.json>
77
+ ```
78
+
79
+ 精确/模糊去重实体,清理属性,更新关系引用,输出 `extraction_result_clean.json`。
80
+
81
+ ### 4. 写入 Neo4j
82
+
83
+ ```bash
84
+ kg-neo4j-writer [input.json]
85
+ ```
86
+
87
+ 将清洗后的结果写入 Neo4j 图数据库。默认尝试读取 `extraction_result_clean.json` 或 `extraction_result.json`。
88
+
89
+ ## 程序化使用
90
+
91
+ ```python
92
+ from knowledge_graph_kit import chunk_file, Neo4jWriter
93
+
94
+ # 文本分块
95
+ chunks = chunk_file("教材.txt")
96
+
97
+ # 配置 OpenAI
98
+ from knowledge_graph_kit.extractor import configure
99
+ configure(api_key="sk-xxx", model="gpt-4o")
100
+
101
+ # 写入 Neo4j
102
+ writer = Neo4jWriter(
103
+ uri="bolt://localhost:7687",
104
+ user="neo4j",
105
+ password="your-password",
106
+ )
107
+ writer.write_entities(entities)
108
+ writer.write_relations(relations)
109
+ writer.close()
110
+ ```
111
+
112
+ ## 管线流程
113
+
114
+ ```
115
+ txt 文件 → 分块(chunker) → LLM抽取(extractor) → 实体解析(resolver) → Neo4j写入(writer)
116
+ ```
117
+
118
+ ## License
119
+
120
+ MIT
@@ -0,0 +1,12 @@
1
+ knowledge_graph_kit/__init__.py,sha256=kdwMYns2WDXC7eAh8nwEmkys7XYRm2lrhU8kP-pwggQ,833
2
+ knowledge_graph_kit/chunker.py,sha256=Dr61Sast26clUGsMuHDdbNUZ85pA6w3IjnG5RB8QS8o,8944
3
+ knowledge_graph_kit/entity_resolver.py,sha256=dWxdMHZGlr8eJpduXEZUjLYyHbMN0Q6MzQ3ups0Zcmg,15610
4
+ knowledge_graph_kit/extractor.py,sha256=Q49tKvVUDmkJYJ7Wds20bVwev1R69U-cqelpDCfuBMc,18491
5
+ knowledge_graph_kit/neo4j_writer.py,sha256=arMZHJijGSNgSGzCroc55Jvd5OYsziosGCKPY_FZWxw,10498
6
+ knowledge_graph_kit/schema.txt,sha256=T7Xi1Xm51BeP6jSyxZKa_OWemwR8UUzpvxfymLFsYdk,1486
7
+ knowledge_graph_kit-0.1.0.dist-info/licenses/LICENSE,sha256=Btzdu2kIoMbdSp6OyCLupB1aRgpTCJ_szMimgEnpkkE,1056
8
+ knowledge_graph_kit-0.1.0.dist-info/METADATA,sha256=4zcwC2P15OFzP2VvtX2QQTkU1teqMLarTUggqO74oek,3241
9
+ knowledge_graph_kit-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
10
+ knowledge_graph_kit-0.1.0.dist-info/entry_points.txt,sha256=BV0DCAgUuMpft7BFpPNQO8p_U-zgvlAq2V-iND7MX_4,225
11
+ knowledge_graph_kit-0.1.0.dist-info/top_level.txt,sha256=BsnPVDjVwMYIa2GI1KLziv6eKZtsr917YIF8g78qTHY,20
12
+ knowledge_graph_kit-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,5 @@
1
+ [console_scripts]
2
+ kg-chunker = knowledge_graph_kit.chunker:main
3
+ kg-extractor = knowledge_graph_kit.extractor:main
4
+ kg-neo4j-writer = knowledge_graph_kit.neo4j_writer:main
5
+ kg-resolver = knowledge_graph_kit.entity_resolver:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ knowledge_graph_kit