kgmodule-utils 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kgmodule_utils-0.2.0/LICENSE +93 -0
- kgmodule_utils-0.2.0/PKG-INFO +210 -0
- kgmodule_utils-0.2.0/README.md +191 -0
- kgmodule_utils-0.2.0/pyproject.toml +59 -0
- kgmodule_utils-0.2.0/src/kg_utils/__init__.py +10 -0
- kgmodule_utils-0.2.0/src/kg_utils/embed.py +131 -0
- kgmodule_utils-0.2.0/src/kg_utils/py.typed +0 -0
- kgmodule_utils-0.2.0/src/kg_utils/snapshots/__init__.py +16 -0
- kgmodule_utils-0.2.0/src/kg_utils/snapshots/manager.py +497 -0
- kgmodule_utils-0.2.0/src/kg_utils/snapshots/models.py +137 -0
- kgmodule_utils-0.2.0/src/kg_utils/types/__init__.py +14 -0
- kgmodule_utils-0.2.0/src/kg_utils/types/extractor.py +68 -0
- kgmodule_utils-0.2.0/src/kg_utils/types/module.py +87 -0
- kgmodule_utils-0.2.0/src/kg_utils/types/specs.py +90 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
Elastic License 2.0 (ELv2)
|
|
2
|
+
|
|
3
|
+
URL: https://www.elastic.co/licensing/elastic-license
|
|
4
|
+
|
|
5
|
+
## Acceptance
|
|
6
|
+
|
|
7
|
+
By using the software, you agree to all of the terms and conditions below.
|
|
8
|
+
|
|
9
|
+
## Copyright License
|
|
10
|
+
|
|
11
|
+
The licensor grants you a non-exclusive, royalty-free, worldwide,
|
|
12
|
+
non-sublicensable, non-transferable license to use, copy, distribute, make
|
|
13
|
+
available, and prepare derivative works of the software, in each case subject
|
|
14
|
+
to the limitations and conditions below.
|
|
15
|
+
|
|
16
|
+
## Limitations
|
|
17
|
+
|
|
18
|
+
You may not provide the software to third parties as a hosted or managed
|
|
19
|
+
service, where the service provides users with access to any substantial set
|
|
20
|
+
of the features or functionality of the software.
|
|
21
|
+
|
|
22
|
+
You may not move, change, disable, or circumvent the license key functionality
|
|
23
|
+
in the software, and you may not remove or obscure any functionality in the
|
|
24
|
+
software that is protected by the license key.
|
|
25
|
+
|
|
26
|
+
You may not alter, remove, or obscure any licensing, copyright, or other
|
|
27
|
+
notices of the licensor in the software. Any use of the licensor's trademarks
|
|
28
|
+
is subject to applicable law.
|
|
29
|
+
|
|
30
|
+
## Patents
|
|
31
|
+
|
|
32
|
+
The licensor grants you a license, under any patent claims the licensor can
|
|
33
|
+
license, or becomes able to license, to make, have made, use, sell, offer for
|
|
34
|
+
sale, import and have imported the software, in each case subject to the
|
|
35
|
+
limitations and conditions in this license. This license does not cover any
|
|
36
|
+
patent claims that you cause to be infringed by modifications or additions to
|
|
37
|
+
the software. If you or your company make any written claim that the software
|
|
38
|
+
infringes or contributes to infringement of any patent, your patent license
|
|
39
|
+
for the software granted under these terms ends immediately. If your company
|
|
40
|
+
makes such a claim, your patent license ends immediately for work on behalf
|
|
41
|
+
of your company.
|
|
42
|
+
|
|
43
|
+
## Notices
|
|
44
|
+
|
|
45
|
+
You must ensure that anyone who gets a copy of any part of the software from
|
|
46
|
+
you also gets a copy of these terms.
|
|
47
|
+
|
|
48
|
+
If you modify the software, you must include in any modified copies of the
|
|
49
|
+
software prominent notices stating that you have modified the software.
|
|
50
|
+
|
|
51
|
+
## No Other Rights
|
|
52
|
+
|
|
53
|
+
These terms do not imply any licenses other than those expressly granted in
|
|
54
|
+
these terms.
|
|
55
|
+
|
|
56
|
+
## Termination
|
|
57
|
+
|
|
58
|
+
If you use the software in violation of these terms, such use is not licensed,
|
|
59
|
+
and your licenses will automatically terminate. If the licensor provides you
|
|
60
|
+
with a notice of your violation, and you cease all violation of this license
|
|
61
|
+
no later than 30 days after you receive that notice, your licenses will be
|
|
62
|
+
reinstated retroactively. However, if you violate these terms after such
|
|
63
|
+
reinstatement, any additional violation of these terms will cause your
|
|
64
|
+
licenses to terminate automatically and permanently.
|
|
65
|
+
|
|
66
|
+
## No Liability
|
|
67
|
+
|
|
68
|
+
*As far as the law allows, the software comes as is, without any warranty or
|
|
69
|
+
condition, and the licensor will not be liable to you for any damages arising
|
|
70
|
+
out of these terms or the use or nature of the software, under any kind of
|
|
71
|
+
legal claim.*
|
|
72
|
+
|
|
73
|
+
## Definitions
|
|
74
|
+
|
|
75
|
+
The **licensor** is the entity offering these terms, and the **software** is
|
|
76
|
+
the software the licensor makes available under these terms, including any
|
|
77
|
+
portion of it.
|
|
78
|
+
|
|
79
|
+
**you** refers to the individual or entity agreeing to these terms.
|
|
80
|
+
|
|
81
|
+
**your company** is any legal entity, sole proprietorship, or other kind of
|
|
82
|
+
organization that you work for, plus all organizations that have control over,
|
|
83
|
+
are under the control of, or are under common control with that organization.
|
|
84
|
+
**control** means ownership of substantially all the assets of an entity, or
|
|
85
|
+
the power to direct its management and policies by vote, contract, or
|
|
86
|
+
otherwise. Control can be direct or indirect.
|
|
87
|
+
|
|
88
|
+
**your licenses** are all the licenses granted to you for the software under
|
|
89
|
+
these terms.
|
|
90
|
+
|
|
91
|
+
**use** means anything you do with the software requiring one of your licenses.
|
|
92
|
+
|
|
93
|
+
**trademark** means trademarks, service marks, and similar rights.
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kgmodule-utils
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Shared types and snapshot infrastructure for the KGModule SDK
|
|
5
|
+
License: Elastic-2.0
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Keywords: knowledge-graph,kgmodule,sdk,types,snapshots
|
|
8
|
+
Author: Eric G. Suchanek, PhD
|
|
9
|
+
Author-email: suchanek@flux-frontiers.com
|
|
10
|
+
Requires-Python: >=3.12,<3.14
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Project-URL: Repository, https://github.com/Flux-Frontiers/kg_utils
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
[](https://www.python.org/)
|
|
21
|
+
[](https://www.elastic.co/licensing/elastic-license)
|
|
22
|
+
[](https://github.com/Flux-Frontiers/KG_utils/releases)
|
|
23
|
+
[](https://github.com/Flux-Frontiers/KG_utils/actions/workflows/ci.yml)
|
|
24
|
+
[](https://python-poetry.org/)
|
|
25
|
+
|
|
26
|
+
# kgmodule-utils
|
|
27
|
+
|
|
28
|
+
**kgmodule-utils** — Shared types and snapshot infrastructure for the KGModule SDK.
|
|
29
|
+
|
|
30
|
+
*Author: Eric G. Suchanek, PhD*
|
|
31
|
+
|
|
32
|
+
*Flux-Frontiers, Liberty TWP, OH*
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## Overview
|
|
37
|
+
|
|
38
|
+
kgmodule-utils is the **zero-dependency foundation package** for the Flux-Frontiers knowledge-graph ecosystem. It provides the canonical type abstractions and temporal snapshot infrastructure that all KGModule implementations — [PyCodeKG](https://github.com/Flux-Frontiers/pycode_kg), [FTreeKG](https://github.com/Flux-Frontiers/ftree_kg), [DocKG](https://github.com/Flux-Frontiers/doc_kg), [AgentKG](https://github.com/Flux-Frontiers/agent_kg) — depend on.
|
|
39
|
+
|
|
40
|
+
Every KGModule shares the same `NodeSpec`, `EdgeSpec`, `KGExtractor`, and `KGModule` base classes defined here, ensuring consistent interfaces across the ecosystem. The snapshot subsystem enables temporal metric tracking, delta comparison, and pruning across git commits.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Features
|
|
45
|
+
|
|
46
|
+
- **Core type abstractions** — `NodeSpec`, `EdgeSpec`, `QueryResult`, `SnippetPack` dataclasses for knowledge-graph nodes, edges, and query results
|
|
47
|
+
- **KGExtractor base class** — Abstract interface for domain-specific extractors with `extract()`, `node_kinds()`, `edge_kinds()`, and `coverage_metric()`
|
|
48
|
+
- **KGModule base class** — Abstract interface for knowledge-graph modules with `build()`, `query()`, `pack()`, `stats()`, and `analyze()`
|
|
49
|
+
- **Snapshot models** — `Snapshot` dataclass keyed by git tree hash with free-form metrics, hotspots, issues, and delta tracking
|
|
50
|
+
- **SnapshotManager** — Capture, persist, load, list, diff, and prune snapshots with automatic deduplication and delta computation
|
|
51
|
+
- **SnapshotManifest** — Fast-lookup index of all snapshots with format versioning
|
|
52
|
+
- **Zero dependencies** — Stdlib-only; no external packages required at runtime
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Installation
|
|
57
|
+
|
|
58
|
+
**Requirements:** Python ≥ 3.12, < 3.14
|
|
59
|
+
|
|
60
|
+
### Standalone (pip)
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install kgmodule-utils
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Existing Poetry project
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
poetry add kgmodule-utils
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Or declare it directly in your `pyproject.toml`:
|
|
73
|
+
|
|
74
|
+
```toml
|
|
75
|
+
[tool.poetry.dependencies]
|
|
76
|
+
kgmodule-utils = "^0.2.0"
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Quick Start
|
|
82
|
+
|
|
83
|
+
### Types — Define a KGModule
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from kg_utils.types import NodeSpec, EdgeSpec, KGExtractor, KGModule
|
|
87
|
+
|
|
88
|
+
class MyExtractor(KGExtractor):
|
|
89
|
+
def node_kinds(self) -> list[str]:
|
|
90
|
+
return ["module", "function", "class"]
|
|
91
|
+
|
|
92
|
+
def edge_kinds(self) -> list[str]:
|
|
93
|
+
return ["CONTAINS", "CALLS", "IMPORTS"]
|
|
94
|
+
|
|
95
|
+
def extract(self, source_root: str):
|
|
96
|
+
# Yield NodeSpec and EdgeSpec objects from your domain
|
|
97
|
+
yield NodeSpec(
|
|
98
|
+
node_id="fn:main:hello",
|
|
99
|
+
kind="function",
|
|
100
|
+
name="hello",
|
|
101
|
+
qualname="main.hello",
|
|
102
|
+
source_path="main.py",
|
|
103
|
+
docstring="Greet the user.",
|
|
104
|
+
)
|
|
105
|
+
yield EdgeSpec(
|
|
106
|
+
source_id="mod:main",
|
|
107
|
+
target_id="fn:main:hello",
|
|
108
|
+
relation="CONTAINS",
|
|
109
|
+
)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Snapshots — Track metrics over time
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from kg_utils.snapshots import SnapshotManager
|
|
116
|
+
|
|
117
|
+
mgr = SnapshotManager(snapshots_dir=".my_kg/snapshots", package_name="my-kg")
|
|
118
|
+
|
|
119
|
+
# Capture a snapshot from current metrics
|
|
120
|
+
snapshot = mgr.capture(metrics={
|
|
121
|
+
"total_nodes": 142,
|
|
122
|
+
"total_edges": 387,
|
|
123
|
+
"coverage": 0.78,
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
# Save with automatic deduplication
|
|
127
|
+
mgr.save_snapshot(snapshot)
|
|
128
|
+
|
|
129
|
+
# List and compare
|
|
130
|
+
snaps = mgr.list_snapshots(limit=5)
|
|
131
|
+
delta = mgr.diff_snapshots(key_a=snaps[0].key, key_b=snaps[-1].key)
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## API Reference
|
|
137
|
+
|
|
138
|
+
### `kg_utils.types`
|
|
139
|
+
|
|
140
|
+
| Class | Description |
|
|
141
|
+
|---|---|
|
|
142
|
+
| `NodeSpec` | Dataclass for KG nodes: `node_id`, `kind`, `name`, `qualname`, `source_path`, `docstring` |
|
|
143
|
+
| `EdgeSpec` | Dataclass for KG edges: `source_id`, `target_id`, `relation` |
|
|
144
|
+
| `QueryResult` | Container for query responses with nodes, edges, and metadata |
|
|
145
|
+
| `SnippetPack` | Extended result container with source-code snippets |
|
|
146
|
+
| `KGExtractor` | Abstract base class for domain extractors |
|
|
147
|
+
| `KGModule` | Abstract base class for knowledge-graph modules |
|
|
148
|
+
|
|
149
|
+
### `kg_utils.snapshots`
|
|
150
|
+
|
|
151
|
+
| Class | Description |
|
|
152
|
+
|---|---|
|
|
153
|
+
| `Snapshot` | Temporal snapshot keyed by git tree hash with free-form metrics and deltas |
|
|
154
|
+
| `SnapshotManager` | Capture, persist, load, list, diff, and prune snapshots |
|
|
155
|
+
| `SnapshotManifest` | Index of all snapshots with format versioning and fast lookup |
|
|
156
|
+
| `PruneResult` | Summary of pruning operations: removed, orphaned, broken entries |
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## Project Structure
|
|
161
|
+
|
|
162
|
+
```
|
|
163
|
+
KG_utils/
|
|
164
|
+
├── LICENSE
|
|
165
|
+
├── README.md
|
|
166
|
+
├── pyproject.toml
|
|
167
|
+
├── pytest.ini
|
|
168
|
+
├── src/
|
|
169
|
+
│ └── kg_utils/
|
|
170
|
+
│ ├── __init__.py
|
|
171
|
+
│ ├── py.typed # PEP 561 marker
|
|
172
|
+
│ ├── types/
|
|
173
|
+
│ │ ├── __init__.py # Public re-exports
|
|
174
|
+
│ │ ├── specs.py # NodeSpec, EdgeSpec, QueryResult, SnippetPack
|
|
175
|
+
│ │ ├── extractor.py # KGExtractor ABC
|
|
176
|
+
│ │ └── module.py # KGModule ABC
|
|
177
|
+
│ └── snapshots/
|
|
178
|
+
│ ├── __init__.py # Public re-exports
|
|
179
|
+
│ ├── models.py # Snapshot, SnapshotManifest, PruneResult
|
|
180
|
+
│ └── manager.py # SnapshotManager
|
|
181
|
+
└── tests/
|
|
182
|
+
├── __init__.py
|
|
183
|
+
├── test_types.py
|
|
184
|
+
└── test_snapshots.py
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
## Development
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
git clone https://github.com/Flux-Frontiers/KG_utils.git
|
|
193
|
+
cd KG_utils
|
|
194
|
+
poetry install --with dev
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
Run the test suite:
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
poetry run pytest
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## License
|
|
206
|
+
|
|
207
|
+
[Elastic License 2.0](https://www.elastic.co/licensing/elastic-license) — see [LICENSE](LICENSE).
|
|
208
|
+
|
|
209
|
+
Free to use, modify, and distribute. You may not offer the software as a hosted or managed service to third parties. Commercial use internally is permitted.
|
|
210
|
+
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
|
|
2
|
+
[](https://www.python.org/)
|
|
3
|
+
[](https://www.elastic.co/licensing/elastic-license)
|
|
4
|
+
[](https://github.com/Flux-Frontiers/KG_utils/releases)
|
|
5
|
+
[](https://github.com/Flux-Frontiers/KG_utils/actions/workflows/ci.yml)
|
|
6
|
+
[](https://python-poetry.org/)
|
|
7
|
+
|
|
8
|
+
# kgmodule-utils
|
|
9
|
+
|
|
10
|
+
**kgmodule-utils** — Shared types and snapshot infrastructure for the KGModule SDK.
|
|
11
|
+
|
|
12
|
+
*Author: Eric G. Suchanek, PhD*
|
|
13
|
+
|
|
14
|
+
*Flux-Frontiers, Liberty TWP, OH*
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Overview
|
|
19
|
+
|
|
20
|
+
kgmodule-utils is the **zero-dependency foundation package** for the Flux-Frontiers knowledge-graph ecosystem. It provides the canonical type abstractions and temporal snapshot infrastructure that all KGModule implementations — [PyCodeKG](https://github.com/Flux-Frontiers/pycode_kg), [FTreeKG](https://github.com/Flux-Frontiers/ftree_kg), [DocKG](https://github.com/Flux-Frontiers/doc_kg), [AgentKG](https://github.com/Flux-Frontiers/agent_kg) — depend on.
|
|
21
|
+
|
|
22
|
+
Every KGModule shares the same `NodeSpec`, `EdgeSpec`, `KGExtractor`, and `KGModule` base classes defined here, ensuring consistent interfaces across the ecosystem. The snapshot subsystem enables temporal metric tracking, delta comparison, and pruning across git commits.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Features
|
|
27
|
+
|
|
28
|
+
- **Core type abstractions** — `NodeSpec`, `EdgeSpec`, `QueryResult`, `SnippetPack` dataclasses for knowledge-graph nodes, edges, and query results
|
|
29
|
+
- **KGExtractor base class** — Abstract interface for domain-specific extractors with `extract()`, `node_kinds()`, `edge_kinds()`, and `coverage_metric()`
|
|
30
|
+
- **KGModule base class** — Abstract interface for knowledge-graph modules with `build()`, `query()`, `pack()`, `stats()`, and `analyze()`
|
|
31
|
+
- **Snapshot models** — `Snapshot` dataclass keyed by git tree hash with free-form metrics, hotspots, issues, and delta tracking
|
|
32
|
+
- **SnapshotManager** — Capture, persist, load, list, diff, and prune snapshots with automatic deduplication and delta computation
|
|
33
|
+
- **SnapshotManifest** — Fast-lookup index of all snapshots with format versioning
|
|
34
|
+
- **Zero dependencies** — Stdlib-only; no external packages required at runtime
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
|
|
40
|
+
**Requirements:** Python ≥ 3.12, < 3.14
|
|
41
|
+
|
|
42
|
+
### Standalone (pip)
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install kgmodule-utils
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Existing Poetry project
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
poetry add kgmodule-utils
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Or declare it directly in your `pyproject.toml`:
|
|
55
|
+
|
|
56
|
+
```toml
|
|
57
|
+
[tool.poetry.dependencies]
|
|
58
|
+
kgmodule-utils = "^0.2.0"
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Quick Start
|
|
64
|
+
|
|
65
|
+
### Types — Define a KGModule
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from kg_utils.types import NodeSpec, EdgeSpec, KGExtractor, KGModule
|
|
69
|
+
|
|
70
|
+
class MyExtractor(KGExtractor):
|
|
71
|
+
def node_kinds(self) -> list[str]:
|
|
72
|
+
return ["module", "function", "class"]
|
|
73
|
+
|
|
74
|
+
def edge_kinds(self) -> list[str]:
|
|
75
|
+
return ["CONTAINS", "CALLS", "IMPORTS"]
|
|
76
|
+
|
|
77
|
+
def extract(self, source_root: str):
|
|
78
|
+
# Yield NodeSpec and EdgeSpec objects from your domain
|
|
79
|
+
yield NodeSpec(
|
|
80
|
+
node_id="fn:main:hello",
|
|
81
|
+
kind="function",
|
|
82
|
+
name="hello",
|
|
83
|
+
qualname="main.hello",
|
|
84
|
+
source_path="main.py",
|
|
85
|
+
docstring="Greet the user.",
|
|
86
|
+
)
|
|
87
|
+
yield EdgeSpec(
|
|
88
|
+
source_id="mod:main",
|
|
89
|
+
target_id="fn:main:hello",
|
|
90
|
+
relation="CONTAINS",
|
|
91
|
+
)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Snapshots — Track metrics over time
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from kg_utils.snapshots import SnapshotManager
|
|
98
|
+
|
|
99
|
+
mgr = SnapshotManager(snapshots_dir=".my_kg/snapshots", package_name="my-kg")
|
|
100
|
+
|
|
101
|
+
# Capture a snapshot from current metrics
|
|
102
|
+
snapshot = mgr.capture(metrics={
|
|
103
|
+
"total_nodes": 142,
|
|
104
|
+
"total_edges": 387,
|
|
105
|
+
"coverage": 0.78,
|
|
106
|
+
})
|
|
107
|
+
|
|
108
|
+
# Save with automatic deduplication
|
|
109
|
+
mgr.save_snapshot(snapshot)
|
|
110
|
+
|
|
111
|
+
# List and compare
|
|
112
|
+
snaps = mgr.list_snapshots(limit=5)
|
|
113
|
+
delta = mgr.diff_snapshots(key_a=snaps[0].key, key_b=snaps[-1].key)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## API Reference
|
|
119
|
+
|
|
120
|
+
### `kg_utils.types`
|
|
121
|
+
|
|
122
|
+
| Class | Description |
|
|
123
|
+
|---|---|
|
|
124
|
+
| `NodeSpec` | Dataclass for KG nodes: `node_id`, `kind`, `name`, `qualname`, `source_path`, `docstring` |
|
|
125
|
+
| `EdgeSpec` | Dataclass for KG edges: `source_id`, `target_id`, `relation` |
|
|
126
|
+
| `QueryResult` | Container for query responses with nodes, edges, and metadata |
|
|
127
|
+
| `SnippetPack` | Extended result container with source-code snippets |
|
|
128
|
+
| `KGExtractor` | Abstract base class for domain extractors |
|
|
129
|
+
| `KGModule` | Abstract base class for knowledge-graph modules |
|
|
130
|
+
|
|
131
|
+
### `kg_utils.snapshots`
|
|
132
|
+
|
|
133
|
+
| Class | Description |
|
|
134
|
+
|---|---|
|
|
135
|
+
| `Snapshot` | Temporal snapshot keyed by git tree hash with free-form metrics and deltas |
|
|
136
|
+
| `SnapshotManager` | Capture, persist, load, list, diff, and prune snapshots |
|
|
137
|
+
| `SnapshotManifest` | Index of all snapshots with format versioning and fast lookup |
|
|
138
|
+
| `PruneResult` | Summary of pruning operations: removed, orphaned, broken entries |
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Project Structure
|
|
143
|
+
|
|
144
|
+
```
|
|
145
|
+
KG_utils/
|
|
146
|
+
├── LICENSE
|
|
147
|
+
├── README.md
|
|
148
|
+
├── pyproject.toml
|
|
149
|
+
├── pytest.ini
|
|
150
|
+
├── src/
|
|
151
|
+
│ └── kg_utils/
|
|
152
|
+
│ ├── __init__.py
|
|
153
|
+
│ ├── py.typed # PEP 561 marker
|
|
154
|
+
│ ├── types/
|
|
155
|
+
│ │ ├── __init__.py # Public re-exports
|
|
156
|
+
│ │ ├── specs.py # NodeSpec, EdgeSpec, QueryResult, SnippetPack
|
|
157
|
+
│ │ ├── extractor.py # KGExtractor ABC
|
|
158
|
+
│ │ └── module.py # KGModule ABC
|
|
159
|
+
│ └── snapshots/
|
|
160
|
+
│ ├── __init__.py # Public re-exports
|
|
161
|
+
│ ├── models.py # Snapshot, SnapshotManifest, PruneResult
|
|
162
|
+
│ └── manager.py # SnapshotManager
|
|
163
|
+
└── tests/
|
|
164
|
+
├── __init__.py
|
|
165
|
+
├── test_types.py
|
|
166
|
+
└── test_snapshots.py
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## Development
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
git clone https://github.com/Flux-Frontiers/KG_utils.git
|
|
175
|
+
cd KG_utils
|
|
176
|
+
poetry install --with dev
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
Run the test suite:
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
poetry run pytest
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
---
|
|
186
|
+
|
|
187
|
+
## License
|
|
188
|
+
|
|
189
|
+
[Elastic License 2.0](https://www.elastic.co/licensing/elastic-license) — see [LICENSE](LICENSE).
|
|
190
|
+
|
|
191
|
+
Free to use, modify, and distribute. You may not offer the software as a hosted or managed service to third parties. Commercial use internally is permitted.
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# pyproject.toml — kgmodule-utils: shared types and snapshot infrastructure for KG modules
|
|
2
|
+
#
|
|
3
|
+
# Zero-dependency (stdlib-only) package providing:
|
|
4
|
+
# kg_utils.types — NodeSpec, EdgeSpec, KGExtractor, KGModule, etc.
|
|
5
|
+
# kg_utils.snapshots — Snapshot, SnapshotManager, SnapshotManifest, etc.
|
|
6
|
+
|
|
7
|
+
[build-system]
|
|
8
|
+
requires = ["poetry-core"]
|
|
9
|
+
build-backend = "poetry.core.masonry.api"
|
|
10
|
+
|
|
11
|
+
[project]
|
|
12
|
+
name = "kgmodule-utils"
|
|
13
|
+
version = "0.2.0"
|
|
14
|
+
description = "Shared types and snapshot infrastructure for the KGModule SDK"
|
|
15
|
+
readme = "README.md"
|
|
16
|
+
license = { text = "Elastic-2.0" }
|
|
17
|
+
authors = [
|
|
18
|
+
{ name = "Eric G. Suchanek, PhD", email = "suchanek@flux-frontiers.com" }
|
|
19
|
+
]
|
|
20
|
+
keywords = ["knowledge-graph", "kgmodule", "sdk", "types", "snapshots"]
|
|
21
|
+
classifiers = [
|
|
22
|
+
"Development Status :: 3 - Alpha",
|
|
23
|
+
"Intended Audience :: Developers",
|
|
24
|
+
"Programming Language :: Python :: 3",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Programming Language :: Python :: 3.13",
|
|
27
|
+
]
|
|
28
|
+
requires-python = ">=3.12,<3.14"
|
|
29
|
+
dependencies = []
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Repository = "https://github.com/Flux-Frontiers/kg_utils"
|
|
33
|
+
|
|
34
|
+
[tool.poetry]
|
|
35
|
+
packages = [{include = "kg_utils", from = "src"}]
|
|
36
|
+
|
|
37
|
+
[tool.poetry.group.dev]
|
|
38
|
+
optional = true
|
|
39
|
+
|
|
40
|
+
[tool.poetry.group.dev.dependencies]
|
|
41
|
+
pytest = "^8.0.0"
|
|
42
|
+
pytest-cov = "^5.0.0"
|
|
43
|
+
black = ">=22.0"
|
|
44
|
+
ruff = ">=0.4.0"
|
|
45
|
+
mypy = ">=1.10.0"
|
|
46
|
+
|
|
47
|
+
[tool.black]
|
|
48
|
+
line-length = 100
|
|
49
|
+
target-version = ["py312", "py313"]
|
|
50
|
+
|
|
51
|
+
[tool.ruff]
|
|
52
|
+
line-length = 100
|
|
53
|
+
target-version = "py312"
|
|
54
|
+
|
|
55
|
+
[tool.mypy]
|
|
56
|
+
python_version = "3.12"
|
|
57
|
+
strict = true
|
|
58
|
+
warn_unused_ignores = true
|
|
59
|
+
disallow_untyped_defs = true
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""kg_utils — Shared types, snapshots, and embedding protocol for the KGModule SDK.
|
|
2
|
+
|
|
3
|
+
Sub-packages:
|
|
4
|
+
kg_utils.types — NodeSpec, EdgeSpec, KGExtractor, KGModule, etc.
|
|
5
|
+
kg_utils.snapshots — Snapshot, SnapshotManager, SnapshotManifest, etc.
|
|
6
|
+
kg_utils.embed — Embedder protocol, DEFAULT_MODEL, KNOWN_MODELS,
|
|
7
|
+
kg_model_cache_dir(), resolve_model_path().
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""
|
|
2
|
+
kg_utils.embed — Shared embedding protocol and model-cache convention.
|
|
3
|
+
|
|
4
|
+
Zero external dependencies (stdlib only). Concrete implementations
|
|
5
|
+
(SentenceTransformerEmbedder, LlamaCppEmbedder) live in each KG module and in
|
|
6
|
+
kgrag; this module provides only the shared contract they all implement.
|
|
7
|
+
|
|
8
|
+
Contents
|
|
9
|
+
--------
|
|
10
|
+
Embedder
|
|
11
|
+
Structural protocol: any object with ``embed_query(text) -> list[float]``
|
|
12
|
+
satisfies it. KG modules, kgrag adapters, and tests can type-hint against
|
|
13
|
+
this without coupling to any specific implementation.
|
|
14
|
+
|
|
15
|
+
DEFAULT_MODEL
|
|
16
|
+
Canonical default embedding model for the KGModule stack.
|
|
17
|
+
``BAAI/bge-small-en-v1.5`` (384-dim, ~24 MB, no licence restrictions).
|
|
18
|
+
|
|
19
|
+
KNOWN_MODELS
|
|
20
|
+
Short alias → HuggingFace repo ID mapping shared by all modules.
|
|
21
|
+
Lets users write ``"bge-small"`` instead of ``"BAAI/bge-small-en-v1.5"``.
|
|
22
|
+
|
|
23
|
+
kg_model_cache_dir()
|
|
24
|
+
Return the system-wide model cache root (``~/.kgrag/models/`` by default).
|
|
25
|
+
Override with the ``KGRAG_MODEL_DIR`` environment variable. All KG modules
|
|
26
|
+
should resolve their model paths through this function so that a single
|
|
27
|
+
``KGRAG_MODEL_DIR`` setting redirects every module at once.
|
|
28
|
+
|
|
29
|
+
Local fallback convention for standalone use::
|
|
30
|
+
|
|
31
|
+
path = kg_model_cache_dir() / model_name.replace("/", "--")
|
|
32
|
+
|
|
33
|
+
Author: Eric G. Suchanek, PhD
|
|
34
|
+
License: Elastic 2.0
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
import os
|
|
40
|
+
from pathlib import Path
|
|
41
|
+
from typing import Protocol, runtime_checkable
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
# Shared protocol
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@runtime_checkable
|
|
50
|
+
class Embedder(Protocol):
|
|
51
|
+
"""Minimal embedding protocol for the KGModule stack.
|
|
52
|
+
|
|
53
|
+
Any object with an ``embed_query`` method satisfies this protocol and can
|
|
54
|
+
be injected into any KGModule-based KG backend (DocKG, MemoryKG, etc.).
|
|
55
|
+
|
|
56
|
+
:method embed_query: Embed a single query string into a float vector.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def embed_query(self, text: str) -> list[float]:
|
|
60
|
+
"""Embed a single query string into a dense float vector.
|
|
61
|
+
|
|
62
|
+
:param text: The query string to embed.
|
|
63
|
+
:return: Dense float32 vector as a plain Python list.
|
|
64
|
+
"""
|
|
65
|
+
...
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
# Shared constants
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
DEFAULT_MODEL: str = "BAAI/bge-small-en-v1.5"
|
|
73
|
+
"""Canonical default embedding model for the KGModule stack (384-dim)."""
|
|
74
|
+
|
|
75
|
+
KNOWN_MODELS: dict[str, str] = {
|
|
76
|
+
"default": "BAAI/bge-small-en-v1.5",
|
|
77
|
+
"bge-small": "BAAI/bge-small-en-v1.5",
|
|
78
|
+
"bge-small-en-v1.5": "BAAI/bge-small-en-v1.5",
|
|
79
|
+
"bge-large": "BAAI/bge-large-en-v1.5",
|
|
80
|
+
"bge-large-en-v1.5": "BAAI/bge-large-en-v1.5",
|
|
81
|
+
"all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
|
|
82
|
+
"all-mpnet-base-v2": "sentence-transformers/all-mpnet-base-v2",
|
|
83
|
+
"nomic": "nomic-ai/nomic-embed-text-v1.5",
|
|
84
|
+
"nomic-v1.5": "nomic-ai/nomic-embed-text-v1.5",
|
|
85
|
+
}
|
|
86
|
+
"""Short alias → HuggingFace repo ID. Shared by all KG modules and kgrag."""
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ---------------------------------------------------------------------------
|
|
90
|
+
# Shared cache path convention
|
|
91
|
+
# ---------------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def kg_model_cache_dir() -> Path:
|
|
95
|
+
"""Return the system-wide embedding model cache root.
|
|
96
|
+
|
|
97
|
+
Default: ``~/.kgrag/models/``
|
|
98
|
+
Override: set ``KGRAG_MODEL_DIR`` environment variable.
|
|
99
|
+
|
|
100
|
+
All KG modules should resolve model paths through this function so that a
|
|
101
|
+
single env-var change redirects every module's cache at once.
|
|
102
|
+
|
|
103
|
+
:return: Absolute :class:`~pathlib.Path` to the model cache directory.
|
|
104
|
+
"""
|
|
105
|
+
env = os.environ.get("KGRAG_MODEL_DIR")
|
|
106
|
+
if env:
|
|
107
|
+
return Path(env).resolve()
|
|
108
|
+
return Path.home() / ".kgrag" / "models"
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def resolve_model_path(model_name: str, local_fallback: Path | None = None) -> Path:
|
|
112
|
+
"""Return the local cache path for *model_name*.
|
|
113
|
+
|
|
114
|
+
Checks the system-wide cache (``kg_model_cache_dir()``) first. If
|
|
115
|
+
*local_fallback* is provided and the system cache env var is not set, uses
|
|
116
|
+
that instead — allowing standalone modules to keep their own local cache
|
|
117
|
+
while respecting a global override.
|
|
118
|
+
|
|
119
|
+
The model name is stored as ``<org>/<model>`` directory structure (matching
|
|
120
|
+
HuggingFace layout), e.g. ``BAAI/bge-small-en-v1.5`` →
|
|
121
|
+
``~/.kgrag/models/BAAI/bge-small-en-v1.5/``.
|
|
122
|
+
|
|
123
|
+
:param model_name: HuggingFace model identifier or known alias.
|
|
124
|
+
:param local_fallback: Per-module fallback directory (used when
|
|
125
|
+
``KGRAG_MODEL_DIR`` is not set).
|
|
126
|
+
:return: Absolute :class:`~pathlib.Path` to the model directory.
|
|
127
|
+
"""
|
|
128
|
+
resolved = KNOWN_MODELS.get(model_name, model_name)
|
|
129
|
+
if os.environ.get("KGRAG_MODEL_DIR") or local_fallback is None:
|
|
130
|
+
return kg_model_cache_dir() / resolved.replace("/", os.sep)
|
|
131
|
+
return local_fallback / resolved.replace("/", "--")
|
|
File without changes
|