kgmodule-utils 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kg_utils/__init__.py +10 -0
- kg_utils/embed.py +131 -0
- kg_utils/py.typed +0 -0
- kg_utils/snapshots/__init__.py +16 -0
- kg_utils/snapshots/manager.py +497 -0
- kg_utils/snapshots/models.py +137 -0
- kg_utils/types/__init__.py +14 -0
- kg_utils/types/extractor.py +68 -0
- kg_utils/types/module.py +87 -0
- kg_utils/types/specs.py +90 -0
- kgmodule_utils-0.2.0.dist-info/METADATA +210 -0
- kgmodule_utils-0.2.0.dist-info/RECORD +14 -0
- kgmodule_utils-0.2.0.dist-info/WHEEL +4 -0
- kgmodule_utils-0.2.0.dist-info/licenses/LICENSE +93 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""kg_utils/snapshots/models.py — Snapshot data models.
|
|
2
|
+
|
|
3
|
+
Every snapshot is keyed by git tree hash and contains:
|
|
4
|
+
- Timestamp and branch metadata
|
|
5
|
+
- Metrics dict (domain-flexible: total_nodes, total_edges, node_counts, ...)
|
|
6
|
+
- Hotspots list and issues list
|
|
7
|
+
- Deltas vs. previous and baseline snapshots
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class Snapshot:
|
|
18
|
+
"""A temporal snapshot of KG metrics.
|
|
19
|
+
|
|
20
|
+
``metrics`` is a free-form dict so that each domain can store whatever
|
|
21
|
+
fields it needs (docstring_coverage, total_files, etc.) without requiring
|
|
22
|
+
changes to this shared data model. The only required keys are
|
|
23
|
+
``total_nodes`` and ``total_edges`` -- the manager uses these for delta
|
|
24
|
+
computation.
|
|
25
|
+
|
|
26
|
+
``vs_previous`` and ``vs_baseline`` are also free-form dicts so that
|
|
27
|
+
domain-specific delta fields (coverage_delta, files_delta, ...) can be
|
|
28
|
+
stored alongside the universal ``nodes`` and ``edges`` deltas.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
branch: str
|
|
32
|
+
timestamp: str # ISO 8601 UTC
|
|
33
|
+
metrics: dict[str, Any]
|
|
34
|
+
version: str = ""
|
|
35
|
+
hotspots: list[dict[str, Any]] = field(default_factory=list)
|
|
36
|
+
issues: list[str] = field(default_factory=list)
|
|
37
|
+
vs_previous: dict[str, Any] | None = None
|
|
38
|
+
vs_baseline: dict[str, Any] | None = None
|
|
39
|
+
tree_hash: str = ""
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def key(self) -> str:
|
|
43
|
+
"""Stable file key: git tree hash."""
|
|
44
|
+
return self.tree_hash
|
|
45
|
+
|
|
46
|
+
def to_dict(self) -> dict[str, Any]:
|
|
47
|
+
"""Convert to a JSON-serializable dictionary."""
|
|
48
|
+
return {
|
|
49
|
+
"key": self.tree_hash,
|
|
50
|
+
"branch": self.branch,
|
|
51
|
+
"timestamp": self.timestamp,
|
|
52
|
+
"version": self.version,
|
|
53
|
+
"metrics": self.metrics,
|
|
54
|
+
"hotspots": self.hotspots,
|
|
55
|
+
"issues": self.issues,
|
|
56
|
+
"vs_previous": self.vs_previous,
|
|
57
|
+
"vs_baseline": self.vs_baseline,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def from_dict(data: dict[str, Any]) -> Snapshot:
|
|
62
|
+
"""Reconstruct a Snapshot from a dictionary loaded from JSON."""
|
|
63
|
+
raw = dict(data) # shallow copy to avoid mutating caller's data
|
|
64
|
+
|
|
65
|
+
metrics = raw.pop("metrics", {})
|
|
66
|
+
vs_prev = raw.pop("vs_previous", None)
|
|
67
|
+
vs_base = raw.pop("vs_baseline", None)
|
|
68
|
+
|
|
69
|
+
# Normalise legacy 'tree_hash' field -> 'key'
|
|
70
|
+
if "key" not in raw and "tree_hash" in raw:
|
|
71
|
+
raw["key"] = raw.pop("tree_hash")
|
|
72
|
+
else:
|
|
73
|
+
raw.pop("tree_hash", None)
|
|
74
|
+
|
|
75
|
+
key = raw.pop("key", "")
|
|
76
|
+
raw.pop("commit", None) # drop legacy field
|
|
77
|
+
raw.setdefault("version", "")
|
|
78
|
+
|
|
79
|
+
return Snapshot(
|
|
80
|
+
tree_hash=key,
|
|
81
|
+
metrics=metrics,
|
|
82
|
+
vs_previous=vs_prev,
|
|
83
|
+
vs_baseline=vs_base,
|
|
84
|
+
branch=raw.pop("branch", ""),
|
|
85
|
+
timestamp=raw.pop("timestamp", ""),
|
|
86
|
+
version=raw.pop("version", ""),
|
|
87
|
+
hotspots=raw.pop("hotspots", []),
|
|
88
|
+
issues=raw.pop("issues", []),
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@dataclass
|
|
93
|
+
class PruneResult:
|
|
94
|
+
"""Summary of a :meth:`SnapshotManager.prune_snapshots` operation.
|
|
95
|
+
|
|
96
|
+
:param removed: Keys of snapshots pruned as metric-duplicates.
|
|
97
|
+
:param orphaned_files: Filenames of JSON files deleted from disk because
|
|
98
|
+
they were not referenced by the manifest.
|
|
99
|
+
:param broken_entries: Keys of manifest entries whose JSON file was missing.
|
|
100
|
+
:param dry_run: ``True`` when the call was a dry run (nothing deleted).
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
removed: list[str]
|
|
104
|
+
orphaned_files: list[str]
|
|
105
|
+
broken_entries: list[str]
|
|
106
|
+
dry_run: bool
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def total_cleaned(self) -> int:
|
|
110
|
+
"""Total number of items removed (or that *would* be removed in dry-run)."""
|
|
111
|
+
return len(self.removed) + len(self.orphaned_files) + len(self.broken_entries)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@dataclass
|
|
115
|
+
class SnapshotManifest:
|
|
116
|
+
"""Index of all snapshots, with fast lookup by tree hash."""
|
|
117
|
+
|
|
118
|
+
format_version: str = "1.0"
|
|
119
|
+
last_update: str = ""
|
|
120
|
+
snapshots: list[dict[str, Any]] = field(default_factory=list)
|
|
121
|
+
|
|
122
|
+
def to_dict(self) -> dict[str, Any]:
|
|
123
|
+
"""Serialize to dict."""
|
|
124
|
+
return {
|
|
125
|
+
"format": self.format_version,
|
|
126
|
+
"last_update": self.last_update,
|
|
127
|
+
"snapshots": self.snapshots,
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
@staticmethod
|
|
131
|
+
def from_dict(data: dict[str, Any]) -> SnapshotManifest:
|
|
132
|
+
"""Reconstruct from dict."""
|
|
133
|
+
return SnapshotManifest(
|
|
134
|
+
format_version=data.get("format", "1.0"),
|
|
135
|
+
last_update=data.get("last_update", ""),
|
|
136
|
+
snapshots=data.get("snapshots", []),
|
|
137
|
+
)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""kg_utils.types — Core dataclasses and base classes for the KGModule SDK."""
|
|
2
|
+
|
|
3
|
+
from kg_utils.types.specs import EdgeSpec, NodeSpec, QueryResult, SnippetPack
|
|
4
|
+
from kg_utils.types.extractor import KGExtractor
|
|
5
|
+
from kg_utils.types.module import KGModule
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"EdgeSpec",
|
|
9
|
+
"KGExtractor",
|
|
10
|
+
"KGModule",
|
|
11
|
+
"NodeSpec",
|
|
12
|
+
"QueryResult",
|
|
13
|
+
"SnippetPack",
|
|
14
|
+
]
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""kg_utils/types/extractor.py — Abstract base class for KG extractors."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Iterator
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from kg_utils.types.specs import EdgeSpec, NodeSpec
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class KGExtractor:
|
|
13
|
+
"""Base class for knowledge-graph extractors.
|
|
14
|
+
|
|
15
|
+
Subclasses must implement :meth:`node_kinds`, :meth:`edge_kinds`,
|
|
16
|
+
and :meth:`extract`.
|
|
17
|
+
|
|
18
|
+
:param repo_path: Absolute path to the repository or corpus root.
|
|
19
|
+
:param config: Optional domain-specific configuration dict.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, repo_path: Path, config: dict[str, Any] | None = None) -> None:
|
|
23
|
+
self.repo_path = repo_path
|
|
24
|
+
self.config = config or {}
|
|
25
|
+
|
|
26
|
+
def node_kinds(self) -> list[str]:
|
|
27
|
+
"""Return canonical node kind names.
|
|
28
|
+
|
|
29
|
+
:return: List of node kind strings.
|
|
30
|
+
"""
|
|
31
|
+
raise NotImplementedError
|
|
32
|
+
|
|
33
|
+
def edge_kinds(self) -> list[str]:
|
|
34
|
+
"""Return canonical edge relation types.
|
|
35
|
+
|
|
36
|
+
:return: List of edge relation strings.
|
|
37
|
+
"""
|
|
38
|
+
raise NotImplementedError
|
|
39
|
+
|
|
40
|
+
def meaningful_node_kinds(self) -> list[str]:
|
|
41
|
+
"""Return node kinds included in the vector index and coverage metrics.
|
|
42
|
+
|
|
43
|
+
Override to exclude structural stubs from the default (all node_kinds).
|
|
44
|
+
|
|
45
|
+
:return: Subset of node_kinds() to index semantically.
|
|
46
|
+
"""
|
|
47
|
+
return self.node_kinds()
|
|
48
|
+
|
|
49
|
+
def coverage_metric(self, nodes: list[NodeSpec]) -> float:
|
|
50
|
+
"""Compute a domain coverage quality metric.
|
|
51
|
+
|
|
52
|
+
Default: fraction of meaningful nodes with a non-empty docstring.
|
|
53
|
+
|
|
54
|
+
:param nodes: All extracted NodeSpec objects.
|
|
55
|
+
:return: Coverage score in [0.0, 1.0].
|
|
56
|
+
"""
|
|
57
|
+
meaningful = [n for n in nodes if n.kind in self.meaningful_node_kinds()]
|
|
58
|
+
if not meaningful:
|
|
59
|
+
return 0.0
|
|
60
|
+
covered = sum(1 for n in meaningful if n.docstring.strip())
|
|
61
|
+
return covered / len(meaningful)
|
|
62
|
+
|
|
63
|
+
def extract(self) -> Iterator[NodeSpec | EdgeSpec]:
|
|
64
|
+
"""Traverse the source and yield NodeSpec / EdgeSpec objects.
|
|
65
|
+
|
|
66
|
+
:return: Iterator of NodeSpec and EdgeSpec objects.
|
|
67
|
+
"""
|
|
68
|
+
raise NotImplementedError
|
kg_utils/types/module.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""kg_utils/types/module.py — Abstract base class for KG modules."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from kg_utils.types.extractor import KGExtractor
|
|
9
|
+
from kg_utils.types.specs import QueryResult, SnippetPack
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class KGModule:
|
|
13
|
+
"""Base class for knowledge-graph modules.
|
|
14
|
+
|
|
15
|
+
Subclasses must implement :meth:`make_extractor`, :meth:`kind`,
|
|
16
|
+
and should override :meth:`build`, :meth:`query`, :meth:`stats`,
|
|
17
|
+
:meth:`pack`, and :meth:`analyze` with domain-specific logic.
|
|
18
|
+
|
|
19
|
+
:param repo_root: Absolute path to the repository or corpus root.
|
|
20
|
+
:param db_path: Path for the SQLite graph database.
|
|
21
|
+
:param lancedb_dir: Path for the LanceDB vector index directory.
|
|
22
|
+
:param config: Optional domain-specific configuration dict.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
repo_root: Path,
|
|
28
|
+
db_path: Path | None = None,
|
|
29
|
+
lancedb_dir: Path | None = None,
|
|
30
|
+
config: dict[str, Any] | None = None,
|
|
31
|
+
) -> None:
|
|
32
|
+
self.repo_root = repo_root
|
|
33
|
+
self.db_path = db_path
|
|
34
|
+
self.lancedb_dir = lancedb_dir
|
|
35
|
+
self.config = config or {}
|
|
36
|
+
|
|
37
|
+
def make_extractor(self) -> KGExtractor:
|
|
38
|
+
"""Return the domain extractor for this module.
|
|
39
|
+
|
|
40
|
+
:return: KGExtractor subclass instance.
|
|
41
|
+
"""
|
|
42
|
+
raise NotImplementedError
|
|
43
|
+
|
|
44
|
+
def kind(self) -> str:
|
|
45
|
+
"""Return the KGKind string for this module.
|
|
46
|
+
|
|
47
|
+
:return: Kind string (e.g. "code", "meta", "doc").
|
|
48
|
+
"""
|
|
49
|
+
raise NotImplementedError
|
|
50
|
+
|
|
51
|
+
def build(self, wipe: bool = False) -> None:
|
|
52
|
+
"""Build the knowledge graph index.
|
|
53
|
+
|
|
54
|
+
:param wipe: If True, delete existing index before building.
|
|
55
|
+
"""
|
|
56
|
+
raise NotImplementedError
|
|
57
|
+
|
|
58
|
+
def query(self, q: str, k: int = 8, **kwargs: Any) -> QueryResult:
|
|
59
|
+
"""Query the knowledge graph.
|
|
60
|
+
|
|
61
|
+
:param q: Natural-language query string.
|
|
62
|
+
:param k: Number of results to return.
|
|
63
|
+
:return: QueryResult with matched nodes and edges.
|
|
64
|
+
"""
|
|
65
|
+
raise NotImplementedError
|
|
66
|
+
|
|
67
|
+
def stats(self) -> dict[str, Any]:
|
|
68
|
+
"""Return statistics about the knowledge graph.
|
|
69
|
+
|
|
70
|
+
:return: Dict with keys like total_nodes, total_edges, etc.
|
|
71
|
+
"""
|
|
72
|
+
raise NotImplementedError
|
|
73
|
+
|
|
74
|
+
def pack(self, q: str, **kwargs: Any) -> SnippetPack:
|
|
75
|
+
"""Pack query results with source context.
|
|
76
|
+
|
|
77
|
+
:param q: Natural-language query string.
|
|
78
|
+
:return: SnippetPack with nodes, edges, and snippets.
|
|
79
|
+
"""
|
|
80
|
+
raise NotImplementedError
|
|
81
|
+
|
|
82
|
+
def analyze(self) -> str:
|
|
83
|
+
"""Run full analysis and return a Markdown report.
|
|
84
|
+
|
|
85
|
+
:return: Markdown-formatted analysis report.
|
|
86
|
+
"""
|
|
87
|
+
raise NotImplementedError
|
kg_utils/types/specs.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""kg_utils/types/specs.py — Core dataclasses shared by all KG modules."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class NodeSpec:
|
|
11
|
+
"""Specification for a knowledge-graph node.
|
|
12
|
+
|
|
13
|
+
:param node_id: Unique identifier, typically ``<kind>:<path>:<qualname>``.
|
|
14
|
+
:param kind: Node kind (e.g. "file", "function", "class", "directory").
|
|
15
|
+
:param name: Short display name.
|
|
16
|
+
:param qualname: Fully-qualified name or relative path.
|
|
17
|
+
:param source_path: Path to the source file (relative to repo root).
|
|
18
|
+
:param docstring: Semantic content for vector indexing.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
node_id: str
|
|
22
|
+
kind: str
|
|
23
|
+
name: str
|
|
24
|
+
qualname: str
|
|
25
|
+
source_path: str
|
|
26
|
+
docstring: str = ""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class EdgeSpec:
|
|
31
|
+
"""Specification for a knowledge-graph edge.
|
|
32
|
+
|
|
33
|
+
:param source_id: Node ID of the edge source.
|
|
34
|
+
:param target_id: Node ID of the edge target.
|
|
35
|
+
:param relation: Relation type (e.g. "CONTAINS", "CALLS", "IMPORTS").
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
source_id: str
|
|
39
|
+
target_id: str
|
|
40
|
+
relation: str
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class QueryResult:
|
|
45
|
+
"""Result container returned by KGModule.query().
|
|
46
|
+
|
|
47
|
+
:param nodes: List of matched node dicts.
|
|
48
|
+
:param edges: List of matched edge dicts.
|
|
49
|
+
:param seeds: Number of seed nodes from vector search.
|
|
50
|
+
:param expanded_nodes: Number of nodes after graph expansion.
|
|
51
|
+
:param returned_nodes: Number of nodes actually returned.
|
|
52
|
+
:param hop: Number of hops used in graph expansion.
|
|
53
|
+
:param rels: Relation types used in expansion.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
nodes: list[dict[str, Any]] = field(default_factory=list)
|
|
57
|
+
edges: list[dict[str, Any]] = field(default_factory=list)
|
|
58
|
+
seeds: int = 0
|
|
59
|
+
expanded_nodes: int = 0
|
|
60
|
+
returned_nodes: int = 0
|
|
61
|
+
hop: int = 0
|
|
62
|
+
rels: list[str] = field(default_factory=list)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass
|
|
66
|
+
class SnippetPack:
|
|
67
|
+
"""Result container returned by KGModule.pack().
|
|
68
|
+
|
|
69
|
+
:param query: The original query string.
|
|
70
|
+
:param seeds: Number of seed nodes from vector search.
|
|
71
|
+
:param expanded_nodes: Number of nodes after graph expansion.
|
|
72
|
+
:param returned_nodes: Number of nodes actually returned.
|
|
73
|
+
:param hop: Number of hops used in expansion.
|
|
74
|
+
:param rels: Relation types used in expansion.
|
|
75
|
+
:param model: Embedding model identifier.
|
|
76
|
+
:param nodes: Node dicts included in the pack.
|
|
77
|
+
:param edges: Edge dicts included in the pack.
|
|
78
|
+
:param snippets: Source-code snippets (for code KGs).
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
query: str
|
|
82
|
+
seeds: int = 0
|
|
83
|
+
expanded_nodes: int = 0
|
|
84
|
+
returned_nodes: int = 0
|
|
85
|
+
hop: int = 0
|
|
86
|
+
rels: list[str] = field(default_factory=list)
|
|
87
|
+
model: str = ""
|
|
88
|
+
nodes: list[Any] = field(default_factory=list)
|
|
89
|
+
edges: list[Any] = field(default_factory=list)
|
|
90
|
+
snippets: list[Any] = field(default_factory=list)
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kgmodule-utils
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Shared types and snapshot infrastructure for the KGModule SDK
|
|
5
|
+
License: Elastic-2.0
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Keywords: knowledge-graph,kgmodule,sdk,types,snapshots
|
|
8
|
+
Author: Eric G. Suchanek, PhD
|
|
9
|
+
Author-email: suchanek@flux-frontiers.com
|
|
10
|
+
Requires-Python: >=3.12,<3.14
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Project-URL: Repository, https://github.com/Flux-Frontiers/kg_utils
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
[](https://www.python.org/)
|
|
21
|
+
[](https://www.elastic.co/licensing/elastic-license)
|
|
22
|
+
[](https://github.com/Flux-Frontiers/KG_utils/releases)
|
|
23
|
+
[](https://github.com/Flux-Frontiers/KG_utils/actions/workflows/ci.yml)
|
|
24
|
+
[](https://python-poetry.org/)
|
|
25
|
+
|
|
26
|
+
# kgmodule-utils
|
|
27
|
+
|
|
28
|
+
**kgmodule-utils** — Shared types and snapshot infrastructure for the KGModule SDK.
|
|
29
|
+
|
|
30
|
+
*Author: Eric G. Suchanek, PhD*
|
|
31
|
+
|
|
32
|
+
*Flux-Frontiers, Liberty TWP, OH*
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## Overview
|
|
37
|
+
|
|
38
|
+
kgmodule-utils is the **zero-dependency foundation package** for the Flux-Frontiers knowledge-graph ecosystem. It provides the canonical type abstractions and temporal snapshot infrastructure that all KGModule implementations — [PyCodeKG](https://github.com/Flux-Frontiers/pycode_kg), [FTreeKG](https://github.com/Flux-Frontiers/ftree_kg), [DocKG](https://github.com/Flux-Frontiers/doc_kg), [AgentKG](https://github.com/Flux-Frontiers/agent_kg) — depend on.
|
|
39
|
+
|
|
40
|
+
Every KGModule shares the same `NodeSpec`, `EdgeSpec`, `KGExtractor`, and `KGModule` base classes defined here, ensuring consistent interfaces across the ecosystem. The snapshot subsystem enables temporal metric tracking, delta comparison, and pruning across git commits.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Features
|
|
45
|
+
|
|
46
|
+
- **Core type abstractions** — `NodeSpec`, `EdgeSpec`, `QueryResult`, `SnippetPack` dataclasses for knowledge-graph nodes, edges, and query results
|
|
47
|
+
- **KGExtractor base class** — Abstract interface for domain-specific extractors with `extract()`, `node_kinds()`, `edge_kinds()`, and `coverage_metric()`
|
|
48
|
+
- **KGModule base class** — Abstract interface for knowledge-graph modules with `build()`, `query()`, `pack()`, `stats()`, and `analyze()`
|
|
49
|
+
- **Snapshot models** — `Snapshot` dataclass keyed by git tree hash with free-form metrics, hotspots, issues, and delta tracking
|
|
50
|
+
- **SnapshotManager** — Capture, persist, load, list, diff, and prune snapshots with automatic deduplication and delta computation
|
|
51
|
+
- **SnapshotManifest** — Fast-lookup index of all snapshots with format versioning
|
|
52
|
+
- **Zero dependencies** — Stdlib-only; no external packages required at runtime
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Installation
|
|
57
|
+
|
|
58
|
+
**Requirements:** Python ≥ 3.12, < 3.14
|
|
59
|
+
|
|
60
|
+
### Standalone (pip)
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install kgmodule-utils
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Existing Poetry project
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
poetry add kgmodule-utils
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Or declare it directly in your `pyproject.toml`:
|
|
73
|
+
|
|
74
|
+
```toml
|
|
75
|
+
[tool.poetry.dependencies]
|
|
76
|
+
kgmodule-utils = "^0.2.0"
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Quick Start
|
|
82
|
+
|
|
83
|
+
### Types — Define a KGModule
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from kg_utils.types import NodeSpec, EdgeSpec, KGExtractor, KGModule
|
|
87
|
+
|
|
88
|
+
class MyExtractor(KGExtractor):
|
|
89
|
+
def node_kinds(self) -> list[str]:
|
|
90
|
+
return ["module", "function", "class"]
|
|
91
|
+
|
|
92
|
+
def edge_kinds(self) -> list[str]:
|
|
93
|
+
return ["CONTAINS", "CALLS", "IMPORTS"]
|
|
94
|
+
|
|
95
|
+
def extract(self, source_root: str):
|
|
96
|
+
# Yield NodeSpec and EdgeSpec objects from your domain
|
|
97
|
+
yield NodeSpec(
|
|
98
|
+
node_id="fn:main:hello",
|
|
99
|
+
kind="function",
|
|
100
|
+
name="hello",
|
|
101
|
+
qualname="main.hello",
|
|
102
|
+
source_path="main.py",
|
|
103
|
+
docstring="Greet the user.",
|
|
104
|
+
)
|
|
105
|
+
yield EdgeSpec(
|
|
106
|
+
source_id="mod:main",
|
|
107
|
+
target_id="fn:main:hello",
|
|
108
|
+
relation="CONTAINS",
|
|
109
|
+
)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Snapshots — Track metrics over time
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from kg_utils.snapshots import SnapshotManager
|
|
116
|
+
|
|
117
|
+
mgr = SnapshotManager(snapshots_dir=".my_kg/snapshots", package_name="my-kg")
|
|
118
|
+
|
|
119
|
+
# Capture a snapshot from current metrics
|
|
120
|
+
snapshot = mgr.capture(metrics={
|
|
121
|
+
"total_nodes": 142,
|
|
122
|
+
"total_edges": 387,
|
|
123
|
+
"coverage": 0.78,
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
# Save with automatic deduplication
|
|
127
|
+
mgr.save_snapshot(snapshot)
|
|
128
|
+
|
|
129
|
+
# List and compare
|
|
130
|
+
snaps = mgr.list_snapshots(limit=5)
|
|
131
|
+
delta = mgr.diff_snapshots(key_a=snaps[0].key, key_b=snaps[-1].key)
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## API Reference
|
|
137
|
+
|
|
138
|
+
### `kg_utils.types`
|
|
139
|
+
|
|
140
|
+
| Class | Description |
|
|
141
|
+
|---|---|
|
|
142
|
+
| `NodeSpec` | Dataclass for KG nodes: `node_id`, `kind`, `name`, `qualname`, `source_path`, `docstring` |
|
|
143
|
+
| `EdgeSpec` | Dataclass for KG edges: `source_id`, `target_id`, `relation` |
|
|
144
|
+
| `QueryResult` | Container for query responses with nodes, edges, and metadata |
|
|
145
|
+
| `SnippetPack` | Extended result container with source-code snippets |
|
|
146
|
+
| `KGExtractor` | Abstract base class for domain extractors |
|
|
147
|
+
| `KGModule` | Abstract base class for knowledge-graph modules |
|
|
148
|
+
|
|
149
|
+
### `kg_utils.snapshots`
|
|
150
|
+
|
|
151
|
+
| Class | Description |
|
|
152
|
+
|---|---|
|
|
153
|
+
| `Snapshot` | Temporal snapshot keyed by git tree hash with free-form metrics and deltas |
|
|
154
|
+
| `SnapshotManager` | Capture, persist, load, list, diff, and prune snapshots |
|
|
155
|
+
| `SnapshotManifest` | Index of all snapshots with format versioning and fast lookup |
|
|
156
|
+
| `PruneResult` | Summary of pruning operations: removed, orphaned, broken entries |
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## Project Structure
|
|
161
|
+
|
|
162
|
+
```
|
|
163
|
+
KG_utils/
|
|
164
|
+
├── LICENSE
|
|
165
|
+
├── README.md
|
|
166
|
+
├── pyproject.toml
|
|
167
|
+
├── pytest.ini
|
|
168
|
+
├── src/
|
|
169
|
+
│ └── kg_utils/
|
|
170
|
+
│ ├── __init__.py
|
|
171
|
+
│ ├── py.typed # PEP 561 marker
|
|
172
|
+
│ ├── types/
|
|
173
|
+
│ │ ├── __init__.py # Public re-exports
|
|
174
|
+
│ │ ├── specs.py # NodeSpec, EdgeSpec, QueryResult, SnippetPack
|
|
175
|
+
│ │ ├── extractor.py # KGExtractor ABC
|
|
176
|
+
│ │ └── module.py # KGModule ABC
|
|
177
|
+
│ └── snapshots/
|
|
178
|
+
│ ├── __init__.py # Public re-exports
|
|
179
|
+
│ ├── models.py # Snapshot, SnapshotManifest, PruneResult
|
|
180
|
+
│ └── manager.py # SnapshotManager
|
|
181
|
+
└── tests/
|
|
182
|
+
├── __init__.py
|
|
183
|
+
├── test_types.py
|
|
184
|
+
└── test_snapshots.py
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
## Development
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
git clone https://github.com/Flux-Frontiers/KG_utils.git
|
|
193
|
+
cd KG_utils
|
|
194
|
+
poetry install --with dev
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
Run the test suite:
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
poetry run pytest
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## License
|
|
206
|
+
|
|
207
|
+
[Elastic License 2.0](https://www.elastic.co/licensing/elastic-license) — see [LICENSE](LICENSE).
|
|
208
|
+
|
|
209
|
+
Free to use, modify, and distribute. You may not offer the software as a hosted or managed service to third parties. Commercial use internally is permitted.
|
|
210
|
+
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
kg_utils/__init__.py,sha256=s-W_5K2Z_8H1mTpiCrWrNEHd5P3eAcCWUQdRNgoQ0H0,428
|
|
2
|
+
kg_utils/embed.py,sha256=lIqUdOjnd2TU1-epa7RHoe0y2MYRC1xmhc-5y--wrV8,4969
|
|
3
|
+
kg_utils/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
kg_utils/snapshots/__init__.py,sha256=IXgvXQ31AChUHR9fhrsMpMV_A_dSjoCly_Ad8rTdG9w,538
|
|
5
|
+
kg_utils/snapshots/manager.py,sha256=DT0NyKLAumiXEUEIg3Igf4MH0BGK8bwm65fRFUFlgrc,19707
|
|
6
|
+
kg_utils/snapshots/models.py,sha256=NxF2pqRU6LHyRujdDh4smyRZ8UbL3FIGANBgCmjPGSw,4576
|
|
7
|
+
kg_utils/types/__init__.py,sha256=Q7q5Bb2F6F6zbjSTpCXErIKKu5QSHg-9YChhbr7AqDQ,372
|
|
8
|
+
kg_utils/types/extractor.py,sha256=EozyNWlFfdzCaskUA6tK5iej_DfO7srAWtbEVPtffFU,2182
|
|
9
|
+
kg_utils/types/module.py,sha256=_MRvVe1pbI_lAdT5VOhADgTbPhBHjdkaJzymigBaFsU,2713
|
|
10
|
+
kg_utils/types/specs.py,sha256=QTXIe_P2TxVGB1LiFJ2-zovv1vBKQX68jcnDRcaQO30,2792
|
|
11
|
+
kgmodule_utils-0.2.0.dist-info/METADATA,sha256=2wivJ8i0iIUCWMnpv3hRueTFHmN63XBgMfdsFgMHhvQ,7138
|
|
12
|
+
kgmodule_utils-0.2.0.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
|
|
13
|
+
kgmodule_utils-0.2.0.dist-info/licenses/LICENSE,sha256=X-B9sT00-P4jkaspEftxUFB_KHtOa8qHiC1pe7yXBjA,3865
|
|
14
|
+
kgmodule_utils-0.2.0.dist-info/RECORD,,
|