semanticembed 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- semanticembed/__init__.py +37 -0
- semanticembed/client.py +240 -0
- semanticembed/exceptions.py +34 -0
- semanticembed/models.py +118 -0
- semanticembed-0.1.0.dist-info/METADATA +257 -0
- semanticembed-0.1.0.dist-info/RECORD +8 -0
- semanticembed-0.1.0.dist-info/WHEEL +4 -0
- semanticembed-0.1.0.dist-info/licenses/LICENSE +18 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""SemanticEmbed SDK — 6D structural intelligence for directed graphs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .client import encode, encode_file, report, drift
|
|
6
|
+
from .models import SemanticResult, RiskReport, RiskEntry, DIMENSION_NAMES
|
|
7
|
+
from .exceptions import (
|
|
8
|
+
SemanticEmbedError,
|
|
9
|
+
AuthenticationError,
|
|
10
|
+
NodeLimitError,
|
|
11
|
+
APIError,
|
|
12
|
+
SemanticConnectionError,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__version__ = "0.1.0"
|
|
16
|
+
|
|
17
|
+
# Set this to your license key to unlock unlimited nodes:
|
|
18
|
+
# import semanticembed
|
|
19
|
+
# semanticembed.license_key = "se-xxxxxxxxxxxxxxxxxxxx"
|
|
20
|
+
license_key: str | None = None
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"encode",
|
|
24
|
+
"encode_file",
|
|
25
|
+
"report",
|
|
26
|
+
"drift",
|
|
27
|
+
"SemanticResult",
|
|
28
|
+
"RiskReport",
|
|
29
|
+
"RiskEntry",
|
|
30
|
+
"DIMENSION_NAMES",
|
|
31
|
+
"SemanticEmbedError",
|
|
32
|
+
"AuthenticationError",
|
|
33
|
+
"NodeLimitError",
|
|
34
|
+
"APIError",
|
|
35
|
+
"SemanticConnectionError",
|
|
36
|
+
"license_key",
|
|
37
|
+
]
|
semanticembed/client.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""HTTP client for the SemanticEmbed cloud API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import time
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
|
|
11
|
+
from .exceptions import APIError, AuthenticationError, SemanticConnectionError, NodeLimitError
|
|
12
|
+
from .models import DIMENSION_NAMES, RiskEntry, RiskReport, SemanticResult
|
|
13
|
+
|
|
14
|
+
DEFAULT_API_URL = "https://semanticembed-api-production.up.railway.app"
|
|
15
|
+
FREE_TIER_LIMIT = 50
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _resolve_key(license_key: str | None) -> str:
|
|
19
|
+
"""Resolve the API / license key from explicit arg, module-level, env var, or config file."""
|
|
20
|
+
# 1. Explicit argument
|
|
21
|
+
if license_key:
|
|
22
|
+
return license_key
|
|
23
|
+
|
|
24
|
+
# 2. Module-level attribute (set by user: semanticembed.license_key = "...")
|
|
25
|
+
import semanticembed
|
|
26
|
+
if getattr(semanticembed, "license_key", None):
|
|
27
|
+
return semanticembed.license_key
|
|
28
|
+
|
|
29
|
+
# 3. Environment variable
|
|
30
|
+
env_key = os.environ.get("SEMANTICEMBED_LICENSE_KEY") or os.environ.get("SEMANTICEMBED_API_KEY")
|
|
31
|
+
if env_key:
|
|
32
|
+
return env_key
|
|
33
|
+
|
|
34
|
+
# 4. Config file
|
|
35
|
+
config_path = os.path.expanduser("~/.semanticembed/license")
|
|
36
|
+
if os.path.isfile(config_path):
|
|
37
|
+
with open(config_path) as f:
|
|
38
|
+
key = f.read().strip()
|
|
39
|
+
if key:
|
|
40
|
+
return key
|
|
41
|
+
|
|
42
|
+
# 5. No key — free tier (server will enforce 50-node limit)
|
|
43
|
+
return ""
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _normalize_edges(edges: Any) -> list[list[str]]:
|
|
47
|
+
"""Accept tuples, lists, or dicts and normalize to [[source, target], ...]."""
|
|
48
|
+
normalized = []
|
|
49
|
+
for e in edges:
|
|
50
|
+
if isinstance(e, dict):
|
|
51
|
+
src = e.get("source") or e.get("src") or e.get("from")
|
|
52
|
+
tgt = e.get("target") or e.get("tgt") or e.get("to")
|
|
53
|
+
if not src or not tgt:
|
|
54
|
+
raise ValueError(f"Edge dict must have 'source' and 'target' keys: {e}")
|
|
55
|
+
normalized.append([str(src), str(tgt)])
|
|
56
|
+
elif isinstance(e, (list, tuple)) and len(e) >= 2:
|
|
57
|
+
normalized.append([str(e[0]), str(e[1])])
|
|
58
|
+
else:
|
|
59
|
+
raise ValueError(f"Invalid edge format: {e}")
|
|
60
|
+
return normalized
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _parse_response(data: dict, elapsed_ms: float) -> SemanticResult:
|
|
64
|
+
"""Parse the API response into SDK models."""
|
|
65
|
+
raw_embeddings = data.get("embeddings", {})
|
|
66
|
+
vectors: dict[str, list[float]] = {}
|
|
67
|
+
for node, emb in raw_embeddings.items():
|
|
68
|
+
if isinstance(emb, dict):
|
|
69
|
+
vectors[node] = [emb.get(d, 0.0) for d in DIMENSION_NAMES]
|
|
70
|
+
elif isinstance(emb, list):
|
|
71
|
+
vectors[node] = emb[:6]
|
|
72
|
+
else:
|
|
73
|
+
vectors[node] = [0.0] * 6
|
|
74
|
+
|
|
75
|
+
risks = []
|
|
76
|
+
for r in data.get("risks", []):
|
|
77
|
+
risks.append(RiskEntry(
|
|
78
|
+
node=r.get("node", ""),
|
|
79
|
+
category=r.get("type", ""),
|
|
80
|
+
severity=r.get("severity", "info"),
|
|
81
|
+
description=r.get("description", ""),
|
|
82
|
+
value=r.get("value", 0.0),
|
|
83
|
+
))
|
|
84
|
+
|
|
85
|
+
metadata = data.get("metadata", {})
|
|
86
|
+
graph_info = {
|
|
87
|
+
"nodes": metadata.get("n_nodes", len(vectors)),
|
|
88
|
+
"edges": metadata.get("n_edges", 0),
|
|
89
|
+
"max_depth": metadata.get("max_depth", 0),
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return SemanticResult(
|
|
93
|
+
vectors=vectors,
|
|
94
|
+
graph_info=graph_info,
|
|
95
|
+
encoding_time_ms=elapsed_ms,
|
|
96
|
+
risks=risks,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def encode(
|
|
101
|
+
edges: list,
|
|
102
|
+
*,
|
|
103
|
+
license_key: str | None = None,
|
|
104
|
+
api_url: str | None = None,
|
|
105
|
+
timeout: float = 30.0,
|
|
106
|
+
) -> SemanticResult:
|
|
107
|
+
"""Encode a directed graph and return 6D structural coordinates.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
edges: List of edges as tuples, lists, or dicts.
|
|
111
|
+
Examples: [("A", "B"), ("B", "C")]
|
|
112
|
+
[{"source": "A", "target": "B"}]
|
|
113
|
+
license_key: Optional API key. If not provided, checks env/config.
|
|
114
|
+
api_url: Override the API endpoint (for testing).
|
|
115
|
+
timeout: Request timeout in seconds.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
SemanticResult with .vectors, .table, .graph_info, .risks
|
|
119
|
+
"""
|
|
120
|
+
normalized = _normalize_edges(edges)
|
|
121
|
+
if len(normalized) < 2:
|
|
122
|
+
raise ValueError("Graph must have at least 2 edges.")
|
|
123
|
+
|
|
124
|
+
key = _resolve_key(license_key)
|
|
125
|
+
url = (api_url or os.environ.get("SEMANTICEMBED_API_URL") or DEFAULT_API_URL).rstrip("/")
|
|
126
|
+
|
|
127
|
+
headers: dict[str, str] = {"Content-Type": "application/json"}
|
|
128
|
+
if key:
|
|
129
|
+
headers["X-API-Key"] = key
|
|
130
|
+
|
|
131
|
+
# Count nodes for error reporting
|
|
132
|
+
node_set = set()
|
|
133
|
+
for e in normalized:
|
|
134
|
+
node_set.add(e[0])
|
|
135
|
+
node_set.add(e[1])
|
|
136
|
+
|
|
137
|
+
payload = {"edges": normalized}
|
|
138
|
+
|
|
139
|
+
start = time.perf_counter()
|
|
140
|
+
try:
|
|
141
|
+
with httpx.Client(timeout=timeout) as client:
|
|
142
|
+
resp = client.post(f"{url}/api/v1/encode", headers=headers, json=payload)
|
|
143
|
+
except httpx.ConnectError as e:
|
|
144
|
+
raise SemanticConnectionError(f"Could not connect to SemanticEmbed API at {url}: {e}") from e
|
|
145
|
+
elapsed_ms = (time.perf_counter() - start) * 1000
|
|
146
|
+
|
|
147
|
+
if resp.status_code == 401:
|
|
148
|
+
raise AuthenticationError("Invalid API key. Check your license key or contact jeffmurr@seas.upenn.edu")
|
|
149
|
+
if resp.status_code == 403:
|
|
150
|
+
detail = resp.text[:300]
|
|
151
|
+
# Parse node count from detail message if possible
|
|
152
|
+
import re
|
|
153
|
+
match = re.search(r"(\d+) nodes.*limit.*?(\d+)", detail)
|
|
154
|
+
if match:
|
|
155
|
+
n_nodes = int(match.group(1))
|
|
156
|
+
limit = int(match.group(2))
|
|
157
|
+
else:
|
|
158
|
+
n_nodes = len(node_set)
|
|
159
|
+
limit = FREE_TIER_LIMIT
|
|
160
|
+
raise NodeLimitError(n_nodes, limit)
|
|
161
|
+
if resp.status_code >= 400:
|
|
162
|
+
detail = resp.text[:200]
|
|
163
|
+
raise APIError(resp.status_code, detail)
|
|
164
|
+
|
|
165
|
+
return _parse_response(resp.json(), elapsed_ms)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def report(result: SemanticResult) -> RiskReport:
|
|
169
|
+
"""Generate a structural risk report from an encoding result.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
result: A SemanticResult from encode().
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
RiskReport with .risks, .by_category(), .by_severity()
|
|
176
|
+
"""
|
|
177
|
+
return RiskReport(risks=result.risks)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def encode_file(
|
|
181
|
+
path: str,
|
|
182
|
+
*,
|
|
183
|
+
license_key: str | None = None,
|
|
184
|
+
api_url: str | None = None,
|
|
185
|
+
timeout: float = 30.0,
|
|
186
|
+
) -> SemanticResult:
|
|
187
|
+
"""Encode a graph from a JSON file.
|
|
188
|
+
|
|
189
|
+
The file should contain an "edges" array with objects having
|
|
190
|
+
"source" and "target" fields.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
path: Path to a JSON file.
|
|
194
|
+
license_key: Optional API key.
|
|
195
|
+
api_url: Override the API endpoint.
|
|
196
|
+
timeout: Request timeout in seconds.
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
SemanticResult
|
|
200
|
+
"""
|
|
201
|
+
import json
|
|
202
|
+
with open(path) as f:
|
|
203
|
+
data = json.load(f)
|
|
204
|
+
|
|
205
|
+
edges = data.get("edges", [])
|
|
206
|
+
if not edges:
|
|
207
|
+
raise ValueError(f"No 'edges' array found in {path}")
|
|
208
|
+
|
|
209
|
+
return encode(edges, license_key=license_key, api_url=api_url, timeout=timeout)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def drift(
|
|
213
|
+
before: SemanticResult,
|
|
214
|
+
after: SemanticResult,
|
|
215
|
+
) -> dict[str, dict[str, float]]:
|
|
216
|
+
"""Compare two encoding results and return per-node, per-dimension deltas.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
before: Encoding result from the earlier version.
|
|
220
|
+
after: Encoding result from the later version.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
Dict mapping node names to dicts of dimension deltas.
|
|
224
|
+
Positive values mean the dimension increased.
|
|
225
|
+
"""
|
|
226
|
+
all_nodes = set(before.vectors.keys()) | set(after.vectors.keys())
|
|
227
|
+
changes: dict[str, dict[str, float]] = {}
|
|
228
|
+
|
|
229
|
+
for node in sorted(all_nodes):
|
|
230
|
+
v_before = before.vectors.get(node, [0.0] * 6)
|
|
231
|
+
v_after = after.vectors.get(node, [0.0] * 6)
|
|
232
|
+
deltas = {}
|
|
233
|
+
for i, dim in enumerate(DIMENSION_NAMES):
|
|
234
|
+
delta = v_after[i] - v_before[i]
|
|
235
|
+
if abs(delta) > 1e-6:
|
|
236
|
+
deltas[dim] = round(delta, 4)
|
|
237
|
+
if deltas:
|
|
238
|
+
changes[node] = deltas
|
|
239
|
+
|
|
240
|
+
return changes
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""SemanticEmbed SDK exceptions."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class SemanticEmbedError(Exception):
|
|
5
|
+
"""Base exception for all SDK errors."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AuthenticationError(SemanticEmbedError):
|
|
9
|
+
"""Invalid or missing API key / license key."""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class NodeLimitError(SemanticEmbedError):
|
|
13
|
+
"""Graph exceeds the node limit for the current plan."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, n_nodes: int, limit: int):
|
|
16
|
+
self.n_nodes = n_nodes
|
|
17
|
+
self.limit = limit
|
|
18
|
+
super().__init__(
|
|
19
|
+
f"Graph has {n_nodes} nodes but your plan allows {limit}. "
|
|
20
|
+
f"Contact jeffmurr@seas.upenn.edu for a license key"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class APIError(SemanticEmbedError):
|
|
25
|
+
"""Server returned an error response."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, status: int, detail: str):
|
|
28
|
+
self.status = status
|
|
29
|
+
self.detail = detail
|
|
30
|
+
super().__init__(f"API error {status}: {detail}")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SemanticConnectionError(SemanticEmbedError):
|
|
34
|
+
"""Could not connect to the SemanticEmbed API."""
|
semanticembed/models.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""Response models for SemanticEmbed SDK."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
DIMENSION_NAMES = ["depth", "independence", "hierarchy", "throughput", "criticality", "fanout"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class RiskEntry:
|
|
14
|
+
"""A single structural risk finding."""
|
|
15
|
+
|
|
16
|
+
node: str
|
|
17
|
+
category: str
|
|
18
|
+
severity: str
|
|
19
|
+
description: str
|
|
20
|
+
value: float
|
|
21
|
+
|
|
22
|
+
def json(self) -> dict[str, Any]:
|
|
23
|
+
return {
|
|
24
|
+
"node": self.node,
|
|
25
|
+
"category": self.category,
|
|
26
|
+
"severity": self.severity,
|
|
27
|
+
"description": self.description,
|
|
28
|
+
"value": self.value,
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class RiskReport:
|
|
34
|
+
"""Structural risk report for a graph."""
|
|
35
|
+
|
|
36
|
+
risks: list[RiskEntry] = field(default_factory=list)
|
|
37
|
+
|
|
38
|
+
def by_category(self, category: str) -> list[RiskEntry]:
|
|
39
|
+
"""Filter risks by category name (case-insensitive)."""
|
|
40
|
+
cat = category.lower().replace("_", " ").replace("-", " ")
|
|
41
|
+
return [r for r in self.risks if cat in r.category.lower().replace("_", " ")]
|
|
42
|
+
|
|
43
|
+
def by_severity(self, severity: str) -> list[RiskEntry]:
|
|
44
|
+
"""Filter risks by severity level."""
|
|
45
|
+
return [r for r in self.risks if r.severity == severity]
|
|
46
|
+
|
|
47
|
+
def json(self) -> list[dict[str, Any]]:
|
|
48
|
+
return [r.json() for r in self.risks]
|
|
49
|
+
|
|
50
|
+
def __str__(self) -> str:
|
|
51
|
+
if not self.risks:
|
|
52
|
+
return "STRUCTURAL RISK REPORT\n======================\n\nNo structural risks detected."
|
|
53
|
+
|
|
54
|
+
lines = ["STRUCTURAL RISK REPORT", "=" * 22, ""]
|
|
55
|
+
|
|
56
|
+
# Group by category
|
|
57
|
+
categories: dict[str, list[RiskEntry]] = {}
|
|
58
|
+
for r in self.risks:
|
|
59
|
+
categories.setdefault(r.category, []).append(r)
|
|
60
|
+
|
|
61
|
+
for cat, entries in categories.items():
|
|
62
|
+
label = cat.upper().replace("_", " ")
|
|
63
|
+
lines.append(f"{label}:")
|
|
64
|
+
for r in entries:
|
|
65
|
+
lines.append(f" - {r.node:<30} | {r.description}")
|
|
66
|
+
lines.append("")
|
|
67
|
+
|
|
68
|
+
return "\n".join(lines)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class SemanticResult:
|
|
73
|
+
"""Result of a 6D structural encoding."""
|
|
74
|
+
|
|
75
|
+
vectors: dict[str, list[float]]
|
|
76
|
+
graph_info: dict[str, Any]
|
|
77
|
+
encoding_time_ms: float
|
|
78
|
+
risks: list[RiskEntry] = field(default_factory=list)
|
|
79
|
+
|
|
80
|
+
def __getitem__(self, node: str) -> list[float]:
|
|
81
|
+
return self.vectors[node]
|
|
82
|
+
|
|
83
|
+
def dimensions(self, node: str) -> dict[str, float]:
|
|
84
|
+
"""Return named dimensions for a node."""
|
|
85
|
+
v = self.vectors[node]
|
|
86
|
+
return dict(zip(DIMENSION_NAMES, v))
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def nodes(self) -> list[str]:
|
|
90
|
+
"""All node names."""
|
|
91
|
+
return list(self.vectors.keys())
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def table(self) -> str:
|
|
95
|
+
"""Formatted table sorted by criticality (highest first)."""
|
|
96
|
+
header = f"{'Node':<35} {'Depth':>6} {'Indep':>6} {'Hier':>6} {'Thru':>6} {'Crit':>6} {'Fan':>6}"
|
|
97
|
+
sep = "-" * len(header)
|
|
98
|
+
rows = sorted(
|
|
99
|
+
self.vectors.items(),
|
|
100
|
+
key=lambda x: x[1][4], # criticality index
|
|
101
|
+
reverse=True,
|
|
102
|
+
)
|
|
103
|
+
lines = [header, sep]
|
|
104
|
+
for node, v in rows:
|
|
105
|
+
name = node if len(node) <= 35 else node[:32] + "..."
|
|
106
|
+
lines.append(
|
|
107
|
+
f"{name:<35} {v[0]:>6.3f} {v[1]:>6.3f} {v[2]:>6.3f} {v[3]:>6.3f} {v[4]:>6.3f} {v[5]:>6.3f}"
|
|
108
|
+
)
|
|
109
|
+
return "\n".join(lines)
|
|
110
|
+
|
|
111
|
+
def json(self) -> dict[str, Any]:
|
|
112
|
+
"""Full result as a JSON-serializable dict."""
|
|
113
|
+
return {
|
|
114
|
+
"vectors": self.vectors,
|
|
115
|
+
"graph_info": self.graph_info,
|
|
116
|
+
"encoding_time_ms": self.encoding_time_ms,
|
|
117
|
+
"risks": [r.json() for r in self.risks],
|
|
118
|
+
}
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: semanticembed
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: 6D structural intelligence for directed graphs. Six numbers per node. Sub-millisecond.
|
|
5
|
+
Project-URL: Homepage, https://github.com/jmurray10/semanticembed-sdk
|
|
6
|
+
Project-URL: Documentation, https://github.com/jmurray10/semanticembed-sdk
|
|
7
|
+
Project-URL: Repository, https://github.com/jmurray10/semanticembed-sdk
|
|
8
|
+
Author-email: Jeff Murray <jeffmurr@seas.upenn.edu>
|
|
9
|
+
License: Proprietary
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: graph,microservices,observability,risk-detection,structural-analysis
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Requires-Dist: httpx>=0.24
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# SemanticEmbed SDK
|
|
26
|
+
|
|
27
|
+
**Structural intelligence for directed graphs. Six numbers per node. Sub-millisecond.**
|
|
28
|
+
|
|
29
|
+
SemanticEmbed computes a 6-dimensional structural encoding for every node in a directed graph. From a bare edge list -- no runtime telemetry, no historical data, no tuning -- it produces six independent measurements that fully describe each node's structural role.
|
|
30
|
+
|
|
31
|
+
> **Validated against production incidents.** In a blind test against a live production environment (100+ services, 2,500+ incidents over 30 days), the majority of topology-relevant incidents occurred on nodes that 6D structural analysis had flagged as risky -- from the call graph alone, before any incident occurred.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Why 6D?
|
|
36
|
+
|
|
37
|
+
Observability tools tell you **what broke**. SemanticEmbed tells you **what will break** -- from topology alone.
|
|
38
|
+
|
|
39
|
+
- **No agents, no instrumentation** -- just an edge list
|
|
40
|
+
- **Sub-millisecond** -- encodes 100+ node graphs in <1ms
|
|
41
|
+
- **Works on any directed graph** -- microservices, AI agent pipelines, data workflows, CI/CD
|
|
42
|
+
- **Mathematically independent axes** -- six dimensions, zero redundancy, each captures structural information no other metric provides
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Try It Now
|
|
47
|
+
|
|
48
|
+
**[Open the Interactive Demo in Google Colab](https://colab.research.google.com/github/jmurray10/semanticembed-sdk/blob/main/notebooks/01_quickstart.ipynb)** -- runs in your browser, nothing to install locally.
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## Install
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install semanticembed
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
**Free tier:** Up to 50 nodes per graph. No signup required.
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Quick Start
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from semanticembed import encode, report
|
|
66
|
+
|
|
67
|
+
# Any directed graph as an edge list
|
|
68
|
+
edges = [
|
|
69
|
+
("frontend", "api-gateway"),
|
|
70
|
+
("api-gateway", "order-service"),
|
|
71
|
+
("api-gateway", "user-service"),
|
|
72
|
+
("order-service", "payment-service"),
|
|
73
|
+
("order-service", "inventory-service"),
|
|
74
|
+
("payment-service", "database"),
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
# Compute the 6D encoding (sub-millisecond)
|
|
78
|
+
result = encode(edges)
|
|
79
|
+
|
|
80
|
+
# Six structural measurements per node
|
|
81
|
+
for node, vector in result.vectors.items():
|
|
82
|
+
print(f"{node}: {vector}")
|
|
83
|
+
|
|
84
|
+
# Structural risk report
|
|
85
|
+
print(report(result))
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Output:
|
|
89
|
+
|
|
90
|
+
```
|
|
91
|
+
STRUCTURAL RISK REPORT
|
|
92
|
+
======================
|
|
93
|
+
|
|
94
|
+
AMPLIFICATION RISKS (high fanout, high criticality):
|
|
95
|
+
- api-gateway | fanout=0.667 | criticality=0.556
|
|
96
|
+
|
|
97
|
+
CONVERGENCE SINKS (low independence, many upstream callers):
|
|
98
|
+
- database | independence=0.000
|
|
99
|
+
|
|
100
|
+
STRUCTURAL SPOF (low independence, high upstream dependency):
|
|
101
|
+
- api-gateway | independence=0.000 | every request flows through this node
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## What It Finds That Other Tools Miss
|
|
107
|
+
|
|
108
|
+
| Your current tools | SemanticEmbed |
|
|
109
|
+
|---|---|
|
|
110
|
+
| This service has high latency | This service is on 89% of all paths (structural SPOF) |
|
|
111
|
+
| This service had 5 errors | This service fans out to 12 downstream services (amplification risk) |
|
|
112
|
+
| This service is healthy | This service has zero lateral redundancy (convergence sink) |
|
|
113
|
+
|
|
114
|
+
Runtime monitoring tells you what is slow **now**. Structural analysis tells you what **will** cause cascading failures regardless of current load.
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## The Six Dimensions
|
|
119
|
+
|
|
120
|
+
Every node gets six independent structural measurements:
|
|
121
|
+
|
|
122
|
+
| Dimension | What It Measures | Risk Signal |
|
|
123
|
+
|-----------|-----------------|-------------|
|
|
124
|
+
| **Depth** | Position in the execution pipeline (0.0 = entry, 1.0 = deepest) | Deep nodes accumulate upstream latency |
|
|
125
|
+
| **Independence** | Lateral redundancy at the same pipeline stage | Low independence = structural chokepoint |
|
|
126
|
+
| **Hierarchy** | Module or group membership | Cross-module dependencies = blast radius |
|
|
127
|
+
| **Throughput** | Fraction of total traffic flowing through the node | High throughput + low independence = hidden bottleneck |
|
|
128
|
+
| **Criticality** | Fraction of end-to-end paths depending on this node | High criticality = SPOF |
|
|
129
|
+
| **Fanout** | Broadcaster (1.0) vs aggregator (0.0) | High fanout = amplification risk |
|
|
130
|
+
|
|
131
|
+
These six properties are mathematically independent -- knowing any five tells you nothing about the sixth.
|
|
132
|
+
|
|
133
|
+
See [docs/dimensions.md](docs/dimensions.md) for the full reference.
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## Use Cases
|
|
138
|
+
|
|
139
|
+
**Microservice architectures** -- Find SPOFs, amplification cascades, and convergence bottlenecks in any service mesh. Works with Kubernetes, Istio, OTel traces, or static architecture diagrams.
|
|
140
|
+
|
|
141
|
+
**AI agent pipelines** -- Identify vendor concentration risk, gateway bottlenecks, and guardrail single points of failure in LLM orchestration graphs.
|
|
142
|
+
|
|
143
|
+
**CI/CD and data pipelines** -- Detect structural fragility in build graphs, ETL workflows, and deployment pipelines before they cause cascading failures.
|
|
144
|
+
|
|
145
|
+
**Architecture drift monitoring** -- Compare structural fingerprints across releases. Know exactly which services changed structural role and by how much.
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## Notebooks
|
|
150
|
+
|
|
151
|
+
Step-by-step Colab notebooks. Click to open, run in your browser.
|
|
152
|
+
|
|
153
|
+
| Notebook | Use Case | What You Learn |
|
|
154
|
+
|----------|----------|---------------|
|
|
155
|
+
| [01 - Quickstart](https://colab.research.google.com/github/jmurray10/semanticembed-sdk/blob/main/notebooks/01_quickstart.ipynb) | Getting started | Install, encode a graph, read the risk report |
|
|
156
|
+
| [02 - Dimensions Deep Dive](https://colab.research.google.com/github/jmurray10/semanticembed-sdk/blob/main/notebooks/02_dimensions.ipynb) | Understanding 6D | What each dimension means, with worked examples |
|
|
157
|
+
| [03 - Drift Detection](https://colab.research.google.com/github/jmurray10/semanticembed-sdk/blob/main/notebooks/03_drift_detection.ipynb) | Architecture drift | Compare graph versions, detect structural changes |
|
|
158
|
+
| [04 - Bring Your Own Graph](https://colab.research.google.com/github/jmurray10/semanticembed-sdk/blob/main/notebooks/04_bring_your_own.ipynb) | Any graph | Load from JSON, OTel traces, or Kubernetes |
|
|
159
|
+
| [05 - AI Agent Pipelines](https://colab.research.google.com/github/jmurray10/semanticembed-sdk/blob/main/notebooks/05_ai_agent_pipelines.ipynb) | AI/LLM agents | Vendor concentration, gateway bottlenecks, guardrail SPOFs |
|
|
160
|
+
| [06 - CI/CD & Data Pipelines](https://colab.research.google.com/github/jmurray10/semanticembed-sdk/blob/main/notebooks/06_cicd_pipelines.ipynb) | CI/CD & ETL | Build graph fragility, pipeline bottlenecks, drift gates |
|
|
161
|
+
| [07 - OpenTelemetry](https://colab.research.google.com/github/jmurray10/semanticembed-sdk/blob/main/notebooks/07_opentelemetry.ipynb) | OTel traces | Extract edges from traces, structural analysis, CI/CD gates |
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## Example Graphs
|
|
166
|
+
|
|
167
|
+
The `examples/` directory contains edge lists for well-known architectures:
|
|
168
|
+
|
|
169
|
+
| File | Application | Nodes | Edges |
|
|
170
|
+
|------|------------|-------|-------|
|
|
171
|
+
| [google_online_boutique.json](examples/google_online_boutique.json) | Google Online Boutique (microservices) | 11 | 15 |
|
|
172
|
+
| [weaveworks_sock_shop.json](examples/weaveworks_sock_shop.json) | Weaveworks Sock Shop (microservices) | 15 | 15 |
|
|
173
|
+
| [ai_agent_pipeline.json](examples/ai_agent_pipeline.json) | Multi-agent LLM orchestration | 12 | 15 |
|
|
174
|
+
| [cicd_pipeline.json](examples/cicd_pipeline.json) | CI/CD build pipeline | 13 | 17 |
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## React Components
|
|
179
|
+
|
|
180
|
+
Drop-in React components for rendering SDK results. See [examples/react/](examples/react/) for the full source.
|
|
181
|
+
|
|
182
|
+
| Component | What it renders |
|
|
183
|
+
|-----------|----------------|
|
|
184
|
+
| `useSemanticEmbed.ts` | React hook — call `encode()` from your frontend |
|
|
185
|
+
| `RiskTable.tsx` | Sortable risk table with severity badges |
|
|
186
|
+
| `RadarChart.tsx` | 6D radar chart comparing node profiles |
|
|
187
|
+
| `TopologySummary.tsx` | KPI cards + risk breakdown |
|
|
188
|
+
|
|
189
|
+
```tsx
|
|
190
|
+
import { useSemanticEmbed } from './useSemanticEmbed';
|
|
191
|
+
import { RiskTable } from './RiskTable';
|
|
192
|
+
|
|
193
|
+
function App() {
|
|
194
|
+
const { result, loading, encode } = useSemanticEmbed();
|
|
195
|
+
return (
|
|
196
|
+
<>
|
|
197
|
+
<button onClick={() => encode([["A","B"],["B","C"],["C","D"]])}>Analyze</button>
|
|
198
|
+
{result && <RiskTable risks={result.risks} />}
|
|
199
|
+
</>
|
|
200
|
+
);
|
|
201
|
+
}
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## Input Format
|
|
207
|
+
|
|
208
|
+
SemanticEmbed accepts any directed graph as an edge list.
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
# Python tuples
|
|
212
|
+
edges = [("A", "B"), ("B", "C")]
|
|
213
|
+
result = encode(edges)
|
|
214
|
+
|
|
215
|
+
# JSON file
|
|
216
|
+
result = encode_file("my_graph.json")
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
JSON format:
|
|
220
|
+
|
|
221
|
+
```json
|
|
222
|
+
{
|
|
223
|
+
"edges": [
|
|
224
|
+
{"source": "A", "target": "B"},
|
|
225
|
+
{"source": "B", "target": "C"}
|
|
226
|
+
]
|
|
227
|
+
}
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
See [docs/input_format.md](docs/input_format.md) for the full spec.
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
## Documentation
|
|
235
|
+
|
|
236
|
+
| Document | Description |
|
|
237
|
+
|----------|-------------|
|
|
238
|
+
| [docs/getting_started.md](docs/getting_started.md) | Install, encode, read results, export -- one page |
|
|
239
|
+
| [docs/api_reference.md](docs/api_reference.md) | Every function, class, parameter, and return type |
|
|
240
|
+
| [docs/dimensions.md](docs/dimensions.md) | The six structural dimensions -- full reference |
|
|
241
|
+
| [docs/input_format.md](docs/input_format.md) | Edge list input specification |
|
|
242
|
+
| [docs/output_format.md](docs/output_format.md) | Encoding output and risk report format |
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
|
|
246
|
+
## License
|
|
247
|
+
|
|
248
|
+
SemanticEmbed SDK is proprietary software distributed as a compiled package.
|
|
249
|
+
Free tier available for graphs up to 50 nodes. See [LICENSE](LICENSE) for terms.
|
|
250
|
+
|
|
251
|
+
**Patent pending.** Application #63/994,075.
|
|
252
|
+
|
|
253
|
+
---
|
|
254
|
+
|
|
255
|
+
## Contact
|
|
256
|
+
|
|
257
|
+
Email jeffmurr@seas.upenn.edu
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
semanticembed/__init__.py,sha256=yUlt-QWKUJGswliwg9_UrOqkDe2mRbkW8A_lgt8osOo,875
|
|
2
|
+
semanticembed/client.py,sha256=pbO3ZK7dZ2kjZM9WWjuBvRpqNtWHj65y68s9KCboMe4,7613
|
|
3
|
+
semanticembed/exceptions.py,sha256=h-AenVWauzIyRZT_17QLpNXLAdroRZ9z0IKN0lPu0H8,965
|
|
4
|
+
semanticembed/models.py,sha256=lP4egjv6qSKK_BsmPF60OAdB0UTfQ1SCwwacdtGRtFk,3661
|
|
5
|
+
semanticembed-0.1.0.dist-info/METADATA,sha256=Dry7UW4kkqXOltONImempie8X2ww9IpvVIj_oorIWj0,10127
|
|
6
|
+
semanticembed-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
7
|
+
semanticembed-0.1.0.dist-info/licenses/LICENSE,sha256=B64mn9F_MbnqYyNznp6j1vPfGEyrENUDY0gJBAvsdFI,665
|
|
8
|
+
semanticembed-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
SemanticEmbed SDK License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 SemanticEmbed Inc. All rights reserved.
|
|
4
|
+
|
|
5
|
+
This software is proprietary and confidential. Unauthorized copying,
|
|
6
|
+
distribution, modification, or use of this software, via any medium,
|
|
7
|
+
is strictly prohibited.
|
|
8
|
+
|
|
9
|
+
The SemanticEmbed SDK is distributed as a compiled package for the
|
|
10
|
+
sole purpose of interfacing with the SemanticEmbed cloud API. It does
|
|
11
|
+
not contain the encoding algorithm or any proprietary computation logic.
|
|
12
|
+
|
|
13
|
+
Free tier: graphs up to 50 nodes, no signup required.
|
|
14
|
+
Paid tier: unlimited nodes, requires a valid license key.
|
|
15
|
+
|
|
16
|
+
Patent pending. Application #63/994,075.
|
|
17
|
+
|
|
18
|
+
For licensing inquiries: jeffmurr@seas.upenn.edu
|