jsonld-ex 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jsonld_ex/__init__.py +30 -0
- jsonld_ex/ai_ml.py +154 -0
- jsonld_ex/processor.py +91 -0
- jsonld_ex/security.py +92 -0
- jsonld_ex/validation.py +186 -0
- jsonld_ex/vector.py +76 -0
- jsonld_ex-0.1.0.dist-info/METADATA +26 -0
- jsonld_ex-0.1.0.dist-info/RECORD +10 -0
- jsonld_ex-0.1.0.dist-info/WHEEL +5 -0
- jsonld_ex-0.1.0.dist-info/top_level.txt +1 -0
jsonld_ex/__init__.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""
|
|
2
|
+
jsonld-ex: JSON-LD 1.2 Extensions for AI/ML, Security, and Validation
|
|
3
|
+
|
|
4
|
+
Reference implementation of proposed JSON-LD 1.2 extensions.
|
|
5
|
+
Wraps PyLD for core JSON-LD processing and adds extension layers.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "0.1.0"
|
|
9
|
+
|
|
10
|
+
from jsonld_ex.processor import JsonLdEx
|
|
11
|
+
from jsonld_ex.ai_ml import annotate, get_confidence, get_provenance, filter_by_confidence
|
|
12
|
+
from jsonld_ex.vector import validate_vector, cosine_similarity, vector_term_definition
|
|
13
|
+
from jsonld_ex.security import compute_integrity, verify_integrity, is_context_allowed
|
|
14
|
+
from jsonld_ex.validation import validate_node, validate_document
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"JsonLdEx",
|
|
18
|
+
"annotate",
|
|
19
|
+
"get_confidence",
|
|
20
|
+
"get_provenance",
|
|
21
|
+
"filter_by_confidence",
|
|
22
|
+
"validate_vector",
|
|
23
|
+
"cosine_similarity",
|
|
24
|
+
"vector_term_definition",
|
|
25
|
+
"compute_integrity",
|
|
26
|
+
"verify_integrity",
|
|
27
|
+
"is_context_allowed",
|
|
28
|
+
"validate_node",
|
|
29
|
+
"validate_document",
|
|
30
|
+
]
|
jsonld_ex/ai_ml.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AI/ML Extensions for JSON-LD
|
|
3
|
+
|
|
4
|
+
Provides @confidence, @source, @extractedAt, @method, @humanVerified
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any, Optional, Sequence, Literal
|
|
10
|
+
import math
|
|
11
|
+
|
|
12
|
+
JSONLD_EX_NAMESPACE = "http://www.w3.org/ns/jsonld-ex/"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ProvenanceMetadata:
|
|
17
|
+
"""AI/ML provenance metadata attached to a value."""
|
|
18
|
+
confidence: Optional[float] = None
|
|
19
|
+
source: Optional[str] = None
|
|
20
|
+
extracted_at: Optional[str] = None
|
|
21
|
+
method: Optional[str] = None
|
|
22
|
+
human_verified: Optional[bool] = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def annotate(
|
|
26
|
+
value: Any,
|
|
27
|
+
confidence: Optional[float] = None,
|
|
28
|
+
source: Optional[str] = None,
|
|
29
|
+
extracted_at: Optional[str] = None,
|
|
30
|
+
method: Optional[str] = None,
|
|
31
|
+
human_verified: Optional[bool] = None,
|
|
32
|
+
) -> dict[str, Any]:
|
|
33
|
+
"""Create an annotated JSON-LD value with provenance metadata."""
|
|
34
|
+
result: dict[str, Any] = {"@value": value}
|
|
35
|
+
|
|
36
|
+
if confidence is not None:
|
|
37
|
+
_validate_confidence(confidence)
|
|
38
|
+
result["@confidence"] = confidence
|
|
39
|
+
if source is not None:
|
|
40
|
+
result["@source"] = source
|
|
41
|
+
if extracted_at is not None:
|
|
42
|
+
result["@extractedAt"] = extracted_at
|
|
43
|
+
if method is not None:
|
|
44
|
+
result["@method"] = method
|
|
45
|
+
if human_verified is not None:
|
|
46
|
+
result["@humanVerified"] = human_verified
|
|
47
|
+
|
|
48
|
+
return result
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def get_confidence(node: Any) -> Optional[float]:
|
|
52
|
+
"""Extract confidence score from a node or annotated value."""
|
|
53
|
+
if node is None or not isinstance(node, dict):
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
# Compact form
|
|
57
|
+
if "@confidence" in node:
|
|
58
|
+
return node["@confidence"]
|
|
59
|
+
|
|
60
|
+
# Expanded form
|
|
61
|
+
key = f"{JSONLD_EX_NAMESPACE}confidence"
|
|
62
|
+
if key in node:
|
|
63
|
+
val = node[key]
|
|
64
|
+
if isinstance(val, list) and len(val) > 0:
|
|
65
|
+
item = val[0]
|
|
66
|
+
return item.get("@value", item) if isinstance(item, dict) else item
|
|
67
|
+
if isinstance(val, dict):
|
|
68
|
+
return val.get("@value", val)
|
|
69
|
+
return val
|
|
70
|
+
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_provenance(node: Any) -> ProvenanceMetadata:
|
|
75
|
+
"""Extract all provenance metadata from a node."""
|
|
76
|
+
if node is None or not isinstance(node, dict):
|
|
77
|
+
return ProvenanceMetadata()
|
|
78
|
+
|
|
79
|
+
return ProvenanceMetadata(
|
|
80
|
+
confidence=_extract_field(node, "confidence", "@confidence"),
|
|
81
|
+
source=_extract_field(node, "source", "@source"),
|
|
82
|
+
extracted_at=_extract_field(node, "extractedAt", "@extractedAt"),
|
|
83
|
+
method=_extract_field(node, "method", "@method"),
|
|
84
|
+
human_verified=_extract_field(node, "humanVerified", "@humanVerified"),
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def filter_by_confidence(
|
|
89
|
+
graph: Sequence[dict[str, Any]],
|
|
90
|
+
property_name: str,
|
|
91
|
+
min_confidence: float,
|
|
92
|
+
) -> list[dict[str, Any]]:
|
|
93
|
+
"""Filter graph nodes by minimum confidence on a property."""
|
|
94
|
+
_validate_confidence(min_confidence)
|
|
95
|
+
results = []
|
|
96
|
+
for node in graph:
|
|
97
|
+
prop_value = node.get(property_name)
|
|
98
|
+
if prop_value is None:
|
|
99
|
+
continue
|
|
100
|
+
values = prop_value if isinstance(prop_value, list) else [prop_value]
|
|
101
|
+
if any(
|
|
102
|
+
(c := get_confidence(v)) is not None and c >= min_confidence
|
|
103
|
+
for v in values
|
|
104
|
+
):
|
|
105
|
+
results.append(node)
|
|
106
|
+
return results
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def aggregate_confidence(
|
|
110
|
+
scores: Sequence[float],
|
|
111
|
+
strategy: Literal["mean", "max", "min", "weighted"] = "mean",
|
|
112
|
+
weights: Optional[Sequence[float]] = None,
|
|
113
|
+
) -> float:
|
|
114
|
+
"""Aggregate multiple confidence scores."""
|
|
115
|
+
if len(scores) == 0:
|
|
116
|
+
return 0.0
|
|
117
|
+
for s in scores:
|
|
118
|
+
_validate_confidence(s)
|
|
119
|
+
|
|
120
|
+
if strategy == "max":
|
|
121
|
+
return max(scores)
|
|
122
|
+
elif strategy == "min":
|
|
123
|
+
return min(scores)
|
|
124
|
+
elif strategy == "weighted":
|
|
125
|
+
if weights is None or len(weights) != len(scores):
|
|
126
|
+
raise ValueError("Weights must match scores length")
|
|
127
|
+
total_weight = sum(weights)
|
|
128
|
+
return sum(s * w for s, w in zip(scores, weights)) / total_weight
|
|
129
|
+
else: # mean
|
|
130
|
+
return sum(scores) / len(scores)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# ── Internal ───────────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
def _validate_confidence(score: float) -> None:
|
|
136
|
+
if not isinstance(score, (int, float)) or score < 0 or score > 1:
|
|
137
|
+
raise ValueError(f"@confidence must be between 0.0 and 1.0, got: {score}")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _extract_field(node: dict, compact_name: str, keyword: str) -> Any:
|
|
141
|
+
if keyword in node:
|
|
142
|
+
return node[keyword]
|
|
143
|
+
if compact_name in node:
|
|
144
|
+
return node[compact_name]
|
|
145
|
+
expanded_key = f"{JSONLD_EX_NAMESPACE}{compact_name}"
|
|
146
|
+
if expanded_key in node:
|
|
147
|
+
val = node[expanded_key]
|
|
148
|
+
if isinstance(val, list) and len(val) > 0:
|
|
149
|
+
item = val[0]
|
|
150
|
+
return item.get("@value", item) if isinstance(item, dict) else item
|
|
151
|
+
if isinstance(val, dict):
|
|
152
|
+
return val.get("@value", val)
|
|
153
|
+
return val
|
|
154
|
+
return None
|
jsonld_ex/processor.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""
|
|
2
|
+
JsonLdEx — Extended JSON-LD Processor (Python)
|
|
3
|
+
|
|
4
|
+
Wraps PyLD with backward-compatible extensions for AI/ML,
|
|
5
|
+
security, and validation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
from typing import Any, Optional
|
|
10
|
+
|
|
11
|
+
from pyld import jsonld
|
|
12
|
+
|
|
13
|
+
from jsonld_ex.ai_ml import (
|
|
14
|
+
annotate, get_confidence, get_provenance,
|
|
15
|
+
filter_by_confidence, aggregate_confidence, ProvenanceMetadata,
|
|
16
|
+
)
|
|
17
|
+
from jsonld_ex.vector import (
|
|
18
|
+
vector_term_definition, validate_vector, cosine_similarity,
|
|
19
|
+
extract_vectors, strip_vectors_for_rdf,
|
|
20
|
+
)
|
|
21
|
+
from jsonld_ex.security import (
|
|
22
|
+
compute_integrity, verify_integrity, integrity_context,
|
|
23
|
+
is_context_allowed, enforce_resource_limits, DEFAULT_RESOURCE_LIMITS,
|
|
24
|
+
)
|
|
25
|
+
from jsonld_ex.validation import validate_node, validate_document, ValidationResult
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class JsonLdEx:
|
|
29
|
+
"""Extended JSON-LD processor wrapping PyLD."""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
resource_limits: Optional[dict[str, int]] = None,
|
|
34
|
+
context_allowlist: Optional[dict[str, Any]] = None,
|
|
35
|
+
):
|
|
36
|
+
self._limits = {**DEFAULT_RESOURCE_LIMITS, **(resource_limits or {})}
|
|
37
|
+
self._allowlist = context_allowlist
|
|
38
|
+
|
|
39
|
+
# ── Core Operations ──────────────────────────────────────────
|
|
40
|
+
|
|
41
|
+
def expand(self, doc: dict[str, Any], **kwargs: Any) -> list[dict[str, Any]]:
|
|
42
|
+
"""Expand a JSON-LD document with resource limit enforcement."""
|
|
43
|
+
enforce_resource_limits(doc, self._limits)
|
|
44
|
+
return jsonld.expand(doc, kwargs)
|
|
45
|
+
|
|
46
|
+
def compact(self, doc: dict[str, Any], ctx: Any, **kwargs: Any) -> dict[str, Any]:
|
|
47
|
+
"""Compact a JSON-LD document."""
|
|
48
|
+
enforce_resource_limits(doc, self._limits)
|
|
49
|
+
return jsonld.compact(doc, ctx, kwargs)
|
|
50
|
+
|
|
51
|
+
def flatten(self, doc: dict[str, Any], ctx: Any = None, **kwargs: Any) -> dict[str, Any]:
|
|
52
|
+
"""Flatten a JSON-LD document."""
|
|
53
|
+
enforce_resource_limits(doc, self._limits)
|
|
54
|
+
return jsonld.flatten(doc, ctx, kwargs)
|
|
55
|
+
|
|
56
|
+
def to_rdf(self, doc: dict[str, Any], **kwargs: Any) -> str:
|
|
57
|
+
"""Convert to N-Quads."""
|
|
58
|
+
enforce_resource_limits(doc, self._limits)
|
|
59
|
+
return jsonld.to_rdf(doc, {**kwargs, "format": "application/n-quads"})
|
|
60
|
+
|
|
61
|
+
def from_rdf(self, nquads: str, **kwargs: Any) -> list[dict[str, Any]]:
|
|
62
|
+
"""Convert N-Quads to JSON-LD."""
|
|
63
|
+
return jsonld.from_rdf(nquads, kwargs)
|
|
64
|
+
|
|
65
|
+
# ── AI/ML Extensions ─────────────────────────────────────────
|
|
66
|
+
|
|
67
|
+
annotate = staticmethod(annotate)
|
|
68
|
+
get_confidence = staticmethod(get_confidence)
|
|
69
|
+
get_provenance = staticmethod(get_provenance)
|
|
70
|
+
filter_by_confidence = staticmethod(filter_by_confidence)
|
|
71
|
+
aggregate_confidence = staticmethod(aggregate_confidence)
|
|
72
|
+
|
|
73
|
+
# ── Vector Extensions ────────────────────────────────────────
|
|
74
|
+
|
|
75
|
+
vector_term_definition = staticmethod(vector_term_definition)
|
|
76
|
+
validate_vector = staticmethod(validate_vector)
|
|
77
|
+
cosine_similarity = staticmethod(cosine_similarity)
|
|
78
|
+
extract_vectors = staticmethod(extract_vectors)
|
|
79
|
+
strip_vectors_for_rdf = staticmethod(strip_vectors_for_rdf)
|
|
80
|
+
|
|
81
|
+
# ── Security Extensions ──────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
compute_integrity = staticmethod(compute_integrity)
|
|
84
|
+
verify_integrity = staticmethod(verify_integrity)
|
|
85
|
+
integrity_context = staticmethod(integrity_context)
|
|
86
|
+
is_context_allowed = staticmethod(is_context_allowed)
|
|
87
|
+
|
|
88
|
+
# ── Validation Extensions ────────────────────────────────────
|
|
89
|
+
|
|
90
|
+
validate_node = staticmethod(validate_node)
|
|
91
|
+
validate_document = staticmethod(validate_document)
|
jsonld_ex/security.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Security Extensions for JSON-LD."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
import hashlib
|
|
5
|
+
import base64
|
|
6
|
+
import json
|
|
7
|
+
import re
|
|
8
|
+
from typing import Any, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
DEFAULT_RESOURCE_LIMITS = {
|
|
12
|
+
"max_context_depth": 10,
|
|
13
|
+
"max_graph_depth": 100,
|
|
14
|
+
"max_document_size": 10 * 1024 * 1024, # 10 MB
|
|
15
|
+
"max_expansion_time": 30, # seconds
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
SUPPORTED_ALGORITHMS = ("sha256", "sha384", "sha512")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def compute_integrity(
|
|
22
|
+
context: str | dict | Any, algorithm: str = "sha256"
|
|
23
|
+
) -> str:
|
|
24
|
+
"""Compute an integrity hash for a context."""
|
|
25
|
+
if algorithm not in SUPPORTED_ALGORITHMS:
|
|
26
|
+
raise ValueError(f"Unsupported algorithm: {algorithm}")
|
|
27
|
+
content = context if isinstance(context, str) else json.dumps(context)
|
|
28
|
+
h = hashlib.new(algorithm, content.encode("utf-8")).digest()
|
|
29
|
+
b64 = base64.b64encode(h).decode("ascii")
|
|
30
|
+
return f"{algorithm}-{b64}"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def verify_integrity(context: str | dict | Any, declared: str) -> bool:
|
|
34
|
+
"""Verify context content against its declared integrity hash."""
|
|
35
|
+
parts = declared.split("-", 1)
|
|
36
|
+
if len(parts) != 2 or parts[0] not in SUPPORTED_ALGORITHMS:
|
|
37
|
+
raise ValueError(f"Invalid integrity string: {declared}")
|
|
38
|
+
computed = compute_integrity(context, parts[0])
|
|
39
|
+
return computed == declared
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def integrity_context(
|
|
43
|
+
url: str, content: str | dict | Any, algorithm: str = "sha256"
|
|
44
|
+
) -> dict[str, str]:
|
|
45
|
+
"""Create a context reference with integrity verification."""
|
|
46
|
+
return {"@id": url, "@integrity": compute_integrity(content, algorithm)}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def is_context_allowed(url: str, config: dict[str, Any]) -> bool:
|
|
50
|
+
"""Check if a context URL is permitted by an allowlist configuration."""
|
|
51
|
+
if config.get("block_remote_contexts", False):
|
|
52
|
+
return False
|
|
53
|
+
allowed = config.get("allowed", [])
|
|
54
|
+
if url in allowed:
|
|
55
|
+
return True
|
|
56
|
+
for pattern in config.get("patterns", []):
|
|
57
|
+
if isinstance(pattern, str):
|
|
58
|
+
regex = "^" + re.escape(pattern).replace(r"\*", ".*").replace(r"\?", ".") + "$"
|
|
59
|
+
if re.match(regex, url):
|
|
60
|
+
return True
|
|
61
|
+
if allowed or config.get("patterns"):
|
|
62
|
+
return False
|
|
63
|
+
return True
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def enforce_resource_limits(
|
|
67
|
+
document: str | dict | Any,
|
|
68
|
+
limits: Optional[dict[str, int]] = None,
|
|
69
|
+
) -> None:
|
|
70
|
+
"""Validate document against resource limits before processing."""
|
|
71
|
+
resolved = {**DEFAULT_RESOURCE_LIMITS, **(limits or {})}
|
|
72
|
+
content = document if isinstance(document, str) else json.dumps(document)
|
|
73
|
+
if len(content) > resolved["max_document_size"]:
|
|
74
|
+
raise ValueError(
|
|
75
|
+
f"Document size {len(content)} exceeds limit {resolved['max_document_size']}"
|
|
76
|
+
)
|
|
77
|
+
parsed = json.loads(content) if isinstance(document, str) else document
|
|
78
|
+
depth = _measure_depth(parsed)
|
|
79
|
+
if depth > resolved["max_graph_depth"]:
|
|
80
|
+
raise ValueError(
|
|
81
|
+
f"Document depth {depth} exceeds limit {resolved['max_graph_depth']}"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _measure_depth(obj: Any, current: int = 0) -> int:
|
|
86
|
+
if obj is None or not isinstance(obj, (dict, list)):
|
|
87
|
+
return current
|
|
88
|
+
max_depth = current
|
|
89
|
+
items = obj if isinstance(obj, list) else obj.values()
|
|
90
|
+
for item in items:
|
|
91
|
+
max_depth = max(max_depth, _measure_depth(item, current + 1))
|
|
92
|
+
return max_depth
|
jsonld_ex/validation.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Validation Extensions for JSON-LD (@shape)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
import re
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Any, Optional, Sequence
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ValidationError:
|
|
12
|
+
path: str
|
|
13
|
+
constraint: str
|
|
14
|
+
message: str
|
|
15
|
+
value: Any = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ValidationWarning:
|
|
20
|
+
path: str
|
|
21
|
+
code: str
|
|
22
|
+
message: str
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class ValidationResult:
|
|
27
|
+
valid: bool
|
|
28
|
+
errors: list[ValidationError] = field(default_factory=list)
|
|
29
|
+
warnings: list[ValidationWarning] = field(default_factory=list)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
XSD = "http://www.w3.org/2001/XMLSchema#"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def validate_node(node: dict[str, Any], shape: dict[str, Any]) -> ValidationResult:
|
|
36
|
+
"""Validate a JSON-LD node against a shape definition."""
|
|
37
|
+
errors: list[ValidationError] = []
|
|
38
|
+
warnings: list[ValidationWarning] = []
|
|
39
|
+
|
|
40
|
+
if not isinstance(node, dict):
|
|
41
|
+
errors.append(ValidationError(".", "type", "Node must be a dict"))
|
|
42
|
+
return ValidationResult(False, errors, warnings)
|
|
43
|
+
|
|
44
|
+
# Type check
|
|
45
|
+
if "@type" in shape:
|
|
46
|
+
node_types = _get_types(node)
|
|
47
|
+
if shape["@type"] not in node_types:
|
|
48
|
+
errors.append(ValidationError(
|
|
49
|
+
"@type", "type",
|
|
50
|
+
f'Expected type "{shape["@type"]}", found: {node_types}',
|
|
51
|
+
node_types,
|
|
52
|
+
))
|
|
53
|
+
|
|
54
|
+
# Property constraints
|
|
55
|
+
for prop, constraint in shape.items():
|
|
56
|
+
if prop.startswith("@") or not isinstance(constraint, dict):
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
value = node.get(prop)
|
|
60
|
+
raw = _extract_raw(value)
|
|
61
|
+
|
|
62
|
+
if constraint.get("@required") and raw is None:
|
|
63
|
+
errors.append(ValidationError(prop, "required", f'Property "{prop}" is required'))
|
|
64
|
+
continue
|
|
65
|
+
|
|
66
|
+
if raw is None:
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
# Type check
|
|
70
|
+
expected_type = constraint.get("@type")
|
|
71
|
+
if expected_type:
|
|
72
|
+
type_err = _validate_type(raw, expected_type)
|
|
73
|
+
if type_err:
|
|
74
|
+
errors.append(ValidationError(prop, "type", type_err, raw))
|
|
75
|
+
|
|
76
|
+
# Numeric
|
|
77
|
+
if "@minimum" in constraint and isinstance(raw, (int, float)):
|
|
78
|
+
if raw < constraint["@minimum"]:
|
|
79
|
+
errors.append(ValidationError(
|
|
80
|
+
prop, "minimum",
|
|
81
|
+
f"Value {raw} below minimum {constraint['@minimum']}", raw,
|
|
82
|
+
))
|
|
83
|
+
|
|
84
|
+
if "@maximum" in constraint and isinstance(raw, (int, float)):
|
|
85
|
+
if raw > constraint["@maximum"]:
|
|
86
|
+
errors.append(ValidationError(
|
|
87
|
+
prop, "maximum",
|
|
88
|
+
f"Value {raw} exceeds maximum {constraint['@maximum']}", raw,
|
|
89
|
+
))
|
|
90
|
+
|
|
91
|
+
# String length
|
|
92
|
+
if "@minLength" in constraint and isinstance(raw, str):
|
|
93
|
+
if len(raw) < constraint["@minLength"]:
|
|
94
|
+
errors.append(ValidationError(
|
|
95
|
+
prop, "minLength",
|
|
96
|
+
f"Length {len(raw)} below minimum {constraint['@minLength']}", raw,
|
|
97
|
+
))
|
|
98
|
+
|
|
99
|
+
if "@maxLength" in constraint and isinstance(raw, str):
|
|
100
|
+
if len(raw) > constraint["@maxLength"]:
|
|
101
|
+
errors.append(ValidationError(
|
|
102
|
+
prop, "maxLength",
|
|
103
|
+
f"Length {len(raw)} exceeds maximum {constraint['@maxLength']}", raw,
|
|
104
|
+
))
|
|
105
|
+
|
|
106
|
+
# Pattern
|
|
107
|
+
if "@pattern" in constraint and isinstance(raw, str):
|
|
108
|
+
if not re.search(constraint["@pattern"], raw):
|
|
109
|
+
errors.append(ValidationError(
|
|
110
|
+
prop, "pattern",
|
|
111
|
+
f'"{raw}" does not match pattern "{constraint["@pattern"]}"', raw,
|
|
112
|
+
))
|
|
113
|
+
|
|
114
|
+
return ValidationResult(len(errors) == 0, errors, warnings)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def validate_document(
|
|
118
|
+
doc: dict[str, Any], shapes: Sequence[dict[str, Any]]
|
|
119
|
+
) -> ValidationResult:
|
|
120
|
+
"""Validate all matching nodes in a document against shapes."""
|
|
121
|
+
all_errors: list[ValidationError] = []
|
|
122
|
+
all_warnings: list[ValidationWarning] = []
|
|
123
|
+
|
|
124
|
+
for node in _extract_nodes(doc):
|
|
125
|
+
node_types = _get_types(node)
|
|
126
|
+
for shape in shapes:
|
|
127
|
+
if shape.get("@type") in node_types:
|
|
128
|
+
result = validate_node(node, shape)
|
|
129
|
+
for e in result.errors:
|
|
130
|
+
e.path = f"{node.get('@id', 'anonymous')}/{e.path}"
|
|
131
|
+
all_errors.extend(result.errors)
|
|
132
|
+
all_warnings.extend(result.warnings)
|
|
133
|
+
|
|
134
|
+
return ValidationResult(len(all_errors) == 0, all_errors, all_warnings)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# ── Internal ───────────────────────────────────────────────────────
|
|
138
|
+
|
|
139
|
+
def _get_types(node: dict) -> list[str]:
|
|
140
|
+
t = node.get("@type")
|
|
141
|
+
if t is None:
|
|
142
|
+
return []
|
|
143
|
+
return t if isinstance(t, list) else [t]
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _extract_raw(value: Any) -> Any:
|
|
147
|
+
if value is None:
|
|
148
|
+
return None
|
|
149
|
+
if isinstance(value, dict) and "@value" in value:
|
|
150
|
+
return value["@value"]
|
|
151
|
+
if isinstance(value, list) and len(value) > 0:
|
|
152
|
+
return _extract_raw(value[0])
|
|
153
|
+
return value
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _extract_nodes(doc: Any) -> list[dict]:
|
|
157
|
+
if isinstance(doc, list):
|
|
158
|
+
nodes = []
|
|
159
|
+
for item in doc:
|
|
160
|
+
nodes.extend(_extract_nodes(item))
|
|
161
|
+
return nodes
|
|
162
|
+
if not isinstance(doc, dict):
|
|
163
|
+
return []
|
|
164
|
+
nodes = []
|
|
165
|
+
if "@type" in doc:
|
|
166
|
+
nodes.append(doc)
|
|
167
|
+
if "@graph" in doc:
|
|
168
|
+
nodes.extend(_extract_nodes(doc["@graph"]))
|
|
169
|
+
return nodes
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _validate_type(value: Any, expected: str) -> Optional[str]:
|
|
173
|
+
xsd_type = expected.replace("xsd:", XSD) if expected.startswith("xsd:") else expected
|
|
174
|
+
checks = {
|
|
175
|
+
f"{XSD}string": lambda v: isinstance(v, str),
|
|
176
|
+
f"{XSD}integer": lambda v: isinstance(v, int) and not isinstance(v, bool),
|
|
177
|
+
f"{XSD}double": lambda v: isinstance(v, (int, float)),
|
|
178
|
+
f"{XSD}float": lambda v: isinstance(v, (int, float)),
|
|
179
|
+
f"{XSD}decimal": lambda v: isinstance(v, (int, float)),
|
|
180
|
+
f"{XSD}boolean": lambda v: isinstance(v, bool),
|
|
181
|
+
}
|
|
182
|
+
checker = checks.get(xsd_type)
|
|
183
|
+
if checker and not checker(value):
|
|
184
|
+
short = expected if expected.startswith("xsd:") else xsd_type
|
|
185
|
+
return f"Expected {short}, got {type(value).__name__}: {value}"
|
|
186
|
+
return None
|
jsonld_ex/vector.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Vector Embedding Extensions for JSON-LD."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
import math
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def vector_term_definition(
|
|
9
|
+
term_name: str, iri: str, dimensions: Optional[int] = None
|
|
10
|
+
) -> dict[str, Any]:
|
|
11
|
+
"""Create a context term definition for a vector embedding property."""
|
|
12
|
+
defn: dict[str, Any] = {"@id": iri, "@container": "@vector"}
|
|
13
|
+
if dimensions is not None:
|
|
14
|
+
if not isinstance(dimensions, int) or dimensions < 1:
|
|
15
|
+
raise ValueError(f"@dimensions must be a positive integer, got: {dimensions}")
|
|
16
|
+
defn["@dimensions"] = dimensions
|
|
17
|
+
return {term_name: defn}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def validate_vector(
|
|
21
|
+
vector: Any, expected_dimensions: Optional[int] = None
|
|
22
|
+
) -> tuple[bool, list[str]]:
|
|
23
|
+
"""Validate a vector embedding. Returns (valid, errors)."""
|
|
24
|
+
errors: list[str] = []
|
|
25
|
+
if not isinstance(vector, (list, tuple)):
|
|
26
|
+
errors.append(f"Vector must be a list, got: {type(vector).__name__}")
|
|
27
|
+
return False, errors
|
|
28
|
+
if len(vector) == 0:
|
|
29
|
+
errors.append("Vector must not be empty")
|
|
30
|
+
return False, errors
|
|
31
|
+
for i, v in enumerate(vector):
|
|
32
|
+
if not isinstance(v, (int, float)) or math.isnan(v) or math.isinf(v):
|
|
33
|
+
errors.append(f"Vector element [{i}] must be a finite number, got: {v}")
|
|
34
|
+
if expected_dimensions is not None and len(vector) != expected_dimensions:
|
|
35
|
+
errors.append(
|
|
36
|
+
f"Vector dimension mismatch: expected {expected_dimensions}, got {len(vector)}"
|
|
37
|
+
)
|
|
38
|
+
return len(errors) == 0, errors
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def cosine_similarity(a: list[float], b: list[float]) -> float:
|
|
42
|
+
"""Compute cosine similarity between two vectors."""
|
|
43
|
+
if len(a) != len(b):
|
|
44
|
+
raise ValueError(f"Vector dimension mismatch: {len(a)} vs {len(b)}")
|
|
45
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
46
|
+
norm_a = math.sqrt(sum(x * x for x in a))
|
|
47
|
+
norm_b = math.sqrt(sum(x * x for x in b))
|
|
48
|
+
denom = norm_a * norm_b
|
|
49
|
+
return dot / denom if denom != 0 else 0.0
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def extract_vectors(
|
|
53
|
+
node: dict[str, Any], vector_properties: list[str]
|
|
54
|
+
) -> dict[str, list[float]]:
|
|
55
|
+
"""Extract vector embeddings from a JSON-LD node."""
|
|
56
|
+
vectors: dict[str, list[float]] = {}
|
|
57
|
+
if not isinstance(node, dict):
|
|
58
|
+
return vectors
|
|
59
|
+
for prop in vector_properties:
|
|
60
|
+
value = node.get(prop)
|
|
61
|
+
if isinstance(value, list) and len(value) > 0 and isinstance(value[0], (int, float)):
|
|
62
|
+
vectors[prop] = value
|
|
63
|
+
return vectors
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def strip_vectors_for_rdf(doc: Any, vector_properties: list[str]) -> Any:
|
|
67
|
+
"""Remove vector embeddings before RDF conversion."""
|
|
68
|
+
if isinstance(doc, list):
|
|
69
|
+
return [strip_vectors_for_rdf(item, vector_properties) for item in doc]
|
|
70
|
+
if not isinstance(doc, dict):
|
|
71
|
+
return doc
|
|
72
|
+
return {
|
|
73
|
+
k: strip_vectors_for_rdf(v, vector_properties)
|
|
74
|
+
for k, v in doc.items()
|
|
75
|
+
if k not in vector_properties
|
|
76
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: jsonld-ex
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: JSON-LD 1.2 extensions for AI/ML data exchange, security hardening, and validation
|
|
5
|
+
Author-email: Muntaser Aljabry <muntaser@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: json-ld,linked-data,semantic-web,ai,ml,confidence,provenance,embeddings,security,validation
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Requires-Dist: PyLD>=2.0.4
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
24
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
25
|
+
Requires-Dist: mypy>=1.8; extra == "dev"
|
|
26
|
+
Requires-Dist: ruff>=0.2; extra == "dev"
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
jsonld_ex/__init__.py,sha256=_4A1sW_AzHYtK8d0evFiOtlPyTI1IIsteiRmOf6k9SQ,921
|
|
2
|
+
jsonld_ex/ai_ml.py,sha256=0SvM2RADWLAandxfYTFMMmQUMuTRzqkezpwplpGRP3A,4973
|
|
3
|
+
jsonld_ex/processor.py,sha256=tRZX2aiHwZJa62ex4KT0HCbKYtzAkGV87bDBtXAhbvE,3880
|
|
4
|
+
jsonld_ex/security.py,sha256=PCHZ7NTCy_WeuI_BNTKIMIH00OzLtozu47y5x1E8Fhg,3251
|
|
5
|
+
jsonld_ex/validation.py,sha256=FjnRotqb9irWNARhh_eY6xBxFLCZDHJAczAHc0EUE2Q,6266
|
|
6
|
+
jsonld_ex/vector.py,sha256=SHTHAcU2xa-Gh_Q4YQiOy1QX1_5O_U-vkODpwzj0Irc,2885
|
|
7
|
+
jsonld_ex-0.1.0.dist-info/METADATA,sha256=0_Xm330HBmDg82a6rbk0f3B3msBeYHEo-snle1OjnX4,1167
|
|
8
|
+
jsonld_ex-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
9
|
+
jsonld_ex-0.1.0.dist-info/top_level.txt,sha256=YMe-47TNES9MWZsVWecMwZUDjGxCeU51ay5fuqidBfk,10
|
|
10
|
+
jsonld_ex-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
jsonld_ex
|