jsonld-ex 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jsonld_ex-0.1.0/PKG-INFO +26 -0
- jsonld_ex-0.1.0/pyproject.toml +56 -0
- jsonld_ex-0.1.0/setup.cfg +4 -0
- jsonld_ex-0.1.0/src/jsonld_ex/__init__.py +30 -0
- jsonld_ex-0.1.0/src/jsonld_ex/ai_ml.py +154 -0
- jsonld_ex-0.1.0/src/jsonld_ex/processor.py +91 -0
- jsonld_ex-0.1.0/src/jsonld_ex/security.py +92 -0
- jsonld_ex-0.1.0/src/jsonld_ex/validation.py +186 -0
- jsonld_ex-0.1.0/src/jsonld_ex/vector.py +76 -0
- jsonld_ex-0.1.0/src/jsonld_ex.egg-info/PKG-INFO +26 -0
- jsonld_ex-0.1.0/src/jsonld_ex.egg-info/SOURCES.txt +14 -0
- jsonld_ex-0.1.0/src/jsonld_ex.egg-info/dependency_links.txt +1 -0
- jsonld_ex-0.1.0/src/jsonld_ex.egg-info/requires.txt +7 -0
- jsonld_ex-0.1.0/src/jsonld_ex.egg-info/top_level.txt +1 -0
- jsonld_ex-0.1.0/tests/test_ai_ml.py +96 -0
- jsonld_ex-0.1.0/tests/test_validation.py +63 -0
jsonld_ex-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: jsonld-ex
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: JSON-LD 1.2 extensions for AI/ML data exchange, security hardening, and validation
|
|
5
|
+
Author-email: Muntaser Aljabry <muntaser@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: json-ld,linked-data,semantic-web,ai,ml,confidence,provenance,embeddings,security,validation
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Requires-Dist: PyLD>=2.0.4
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
24
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
25
|
+
Requires-Dist: mypy>=1.8; extra == "dev"
|
|
26
|
+
Requires-Dist: ruff>=0.2; extra == "dev"
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "jsonld-ex"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "JSON-LD 1.2 extensions for AI/ML data exchange, security hardening, and validation"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "Muntaser Aljabry", email = "muntaser@example.com"},
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"json-ld", "linked-data", "semantic-web", "ai", "ml",
|
|
17
|
+
"confidence", "provenance", "embeddings", "security", "validation",
|
|
18
|
+
]
|
|
19
|
+
classifiers = [
|
|
20
|
+
"Development Status :: 3 - Alpha",
|
|
21
|
+
"Intended Audience :: Developers",
|
|
22
|
+
"Intended Audience :: Science/Research",
|
|
23
|
+
"License :: OSI Approved :: MIT License",
|
|
24
|
+
"Programming Language :: Python :: 3",
|
|
25
|
+
"Programming Language :: Python :: 3.9",
|
|
26
|
+
"Programming Language :: Python :: 3.10",
|
|
27
|
+
"Programming Language :: Python :: 3.11",
|
|
28
|
+
"Programming Language :: Python :: 3.12",
|
|
29
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
30
|
+
"Topic :: Software Development :: Libraries",
|
|
31
|
+
]
|
|
32
|
+
dependencies = [
|
|
33
|
+
"PyLD>=2.0.4",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.optional-dependencies]
|
|
37
|
+
dev = [
|
|
38
|
+
"pytest>=7.0",
|
|
39
|
+
"pytest-asyncio>=0.23",
|
|
40
|
+
"mypy>=1.8",
|
|
41
|
+
"ruff>=0.2",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
[tool.setuptools.packages.find]
|
|
45
|
+
where = ["src"]
|
|
46
|
+
|
|
47
|
+
[tool.pytest.ini_options]
|
|
48
|
+
testpaths = ["tests"]
|
|
49
|
+
|
|
50
|
+
[tool.mypy]
|
|
51
|
+
python_version = "3.9"
|
|
52
|
+
strict = true
|
|
53
|
+
|
|
54
|
+
[tool.ruff]
|
|
55
|
+
target-version = "py39"
|
|
56
|
+
line-length = 100
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""
|
|
2
|
+
jsonld-ex: JSON-LD 1.2 Extensions for AI/ML, Security, and Validation
|
|
3
|
+
|
|
4
|
+
Reference implementation of proposed JSON-LD 1.2 extensions.
|
|
5
|
+
Wraps PyLD for core JSON-LD processing and adds extension layers.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "0.1.0"
|
|
9
|
+
|
|
10
|
+
from jsonld_ex.processor import JsonLdEx
|
|
11
|
+
from jsonld_ex.ai_ml import annotate, get_confidence, get_provenance, filter_by_confidence
|
|
12
|
+
from jsonld_ex.vector import validate_vector, cosine_similarity, vector_term_definition
|
|
13
|
+
from jsonld_ex.security import compute_integrity, verify_integrity, is_context_allowed
|
|
14
|
+
from jsonld_ex.validation import validate_node, validate_document
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"JsonLdEx",
|
|
18
|
+
"annotate",
|
|
19
|
+
"get_confidence",
|
|
20
|
+
"get_provenance",
|
|
21
|
+
"filter_by_confidence",
|
|
22
|
+
"validate_vector",
|
|
23
|
+
"cosine_similarity",
|
|
24
|
+
"vector_term_definition",
|
|
25
|
+
"compute_integrity",
|
|
26
|
+
"verify_integrity",
|
|
27
|
+
"is_context_allowed",
|
|
28
|
+
"validate_node",
|
|
29
|
+
"validate_document",
|
|
30
|
+
]
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AI/ML Extensions for JSON-LD
|
|
3
|
+
|
|
4
|
+
Provides @confidence, @source, @extractedAt, @method, @humanVerified
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any, Optional, Sequence, Literal
|
|
10
|
+
import math
|
|
11
|
+
|
|
12
|
+
JSONLD_EX_NAMESPACE = "http://www.w3.org/ns/jsonld-ex/"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ProvenanceMetadata:
|
|
17
|
+
"""AI/ML provenance metadata attached to a value."""
|
|
18
|
+
confidence: Optional[float] = None
|
|
19
|
+
source: Optional[str] = None
|
|
20
|
+
extracted_at: Optional[str] = None
|
|
21
|
+
method: Optional[str] = None
|
|
22
|
+
human_verified: Optional[bool] = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def annotate(
|
|
26
|
+
value: Any,
|
|
27
|
+
confidence: Optional[float] = None,
|
|
28
|
+
source: Optional[str] = None,
|
|
29
|
+
extracted_at: Optional[str] = None,
|
|
30
|
+
method: Optional[str] = None,
|
|
31
|
+
human_verified: Optional[bool] = None,
|
|
32
|
+
) -> dict[str, Any]:
|
|
33
|
+
"""Create an annotated JSON-LD value with provenance metadata."""
|
|
34
|
+
result: dict[str, Any] = {"@value": value}
|
|
35
|
+
|
|
36
|
+
if confidence is not None:
|
|
37
|
+
_validate_confidence(confidence)
|
|
38
|
+
result["@confidence"] = confidence
|
|
39
|
+
if source is not None:
|
|
40
|
+
result["@source"] = source
|
|
41
|
+
if extracted_at is not None:
|
|
42
|
+
result["@extractedAt"] = extracted_at
|
|
43
|
+
if method is not None:
|
|
44
|
+
result["@method"] = method
|
|
45
|
+
if human_verified is not None:
|
|
46
|
+
result["@humanVerified"] = human_verified
|
|
47
|
+
|
|
48
|
+
return result
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def get_confidence(node: Any) -> Optional[float]:
|
|
52
|
+
"""Extract confidence score from a node or annotated value."""
|
|
53
|
+
if node is None or not isinstance(node, dict):
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
# Compact form
|
|
57
|
+
if "@confidence" in node:
|
|
58
|
+
return node["@confidence"]
|
|
59
|
+
|
|
60
|
+
# Expanded form
|
|
61
|
+
key = f"{JSONLD_EX_NAMESPACE}confidence"
|
|
62
|
+
if key in node:
|
|
63
|
+
val = node[key]
|
|
64
|
+
if isinstance(val, list) and len(val) > 0:
|
|
65
|
+
item = val[0]
|
|
66
|
+
return item.get("@value", item) if isinstance(item, dict) else item
|
|
67
|
+
if isinstance(val, dict):
|
|
68
|
+
return val.get("@value", val)
|
|
69
|
+
return val
|
|
70
|
+
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_provenance(node: Any) -> ProvenanceMetadata:
|
|
75
|
+
"""Extract all provenance metadata from a node."""
|
|
76
|
+
if node is None or not isinstance(node, dict):
|
|
77
|
+
return ProvenanceMetadata()
|
|
78
|
+
|
|
79
|
+
return ProvenanceMetadata(
|
|
80
|
+
confidence=_extract_field(node, "confidence", "@confidence"),
|
|
81
|
+
source=_extract_field(node, "source", "@source"),
|
|
82
|
+
extracted_at=_extract_field(node, "extractedAt", "@extractedAt"),
|
|
83
|
+
method=_extract_field(node, "method", "@method"),
|
|
84
|
+
human_verified=_extract_field(node, "humanVerified", "@humanVerified"),
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def filter_by_confidence(
|
|
89
|
+
graph: Sequence[dict[str, Any]],
|
|
90
|
+
property_name: str,
|
|
91
|
+
min_confidence: float,
|
|
92
|
+
) -> list[dict[str, Any]]:
|
|
93
|
+
"""Filter graph nodes by minimum confidence on a property."""
|
|
94
|
+
_validate_confidence(min_confidence)
|
|
95
|
+
results = []
|
|
96
|
+
for node in graph:
|
|
97
|
+
prop_value = node.get(property_name)
|
|
98
|
+
if prop_value is None:
|
|
99
|
+
continue
|
|
100
|
+
values = prop_value if isinstance(prop_value, list) else [prop_value]
|
|
101
|
+
if any(
|
|
102
|
+
(c := get_confidence(v)) is not None and c >= min_confidence
|
|
103
|
+
for v in values
|
|
104
|
+
):
|
|
105
|
+
results.append(node)
|
|
106
|
+
return results
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def aggregate_confidence(
|
|
110
|
+
scores: Sequence[float],
|
|
111
|
+
strategy: Literal["mean", "max", "min", "weighted"] = "mean",
|
|
112
|
+
weights: Optional[Sequence[float]] = None,
|
|
113
|
+
) -> float:
|
|
114
|
+
"""Aggregate multiple confidence scores."""
|
|
115
|
+
if len(scores) == 0:
|
|
116
|
+
return 0.0
|
|
117
|
+
for s in scores:
|
|
118
|
+
_validate_confidence(s)
|
|
119
|
+
|
|
120
|
+
if strategy == "max":
|
|
121
|
+
return max(scores)
|
|
122
|
+
elif strategy == "min":
|
|
123
|
+
return min(scores)
|
|
124
|
+
elif strategy == "weighted":
|
|
125
|
+
if weights is None or len(weights) != len(scores):
|
|
126
|
+
raise ValueError("Weights must match scores length")
|
|
127
|
+
total_weight = sum(weights)
|
|
128
|
+
return sum(s * w for s, w in zip(scores, weights)) / total_weight
|
|
129
|
+
else: # mean
|
|
130
|
+
return sum(scores) / len(scores)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# ── Internal ───────────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
def _validate_confidence(score: float) -> None:
|
|
136
|
+
if not isinstance(score, (int, float)) or score < 0 or score > 1:
|
|
137
|
+
raise ValueError(f"@confidence must be between 0.0 and 1.0, got: {score}")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _extract_field(node: dict, compact_name: str, keyword: str) -> Any:
|
|
141
|
+
if keyword in node:
|
|
142
|
+
return node[keyword]
|
|
143
|
+
if compact_name in node:
|
|
144
|
+
return node[compact_name]
|
|
145
|
+
expanded_key = f"{JSONLD_EX_NAMESPACE}{compact_name}"
|
|
146
|
+
if expanded_key in node:
|
|
147
|
+
val = node[expanded_key]
|
|
148
|
+
if isinstance(val, list) and len(val) > 0:
|
|
149
|
+
item = val[0]
|
|
150
|
+
return item.get("@value", item) if isinstance(item, dict) else item
|
|
151
|
+
if isinstance(val, dict):
|
|
152
|
+
return val.get("@value", val)
|
|
153
|
+
return val
|
|
154
|
+
return None
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""
|
|
2
|
+
JsonLdEx — Extended JSON-LD Processor (Python)
|
|
3
|
+
|
|
4
|
+
Wraps PyLD with backward-compatible extensions for AI/ML,
|
|
5
|
+
security, and validation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
from typing import Any, Optional
|
|
10
|
+
|
|
11
|
+
from pyld import jsonld
|
|
12
|
+
|
|
13
|
+
from jsonld_ex.ai_ml import (
|
|
14
|
+
annotate, get_confidence, get_provenance,
|
|
15
|
+
filter_by_confidence, aggregate_confidence, ProvenanceMetadata,
|
|
16
|
+
)
|
|
17
|
+
from jsonld_ex.vector import (
|
|
18
|
+
vector_term_definition, validate_vector, cosine_similarity,
|
|
19
|
+
extract_vectors, strip_vectors_for_rdf,
|
|
20
|
+
)
|
|
21
|
+
from jsonld_ex.security import (
|
|
22
|
+
compute_integrity, verify_integrity, integrity_context,
|
|
23
|
+
is_context_allowed, enforce_resource_limits, DEFAULT_RESOURCE_LIMITS,
|
|
24
|
+
)
|
|
25
|
+
from jsonld_ex.validation import validate_node, validate_document, ValidationResult
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class JsonLdEx:
|
|
29
|
+
"""Extended JSON-LD processor wrapping PyLD."""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
resource_limits: Optional[dict[str, int]] = None,
|
|
34
|
+
context_allowlist: Optional[dict[str, Any]] = None,
|
|
35
|
+
):
|
|
36
|
+
self._limits = {**DEFAULT_RESOURCE_LIMITS, **(resource_limits or {})}
|
|
37
|
+
self._allowlist = context_allowlist
|
|
38
|
+
|
|
39
|
+
# ── Core Operations ──────────────────────────────────────────
|
|
40
|
+
|
|
41
|
+
def expand(self, doc: dict[str, Any], **kwargs: Any) -> list[dict[str, Any]]:
|
|
42
|
+
"""Expand a JSON-LD document with resource limit enforcement."""
|
|
43
|
+
enforce_resource_limits(doc, self._limits)
|
|
44
|
+
return jsonld.expand(doc, kwargs)
|
|
45
|
+
|
|
46
|
+
def compact(self, doc: dict[str, Any], ctx: Any, **kwargs: Any) -> dict[str, Any]:
|
|
47
|
+
"""Compact a JSON-LD document."""
|
|
48
|
+
enforce_resource_limits(doc, self._limits)
|
|
49
|
+
return jsonld.compact(doc, ctx, kwargs)
|
|
50
|
+
|
|
51
|
+
def flatten(self, doc: dict[str, Any], ctx: Any = None, **kwargs: Any) -> dict[str, Any]:
|
|
52
|
+
"""Flatten a JSON-LD document."""
|
|
53
|
+
enforce_resource_limits(doc, self._limits)
|
|
54
|
+
return jsonld.flatten(doc, ctx, kwargs)
|
|
55
|
+
|
|
56
|
+
def to_rdf(self, doc: dict[str, Any], **kwargs: Any) -> str:
|
|
57
|
+
"""Convert to N-Quads."""
|
|
58
|
+
enforce_resource_limits(doc, self._limits)
|
|
59
|
+
return jsonld.to_rdf(doc, {**kwargs, "format": "application/n-quads"})
|
|
60
|
+
|
|
61
|
+
def from_rdf(self, nquads: str, **kwargs: Any) -> list[dict[str, Any]]:
|
|
62
|
+
"""Convert N-Quads to JSON-LD."""
|
|
63
|
+
return jsonld.from_rdf(nquads, kwargs)
|
|
64
|
+
|
|
65
|
+
# ── AI/ML Extensions ─────────────────────────────────────────
|
|
66
|
+
|
|
67
|
+
annotate = staticmethod(annotate)
|
|
68
|
+
get_confidence = staticmethod(get_confidence)
|
|
69
|
+
get_provenance = staticmethod(get_provenance)
|
|
70
|
+
filter_by_confidence = staticmethod(filter_by_confidence)
|
|
71
|
+
aggregate_confidence = staticmethod(aggregate_confidence)
|
|
72
|
+
|
|
73
|
+
# ── Vector Extensions ────────────────────────────────────────
|
|
74
|
+
|
|
75
|
+
vector_term_definition = staticmethod(vector_term_definition)
|
|
76
|
+
validate_vector = staticmethod(validate_vector)
|
|
77
|
+
cosine_similarity = staticmethod(cosine_similarity)
|
|
78
|
+
extract_vectors = staticmethod(extract_vectors)
|
|
79
|
+
strip_vectors_for_rdf = staticmethod(strip_vectors_for_rdf)
|
|
80
|
+
|
|
81
|
+
# ── Security Extensions ──────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
compute_integrity = staticmethod(compute_integrity)
|
|
84
|
+
verify_integrity = staticmethod(verify_integrity)
|
|
85
|
+
integrity_context = staticmethod(integrity_context)
|
|
86
|
+
is_context_allowed = staticmethod(is_context_allowed)
|
|
87
|
+
|
|
88
|
+
# ── Validation Extensions ────────────────────────────────────
|
|
89
|
+
|
|
90
|
+
validate_node = staticmethod(validate_node)
|
|
91
|
+
validate_document = staticmethod(validate_document)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Security Extensions for JSON-LD."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
import hashlib
|
|
5
|
+
import base64
|
|
6
|
+
import json
|
|
7
|
+
import re
|
|
8
|
+
from typing import Any, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
DEFAULT_RESOURCE_LIMITS = {
|
|
12
|
+
"max_context_depth": 10,
|
|
13
|
+
"max_graph_depth": 100,
|
|
14
|
+
"max_document_size": 10 * 1024 * 1024, # 10 MB
|
|
15
|
+
"max_expansion_time": 30, # seconds
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
SUPPORTED_ALGORITHMS = ("sha256", "sha384", "sha512")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def compute_integrity(
|
|
22
|
+
context: str | dict | Any, algorithm: str = "sha256"
|
|
23
|
+
) -> str:
|
|
24
|
+
"""Compute an integrity hash for a context."""
|
|
25
|
+
if algorithm not in SUPPORTED_ALGORITHMS:
|
|
26
|
+
raise ValueError(f"Unsupported algorithm: {algorithm}")
|
|
27
|
+
content = context if isinstance(context, str) else json.dumps(context)
|
|
28
|
+
h = hashlib.new(algorithm, content.encode("utf-8")).digest()
|
|
29
|
+
b64 = base64.b64encode(h).decode("ascii")
|
|
30
|
+
return f"{algorithm}-{b64}"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def verify_integrity(context: str | dict | Any, declared: str) -> bool:
|
|
34
|
+
"""Verify context content against its declared integrity hash."""
|
|
35
|
+
parts = declared.split("-", 1)
|
|
36
|
+
if len(parts) != 2 or parts[0] not in SUPPORTED_ALGORITHMS:
|
|
37
|
+
raise ValueError(f"Invalid integrity string: {declared}")
|
|
38
|
+
computed = compute_integrity(context, parts[0])
|
|
39
|
+
return computed == declared
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def integrity_context(
|
|
43
|
+
url: str, content: str | dict | Any, algorithm: str = "sha256"
|
|
44
|
+
) -> dict[str, str]:
|
|
45
|
+
"""Create a context reference with integrity verification."""
|
|
46
|
+
return {"@id": url, "@integrity": compute_integrity(content, algorithm)}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def is_context_allowed(url: str, config: dict[str, Any]) -> bool:
|
|
50
|
+
"""Check if a context URL is permitted by an allowlist configuration."""
|
|
51
|
+
if config.get("block_remote_contexts", False):
|
|
52
|
+
return False
|
|
53
|
+
allowed = config.get("allowed", [])
|
|
54
|
+
if url in allowed:
|
|
55
|
+
return True
|
|
56
|
+
for pattern in config.get("patterns", []):
|
|
57
|
+
if isinstance(pattern, str):
|
|
58
|
+
regex = "^" + re.escape(pattern).replace(r"\*", ".*").replace(r"\?", ".") + "$"
|
|
59
|
+
if re.match(regex, url):
|
|
60
|
+
return True
|
|
61
|
+
if allowed or config.get("patterns"):
|
|
62
|
+
return False
|
|
63
|
+
return True
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def enforce_resource_limits(
|
|
67
|
+
document: str | dict | Any,
|
|
68
|
+
limits: Optional[dict[str, int]] = None,
|
|
69
|
+
) -> None:
|
|
70
|
+
"""Validate document against resource limits before processing."""
|
|
71
|
+
resolved = {**DEFAULT_RESOURCE_LIMITS, **(limits or {})}
|
|
72
|
+
content = document if isinstance(document, str) else json.dumps(document)
|
|
73
|
+
if len(content) > resolved["max_document_size"]:
|
|
74
|
+
raise ValueError(
|
|
75
|
+
f"Document size {len(content)} exceeds limit {resolved['max_document_size']}"
|
|
76
|
+
)
|
|
77
|
+
parsed = json.loads(content) if isinstance(document, str) else document
|
|
78
|
+
depth = _measure_depth(parsed)
|
|
79
|
+
if depth > resolved["max_graph_depth"]:
|
|
80
|
+
raise ValueError(
|
|
81
|
+
f"Document depth {depth} exceeds limit {resolved['max_graph_depth']}"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _measure_depth(obj: Any, current: int = 0) -> int:
|
|
86
|
+
if obj is None or not isinstance(obj, (dict, list)):
|
|
87
|
+
return current
|
|
88
|
+
max_depth = current
|
|
89
|
+
items = obj if isinstance(obj, list) else obj.values()
|
|
90
|
+
for item in items:
|
|
91
|
+
max_depth = max(max_depth, _measure_depth(item, current + 1))
|
|
92
|
+
return max_depth
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Validation Extensions for JSON-LD (@shape)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
import re
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Any, Optional, Sequence
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ValidationError:
|
|
12
|
+
path: str
|
|
13
|
+
constraint: str
|
|
14
|
+
message: str
|
|
15
|
+
value: Any = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ValidationWarning:
|
|
20
|
+
path: str
|
|
21
|
+
code: str
|
|
22
|
+
message: str
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class ValidationResult:
|
|
27
|
+
valid: bool
|
|
28
|
+
errors: list[ValidationError] = field(default_factory=list)
|
|
29
|
+
warnings: list[ValidationWarning] = field(default_factory=list)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
XSD = "http://www.w3.org/2001/XMLSchema#"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def validate_node(node: dict[str, Any], shape: dict[str, Any]) -> ValidationResult:
|
|
36
|
+
"""Validate a JSON-LD node against a shape definition."""
|
|
37
|
+
errors: list[ValidationError] = []
|
|
38
|
+
warnings: list[ValidationWarning] = []
|
|
39
|
+
|
|
40
|
+
if not isinstance(node, dict):
|
|
41
|
+
errors.append(ValidationError(".", "type", "Node must be a dict"))
|
|
42
|
+
return ValidationResult(False, errors, warnings)
|
|
43
|
+
|
|
44
|
+
# Type check
|
|
45
|
+
if "@type" in shape:
|
|
46
|
+
node_types = _get_types(node)
|
|
47
|
+
if shape["@type"] not in node_types:
|
|
48
|
+
errors.append(ValidationError(
|
|
49
|
+
"@type", "type",
|
|
50
|
+
f'Expected type "{shape["@type"]}", found: {node_types}',
|
|
51
|
+
node_types,
|
|
52
|
+
))
|
|
53
|
+
|
|
54
|
+
# Property constraints
|
|
55
|
+
for prop, constraint in shape.items():
|
|
56
|
+
if prop.startswith("@") or not isinstance(constraint, dict):
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
value = node.get(prop)
|
|
60
|
+
raw = _extract_raw(value)
|
|
61
|
+
|
|
62
|
+
if constraint.get("@required") and raw is None:
|
|
63
|
+
errors.append(ValidationError(prop, "required", f'Property "{prop}" is required'))
|
|
64
|
+
continue
|
|
65
|
+
|
|
66
|
+
if raw is None:
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
# Type check
|
|
70
|
+
expected_type = constraint.get("@type")
|
|
71
|
+
if expected_type:
|
|
72
|
+
type_err = _validate_type(raw, expected_type)
|
|
73
|
+
if type_err:
|
|
74
|
+
errors.append(ValidationError(prop, "type", type_err, raw))
|
|
75
|
+
|
|
76
|
+
# Numeric
|
|
77
|
+
if "@minimum" in constraint and isinstance(raw, (int, float)):
|
|
78
|
+
if raw < constraint["@minimum"]:
|
|
79
|
+
errors.append(ValidationError(
|
|
80
|
+
prop, "minimum",
|
|
81
|
+
f"Value {raw} below minimum {constraint['@minimum']}", raw,
|
|
82
|
+
))
|
|
83
|
+
|
|
84
|
+
if "@maximum" in constraint and isinstance(raw, (int, float)):
|
|
85
|
+
if raw > constraint["@maximum"]:
|
|
86
|
+
errors.append(ValidationError(
|
|
87
|
+
prop, "maximum",
|
|
88
|
+
f"Value {raw} exceeds maximum {constraint['@maximum']}", raw,
|
|
89
|
+
))
|
|
90
|
+
|
|
91
|
+
# String length
|
|
92
|
+
if "@minLength" in constraint and isinstance(raw, str):
|
|
93
|
+
if len(raw) < constraint["@minLength"]:
|
|
94
|
+
errors.append(ValidationError(
|
|
95
|
+
prop, "minLength",
|
|
96
|
+
f"Length {len(raw)} below minimum {constraint['@minLength']}", raw,
|
|
97
|
+
))
|
|
98
|
+
|
|
99
|
+
if "@maxLength" in constraint and isinstance(raw, str):
|
|
100
|
+
if len(raw) > constraint["@maxLength"]:
|
|
101
|
+
errors.append(ValidationError(
|
|
102
|
+
prop, "maxLength",
|
|
103
|
+
f"Length {len(raw)} exceeds maximum {constraint['@maxLength']}", raw,
|
|
104
|
+
))
|
|
105
|
+
|
|
106
|
+
# Pattern
|
|
107
|
+
if "@pattern" in constraint and isinstance(raw, str):
|
|
108
|
+
if not re.search(constraint["@pattern"], raw):
|
|
109
|
+
errors.append(ValidationError(
|
|
110
|
+
prop, "pattern",
|
|
111
|
+
f'"{raw}" does not match pattern "{constraint["@pattern"]}"', raw,
|
|
112
|
+
))
|
|
113
|
+
|
|
114
|
+
return ValidationResult(len(errors) == 0, errors, warnings)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def validate_document(
|
|
118
|
+
doc: dict[str, Any], shapes: Sequence[dict[str, Any]]
|
|
119
|
+
) -> ValidationResult:
|
|
120
|
+
"""Validate all matching nodes in a document against shapes."""
|
|
121
|
+
all_errors: list[ValidationError] = []
|
|
122
|
+
all_warnings: list[ValidationWarning] = []
|
|
123
|
+
|
|
124
|
+
for node in _extract_nodes(doc):
|
|
125
|
+
node_types = _get_types(node)
|
|
126
|
+
for shape in shapes:
|
|
127
|
+
if shape.get("@type") in node_types:
|
|
128
|
+
result = validate_node(node, shape)
|
|
129
|
+
for e in result.errors:
|
|
130
|
+
e.path = f"{node.get('@id', 'anonymous')}/{e.path}"
|
|
131
|
+
all_errors.extend(result.errors)
|
|
132
|
+
all_warnings.extend(result.warnings)
|
|
133
|
+
|
|
134
|
+
return ValidationResult(len(all_errors) == 0, all_errors, all_warnings)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# ── Internal ───────────────────────────────────────────────────────
|
|
138
|
+
|
|
139
|
+
def _get_types(node: dict) -> list[str]:
|
|
140
|
+
t = node.get("@type")
|
|
141
|
+
if t is None:
|
|
142
|
+
return []
|
|
143
|
+
return t if isinstance(t, list) else [t]
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _extract_raw(value: Any) -> Any:
|
|
147
|
+
if value is None:
|
|
148
|
+
return None
|
|
149
|
+
if isinstance(value, dict) and "@value" in value:
|
|
150
|
+
return value["@value"]
|
|
151
|
+
if isinstance(value, list) and len(value) > 0:
|
|
152
|
+
return _extract_raw(value[0])
|
|
153
|
+
return value
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _extract_nodes(doc: Any) -> list[dict]:
|
|
157
|
+
if isinstance(doc, list):
|
|
158
|
+
nodes = []
|
|
159
|
+
for item in doc:
|
|
160
|
+
nodes.extend(_extract_nodes(item))
|
|
161
|
+
return nodes
|
|
162
|
+
if not isinstance(doc, dict):
|
|
163
|
+
return []
|
|
164
|
+
nodes = []
|
|
165
|
+
if "@type" in doc:
|
|
166
|
+
nodes.append(doc)
|
|
167
|
+
if "@graph" in doc:
|
|
168
|
+
nodes.extend(_extract_nodes(doc["@graph"]))
|
|
169
|
+
return nodes
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _validate_type(value: Any, expected: str) -> Optional[str]:
|
|
173
|
+
xsd_type = expected.replace("xsd:", XSD) if expected.startswith("xsd:") else expected
|
|
174
|
+
checks = {
|
|
175
|
+
f"{XSD}string": lambda v: isinstance(v, str),
|
|
176
|
+
f"{XSD}integer": lambda v: isinstance(v, int) and not isinstance(v, bool),
|
|
177
|
+
f"{XSD}double": lambda v: isinstance(v, (int, float)),
|
|
178
|
+
f"{XSD}float": lambda v: isinstance(v, (int, float)),
|
|
179
|
+
f"{XSD}decimal": lambda v: isinstance(v, (int, float)),
|
|
180
|
+
f"{XSD}boolean": lambda v: isinstance(v, bool),
|
|
181
|
+
}
|
|
182
|
+
checker = checks.get(xsd_type)
|
|
183
|
+
if checker and not checker(value):
|
|
184
|
+
short = expected if expected.startswith("xsd:") else xsd_type
|
|
185
|
+
return f"Expected {short}, got {type(value).__name__}: {value}"
|
|
186
|
+
return None
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Vector Embedding Extensions for JSON-LD."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
import math
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def vector_term_definition(
|
|
9
|
+
term_name: str, iri: str, dimensions: Optional[int] = None
|
|
10
|
+
) -> dict[str, Any]:
|
|
11
|
+
"""Create a context term definition for a vector embedding property."""
|
|
12
|
+
defn: dict[str, Any] = {"@id": iri, "@container": "@vector"}
|
|
13
|
+
if dimensions is not None:
|
|
14
|
+
if not isinstance(dimensions, int) or dimensions < 1:
|
|
15
|
+
raise ValueError(f"@dimensions must be a positive integer, got: {dimensions}")
|
|
16
|
+
defn["@dimensions"] = dimensions
|
|
17
|
+
return {term_name: defn}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def validate_vector(
|
|
21
|
+
vector: Any, expected_dimensions: Optional[int] = None
|
|
22
|
+
) -> tuple[bool, list[str]]:
|
|
23
|
+
"""Validate a vector embedding. Returns (valid, errors)."""
|
|
24
|
+
errors: list[str] = []
|
|
25
|
+
if not isinstance(vector, (list, tuple)):
|
|
26
|
+
errors.append(f"Vector must be a list, got: {type(vector).__name__}")
|
|
27
|
+
return False, errors
|
|
28
|
+
if len(vector) == 0:
|
|
29
|
+
errors.append("Vector must not be empty")
|
|
30
|
+
return False, errors
|
|
31
|
+
for i, v in enumerate(vector):
|
|
32
|
+
if not isinstance(v, (int, float)) or math.isnan(v) or math.isinf(v):
|
|
33
|
+
errors.append(f"Vector element [{i}] must be a finite number, got: {v}")
|
|
34
|
+
if expected_dimensions is not None and len(vector) != expected_dimensions:
|
|
35
|
+
errors.append(
|
|
36
|
+
f"Vector dimension mismatch: expected {expected_dimensions}, got {len(vector)}"
|
|
37
|
+
)
|
|
38
|
+
return len(errors) == 0, errors
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def cosine_similarity(a: list[float], b: list[float]) -> float:
|
|
42
|
+
"""Compute cosine similarity between two vectors."""
|
|
43
|
+
if len(a) != len(b):
|
|
44
|
+
raise ValueError(f"Vector dimension mismatch: {len(a)} vs {len(b)}")
|
|
45
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
46
|
+
norm_a = math.sqrt(sum(x * x for x in a))
|
|
47
|
+
norm_b = math.sqrt(sum(x * x for x in b))
|
|
48
|
+
denom = norm_a * norm_b
|
|
49
|
+
return dot / denom if denom != 0 else 0.0
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def extract_vectors(
|
|
53
|
+
node: dict[str, Any], vector_properties: list[str]
|
|
54
|
+
) -> dict[str, list[float]]:
|
|
55
|
+
"""Extract vector embeddings from a JSON-LD node."""
|
|
56
|
+
vectors: dict[str, list[float]] = {}
|
|
57
|
+
if not isinstance(node, dict):
|
|
58
|
+
return vectors
|
|
59
|
+
for prop in vector_properties:
|
|
60
|
+
value = node.get(prop)
|
|
61
|
+
if isinstance(value, list) and len(value) > 0 and isinstance(value[0], (int, float)):
|
|
62
|
+
vectors[prop] = value
|
|
63
|
+
return vectors
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def strip_vectors_for_rdf(doc: Any, vector_properties: list[str]) -> Any:
|
|
67
|
+
"""Remove vector embeddings before RDF conversion."""
|
|
68
|
+
if isinstance(doc, list):
|
|
69
|
+
return [strip_vectors_for_rdf(item, vector_properties) for item in doc]
|
|
70
|
+
if not isinstance(doc, dict):
|
|
71
|
+
return doc
|
|
72
|
+
return {
|
|
73
|
+
k: strip_vectors_for_rdf(v, vector_properties)
|
|
74
|
+
for k, v in doc.items()
|
|
75
|
+
if k not in vector_properties
|
|
76
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: jsonld-ex
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: JSON-LD 1.2 extensions for AI/ML data exchange, security hardening, and validation
|
|
5
|
+
Author-email: Muntaser Aljabry <muntaser@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: json-ld,linked-data,semantic-web,ai,ml,confidence,provenance,embeddings,security,validation
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Requires-Dist: PyLD>=2.0.4
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
24
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
25
|
+
Requires-Dist: mypy>=1.8; extra == "dev"
|
|
26
|
+
Requires-Dist: ruff>=0.2; extra == "dev"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
pyproject.toml
|
|
2
|
+
src/jsonld_ex/__init__.py
|
|
3
|
+
src/jsonld_ex/ai_ml.py
|
|
4
|
+
src/jsonld_ex/processor.py
|
|
5
|
+
src/jsonld_ex/security.py
|
|
6
|
+
src/jsonld_ex/validation.py
|
|
7
|
+
src/jsonld_ex/vector.py
|
|
8
|
+
src/jsonld_ex.egg-info/PKG-INFO
|
|
9
|
+
src/jsonld_ex.egg-info/SOURCES.txt
|
|
10
|
+
src/jsonld_ex.egg-info/dependency_links.txt
|
|
11
|
+
src/jsonld_ex.egg-info/requires.txt
|
|
12
|
+
src/jsonld_ex.egg-info/top_level.txt
|
|
13
|
+
tests/test_ai_ml.py
|
|
14
|
+
tests/test_validation.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
jsonld_ex
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Tests for AI/ML extensions."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from jsonld_ex.ai_ml import (
|
|
5
|
+
annotate, get_confidence, get_provenance,
|
|
6
|
+
filter_by_confidence, aggregate_confidence,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestAnnotate:
|
|
11
|
+
def test_basic_confidence(self):
|
|
12
|
+
result = annotate("John Smith", confidence=0.95)
|
|
13
|
+
assert result == {"@value": "John Smith", "@confidence": 0.95}
|
|
14
|
+
|
|
15
|
+
def test_full_provenance(self):
|
|
16
|
+
result = annotate(
|
|
17
|
+
"John Smith",
|
|
18
|
+
confidence=0.95,
|
|
19
|
+
source="https://model.example.org/ner-v2",
|
|
20
|
+
extracted_at="2026-01-15T10:30:00Z",
|
|
21
|
+
method="NER",
|
|
22
|
+
human_verified=False,
|
|
23
|
+
)
|
|
24
|
+
assert result["@confidence"] == 0.95
|
|
25
|
+
assert result["@source"] == "https://model.example.org/ner-v2"
|
|
26
|
+
assert result["@method"] == "NER"
|
|
27
|
+
assert result["@humanVerified"] is False
|
|
28
|
+
|
|
29
|
+
def test_rejects_invalid_confidence(self):
|
|
30
|
+
with pytest.raises(ValueError):
|
|
31
|
+
annotate("x", confidence=1.5)
|
|
32
|
+
with pytest.raises(ValueError):
|
|
33
|
+
annotate("x", confidence=-0.1)
|
|
34
|
+
|
|
35
|
+
def test_numeric_value(self):
|
|
36
|
+
result = annotate(42, confidence=0.8)
|
|
37
|
+
assert result["@value"] == 42
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class TestGetConfidence:
|
|
41
|
+
def test_compact_form(self):
|
|
42
|
+
assert get_confidence({"@value": "test", "@confidence": 0.9}) == 0.9
|
|
43
|
+
|
|
44
|
+
def test_expanded_form(self):
|
|
45
|
+
node = {
|
|
46
|
+
"http://www.w3.org/ns/jsonld-ex/confidence": [{"@value": 0.85}]
|
|
47
|
+
}
|
|
48
|
+
assert get_confidence(node) == 0.85
|
|
49
|
+
|
|
50
|
+
def test_missing(self):
|
|
51
|
+
assert get_confidence({"@value": "test"}) is None
|
|
52
|
+
assert get_confidence(None) is None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class TestGetProvenance:
|
|
56
|
+
def test_extracts_all(self):
|
|
57
|
+
node = {"@confidence": 0.9, "@source": "https://x.org/v1", "@method": "NER"}
|
|
58
|
+
prov = get_provenance(node)
|
|
59
|
+
assert prov.confidence == 0.9
|
|
60
|
+
assert prov.source == "https://x.org/v1"
|
|
61
|
+
assert prov.method == "NER"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class TestFilterByConfidence:
|
|
65
|
+
graph = [
|
|
66
|
+
{"@id": "#a", "name": {"@value": "Alice", "@confidence": 0.95}},
|
|
67
|
+
{"@id": "#b", "name": {"@value": "Bob", "@confidence": 0.6}},
|
|
68
|
+
{"@id": "#c", "name": {"@value": "Charlie", "@confidence": 0.3}},
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
def test_filters_above_threshold(self):
|
|
72
|
+
result = filter_by_confidence(self.graph, "name", 0.5)
|
|
73
|
+
assert len(result) == 2
|
|
74
|
+
assert result[0]["@id"] == "#a"
|
|
75
|
+
|
|
76
|
+
def test_high_threshold(self):
|
|
77
|
+
result = filter_by_confidence(self.graph, "name", 0.99)
|
|
78
|
+
assert len(result) == 0
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class TestAggregateConfidence:
|
|
82
|
+
def test_mean(self):
|
|
83
|
+
assert abs(aggregate_confidence([0.8, 0.6, 0.4]) - 0.6) < 1e-9
|
|
84
|
+
|
|
85
|
+
def test_max(self):
|
|
86
|
+
assert aggregate_confidence([0.8, 0.6, 0.4], "max") == 0.8
|
|
87
|
+
|
|
88
|
+
def test_min(self):
|
|
89
|
+
assert aggregate_confidence([0.8, 0.6, 0.4], "min") == 0.4
|
|
90
|
+
|
|
91
|
+
def test_weighted(self):
|
|
92
|
+
result = aggregate_confidence([0.9, 0.5], "weighted", [3, 1])
|
|
93
|
+
assert abs(result - 0.8) < 1e-9
|
|
94
|
+
|
|
95
|
+
def test_empty(self):
|
|
96
|
+
assert aggregate_confidence([]) == 0.0
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Tests for validation extensions."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from jsonld_ex.validation import validate_node, validate_document
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
PERSON_SHAPE = {
|
|
8
|
+
"@type": "Person",
|
|
9
|
+
"name": {"@required": True, "@type": "xsd:string", "@minLength": 1},
|
|
10
|
+
"email": {"@pattern": r"^[^@]+@[^@]+$"},
|
|
11
|
+
"age": {"@type": "xsd:integer", "@minimum": 0, "@maximum": 150},
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TestValidateNode:
|
|
16
|
+
def test_valid_node(self):
|
|
17
|
+
node = {"@type": "Person", "name": "John", "email": "j@x.com", "age": 30}
|
|
18
|
+
result = validate_node(node, PERSON_SHAPE)
|
|
19
|
+
assert result.valid
|
|
20
|
+
|
|
21
|
+
def test_missing_required(self):
|
|
22
|
+
node = {"@type": "Person", "email": "j@x.com"}
|
|
23
|
+
result = validate_node(node, PERSON_SHAPE)
|
|
24
|
+
assert not result.valid
|
|
25
|
+
assert any(e.constraint == "required" for e in result.errors)
|
|
26
|
+
|
|
27
|
+
def test_type_mismatch(self):
|
|
28
|
+
node = {"@type": "Person", "name": 12345}
|
|
29
|
+
result = validate_node(node, PERSON_SHAPE)
|
|
30
|
+
assert not result.valid
|
|
31
|
+
|
|
32
|
+
def test_below_minimum(self):
|
|
33
|
+
node = {"@type": "Person", "name": "Test", "age": -5}
|
|
34
|
+
result = validate_node(node, PERSON_SHAPE)
|
|
35
|
+
assert any(e.constraint == "minimum" for e in result.errors)
|
|
36
|
+
|
|
37
|
+
def test_above_maximum(self):
|
|
38
|
+
node = {"@type": "Person", "name": "Test", "age": 200}
|
|
39
|
+
result = validate_node(node, PERSON_SHAPE)
|
|
40
|
+
assert any(e.constraint == "maximum" for e in result.errors)
|
|
41
|
+
|
|
42
|
+
def test_pattern_mismatch(self):
|
|
43
|
+
node = {"@type": "Person", "name": "Test", "email": "bad"}
|
|
44
|
+
result = validate_node(node, PERSON_SHAPE)
|
|
45
|
+
assert any(e.constraint == "pattern" for e in result.errors)
|
|
46
|
+
|
|
47
|
+
def test_optional_absent(self):
|
|
48
|
+
node = {"@type": "Person", "name": "Test"}
|
|
49
|
+
result = validate_node(node, PERSON_SHAPE)
|
|
50
|
+
assert result.valid
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class TestValidateDocument:
|
|
54
|
+
def test_validates_graph(self):
|
|
55
|
+
doc = {
|
|
56
|
+
"@graph": [
|
|
57
|
+
{"@type": "Person", "name": "Alice"},
|
|
58
|
+
{"@type": "Person"}, # missing name
|
|
59
|
+
]
|
|
60
|
+
}
|
|
61
|
+
result = validate_document(doc, [PERSON_SHAPE])
|
|
62
|
+
assert not result.valid
|
|
63
|
+
assert len(result.errors) == 1
|