jsonld-ex 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ Metadata-Version: 2.4
2
+ Name: jsonld-ex
3
+ Version: 0.1.0
4
+ Summary: JSON-LD 1.2 extensions for AI/ML data exchange, security hardening, and validation
5
+ Author-email: Muntaser Aljabry <muntaser@example.com>
6
+ License: MIT
7
+ Keywords: json-ld,linked-data,semantic-web,ai,ml,confidence,provenance,embeddings,security,validation
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Classifier: Topic :: Software Development :: Libraries
19
+ Requires-Python: >=3.9
20
+ Description-Content-Type: text/markdown
21
+ Requires-Dist: PyLD>=2.0.4
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7.0; extra == "dev"
24
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
25
+ Requires-Dist: mypy>=1.8; extra == "dev"
26
+ Requires-Dist: ruff>=0.2; extra == "dev"
@@ -0,0 +1,56 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "jsonld-ex"
7
+ version = "0.1.0"
8
+ description = "JSON-LD 1.2 extensions for AI/ML data exchange, security hardening, and validation"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ {name = "Muntaser Aljabry", email = "muntaser@example.com"},
14
+ ]
15
+ keywords = [
16
+ "json-ld", "linked-data", "semantic-web", "ai", "ml",
17
+ "confidence", "provenance", "embeddings", "security", "validation",
18
+ ]
19
+ classifiers = [
20
+ "Development Status :: 3 - Alpha",
21
+ "Intended Audience :: Developers",
22
+ "Intended Audience :: Science/Research",
23
+ "License :: OSI Approved :: MIT License",
24
+ "Programming Language :: Python :: 3",
25
+ "Programming Language :: Python :: 3.9",
26
+ "Programming Language :: Python :: 3.10",
27
+ "Programming Language :: Python :: 3.11",
28
+ "Programming Language :: Python :: 3.12",
29
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
30
+ "Topic :: Software Development :: Libraries",
31
+ ]
32
+ dependencies = [
33
+ "PyLD>=2.0.4",
34
+ ]
35
+
36
+ [project.optional-dependencies]
37
+ dev = [
38
+ "pytest>=7.0",
39
+ "pytest-asyncio>=0.23",
40
+ "mypy>=1.8",
41
+ "ruff>=0.2",
42
+ ]
43
+
44
+ [tool.setuptools.packages.find]
45
+ where = ["src"]
46
+
47
+ [tool.pytest.ini_options]
48
+ testpaths = ["tests"]
49
+
50
+ [tool.mypy]
51
+ python_version = "3.9"
52
+ strict = true
53
+
54
+ [tool.ruff]
55
+ target-version = "py39"
56
+ line-length = 100
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,30 @@
1
+ """
2
+ jsonld-ex: JSON-LD 1.2 Extensions for AI/ML, Security, and Validation
3
+
4
+ Reference implementation of proposed JSON-LD 1.2 extensions.
5
+ Wraps PyLD for core JSON-LD processing and adds extension layers.
6
+ """
7
+
8
+ __version__ = "0.1.0"
9
+
10
+ from jsonld_ex.processor import JsonLdEx
11
+ from jsonld_ex.ai_ml import annotate, get_confidence, get_provenance, filter_by_confidence
12
+ from jsonld_ex.vector import validate_vector, cosine_similarity, vector_term_definition
13
+ from jsonld_ex.security import compute_integrity, verify_integrity, is_context_allowed
14
+ from jsonld_ex.validation import validate_node, validate_document
15
+
16
+ __all__ = [
17
+ "JsonLdEx",
18
+ "annotate",
19
+ "get_confidence",
20
+ "get_provenance",
21
+ "filter_by_confidence",
22
+ "validate_vector",
23
+ "cosine_similarity",
24
+ "vector_term_definition",
25
+ "compute_integrity",
26
+ "verify_integrity",
27
+ "is_context_allowed",
28
+ "validate_node",
29
+ "validate_document",
30
+ ]
@@ -0,0 +1,154 @@
1
+ """
2
+ AI/ML Extensions for JSON-LD
3
+
4
+ Provides @confidence, @source, @extractedAt, @method, @humanVerified
5
+ """
6
+
7
+ from __future__ import annotations
8
+ from dataclasses import dataclass, field
9
+ from typing import Any, Optional, Sequence, Literal
10
+ import math
11
+
12
+ JSONLD_EX_NAMESPACE = "http://www.w3.org/ns/jsonld-ex/"
13
+
14
+
15
+ @dataclass
16
+ class ProvenanceMetadata:
17
+ """AI/ML provenance metadata attached to a value."""
18
+ confidence: Optional[float] = None
19
+ source: Optional[str] = None
20
+ extracted_at: Optional[str] = None
21
+ method: Optional[str] = None
22
+ human_verified: Optional[bool] = None
23
+
24
+
25
+ def annotate(
26
+ value: Any,
27
+ confidence: Optional[float] = None,
28
+ source: Optional[str] = None,
29
+ extracted_at: Optional[str] = None,
30
+ method: Optional[str] = None,
31
+ human_verified: Optional[bool] = None,
32
+ ) -> dict[str, Any]:
33
+ """Create an annotated JSON-LD value with provenance metadata."""
34
+ result: dict[str, Any] = {"@value": value}
35
+
36
+ if confidence is not None:
37
+ _validate_confidence(confidence)
38
+ result["@confidence"] = confidence
39
+ if source is not None:
40
+ result["@source"] = source
41
+ if extracted_at is not None:
42
+ result["@extractedAt"] = extracted_at
43
+ if method is not None:
44
+ result["@method"] = method
45
+ if human_verified is not None:
46
+ result["@humanVerified"] = human_verified
47
+
48
+ return result
49
+
50
+
51
+ def get_confidence(node: Any) -> Optional[float]:
52
+ """Extract confidence score from a node or annotated value."""
53
+ if node is None or not isinstance(node, dict):
54
+ return None
55
+
56
+ # Compact form
57
+ if "@confidence" in node:
58
+ return node["@confidence"]
59
+
60
+ # Expanded form
61
+ key = f"{JSONLD_EX_NAMESPACE}confidence"
62
+ if key in node:
63
+ val = node[key]
64
+ if isinstance(val, list) and len(val) > 0:
65
+ item = val[0]
66
+ return item.get("@value", item) if isinstance(item, dict) else item
67
+ if isinstance(val, dict):
68
+ return val.get("@value", val)
69
+ return val
70
+
71
+ return None
72
+
73
+
74
+ def get_provenance(node: Any) -> ProvenanceMetadata:
75
+ """Extract all provenance metadata from a node."""
76
+ if node is None or not isinstance(node, dict):
77
+ return ProvenanceMetadata()
78
+
79
+ return ProvenanceMetadata(
80
+ confidence=_extract_field(node, "confidence", "@confidence"),
81
+ source=_extract_field(node, "source", "@source"),
82
+ extracted_at=_extract_field(node, "extractedAt", "@extractedAt"),
83
+ method=_extract_field(node, "method", "@method"),
84
+ human_verified=_extract_field(node, "humanVerified", "@humanVerified"),
85
+ )
86
+
87
+
88
+ def filter_by_confidence(
89
+ graph: Sequence[dict[str, Any]],
90
+ property_name: str,
91
+ min_confidence: float,
92
+ ) -> list[dict[str, Any]]:
93
+ """Filter graph nodes by minimum confidence on a property."""
94
+ _validate_confidence(min_confidence)
95
+ results = []
96
+ for node in graph:
97
+ prop_value = node.get(property_name)
98
+ if prop_value is None:
99
+ continue
100
+ values = prop_value if isinstance(prop_value, list) else [prop_value]
101
+ if any(
102
+ (c := get_confidence(v)) is not None and c >= min_confidence
103
+ for v in values
104
+ ):
105
+ results.append(node)
106
+ return results
107
+
108
+
109
+ def aggregate_confidence(
110
+ scores: Sequence[float],
111
+ strategy: Literal["mean", "max", "min", "weighted"] = "mean",
112
+ weights: Optional[Sequence[float]] = None,
113
+ ) -> float:
114
+ """Aggregate multiple confidence scores."""
115
+ if len(scores) == 0:
116
+ return 0.0
117
+ for s in scores:
118
+ _validate_confidence(s)
119
+
120
+ if strategy == "max":
121
+ return max(scores)
122
+ elif strategy == "min":
123
+ return min(scores)
124
+ elif strategy == "weighted":
125
+ if weights is None or len(weights) != len(scores):
126
+ raise ValueError("Weights must match scores length")
127
+ total_weight = sum(weights)
128
+ return sum(s * w for s, w in zip(scores, weights)) / total_weight
129
+ else: # mean
130
+ return sum(scores) / len(scores)
131
+
132
+
133
+ # ── Internal ───────────────────────────────────────────────────────
134
+
135
+ def _validate_confidence(score: float) -> None:
136
+ if not isinstance(score, (int, float)) or score < 0 or score > 1:
137
+ raise ValueError(f"@confidence must be between 0.0 and 1.0, got: {score}")
138
+
139
+
140
+ def _extract_field(node: dict, compact_name: str, keyword: str) -> Any:
141
+ if keyword in node:
142
+ return node[keyword]
143
+ if compact_name in node:
144
+ return node[compact_name]
145
+ expanded_key = f"{JSONLD_EX_NAMESPACE}{compact_name}"
146
+ if expanded_key in node:
147
+ val = node[expanded_key]
148
+ if isinstance(val, list) and len(val) > 0:
149
+ item = val[0]
150
+ return item.get("@value", item) if isinstance(item, dict) else item
151
+ if isinstance(val, dict):
152
+ return val.get("@value", val)
153
+ return val
154
+ return None
@@ -0,0 +1,91 @@
1
+ """
2
+ JsonLdEx — Extended JSON-LD Processor (Python)
3
+
4
+ Wraps PyLD with backward-compatible extensions for AI/ML,
5
+ security, and validation.
6
+ """
7
+
8
+ from __future__ import annotations
9
+ from typing import Any, Optional
10
+
11
+ from pyld import jsonld
12
+
13
+ from jsonld_ex.ai_ml import (
14
+ annotate, get_confidence, get_provenance,
15
+ filter_by_confidence, aggregate_confidence, ProvenanceMetadata,
16
+ )
17
+ from jsonld_ex.vector import (
18
+ vector_term_definition, validate_vector, cosine_similarity,
19
+ extract_vectors, strip_vectors_for_rdf,
20
+ )
21
+ from jsonld_ex.security import (
22
+ compute_integrity, verify_integrity, integrity_context,
23
+ is_context_allowed, enforce_resource_limits, DEFAULT_RESOURCE_LIMITS,
24
+ )
25
+ from jsonld_ex.validation import validate_node, validate_document, ValidationResult
26
+
27
+
28
+ class JsonLdEx:
29
+ """Extended JSON-LD processor wrapping PyLD."""
30
+
31
+ def __init__(
32
+ self,
33
+ resource_limits: Optional[dict[str, int]] = None,
34
+ context_allowlist: Optional[dict[str, Any]] = None,
35
+ ):
36
+ self._limits = {**DEFAULT_RESOURCE_LIMITS, **(resource_limits or {})}
37
+ self._allowlist = context_allowlist
38
+
39
+ # ── Core Operations ──────────────────────────────────────────
40
+
41
+ def expand(self, doc: dict[str, Any], **kwargs: Any) -> list[dict[str, Any]]:
42
+ """Expand a JSON-LD document with resource limit enforcement."""
43
+ enforce_resource_limits(doc, self._limits)
44
+ return jsonld.expand(doc, kwargs)
45
+
46
+ def compact(self, doc: dict[str, Any], ctx: Any, **kwargs: Any) -> dict[str, Any]:
47
+ """Compact a JSON-LD document."""
48
+ enforce_resource_limits(doc, self._limits)
49
+ return jsonld.compact(doc, ctx, kwargs)
50
+
51
+ def flatten(self, doc: dict[str, Any], ctx: Any = None, **kwargs: Any) -> dict[str, Any]:
52
+ """Flatten a JSON-LD document."""
53
+ enforce_resource_limits(doc, self._limits)
54
+ return jsonld.flatten(doc, ctx, kwargs)
55
+
56
+ def to_rdf(self, doc: dict[str, Any], **kwargs: Any) -> str:
57
+ """Convert to N-Quads."""
58
+ enforce_resource_limits(doc, self._limits)
59
+ return jsonld.to_rdf(doc, {**kwargs, "format": "application/n-quads"})
60
+
61
+ def from_rdf(self, nquads: str, **kwargs: Any) -> list[dict[str, Any]]:
62
+ """Convert N-Quads to JSON-LD."""
63
+ return jsonld.from_rdf(nquads, kwargs)
64
+
65
+ # ── AI/ML Extensions ─────────────────────────────────────────
66
+
67
+ annotate = staticmethod(annotate)
68
+ get_confidence = staticmethod(get_confidence)
69
+ get_provenance = staticmethod(get_provenance)
70
+ filter_by_confidence = staticmethod(filter_by_confidence)
71
+ aggregate_confidence = staticmethod(aggregate_confidence)
72
+
73
+ # ── Vector Extensions ────────────────────────────────────────
74
+
75
+ vector_term_definition = staticmethod(vector_term_definition)
76
+ validate_vector = staticmethod(validate_vector)
77
+ cosine_similarity = staticmethod(cosine_similarity)
78
+ extract_vectors = staticmethod(extract_vectors)
79
+ strip_vectors_for_rdf = staticmethod(strip_vectors_for_rdf)
80
+
81
+ # ── Security Extensions ──────────────────────────────────────
82
+
83
+ compute_integrity = staticmethod(compute_integrity)
84
+ verify_integrity = staticmethod(verify_integrity)
85
+ integrity_context = staticmethod(integrity_context)
86
+ is_context_allowed = staticmethod(is_context_allowed)
87
+
88
+ # ── Validation Extensions ────────────────────────────────────
89
+
90
+ validate_node = staticmethod(validate_node)
91
+ validate_document = staticmethod(validate_document)
@@ -0,0 +1,92 @@
1
+ """Security Extensions for JSON-LD."""
2
+
3
+ from __future__ import annotations
4
+ import hashlib
5
+ import base64
6
+ import json
7
+ import re
8
+ from typing import Any, Optional
9
+
10
+
11
+ DEFAULT_RESOURCE_LIMITS = {
12
+ "max_context_depth": 10,
13
+ "max_graph_depth": 100,
14
+ "max_document_size": 10 * 1024 * 1024, # 10 MB
15
+ "max_expansion_time": 30, # seconds
16
+ }
17
+
18
+ SUPPORTED_ALGORITHMS = ("sha256", "sha384", "sha512")
19
+
20
+
21
+ def compute_integrity(
22
+ context: str | dict | Any, algorithm: str = "sha256"
23
+ ) -> str:
24
+ """Compute an integrity hash for a context."""
25
+ if algorithm not in SUPPORTED_ALGORITHMS:
26
+ raise ValueError(f"Unsupported algorithm: {algorithm}")
27
+ content = context if isinstance(context, str) else json.dumps(context)
28
+ h = hashlib.new(algorithm, content.encode("utf-8")).digest()
29
+ b64 = base64.b64encode(h).decode("ascii")
30
+ return f"{algorithm}-{b64}"
31
+
32
+
33
+ def verify_integrity(context: str | dict | Any, declared: str) -> bool:
34
+ """Verify context content against its declared integrity hash."""
35
+ parts = declared.split("-", 1)
36
+ if len(parts) != 2 or parts[0] not in SUPPORTED_ALGORITHMS:
37
+ raise ValueError(f"Invalid integrity string: {declared}")
38
+ computed = compute_integrity(context, parts[0])
39
+ return computed == declared
40
+
41
+
42
+ def integrity_context(
43
+ url: str, content: str | dict | Any, algorithm: str = "sha256"
44
+ ) -> dict[str, str]:
45
+ """Create a context reference with integrity verification."""
46
+ return {"@id": url, "@integrity": compute_integrity(content, algorithm)}
47
+
48
+
49
+ def is_context_allowed(url: str, config: dict[str, Any]) -> bool:
50
+ """Check if a context URL is permitted by an allowlist configuration."""
51
+ if config.get("block_remote_contexts", False):
52
+ return False
53
+ allowed = config.get("allowed", [])
54
+ if url in allowed:
55
+ return True
56
+ for pattern in config.get("patterns", []):
57
+ if isinstance(pattern, str):
58
+ regex = "^" + re.escape(pattern).replace(r"\*", ".*").replace(r"\?", ".") + "$"
59
+ if re.match(regex, url):
60
+ return True
61
+ if allowed or config.get("patterns"):
62
+ return False
63
+ return True
64
+
65
+
66
+ def enforce_resource_limits(
67
+ document: str | dict | Any,
68
+ limits: Optional[dict[str, int]] = None,
69
+ ) -> None:
70
+ """Validate document against resource limits before processing."""
71
+ resolved = {**DEFAULT_RESOURCE_LIMITS, **(limits or {})}
72
+ content = document if isinstance(document, str) else json.dumps(document)
73
+ if len(content) > resolved["max_document_size"]:
74
+ raise ValueError(
75
+ f"Document size {len(content)} exceeds limit {resolved['max_document_size']}"
76
+ )
77
+ parsed = json.loads(content) if isinstance(document, str) else document
78
+ depth = _measure_depth(parsed)
79
+ if depth > resolved["max_graph_depth"]:
80
+ raise ValueError(
81
+ f"Document depth {depth} exceeds limit {resolved['max_graph_depth']}"
82
+ )
83
+
84
+
85
+ def _measure_depth(obj: Any, current: int = 0) -> int:
86
+ if obj is None or not isinstance(obj, (dict, list)):
87
+ return current
88
+ max_depth = current
89
+ items = obj if isinstance(obj, list) else obj.values()
90
+ for item in items:
91
+ max_depth = max(max_depth, _measure_depth(item, current + 1))
92
+ return max_depth
@@ -0,0 +1,186 @@
1
+ """Validation Extensions for JSON-LD (@shape)."""
2
+
3
+ from __future__ import annotations
4
+ import re
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime
7
+ from typing import Any, Optional, Sequence
8
+
9
+
10
+ @dataclass
11
+ class ValidationError:
12
+ path: str
13
+ constraint: str
14
+ message: str
15
+ value: Any = None
16
+
17
+
18
+ @dataclass
19
+ class ValidationWarning:
20
+ path: str
21
+ code: str
22
+ message: str
23
+
24
+
25
+ @dataclass
26
+ class ValidationResult:
27
+ valid: bool
28
+ errors: list[ValidationError] = field(default_factory=list)
29
+ warnings: list[ValidationWarning] = field(default_factory=list)
30
+
31
+
32
+ XSD = "http://www.w3.org/2001/XMLSchema#"
33
+
34
+
35
+ def validate_node(node: dict[str, Any], shape: dict[str, Any]) -> ValidationResult:
36
+ """Validate a JSON-LD node against a shape definition."""
37
+ errors: list[ValidationError] = []
38
+ warnings: list[ValidationWarning] = []
39
+
40
+ if not isinstance(node, dict):
41
+ errors.append(ValidationError(".", "type", "Node must be a dict"))
42
+ return ValidationResult(False, errors, warnings)
43
+
44
+ # Type check
45
+ if "@type" in shape:
46
+ node_types = _get_types(node)
47
+ if shape["@type"] not in node_types:
48
+ errors.append(ValidationError(
49
+ "@type", "type",
50
+ f'Expected type "{shape["@type"]}", found: {node_types}',
51
+ node_types,
52
+ ))
53
+
54
+ # Property constraints
55
+ for prop, constraint in shape.items():
56
+ if prop.startswith("@") or not isinstance(constraint, dict):
57
+ continue
58
+
59
+ value = node.get(prop)
60
+ raw = _extract_raw(value)
61
+
62
+ if constraint.get("@required") and raw is None:
63
+ errors.append(ValidationError(prop, "required", f'Property "{prop}" is required'))
64
+ continue
65
+
66
+ if raw is None:
67
+ continue
68
+
69
+ # Type check
70
+ expected_type = constraint.get("@type")
71
+ if expected_type:
72
+ type_err = _validate_type(raw, expected_type)
73
+ if type_err:
74
+ errors.append(ValidationError(prop, "type", type_err, raw))
75
+
76
+ # Numeric
77
+ if "@minimum" in constraint and isinstance(raw, (int, float)):
78
+ if raw < constraint["@minimum"]:
79
+ errors.append(ValidationError(
80
+ prop, "minimum",
81
+ f"Value {raw} below minimum {constraint['@minimum']}", raw,
82
+ ))
83
+
84
+ if "@maximum" in constraint and isinstance(raw, (int, float)):
85
+ if raw > constraint["@maximum"]:
86
+ errors.append(ValidationError(
87
+ prop, "maximum",
88
+ f"Value {raw} exceeds maximum {constraint['@maximum']}", raw,
89
+ ))
90
+
91
+ # String length
92
+ if "@minLength" in constraint and isinstance(raw, str):
93
+ if len(raw) < constraint["@minLength"]:
94
+ errors.append(ValidationError(
95
+ prop, "minLength",
96
+ f"Length {len(raw)} below minimum {constraint['@minLength']}", raw,
97
+ ))
98
+
99
+ if "@maxLength" in constraint and isinstance(raw, str):
100
+ if len(raw) > constraint["@maxLength"]:
101
+ errors.append(ValidationError(
102
+ prop, "maxLength",
103
+ f"Length {len(raw)} exceeds maximum {constraint['@maxLength']}", raw,
104
+ ))
105
+
106
+ # Pattern
107
+ if "@pattern" in constraint and isinstance(raw, str):
108
+ if not re.search(constraint["@pattern"], raw):
109
+ errors.append(ValidationError(
110
+ prop, "pattern",
111
+ f'"{raw}" does not match pattern "{constraint["@pattern"]}"', raw,
112
+ ))
113
+
114
+ return ValidationResult(len(errors) == 0, errors, warnings)
115
+
116
+
117
+ def validate_document(
118
+ doc: dict[str, Any], shapes: Sequence[dict[str, Any]]
119
+ ) -> ValidationResult:
120
+ """Validate all matching nodes in a document against shapes."""
121
+ all_errors: list[ValidationError] = []
122
+ all_warnings: list[ValidationWarning] = []
123
+
124
+ for node in _extract_nodes(doc):
125
+ node_types = _get_types(node)
126
+ for shape in shapes:
127
+ if shape.get("@type") in node_types:
128
+ result = validate_node(node, shape)
129
+ for e in result.errors:
130
+ e.path = f"{node.get('@id', 'anonymous')}/{e.path}"
131
+ all_errors.extend(result.errors)
132
+ all_warnings.extend(result.warnings)
133
+
134
+ return ValidationResult(len(all_errors) == 0, all_errors, all_warnings)
135
+
136
+
137
+ # ── Internal ───────────────────────────────────────────────────────
138
+
139
+ def _get_types(node: dict) -> list[str]:
140
+ t = node.get("@type")
141
+ if t is None:
142
+ return []
143
+ return t if isinstance(t, list) else [t]
144
+
145
+
146
+ def _extract_raw(value: Any) -> Any:
147
+ if value is None:
148
+ return None
149
+ if isinstance(value, dict) and "@value" in value:
150
+ return value["@value"]
151
+ if isinstance(value, list) and len(value) > 0:
152
+ return _extract_raw(value[0])
153
+ return value
154
+
155
+
156
+ def _extract_nodes(doc: Any) -> list[dict]:
157
+ if isinstance(doc, list):
158
+ nodes = []
159
+ for item in doc:
160
+ nodes.extend(_extract_nodes(item))
161
+ return nodes
162
+ if not isinstance(doc, dict):
163
+ return []
164
+ nodes = []
165
+ if "@type" in doc:
166
+ nodes.append(doc)
167
+ if "@graph" in doc:
168
+ nodes.extend(_extract_nodes(doc["@graph"]))
169
+ return nodes
170
+
171
+
172
+ def _validate_type(value: Any, expected: str) -> Optional[str]:
173
+ xsd_type = expected.replace("xsd:", XSD) if expected.startswith("xsd:") else expected
174
+ checks = {
175
+ f"{XSD}string": lambda v: isinstance(v, str),
176
+ f"{XSD}integer": lambda v: isinstance(v, int) and not isinstance(v, bool),
177
+ f"{XSD}double": lambda v: isinstance(v, (int, float)),
178
+ f"{XSD}float": lambda v: isinstance(v, (int, float)),
179
+ f"{XSD}decimal": lambda v: isinstance(v, (int, float)),
180
+ f"{XSD}boolean": lambda v: isinstance(v, bool),
181
+ }
182
+ checker = checks.get(xsd_type)
183
+ if checker and not checker(value):
184
+ short = expected if expected.startswith("xsd:") else xsd_type
185
+ return f"Expected {short}, got {type(value).__name__}: {value}"
186
+ return None
@@ -0,0 +1,76 @@
1
+ """Vector Embedding Extensions for JSON-LD."""
2
+
3
+ from __future__ import annotations
4
+ import math
5
+ from typing import Any, Optional
6
+
7
+
8
+ def vector_term_definition(
9
+ term_name: str, iri: str, dimensions: Optional[int] = None
10
+ ) -> dict[str, Any]:
11
+ """Create a context term definition for a vector embedding property."""
12
+ defn: dict[str, Any] = {"@id": iri, "@container": "@vector"}
13
+ if dimensions is not None:
14
+ if not isinstance(dimensions, int) or dimensions < 1:
15
+ raise ValueError(f"@dimensions must be a positive integer, got: {dimensions}")
16
+ defn["@dimensions"] = dimensions
17
+ return {term_name: defn}
18
+
19
+
20
+ def validate_vector(
21
+ vector: Any, expected_dimensions: Optional[int] = None
22
+ ) -> tuple[bool, list[str]]:
23
+ """Validate a vector embedding. Returns (valid, errors)."""
24
+ errors: list[str] = []
25
+ if not isinstance(vector, (list, tuple)):
26
+ errors.append(f"Vector must be a list, got: {type(vector).__name__}")
27
+ return False, errors
28
+ if len(vector) == 0:
29
+ errors.append("Vector must not be empty")
30
+ return False, errors
31
+ for i, v in enumerate(vector):
32
+ if not isinstance(v, (int, float)) or math.isnan(v) or math.isinf(v):
33
+ errors.append(f"Vector element [{i}] must be a finite number, got: {v}")
34
+ if expected_dimensions is not None and len(vector) != expected_dimensions:
35
+ errors.append(
36
+ f"Vector dimension mismatch: expected {expected_dimensions}, got {len(vector)}"
37
+ )
38
+ return len(errors) == 0, errors
39
+
40
+
41
+ def cosine_similarity(a: list[float], b: list[float]) -> float:
42
+ """Compute cosine similarity between two vectors."""
43
+ if len(a) != len(b):
44
+ raise ValueError(f"Vector dimension mismatch: {len(a)} vs {len(b)}")
45
+ dot = sum(x * y for x, y in zip(a, b))
46
+ norm_a = math.sqrt(sum(x * x for x in a))
47
+ norm_b = math.sqrt(sum(x * x for x in b))
48
+ denom = norm_a * norm_b
49
+ return dot / denom if denom != 0 else 0.0
50
+
51
+
52
+ def extract_vectors(
53
+ node: dict[str, Any], vector_properties: list[str]
54
+ ) -> dict[str, list[float]]:
55
+ """Extract vector embeddings from a JSON-LD node."""
56
+ vectors: dict[str, list[float]] = {}
57
+ if not isinstance(node, dict):
58
+ return vectors
59
+ for prop in vector_properties:
60
+ value = node.get(prop)
61
+ if isinstance(value, list) and len(value) > 0 and isinstance(value[0], (int, float)):
62
+ vectors[prop] = value
63
+ return vectors
64
+
65
+
66
+ def strip_vectors_for_rdf(doc: Any, vector_properties: list[str]) -> Any:
67
+ """Remove vector embeddings before RDF conversion."""
68
+ if isinstance(doc, list):
69
+ return [strip_vectors_for_rdf(item, vector_properties) for item in doc]
70
+ if not isinstance(doc, dict):
71
+ return doc
72
+ return {
73
+ k: strip_vectors_for_rdf(v, vector_properties)
74
+ for k, v in doc.items()
75
+ if k not in vector_properties
76
+ }
@@ -0,0 +1,26 @@
1
+ Metadata-Version: 2.4
2
+ Name: jsonld-ex
3
+ Version: 0.1.0
4
+ Summary: JSON-LD 1.2 extensions for AI/ML data exchange, security hardening, and validation
5
+ Author-email: Muntaser Aljabry <muntaser@example.com>
6
+ License: MIT
7
+ Keywords: json-ld,linked-data,semantic-web,ai,ml,confidence,provenance,embeddings,security,validation
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Classifier: Topic :: Software Development :: Libraries
19
+ Requires-Python: >=3.9
20
+ Description-Content-Type: text/markdown
21
+ Requires-Dist: PyLD>=2.0.4
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7.0; extra == "dev"
24
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
25
+ Requires-Dist: mypy>=1.8; extra == "dev"
26
+ Requires-Dist: ruff>=0.2; extra == "dev"
@@ -0,0 +1,14 @@
1
+ pyproject.toml
2
+ src/jsonld_ex/__init__.py
3
+ src/jsonld_ex/ai_ml.py
4
+ src/jsonld_ex/processor.py
5
+ src/jsonld_ex/security.py
6
+ src/jsonld_ex/validation.py
7
+ src/jsonld_ex/vector.py
8
+ src/jsonld_ex.egg-info/PKG-INFO
9
+ src/jsonld_ex.egg-info/SOURCES.txt
10
+ src/jsonld_ex.egg-info/dependency_links.txt
11
+ src/jsonld_ex.egg-info/requires.txt
12
+ src/jsonld_ex.egg-info/top_level.txt
13
+ tests/test_ai_ml.py
14
+ tests/test_validation.py
@@ -0,0 +1,7 @@
1
+ PyLD>=2.0.4
2
+
3
+ [dev]
4
+ pytest>=7.0
5
+ pytest-asyncio>=0.23
6
+ mypy>=1.8
7
+ ruff>=0.2
@@ -0,0 +1 @@
1
+ jsonld_ex
@@ -0,0 +1,96 @@
1
+ """Tests for AI/ML extensions."""
2
+
3
+ import pytest
4
+ from jsonld_ex.ai_ml import (
5
+ annotate, get_confidence, get_provenance,
6
+ filter_by_confidence, aggregate_confidence,
7
+ )
8
+
9
+
10
+ class TestAnnotate:
11
+ def test_basic_confidence(self):
12
+ result = annotate("John Smith", confidence=0.95)
13
+ assert result == {"@value": "John Smith", "@confidence": 0.95}
14
+
15
+ def test_full_provenance(self):
16
+ result = annotate(
17
+ "John Smith",
18
+ confidence=0.95,
19
+ source="https://model.example.org/ner-v2",
20
+ extracted_at="2026-01-15T10:30:00Z",
21
+ method="NER",
22
+ human_verified=False,
23
+ )
24
+ assert result["@confidence"] == 0.95
25
+ assert result["@source"] == "https://model.example.org/ner-v2"
26
+ assert result["@method"] == "NER"
27
+ assert result["@humanVerified"] is False
28
+
29
+ def test_rejects_invalid_confidence(self):
30
+ with pytest.raises(ValueError):
31
+ annotate("x", confidence=1.5)
32
+ with pytest.raises(ValueError):
33
+ annotate("x", confidence=-0.1)
34
+
35
+ def test_numeric_value(self):
36
+ result = annotate(42, confidence=0.8)
37
+ assert result["@value"] == 42
38
+
39
+
40
+ class TestGetConfidence:
41
+ def test_compact_form(self):
42
+ assert get_confidence({"@value": "test", "@confidence": 0.9}) == 0.9
43
+
44
+ def test_expanded_form(self):
45
+ node = {
46
+ "http://www.w3.org/ns/jsonld-ex/confidence": [{"@value": 0.85}]
47
+ }
48
+ assert get_confidence(node) == 0.85
49
+
50
+ def test_missing(self):
51
+ assert get_confidence({"@value": "test"}) is None
52
+ assert get_confidence(None) is None
53
+
54
+
55
+ class TestGetProvenance:
56
+ def test_extracts_all(self):
57
+ node = {"@confidence": 0.9, "@source": "https://x.org/v1", "@method": "NER"}
58
+ prov = get_provenance(node)
59
+ assert prov.confidence == 0.9
60
+ assert prov.source == "https://x.org/v1"
61
+ assert prov.method == "NER"
62
+
63
+
64
+ class TestFilterByConfidence:
65
+ graph = [
66
+ {"@id": "#a", "name": {"@value": "Alice", "@confidence": 0.95}},
67
+ {"@id": "#b", "name": {"@value": "Bob", "@confidence": 0.6}},
68
+ {"@id": "#c", "name": {"@value": "Charlie", "@confidence": 0.3}},
69
+ ]
70
+
71
+ def test_filters_above_threshold(self):
72
+ result = filter_by_confidence(self.graph, "name", 0.5)
73
+ assert len(result) == 2
74
+ assert result[0]["@id"] == "#a"
75
+
76
+ def test_high_threshold(self):
77
+ result = filter_by_confidence(self.graph, "name", 0.99)
78
+ assert len(result) == 0
79
+
80
+
81
+ class TestAggregateConfidence:
82
+ def test_mean(self):
83
+ assert abs(aggregate_confidence([0.8, 0.6, 0.4]) - 0.6) < 1e-9
84
+
85
+ def test_max(self):
86
+ assert aggregate_confidence([0.8, 0.6, 0.4], "max") == 0.8
87
+
88
+ def test_min(self):
89
+ assert aggregate_confidence([0.8, 0.6, 0.4], "min") == 0.4
90
+
91
+ def test_weighted(self):
92
+ result = aggregate_confidence([0.9, 0.5], "weighted", [3, 1])
93
+ assert abs(result - 0.8) < 1e-9
94
+
95
+ def test_empty(self):
96
+ assert aggregate_confidence([]) == 0.0
@@ -0,0 +1,63 @@
1
+ """Tests for validation extensions."""
2
+
3
+ import pytest
4
+ from jsonld_ex.validation import validate_node, validate_document
5
+
6
+
7
+ PERSON_SHAPE = {
8
+ "@type": "Person",
9
+ "name": {"@required": True, "@type": "xsd:string", "@minLength": 1},
10
+ "email": {"@pattern": r"^[^@]+@[^@]+$"},
11
+ "age": {"@type": "xsd:integer", "@minimum": 0, "@maximum": 150},
12
+ }
13
+
14
+
15
+ class TestValidateNode:
16
+ def test_valid_node(self):
17
+ node = {"@type": "Person", "name": "John", "email": "j@x.com", "age": 30}
18
+ result = validate_node(node, PERSON_SHAPE)
19
+ assert result.valid
20
+
21
+ def test_missing_required(self):
22
+ node = {"@type": "Person", "email": "j@x.com"}
23
+ result = validate_node(node, PERSON_SHAPE)
24
+ assert not result.valid
25
+ assert any(e.constraint == "required" for e in result.errors)
26
+
27
+ def test_type_mismatch(self):
28
+ node = {"@type": "Person", "name": 12345}
29
+ result = validate_node(node, PERSON_SHAPE)
30
+ assert not result.valid
31
+
32
+ def test_below_minimum(self):
33
+ node = {"@type": "Person", "name": "Test", "age": -5}
34
+ result = validate_node(node, PERSON_SHAPE)
35
+ assert any(e.constraint == "minimum" for e in result.errors)
36
+
37
+ def test_above_maximum(self):
38
+ node = {"@type": "Person", "name": "Test", "age": 200}
39
+ result = validate_node(node, PERSON_SHAPE)
40
+ assert any(e.constraint == "maximum" for e in result.errors)
41
+
42
+ def test_pattern_mismatch(self):
43
+ node = {"@type": "Person", "name": "Test", "email": "bad"}
44
+ result = validate_node(node, PERSON_SHAPE)
45
+ assert any(e.constraint == "pattern" for e in result.errors)
46
+
47
+ def test_optional_absent(self):
48
+ node = {"@type": "Person", "name": "Test"}
49
+ result = validate_node(node, PERSON_SHAPE)
50
+ assert result.valid
51
+
52
+
53
+ class TestValidateDocument:
54
+ def test_validates_graph(self):
55
+ doc = {
56
+ "@graph": [
57
+ {"@type": "Person", "name": "Alice"},
58
+ {"@type": "Person"}, # missing name
59
+ ]
60
+ }
61
+ result = validate_document(doc, [PERSON_SHAPE])
62
+ assert not result.valid
63
+ assert len(result.errors) == 1