jsonld-ex 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jsonld_ex/__init__.py +85 -1
- jsonld_ex/cbor_ld.py +233 -0
- jsonld_ex/inference.py +537 -0
- jsonld_ex/merge.py +433 -0
- jsonld_ex/mqtt.py +248 -0
- jsonld_ex/owl_interop.py +938 -0
- jsonld_ex/temporal.py +295 -0
- {jsonld_ex-0.1.3.dist-info → jsonld_ex-0.2.0.dist-info}/METADATA +8 -1
- jsonld_ex-0.2.0.dist-info/RECORD +16 -0
- jsonld_ex-0.1.3.dist-info/RECORD +0 -10
- {jsonld_ex-0.1.3.dist-info → jsonld_ex-0.2.0.dist-info}/WHEEL +0 -0
- {jsonld_ex-0.1.3.dist-info → jsonld_ex-0.2.0.dist-info}/top_level.txt +0 -0
jsonld_ex/__init__.py
CHANGED
|
@@ -5,26 +5,110 @@ Reference implementation of proposed JSON-LD 1.2 extensions.
|
|
|
5
5
|
Wraps PyLD for core JSON-LD processing and adds extension layers.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "0.
|
|
8
|
+
__version__ = "0.2.0"
|
|
9
9
|
|
|
10
10
|
from jsonld_ex.processor import JsonLdEx
|
|
11
11
|
from jsonld_ex.ai_ml import annotate, get_confidence, get_provenance, filter_by_confidence
|
|
12
12
|
from jsonld_ex.vector import validate_vector, cosine_similarity, vector_term_definition
|
|
13
13
|
from jsonld_ex.security import compute_integrity, verify_integrity, is_context_allowed
|
|
14
14
|
from jsonld_ex.validation import validate_node, validate_document
|
|
15
|
+
from jsonld_ex.owl_interop import (
|
|
16
|
+
to_prov_o,
|
|
17
|
+
from_prov_o,
|
|
18
|
+
shape_to_shacl,
|
|
19
|
+
shacl_to_shape,
|
|
20
|
+
shape_to_owl_restrictions,
|
|
21
|
+
to_rdf_star_ntriples,
|
|
22
|
+
compare_with_prov_o,
|
|
23
|
+
compare_with_shacl,
|
|
24
|
+
)
|
|
25
|
+
from jsonld_ex.inference import (
|
|
26
|
+
propagate_confidence,
|
|
27
|
+
combine_sources,
|
|
28
|
+
resolve_conflict,
|
|
29
|
+
propagate_graph_confidence,
|
|
30
|
+
PropagationResult,
|
|
31
|
+
ConflictReport,
|
|
32
|
+
)
|
|
33
|
+
from jsonld_ex.merge import (
|
|
34
|
+
merge_graphs,
|
|
35
|
+
diff_graphs,
|
|
36
|
+
MergeReport,
|
|
37
|
+
MergeConflict,
|
|
38
|
+
)
|
|
39
|
+
from jsonld_ex.temporal import (
|
|
40
|
+
add_temporal,
|
|
41
|
+
query_at_time,
|
|
42
|
+
temporal_diff,
|
|
43
|
+
TemporalDiffResult,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Optional modules — import only if dependencies are available
|
|
47
|
+
try:
|
|
48
|
+
from jsonld_ex.cbor_ld import to_cbor, from_cbor, payload_stats, PayloadStats
|
|
49
|
+
except ImportError:
|
|
50
|
+
pass
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
from jsonld_ex.mqtt import (
|
|
54
|
+
to_mqtt_payload, from_mqtt_payload,
|
|
55
|
+
derive_mqtt_topic, derive_mqtt_qos,
|
|
56
|
+
)
|
|
57
|
+
except ImportError:
|
|
58
|
+
pass
|
|
15
59
|
|
|
16
60
|
__all__ = [
|
|
17
61
|
"JsonLdEx",
|
|
62
|
+
# AI/ML annotations
|
|
18
63
|
"annotate",
|
|
19
64
|
"get_confidence",
|
|
20
65
|
"get_provenance",
|
|
21
66
|
"filter_by_confidence",
|
|
67
|
+
# Vector operations
|
|
22
68
|
"validate_vector",
|
|
23
69
|
"cosine_similarity",
|
|
24
70
|
"vector_term_definition",
|
|
71
|
+
# Security
|
|
25
72
|
"compute_integrity",
|
|
26
73
|
"verify_integrity",
|
|
27
74
|
"is_context_allowed",
|
|
75
|
+
# Validation
|
|
28
76
|
"validate_node",
|
|
29
77
|
"validate_document",
|
|
78
|
+
# OWL/RDF interoperability
|
|
79
|
+
"to_prov_o",
|
|
80
|
+
"from_prov_o",
|
|
81
|
+
"shape_to_shacl",
|
|
82
|
+
"shacl_to_shape",
|
|
83
|
+
"shape_to_owl_restrictions",
|
|
84
|
+
"to_rdf_star_ntriples",
|
|
85
|
+
"compare_with_prov_o",
|
|
86
|
+
"compare_with_shacl",
|
|
87
|
+
# Confidence propagation & inference
|
|
88
|
+
"propagate_confidence",
|
|
89
|
+
"combine_sources",
|
|
90
|
+
"resolve_conflict",
|
|
91
|
+
"propagate_graph_confidence",
|
|
92
|
+
"PropagationResult",
|
|
93
|
+
"ConflictReport",
|
|
94
|
+
# Graph merging
|
|
95
|
+
"merge_graphs",
|
|
96
|
+
"diff_graphs",
|
|
97
|
+
"MergeReport",
|
|
98
|
+
"MergeConflict",
|
|
99
|
+
# Temporal extensions
|
|
100
|
+
"add_temporal",
|
|
101
|
+
"query_at_time",
|
|
102
|
+
"temporal_diff",
|
|
103
|
+
"TemporalDiffResult",
|
|
104
|
+
# CBOR-LD serialization (requires cbor2)
|
|
105
|
+
"to_cbor",
|
|
106
|
+
"from_cbor",
|
|
107
|
+
"payload_stats",
|
|
108
|
+
"PayloadStats",
|
|
109
|
+
# MQTT transport (requires cbor2)
|
|
110
|
+
"to_mqtt_payload",
|
|
111
|
+
"from_mqtt_payload",
|
|
112
|
+
"derive_mqtt_topic",
|
|
113
|
+
"derive_mqtt_qos",
|
|
30
114
|
]
|
jsonld_ex/cbor_ld.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CBOR-LD Serialization for JSON-LD-Ex.
|
|
3
|
+
|
|
4
|
+
Binary-efficient serialization of JSON-LD documents using CBOR
|
|
5
|
+
(RFC 8949). Provides significant payload reduction over JSON,
|
|
6
|
+
especially for IoT and bandwidth-constrained environments.
|
|
7
|
+
|
|
8
|
+
Supports context compression via a registry that maps full context
|
|
9
|
+
URLs to short integer IDs, mirroring the CBOR-LD specification's
|
|
10
|
+
approach to reducing repetitive context references.
|
|
11
|
+
|
|
12
|
+
Requires the ``cbor2`` package::
|
|
13
|
+
|
|
14
|
+
pip install jsonld-ex[iot]
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import gzip
|
|
20
|
+
import json
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from typing import Any, Optional
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
import cbor2
|
|
26
|
+
|
|
27
|
+
_HAS_CBOR2 = True
|
|
28
|
+
except ImportError:
|
|
29
|
+
_HAS_CBOR2 = False
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _require_cbor2() -> None:
|
|
33
|
+
if not _HAS_CBOR2:
|
|
34
|
+
raise ImportError(
|
|
35
|
+
"cbor2 is required for CBOR-LD serialization. "
|
|
36
|
+
"Install it with: pip install jsonld-ex[iot]"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ── Default context registry ──────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
# Maps well-known context URLs to compact integer IDs.
|
|
43
|
+
# Users can extend this with custom registries.
|
|
44
|
+
DEFAULT_CONTEXT_REGISTRY: dict[str, int] = {
|
|
45
|
+
"http://schema.org/": 1,
|
|
46
|
+
"https://schema.org/": 1,
|
|
47
|
+
"https://www.w3.org/ns/activitystreams": 2,
|
|
48
|
+
"https://w3id.org/security/v2": 3,
|
|
49
|
+
"https://www.w3.org/2018/credentials/v1": 4,
|
|
50
|
+
"http://www.w3.org/ns/prov#": 5,
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
_REVERSE_DEFAULT: dict[int, str] = {v: k for k, v in DEFAULT_CONTEXT_REGISTRY.items()}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# ── Data Structures ────────────────────────────────────────────────
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class PayloadStats:
|
|
61
|
+
"""Comparison of serialization sizes for a document."""
|
|
62
|
+
|
|
63
|
+
json_bytes: int
|
|
64
|
+
cbor_bytes: int
|
|
65
|
+
gzip_json_bytes: int
|
|
66
|
+
gzip_cbor_bytes: int
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def cbor_ratio(self) -> float:
|
|
70
|
+
"""CBOR size as a fraction of JSON size (lower = better)."""
|
|
71
|
+
if self.json_bytes == 0:
|
|
72
|
+
return 0.0
|
|
73
|
+
return self.cbor_bytes / self.json_bytes
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def gzip_cbor_ratio(self) -> float:
|
|
77
|
+
"""Gzipped CBOR as a fraction of JSON (the headline number)."""
|
|
78
|
+
if self.json_bytes == 0:
|
|
79
|
+
return 0.0
|
|
80
|
+
return self.gzip_cbor_bytes / self.json_bytes
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
84
|
+
# SERIALIZATION
|
|
85
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def to_cbor(
|
|
89
|
+
doc: dict[str, Any],
|
|
90
|
+
context_registry: Optional[dict[str, int]] = None,
|
|
91
|
+
) -> bytes:
|
|
92
|
+
"""Serialize a JSON-LD document to CBOR with context compression.
|
|
93
|
+
|
|
94
|
+
Context URLs found in the registry are replaced with their compact
|
|
95
|
+
integer IDs. All jsonld-ex extension keywords are preserved.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
doc: JSON-LD document (Python dict).
|
|
99
|
+
context_registry: Mapping of context URL → integer ID.
|
|
100
|
+
Defaults to :data:`DEFAULT_CONTEXT_REGISTRY`.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
CBOR-encoded bytes.
|
|
104
|
+
|
|
105
|
+
Raises:
|
|
106
|
+
ImportError: If ``cbor2`` is not installed.
|
|
107
|
+
"""
|
|
108
|
+
_require_cbor2()
|
|
109
|
+
registry = context_registry or DEFAULT_CONTEXT_REGISTRY
|
|
110
|
+
compressed = _compress_contexts(doc, registry)
|
|
111
|
+
return cbor2.dumps(compressed)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def from_cbor(
|
|
115
|
+
data: bytes,
|
|
116
|
+
context_registry: Optional[dict[str, int]] = None,
|
|
117
|
+
) -> dict[str, Any]:
|
|
118
|
+
"""Deserialize CBOR bytes back to a JSON-LD document.
|
|
119
|
+
|
|
120
|
+
Restores compressed context integer IDs to their full URLs using
|
|
121
|
+
the provided (or default) registry.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
data: CBOR-encoded bytes.
|
|
125
|
+
context_registry: Same registry used during serialization.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Restored JSON-LD document.
|
|
129
|
+
|
|
130
|
+
Raises:
|
|
131
|
+
ImportError: If ``cbor2`` is not installed.
|
|
132
|
+
"""
|
|
133
|
+
_require_cbor2()
|
|
134
|
+
registry = context_registry or DEFAULT_CONTEXT_REGISTRY
|
|
135
|
+
reverse = {v: k for k, v in registry.items()}
|
|
136
|
+
decoded = cbor2.loads(data)
|
|
137
|
+
return _decompress_contexts(decoded, reverse)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
141
|
+
# PAYLOAD STATISTICS
|
|
142
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def payload_stats(
|
|
146
|
+
doc: dict[str, Any],
|
|
147
|
+
context_registry: Optional[dict[str, int]] = None,
|
|
148
|
+
) -> PayloadStats:
|
|
149
|
+
"""Compare serialization sizes for a document.
|
|
150
|
+
|
|
151
|
+
Computes JSON, CBOR, gzipped JSON, and gzipped CBOR sizes.
|
|
152
|
+
Useful for benchmarking payload reduction.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
doc: JSON-LD document.
|
|
156
|
+
context_registry: Optional context registry for CBOR compression.
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
PayloadStats with all four sizes and derived ratios.
|
|
160
|
+
"""
|
|
161
|
+
_require_cbor2()
|
|
162
|
+
json_bytes = json.dumps(doc, separators=(",", ":")).encode("utf-8")
|
|
163
|
+
cbor_bytes = to_cbor(doc, context_registry)
|
|
164
|
+
gzip_json = gzip.compress(json_bytes)
|
|
165
|
+
gzip_cbor = gzip.compress(cbor_bytes)
|
|
166
|
+
|
|
167
|
+
return PayloadStats(
|
|
168
|
+
json_bytes=len(json_bytes),
|
|
169
|
+
cbor_bytes=len(cbor_bytes),
|
|
170
|
+
gzip_json_bytes=len(gzip_json),
|
|
171
|
+
gzip_cbor_bytes=len(gzip_cbor),
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
176
|
+
# INTERNAL HELPERS
|
|
177
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _compress_contexts(obj: Any, registry: dict[str, int]) -> Any:
|
|
181
|
+
"""Recursively replace context URLs with registry IDs."""
|
|
182
|
+
if isinstance(obj, dict):
|
|
183
|
+
result = {}
|
|
184
|
+
for k, v in obj.items():
|
|
185
|
+
if k == "@context":
|
|
186
|
+
result[k] = _compress_context_value(v, registry)
|
|
187
|
+
else:
|
|
188
|
+
result[k] = _compress_contexts(v, registry)
|
|
189
|
+
return result
|
|
190
|
+
if isinstance(obj, list):
|
|
191
|
+
return [_compress_contexts(item, registry) for item in obj]
|
|
192
|
+
return obj
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _compress_context_value(ctx: Any, registry: dict[str, int]) -> Any:
|
|
196
|
+
"""Compress a single @context value."""
|
|
197
|
+
if isinstance(ctx, str):
|
|
198
|
+
return registry.get(ctx, ctx)
|
|
199
|
+
if isinstance(ctx, list):
|
|
200
|
+
return [_compress_context_value(item, registry) for item in ctx]
|
|
201
|
+
if isinstance(ctx, dict):
|
|
202
|
+
# Inline context definition — don't compress, but recurse
|
|
203
|
+
return {k: _compress_context_value(v, registry) if k == "@import" else v
|
|
204
|
+
for k, v in ctx.items()}
|
|
205
|
+
return ctx
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _decompress_contexts(obj: Any, reverse: dict[int, str]) -> Any:
|
|
209
|
+
"""Recursively restore context URLs from registry IDs."""
|
|
210
|
+
if isinstance(obj, dict):
|
|
211
|
+
result = {}
|
|
212
|
+
for k, v in obj.items():
|
|
213
|
+
if k == "@context":
|
|
214
|
+
result[k] = _decompress_context_value(v, reverse)
|
|
215
|
+
else:
|
|
216
|
+
result[k] = _decompress_contexts(v, reverse)
|
|
217
|
+
return result
|
|
218
|
+
if isinstance(obj, list):
|
|
219
|
+
return [_decompress_contexts(item, reverse) for item in obj]
|
|
220
|
+
return obj
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _decompress_context_value(ctx: Any, reverse: dict[int, str]) -> Any:
|
|
224
|
+
"""Restore a single @context value."""
|
|
225
|
+
if isinstance(ctx, int):
|
|
226
|
+
return reverse.get(ctx, ctx)
|
|
227
|
+
if isinstance(ctx, str):
|
|
228
|
+
return ctx
|
|
229
|
+
if isinstance(ctx, list):
|
|
230
|
+
return [_decompress_context_value(item, reverse) for item in ctx]
|
|
231
|
+
if isinstance(ctx, dict):
|
|
232
|
+
return ctx
|
|
233
|
+
return ctx
|