jsonld-ex 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jsonld_ex/__init__.py CHANGED
@@ -5,26 +5,110 @@ Reference implementation of proposed JSON-LD 1.2 extensions.
5
5
  Wraps PyLD for core JSON-LD processing and adds extension layers.
6
6
  """
7
7
 
8
- __version__ = "0.1.3"
8
+ __version__ = "0.2.0"
9
9
 
10
10
  from jsonld_ex.processor import JsonLdEx
11
11
  from jsonld_ex.ai_ml import annotate, get_confidence, get_provenance, filter_by_confidence
12
12
  from jsonld_ex.vector import validate_vector, cosine_similarity, vector_term_definition
13
13
  from jsonld_ex.security import compute_integrity, verify_integrity, is_context_allowed
14
14
  from jsonld_ex.validation import validate_node, validate_document
15
+ from jsonld_ex.owl_interop import (
16
+ to_prov_o,
17
+ from_prov_o,
18
+ shape_to_shacl,
19
+ shacl_to_shape,
20
+ shape_to_owl_restrictions,
21
+ to_rdf_star_ntriples,
22
+ compare_with_prov_o,
23
+ compare_with_shacl,
24
+ )
25
+ from jsonld_ex.inference import (
26
+ propagate_confidence,
27
+ combine_sources,
28
+ resolve_conflict,
29
+ propagate_graph_confidence,
30
+ PropagationResult,
31
+ ConflictReport,
32
+ )
33
+ from jsonld_ex.merge import (
34
+ merge_graphs,
35
+ diff_graphs,
36
+ MergeReport,
37
+ MergeConflict,
38
+ )
39
+ from jsonld_ex.temporal import (
40
+ add_temporal,
41
+ query_at_time,
42
+ temporal_diff,
43
+ TemporalDiffResult,
44
+ )
45
+
46
+ # Optional modules — import only if dependencies are available
47
+ try:
48
+ from jsonld_ex.cbor_ld import to_cbor, from_cbor, payload_stats, PayloadStats
49
+ except ImportError:
50
+ pass
51
+
52
+ try:
53
+ from jsonld_ex.mqtt import (
54
+ to_mqtt_payload, from_mqtt_payload,
55
+ derive_mqtt_topic, derive_mqtt_qos,
56
+ )
57
+ except ImportError:
58
+ pass
15
59
 
16
60
  __all__ = [
17
61
  "JsonLdEx",
62
+ # AI/ML annotations
18
63
  "annotate",
19
64
  "get_confidence",
20
65
  "get_provenance",
21
66
  "filter_by_confidence",
67
+ # Vector operations
22
68
  "validate_vector",
23
69
  "cosine_similarity",
24
70
  "vector_term_definition",
71
+ # Security
25
72
  "compute_integrity",
26
73
  "verify_integrity",
27
74
  "is_context_allowed",
75
+ # Validation
28
76
  "validate_node",
29
77
  "validate_document",
78
+ # OWL/RDF interoperability
79
+ "to_prov_o",
80
+ "from_prov_o",
81
+ "shape_to_shacl",
82
+ "shacl_to_shape",
83
+ "shape_to_owl_restrictions",
84
+ "to_rdf_star_ntriples",
85
+ "compare_with_prov_o",
86
+ "compare_with_shacl",
87
+ # Confidence propagation & inference
88
+ "propagate_confidence",
89
+ "combine_sources",
90
+ "resolve_conflict",
91
+ "propagate_graph_confidence",
92
+ "PropagationResult",
93
+ "ConflictReport",
94
+ # Graph merging
95
+ "merge_graphs",
96
+ "diff_graphs",
97
+ "MergeReport",
98
+ "MergeConflict",
99
+ # Temporal extensions
100
+ "add_temporal",
101
+ "query_at_time",
102
+ "temporal_diff",
103
+ "TemporalDiffResult",
104
+ # CBOR-LD serialization (requires cbor2)
105
+ "to_cbor",
106
+ "from_cbor",
107
+ "payload_stats",
108
+ "PayloadStats",
109
+ # MQTT transport (requires cbor2)
110
+ "to_mqtt_payload",
111
+ "from_mqtt_payload",
112
+ "derive_mqtt_topic",
113
+ "derive_mqtt_qos",
30
114
  ]
jsonld_ex/cbor_ld.py ADDED
@@ -0,0 +1,233 @@
1
+ """
2
+ CBOR-LD Serialization for JSON-LD-Ex.
3
+
4
+ Binary-efficient serialization of JSON-LD documents using CBOR
5
+ (RFC 8949). Provides significant payload reduction over JSON,
6
+ especially for IoT and bandwidth-constrained environments.
7
+
8
+ Supports context compression via a registry that maps full context
9
+ URLs to short integer IDs, mirroring the CBOR-LD specification's
10
+ approach to reducing repetitive context references.
11
+
12
+ Requires the ``cbor2`` package::
13
+
14
+ pip install jsonld-ex[iot]
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import gzip
20
+ import json
21
+ from dataclasses import dataclass
22
+ from typing import Any, Optional
23
+
24
+ try:
25
+ import cbor2
26
+
27
+ _HAS_CBOR2 = True
28
+ except ImportError:
29
+ _HAS_CBOR2 = False
30
+
31
+
32
+ def _require_cbor2() -> None:
33
+ if not _HAS_CBOR2:
34
+ raise ImportError(
35
+ "cbor2 is required for CBOR-LD serialization. "
36
+ "Install it with: pip install jsonld-ex[iot]"
37
+ )
38
+
39
+
40
+ # ── Default context registry ──────────────────────────────────────
41
+
42
+ # Maps well-known context URLs to compact integer IDs.
43
+ # Users can extend this with custom registries.
44
+ DEFAULT_CONTEXT_REGISTRY: dict[str, int] = {
45
+ "http://schema.org/": 1,
46
+ "https://schema.org/": 1,
47
+ "https://www.w3.org/ns/activitystreams": 2,
48
+ "https://w3id.org/security/v2": 3,
49
+ "https://www.w3.org/2018/credentials/v1": 4,
50
+ "http://www.w3.org/ns/prov#": 5,
51
+ }
52
+
53
+ _REVERSE_DEFAULT: dict[int, str] = {v: k for k, v in DEFAULT_CONTEXT_REGISTRY.items()}
54
+
55
+
56
+ # ── Data Structures ────────────────────────────────────────────────
57
+
58
+
59
+ @dataclass
60
+ class PayloadStats:
61
+ """Comparison of serialization sizes for a document."""
62
+
63
+ json_bytes: int
64
+ cbor_bytes: int
65
+ gzip_json_bytes: int
66
+ gzip_cbor_bytes: int
67
+
68
+ @property
69
+ def cbor_ratio(self) -> float:
70
+ """CBOR size as a fraction of JSON size (lower = better)."""
71
+ if self.json_bytes == 0:
72
+ return 0.0
73
+ return self.cbor_bytes / self.json_bytes
74
+
75
+ @property
76
+ def gzip_cbor_ratio(self) -> float:
77
+ """Gzipped CBOR as a fraction of JSON (the headline number)."""
78
+ if self.json_bytes == 0:
79
+ return 0.0
80
+ return self.gzip_cbor_bytes / self.json_bytes
81
+
82
+
83
+ # ═══════════════════════════════════════════════════════════════════
84
+ # SERIALIZATION
85
+ # ═══════════════════════════════════════════════════════════════════
86
+
87
+
88
+ def to_cbor(
89
+ doc: dict[str, Any],
90
+ context_registry: Optional[dict[str, int]] = None,
91
+ ) -> bytes:
92
+ """Serialize a JSON-LD document to CBOR with context compression.
93
+
94
+ Context URLs found in the registry are replaced with their compact
95
+ integer IDs. All jsonld-ex extension keywords are preserved.
96
+
97
+ Args:
98
+ doc: JSON-LD document (Python dict).
99
+ context_registry: Mapping of context URL → integer ID.
100
+ Defaults to :data:`DEFAULT_CONTEXT_REGISTRY`.
101
+
102
+ Returns:
103
+ CBOR-encoded bytes.
104
+
105
+ Raises:
106
+ ImportError: If ``cbor2`` is not installed.
107
+ """
108
+ _require_cbor2()
109
+ registry = context_registry or DEFAULT_CONTEXT_REGISTRY
110
+ compressed = _compress_contexts(doc, registry)
111
+ return cbor2.dumps(compressed)
112
+
113
+
114
+ def from_cbor(
115
+ data: bytes,
116
+ context_registry: Optional[dict[str, int]] = None,
117
+ ) -> dict[str, Any]:
118
+ """Deserialize CBOR bytes back to a JSON-LD document.
119
+
120
+ Restores compressed context integer IDs to their full URLs using
121
+ the provided (or default) registry.
122
+
123
+ Args:
124
+ data: CBOR-encoded bytes.
125
+ context_registry: Same registry used during serialization.
126
+
127
+ Returns:
128
+ Restored JSON-LD document.
129
+
130
+ Raises:
131
+ ImportError: If ``cbor2`` is not installed.
132
+ """
133
+ _require_cbor2()
134
+ registry = context_registry or DEFAULT_CONTEXT_REGISTRY
135
+ reverse = {v: k for k, v in registry.items()}
136
+ decoded = cbor2.loads(data)
137
+ return _decompress_contexts(decoded, reverse)
138
+
139
+
140
+ # ═══════════════════════════════════════════════════════════════════
141
+ # PAYLOAD STATISTICS
142
+ # ═══════════════════════════════════════════════════════════════════
143
+
144
+
145
+ def payload_stats(
146
+ doc: dict[str, Any],
147
+ context_registry: Optional[dict[str, int]] = None,
148
+ ) -> PayloadStats:
149
+ """Compare serialization sizes for a document.
150
+
151
+ Computes JSON, CBOR, gzipped JSON, and gzipped CBOR sizes.
152
+ Useful for benchmarking payload reduction.
153
+
154
+ Args:
155
+ doc: JSON-LD document.
156
+ context_registry: Optional context registry for CBOR compression.
157
+
158
+ Returns:
159
+ PayloadStats with all four sizes and derived ratios.
160
+ """
161
+ _require_cbor2()
162
+ json_bytes = json.dumps(doc, separators=(",", ":")).encode("utf-8")
163
+ cbor_bytes = to_cbor(doc, context_registry)
164
+ gzip_json = gzip.compress(json_bytes)
165
+ gzip_cbor = gzip.compress(cbor_bytes)
166
+
167
+ return PayloadStats(
168
+ json_bytes=len(json_bytes),
169
+ cbor_bytes=len(cbor_bytes),
170
+ gzip_json_bytes=len(gzip_json),
171
+ gzip_cbor_bytes=len(gzip_cbor),
172
+ )
173
+
174
+
175
+ # ═══════════════════════════════════════════════════════════════════
176
+ # INTERNAL HELPERS
177
+ # ═══════════════════════════════════════════════════════════════════
178
+
179
+
180
+ def _compress_contexts(obj: Any, registry: dict[str, int]) -> Any:
181
+ """Recursively replace context URLs with registry IDs."""
182
+ if isinstance(obj, dict):
183
+ result = {}
184
+ for k, v in obj.items():
185
+ if k == "@context":
186
+ result[k] = _compress_context_value(v, registry)
187
+ else:
188
+ result[k] = _compress_contexts(v, registry)
189
+ return result
190
+ if isinstance(obj, list):
191
+ return [_compress_contexts(item, registry) for item in obj]
192
+ return obj
193
+
194
+
195
+ def _compress_context_value(ctx: Any, registry: dict[str, int]) -> Any:
196
+ """Compress a single @context value."""
197
+ if isinstance(ctx, str):
198
+ return registry.get(ctx, ctx)
199
+ if isinstance(ctx, list):
200
+ return [_compress_context_value(item, registry) for item in ctx]
201
+ if isinstance(ctx, dict):
202
+ # Inline context definition — don't compress, but recurse
203
+ return {k: _compress_context_value(v, registry) if k == "@import" else v
204
+ for k, v in ctx.items()}
205
+ return ctx
206
+
207
+
208
+ def _decompress_contexts(obj: Any, reverse: dict[int, str]) -> Any:
209
+ """Recursively restore context URLs from registry IDs."""
210
+ if isinstance(obj, dict):
211
+ result = {}
212
+ for k, v in obj.items():
213
+ if k == "@context":
214
+ result[k] = _decompress_context_value(v, reverse)
215
+ else:
216
+ result[k] = _decompress_contexts(v, reverse)
217
+ return result
218
+ if isinstance(obj, list):
219
+ return [_decompress_contexts(item, reverse) for item in obj]
220
+ return obj
221
+
222
+
223
+ def _decompress_context_value(ctx: Any, reverse: dict[int, str]) -> Any:
224
+ """Restore a single @context value."""
225
+ if isinstance(ctx, int):
226
+ return reverse.get(ctx, ctx)
227
+ if isinstance(ctx, str):
228
+ return ctx
229
+ if isinstance(ctx, list):
230
+ return [_decompress_context_value(item, reverse) for item in ctx]
231
+ if isinstance(ctx, dict):
232
+ return ctx
233
+ return ctx