jsonld-ex 0.1.0__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jsonld_ex/__init__.py CHANGED
@@ -5,7 +5,7 @@ Reference implementation of proposed JSON-LD 1.2 extensions.
5
5
  Wraps PyLD for core JSON-LD processing and adds extension layers.
6
6
  """
7
7
 
8
- __version__ = "0.1.0"
8
+ __version__ = "0.1.3"
9
9
 
10
10
  from jsonld_ex.processor import JsonLdEx
11
11
  from jsonld_ex.ai_ml import annotate, get_confidence, get_provenance, filter_by_confidence
jsonld_ex/ai_ml.py CHANGED
@@ -124,7 +124,14 @@ def aggregate_confidence(
124
124
  elif strategy == "weighted":
125
125
  if weights is None or len(weights) != len(scores):
126
126
  raise ValueError("Weights must match scores length")
127
+ for w in weights:
128
+ if not isinstance(w, (int, float)) or isinstance(w, bool):
129
+ raise TypeError(f"Weight must be a number, got: {type(w).__name__}")
130
+ if w < 0:
131
+ raise ValueError(f"Weight must be non-negative, got: {w}")
127
132
  total_weight = sum(weights)
133
+ if total_weight == 0:
134
+ raise ValueError("Total weight must be greater than zero")
128
135
  return sum(s * w for s, w in zip(scores, weights)) / total_weight
129
136
  else: # mean
130
137
  return sum(scores) / len(scores)
@@ -133,7 +140,11 @@ def aggregate_confidence(
133
140
  # ── Internal ───────────────────────────────────────────────────────
134
141
 
135
142
  def _validate_confidence(score: float) -> None:
136
- if not isinstance(score, (int, float)) or score < 0 or score > 1:
143
+ if not isinstance(score, (int, float)) or isinstance(score, bool):
144
+ raise TypeError(f"@confidence must be a number, got: {type(score).__name__}")
145
+ if math.isnan(score) or math.isinf(score):
146
+ raise ValueError(f"@confidence must be finite, got: {score}")
147
+ if score < 0 or score > 1:
137
148
  raise ValueError(f"@confidence must be between 0.0 and 1.0, got: {score}")
138
149
 
139
150
 
jsonld_ex/security.py CHANGED
@@ -22,9 +22,14 @@ def compute_integrity(
22
22
  context: str | dict | Any, algorithm: str = "sha256"
23
23
  ) -> str:
24
24
  """Compute an integrity hash for a context."""
25
+ if context is None:
26
+ raise TypeError("Context must not be None")
25
27
  if algorithm not in SUPPORTED_ALGORITHMS:
26
28
  raise ValueError(f"Unsupported algorithm: {algorithm}")
27
- content = context if isinstance(context, str) else json.dumps(context)
29
+ try:
30
+ content = context if isinstance(context, str) else json.dumps(context, sort_keys=True)
31
+ except (TypeError, ValueError) as exc:
32
+ raise TypeError(f"Context is not JSON-serializable: {exc}") from exc
28
33
  h = hashlib.new(algorithm, content.encode("utf-8")).digest()
29
34
  b64 = base64.b64encode(h).decode("ascii")
30
35
  return f"{algorithm}-{b64}"
@@ -63,18 +68,39 @@ def is_context_allowed(url: str, config: dict[str, Any]) -> bool:
63
68
  return True
64
69
 
65
70
 
71
+ _MAX_RECURSION_DEPTH = 500 # Safety cap for _measure_depth
72
+
73
+
66
74
  def enforce_resource_limits(
67
75
  document: str | dict | Any,
68
76
  limits: Optional[dict[str, int]] = None,
69
77
  ) -> None:
70
78
  """Validate document against resource limits before processing."""
79
+ if document is None:
80
+ raise TypeError("Document must not be None")
71
81
  resolved = {**DEFAULT_RESOURCE_LIMITS, **(limits or {})}
72
- content = document if isinstance(document, str) else json.dumps(document)
73
- if len(content) > resolved["max_document_size"]:
74
- raise ValueError(
75
- f"Document size {len(content)} exceeds limit {resolved['max_document_size']}"
76
- )
77
- parsed = json.loads(content) if isinstance(document, str) else document
82
+ if isinstance(document, str):
83
+ content = document
84
+ if len(content) > resolved["max_document_size"]:
85
+ raise ValueError(
86
+ f"Document size {len(content)} exceeds limit {resolved['max_document_size']}"
87
+ )
88
+ try:
89
+ parsed = json.loads(content)
90
+ except json.JSONDecodeError as exc:
91
+ raise ValueError(f"Document is not valid JSON: {exc}") from exc
92
+ elif isinstance(document, (dict, list)):
93
+ try:
94
+ content = json.dumps(document)
95
+ except (TypeError, ValueError) as exc:
96
+ raise TypeError(f"Document is not JSON-serializable: {exc}") from exc
97
+ if len(content) > resolved["max_document_size"]:
98
+ raise ValueError(
99
+ f"Document size {len(content)} exceeds limit {resolved['max_document_size']}"
100
+ )
101
+ parsed = document
102
+ else:
103
+ raise TypeError(f"Document must be a str, dict, or list, got: {type(document).__name__}")
78
104
  depth = _measure_depth(parsed)
79
105
  if depth > resolved["max_graph_depth"]:
80
106
  raise ValueError(
@@ -83,6 +109,8 @@ def enforce_resource_limits(
83
109
 
84
110
 
85
111
  def _measure_depth(obj: Any, current: int = 0) -> int:
112
+ if current > _MAX_RECURSION_DEPTH:
113
+ return current # Safety cap to prevent stack overflow
86
114
  if obj is None or not isinstance(obj, (dict, list)):
87
115
  return current
88
116
  max_depth = current
jsonld_ex/validation.py CHANGED
@@ -73,15 +73,15 @@ def validate_node(node: dict[str, Any], shape: dict[str, Any]) -> ValidationResu
73
73
  if type_err:
74
74
  errors.append(ValidationError(prop, "type", type_err, raw))
75
75
 
76
- # Numeric
77
- if "@minimum" in constraint and isinstance(raw, (int, float)):
76
+ # Numeric (exclude booleans — they are int subclass in Python)
77
+ if "@minimum" in constraint and isinstance(raw, (int, float)) and not isinstance(raw, bool):
78
78
  if raw < constraint["@minimum"]:
79
79
  errors.append(ValidationError(
80
80
  prop, "minimum",
81
81
  f"Value {raw} below minimum {constraint['@minimum']}", raw,
82
82
  ))
83
83
 
84
- if "@maximum" in constraint and isinstance(raw, (int, float)):
84
+ if "@maximum" in constraint and isinstance(raw, (int, float)) and not isinstance(raw, bool):
85
85
  if raw > constraint["@maximum"]:
86
86
  errors.append(ValidationError(
87
87
  prop, "maximum",
@@ -105,10 +105,16 @@ def validate_node(node: dict[str, Any], shape: dict[str, Any]) -> ValidationResu
105
105
 
106
106
  # Pattern
107
107
  if "@pattern" in constraint and isinstance(raw, str):
108
- if not re.search(constraint["@pattern"], raw):
108
+ try:
109
+ if not re.search(constraint["@pattern"], raw):
110
+ errors.append(ValidationError(
111
+ prop, "pattern",
112
+ f'"{raw}" does not match pattern "{constraint["@pattern"]}"', raw,
113
+ ))
114
+ except re.error as exc:
109
115
  errors.append(ValidationError(
110
116
  prop, "pattern",
111
- f'"{raw}" does not match pattern "{constraint["@pattern"]}"', raw,
117
+ f'Invalid regex pattern "{constraint["@pattern"]}": {exc}', raw,
112
118
  ))
113
119
 
114
120
  return ValidationResult(len(errors) == 0, errors, warnings)
@@ -148,8 +154,12 @@ def _extract_raw(value: Any) -> Any:
148
154
  return None
149
155
  if isinstance(value, dict) and "@value" in value:
150
156
  return value["@value"]
157
+ if isinstance(value, dict) and not any(k.startswith("@") for k in value):
158
+ return None # Plain dict without JSON-LD keywords — treat as absent
151
159
  if isinstance(value, list) and len(value) > 0:
152
160
  return _extract_raw(value[0])
161
+ if isinstance(value, list) and len(value) == 0:
162
+ return None
153
163
  return value
154
164
 
155
165
 
@@ -174,9 +184,9 @@ def _validate_type(value: Any, expected: str) -> Optional[str]:
174
184
  checks = {
175
185
  f"{XSD}string": lambda v: isinstance(v, str),
176
186
  f"{XSD}integer": lambda v: isinstance(v, int) and not isinstance(v, bool),
177
- f"{XSD}double": lambda v: isinstance(v, (int, float)),
178
- f"{XSD}float": lambda v: isinstance(v, (int, float)),
179
- f"{XSD}decimal": lambda v: isinstance(v, (int, float)),
187
+ f"{XSD}double": lambda v: isinstance(v, (int, float)) and not isinstance(v, bool),
188
+ f"{XSD}float": lambda v: isinstance(v, (int, float)) and not isinstance(v, bool),
189
+ f"{XSD}decimal": lambda v: isinstance(v, (int, float)) and not isinstance(v, bool),
180
190
  f"{XSD}boolean": lambda v: isinstance(v, bool),
181
191
  }
182
192
  checker = checks.get(xsd_type)
jsonld_ex/vector.py CHANGED
@@ -11,7 +11,7 @@ def vector_term_definition(
11
11
  """Create a context term definition for a vector embedding property."""
12
12
  defn: dict[str, Any] = {"@id": iri, "@container": "@vector"}
13
13
  if dimensions is not None:
14
- if not isinstance(dimensions, int) or dimensions < 1:
14
+ if not isinstance(dimensions, int) or isinstance(dimensions, bool) or dimensions < 1:
15
15
  raise ValueError(f"@dimensions must be a positive integer, got: {dimensions}")
16
16
  defn["@dimensions"] = dimensions
17
17
  return {term_name: defn}
@@ -29,7 +29,7 @@ def validate_vector(
29
29
  errors.append("Vector must not be empty")
30
30
  return False, errors
31
31
  for i, v in enumerate(vector):
32
- if not isinstance(v, (int, float)) or math.isnan(v) or math.isinf(v):
32
+ if isinstance(v, bool) or not isinstance(v, (int, float)) or math.isnan(v) or math.isinf(v):
33
33
  errors.append(f"Vector element [{i}] must be a finite number, got: {v}")
34
34
  if expected_dimensions is not None and len(vector) != expected_dimensions:
35
35
  errors.append(
@@ -39,9 +39,20 @@ def validate_vector(
39
39
 
40
40
 
41
41
  def cosine_similarity(a: list[float], b: list[float]) -> float:
42
- """Compute cosine similarity between two vectors."""
42
+ """Compute cosine similarity between two vectors.
43
+
44
+ Returns 0.0 when either vector is a zero vector (norm == 0).
45
+ """
43
46
  if len(a) != len(b):
44
47
  raise ValueError(f"Vector dimension mismatch: {len(a)} vs {len(b)}")
48
+ if len(a) == 0:
49
+ raise ValueError("Vectors must not be empty")
50
+ for i, (x, y) in enumerate(zip(a, b)):
51
+ for label, v in (("a", x), ("b", y)):
52
+ if isinstance(v, bool) or not isinstance(v, (int, float)):
53
+ raise TypeError(f"Vector {label}[{i}] must be a number, got: {type(v).__name__}")
54
+ if math.isnan(v) or math.isinf(v):
55
+ raise ValueError(f"Vector {label}[{i}] must be finite, got: {v}")
45
56
  dot = sum(x * y for x, y in zip(a, b))
46
57
  norm_a = math.sqrt(sum(x * x for x in a))
47
58
  norm_b = math.sqrt(sum(x * x for x in b))
@@ -0,0 +1,80 @@
1
+ Metadata-Version: 2.4
2
+ Name: jsonld-ex
3
+ Version: 0.1.3
4
+ Summary: JSON-LD 1.2 extensions for AI/ML data exchange, security hardening, and validation
5
+ Author-email: Muntaser Syed <jemsbhai@gmail.com>
6
+ License: MIT
7
+ Keywords: json-ld,linked-data,semantic-web,ai,ml,confidence,provenance,embeddings,security,validation
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Classifier: Topic :: Software Development :: Libraries
19
+ Requires-Python: >=3.9
20
+ Description-Content-Type: text/markdown
21
+ Requires-Dist: PyLD>=2.0.4
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7.0; extra == "dev"
24
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
25
+ Requires-Dist: mypy>=1.8; extra == "dev"
26
+ Requires-Dist: ruff>=0.2; extra == "dev"
27
+
28
+ # jsonld-ex
29
+
30
+ **JSON-LD 1.2 Extensions for AI/ML Data Exchange, Security, and Validation**
31
+
32
+ Reference implementation of proposed JSON-LD 1.2 extensions. Wraps [PyLD](https://github.com/digitalbazaar/pyld) for core processing and adds extension layers.
33
+
34
+ ## Install
35
+
36
+ ```bash
37
+ pip install jsonld-ex
38
+ ```
39
+
40
+ ## Quick Start
41
+
42
+ ```python
43
+ from jsonld_ex import JsonLdEx, annotate
44
+
45
+ # Annotate a value with AI/ML provenance
46
+ name = annotate(
47
+ "John Smith",
48
+ confidence=0.95,
49
+ source="https://ml-model.example.org/ner-v2",
50
+ method="NER",
51
+ )
52
+ # {'@value': 'John Smith', '@confidence': 0.95, '@source': '...', '@method': 'NER'}
53
+
54
+ # Validate against a shape
55
+ from jsonld_ex import validate_node
56
+
57
+ shape = {
58
+ "@type": "Person",
59
+ "name": {"@required": True, "@type": "xsd:string"},
60
+ "age": {"@type": "xsd:integer", "@minimum": 0, "@maximum": 150},
61
+ }
62
+
63
+ result = validate_node({"@type": "Person", "name": "John", "age": 30}, shape)
64
+ assert result.valid
65
+ ```
66
+
67
+ ## Features
68
+
69
+ - **AI/ML Extensions**: `@confidence`, `@source`, `@extractedAt`, `@method`, `@humanVerified`
70
+ - **Vector Embeddings**: `@vector` container type with dimension validation
71
+ - **Security**: `@integrity` context verification, allowlists, resource limits
72
+ - **Validation**: `@shape` native validation framework
73
+
74
+ ## Documentation
75
+
76
+ Full documentation and specifications: [github.com/jemsbhai/jsonld-ex](https://github.com/jemsbhai/jsonld-ex)
77
+
78
+ ## License
79
+
80
+ MIT
@@ -0,0 +1,10 @@
1
+ jsonld_ex/__init__.py,sha256=vO9XRRvsON6-wD2QqC8JIrEYqamJgdUqJebJxuuIQek,921
2
+ jsonld_ex/ai_ml.py,sha256=P5UWGuFaVXcsxIQNmLZyjRlo6Q6xVKYvHGVBaPjRD14,5588
3
+ jsonld_ex/processor.py,sha256=tRZX2aiHwZJa62ex4KT0HCbKYtzAkGV87bDBtXAhbvE,3880
4
+ jsonld_ex/security.py,sha256=MDKW3cZREaNhYKelWI5SnnkwNGSSC0UIsc8zBMjBDpk,4374
5
+ jsonld_ex/validation.py,sha256=kLCzaPNTlFRboFKBtkpLvaF5RYPSiUoTxnHtthD2xH4,6955
6
+ jsonld_ex/vector.py,sha256=uJ1a7_Ds8HNmKO0ySKyh0lpNDGCafsUP1HsmSN9vJsw,3472
7
+ jsonld_ex-0.1.3.dist-info/METADATA,sha256=4Bjwcm-gm5dkKlQiXD2y330pX6mUWqDh2PrE4seGsUQ,2573
8
+ jsonld_ex-0.1.3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
9
+ jsonld_ex-0.1.3.dist-info/top_level.txt,sha256=YMe-47TNES9MWZsVWecMwZUDjGxCeU51ay5fuqidBfk,10
10
+ jsonld_ex-0.1.3.dist-info/RECORD,,
@@ -1,26 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: jsonld-ex
3
- Version: 0.1.0
4
- Summary: JSON-LD 1.2 extensions for AI/ML data exchange, security hardening, and validation
5
- Author-email: Muntaser Aljabry <muntaser@example.com>
6
- License: MIT
7
- Keywords: json-ld,linked-data,semantic-web,ai,ml,confidence,provenance,embeddings,security,validation
8
- Classifier: Development Status :: 3 - Alpha
9
- Classifier: Intended Audience :: Developers
10
- Classifier: Intended Audience :: Science/Research
11
- Classifier: License :: OSI Approved :: MIT License
12
- Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.9
14
- Classifier: Programming Language :: Python :: 3.10
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
- Classifier: Topic :: Software Development :: Libraries
19
- Requires-Python: >=3.9
20
- Description-Content-Type: text/markdown
21
- Requires-Dist: PyLD>=2.0.4
22
- Provides-Extra: dev
23
- Requires-Dist: pytest>=7.0; extra == "dev"
24
- Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
25
- Requires-Dist: mypy>=1.8; extra == "dev"
26
- Requires-Dist: ruff>=0.2; extra == "dev"
@@ -1,10 +0,0 @@
1
- jsonld_ex/__init__.py,sha256=_4A1sW_AzHYtK8d0evFiOtlPyTI1IIsteiRmOf6k9SQ,921
2
- jsonld_ex/ai_ml.py,sha256=0SvM2RADWLAandxfYTFMMmQUMuTRzqkezpwplpGRP3A,4973
3
- jsonld_ex/processor.py,sha256=tRZX2aiHwZJa62ex4KT0HCbKYtzAkGV87bDBtXAhbvE,3880
4
- jsonld_ex/security.py,sha256=PCHZ7NTCy_WeuI_BNTKIMIH00OzLtozu47y5x1E8Fhg,3251
5
- jsonld_ex/validation.py,sha256=FjnRotqb9irWNARhh_eY6xBxFLCZDHJAczAHc0EUE2Q,6266
6
- jsonld_ex/vector.py,sha256=SHTHAcU2xa-Gh_Q4YQiOy1QX1_5O_U-vkODpwzj0Irc,2885
7
- jsonld_ex-0.1.0.dist-info/METADATA,sha256=0_Xm330HBmDg82a6rbk0f3B3msBeYHEo-snle1OjnX4,1167
8
- jsonld_ex-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
9
- jsonld_ex-0.1.0.dist-info/top_level.txt,sha256=YMe-47TNES9MWZsVWecMwZUDjGxCeU51ay5fuqidBfk,10
10
- jsonld_ex-0.1.0.dist-info/RECORD,,