jsonstat-validator 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jsonstat_validator/__init__.py +34 -0
- jsonstat_validator/tests/__init__.py +1 -0
- jsonstat_validator/tests/conftest.py +10 -0
- jsonstat_validator/tests/custom_tests.py +411 -0
- jsonstat_validator/tests/samples/areas/canada.json +16 -0
- jsonstat_validator/tests/samples/areas/galicia.json +16 -0
- jsonstat_validator/tests/samples/areas/index.json +31 -0
- jsonstat_validator/tests/samples/areas/oecd.json +16 -0
- jsonstat_validator/tests/samples/areas/us.json +26 -0
- jsonstat_validator/tests/samples/canada.json +131 -0
- jsonstat_validator/tests/samples/datasets/index.json +51 -0
- jsonstat_validator/tests/samples/galicia.json +124 -0
- jsonstat_validator/tests/samples/hierarchy.json +200 -0
- jsonstat_validator/tests/samples/index.json +36 -0
- jsonstat_validator/tests/samples/metrics/gdp/gsp.json +16 -0
- jsonstat_validator/tests/samples/metrics/gdp/gsppc.json +16 -0
- jsonstat_validator/tests/samples/metrics/gdp/gspw.json +16 -0
- jsonstat_validator/tests/samples/metrics/gdp/index.json +26 -0
- jsonstat_validator/tests/samples/metrics/index.json +26 -0
- jsonstat_validator/tests/samples/metrics/lfs/employed.json +16 -0
- jsonstat_validator/tests/samples/metrics/lfs/index.json +31 -0
- jsonstat_validator/tests/samples/metrics/lfs/lf.json +16 -0
- jsonstat_validator/tests/samples/metrics/lfs/unemployed.json +16 -0
- jsonstat_validator/tests/samples/metrics/lfs/unr.json +21 -0
- jsonstat_validator/tests/samples/metrics/pop/index.json +21 -0
- jsonstat_validator/tests/samples/metrics/pop/pop.json +26 -0
- jsonstat_validator/tests/samples/metrics/pop/popw.json +16 -0
- jsonstat_validator/tests/samples/oecd.json +170 -0
- jsonstat_validator/tests/samples/order.json +38 -0
- jsonstat_validator/tests/samples/sources/bls.json +21 -0
- jsonstat_validator/tests/samples/sources/ige.json +16 -0
- jsonstat_validator/tests/samples/sources/index.json +41 -0
- jsonstat_validator/tests/samples/sources/jsonstat.json +21 -0
- jsonstat_validator/tests/samples/sources/oecd.json +16 -0
- jsonstat_validator/tests/samples/sources/statcan.json +16 -0
- jsonstat_validator/tests/samples/sources/wikipedia.json +16 -0
- jsonstat_validator/tests/samples/topics/accounts.json +16 -0
- jsonstat_validator/tests/samples/topics/demos.json +21 -0
- jsonstat_validator/tests/samples/topics/index.json +31 -0
- jsonstat_validator/tests/samples/topics/labor.json +26 -0
- jsonstat_validator/tests/samples/topics/population.json +26 -0
- jsonstat_validator/tests/samples/us-gsp.json +230 -0
- jsonstat_validator/tests/samples/us-labor.json +6509 -0
- jsonstat_validator/tests/samples/us-unr.json +3269 -0
- jsonstat_validator/tests/test_official_samples.py +51 -0
- jsonstat_validator/validator.py +608 -0
- jsonstat_validator-0.1.0.dist-info/LICENSE +21 -0
- jsonstat_validator-0.1.0.dist-info/METADATA +196 -0
- jsonstat_validator-0.1.0.dist-info/RECORD +51 -0
- jsonstat_validator-0.1.0.dist-info/WHEEL +5 -0
- jsonstat_validator-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
"""
|
2
|
+
Test the official JSON-stat samples and collections.
|
3
|
+
|
4
|
+
This test suite validates the official JSON-stat samples provided by the JSON-stat
|
5
|
+
website: https://json-stat.org/samples/. For efficiency purposes, all sample-files
|
6
|
+
are downloaded and saved in the jsonstat_validator/tests/samples directory.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import glob
|
10
|
+
import json
|
11
|
+
from pathlib import Path
|
12
|
+
|
13
|
+
import pytest
|
14
|
+
|
15
|
+
from jsonstat_validator import validate_jsonstat
|
16
|
+
|
17
|
+
# Get the path to the samples directory
|
18
|
+
TESTS_DIR = Path(__file__).parent
|
19
|
+
SAMPLES_DIR = TESTS_DIR / "samples"
|
20
|
+
|
21
|
+
# Find all JSON files in the samples directory
|
22
|
+
official_samples_files = glob.glob(str(SAMPLES_DIR / "**" / "*.json"), recursive=True)
|
23
|
+
|
24
|
+
|
25
|
+
def load_json_file(file_path):
|
26
|
+
"""Load a JSON file and return the parsed JSON object."""
|
27
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
28
|
+
return json.load(f)
|
29
|
+
|
30
|
+
|
31
|
+
@pytest.mark.parametrize("sample_path", official_samples_files)
|
32
|
+
def test_official_sample(sample_path):
|
33
|
+
"""Test that official JSON-stat samples validate successfully."""
|
34
|
+
try:
|
35
|
+
# Load the JSON file
|
36
|
+
data = load_json_file(sample_path)
|
37
|
+
|
38
|
+
# Validate the JSON-stat object
|
39
|
+
result = validate_jsonstat(data)
|
40
|
+
assert result is True, f"Failed to validate {sample_path}"
|
41
|
+
except ValueError as e:
|
42
|
+
pytest.fail(f"Failed to validate {sample_path}: {e}")
|
43
|
+
|
44
|
+
|
45
|
+
if __name__ == "__main__":
|
46
|
+
# This allows running the tests directly with python
|
47
|
+
# Run pytest with the following options:
|
48
|
+
# -v: verbose output (show test names)
|
49
|
+
# -s: don't capture stdout (allow print statements to be shown)
|
50
|
+
# __file__: run tests only in this file
|
51
|
+
pytest.main(["-vs", __file__])
|
@@ -0,0 +1,608 @@
|
|
1
|
+
"""
|
2
|
+
JSON-stat validator.
|
3
|
+
|
4
|
+
Validates JSON-stat data against the specification:
|
5
|
+
https://json-stat.org/full/
|
6
|
+
"""
|
7
|
+
|
8
|
+
from __future__ import annotations
|
9
|
+
|
10
|
+
from collections import Counter
|
11
|
+
from datetime import datetime
|
12
|
+
from typing import Any, Dict, List, Literal, Optional, Union
|
13
|
+
|
14
|
+
from pydantic import (
|
15
|
+
AnyUrl,
|
16
|
+
BaseModel,
|
17
|
+
Field,
|
18
|
+
RootModel,
|
19
|
+
ValidationError,
|
20
|
+
field_validator,
|
21
|
+
model_validator,
|
22
|
+
)
|
23
|
+
|
24
|
+
|
25
|
+
def is_valid_iso_date(date_string: str) -> bool:
|
26
|
+
"""Check if a date string is in ISO 8601 format."""
|
27
|
+
try:
|
28
|
+
datetime.fromisoformat(date_string.replace("Z", "+00:00"))
|
29
|
+
return True
|
30
|
+
except ValueError:
|
31
|
+
return False
|
32
|
+
|
33
|
+
|
34
|
+
class Unit(BaseModel):
|
35
|
+
"""Unit of measurement of a dimension.
|
36
|
+
|
37
|
+
It can be used to assign unit of measure metadata to the categories
|
38
|
+
of a dimension with a metric role.
|
39
|
+
Four properties of this object are currently closed:
|
40
|
+
decimals, label, symbol and position.
|
41
|
+
Based on current standards and practices, other properties of this object could be:
|
42
|
+
base, type, multiplier, adjustment.
|
43
|
+
|
44
|
+
These properties are currently open. Data providers are free to use them
|
45
|
+
on their own terms, although it is safer to do it under extension.
|
46
|
+
"""
|
47
|
+
|
48
|
+
label: Optional[str] = Field(default=None)
|
49
|
+
decimals: int = Field(
|
50
|
+
description=(
|
51
|
+
"It contains the number of unit decimals (integer). "
|
52
|
+
"If unit is present, decimals is required."
|
53
|
+
),
|
54
|
+
)
|
55
|
+
symbol: Optional[str] = Field(
|
56
|
+
default=None,
|
57
|
+
description=(
|
58
|
+
"It contains a possible unit symbol to add to the value "
|
59
|
+
"when it is displayed (like '€', '$' or '%')."
|
60
|
+
),
|
61
|
+
)
|
62
|
+
position: Optional[Literal["start", "end"]] = Field(
|
63
|
+
default=None,
|
64
|
+
description=(
|
65
|
+
"where the unit symbol should be written (before or after the value). "
|
66
|
+
"Default is end."
|
67
|
+
),
|
68
|
+
)
|
69
|
+
base: Optional[str] = Field(
|
70
|
+
default=None,
|
71
|
+
description=("It is the base unit (person, gram, euro, etc.)."),
|
72
|
+
)
|
73
|
+
type: Optional[str] = Field(
|
74
|
+
default=None,
|
75
|
+
description=(
|
76
|
+
"This property should probably help deriving new data from the data. "
|
77
|
+
"It should probably help answering questions like: does it make sense "
|
78
|
+
"to add two different cell values? Some possible values of this "
|
79
|
+
"property could be count or ratio. Some might also consider as "
|
80
|
+
"possible values things like currency, mass, length, time, etc."
|
81
|
+
),
|
82
|
+
)
|
83
|
+
multiplier: Optional[Union[int, float]] = Field(
|
84
|
+
default=None,
|
85
|
+
description=(
|
86
|
+
"It is the unit multiplier. It should help comparing data with the "
|
87
|
+
"same base unit but different multiplier. If a decimal system is used, "
|
88
|
+
"it can be expressed as powers of 10 (0=1, 1=10, -1=0.1, etc.)."
|
89
|
+
),
|
90
|
+
)
|
91
|
+
adjustment: Optional[str] = Field(
|
92
|
+
default=None,
|
93
|
+
description=(
|
94
|
+
"A code to express the time series adjustment (for example, "
|
95
|
+
"seasonally adjusted or adjusted by working days) or indices "
|
96
|
+
"adjustment (for example, chain-linked indices)."
|
97
|
+
),
|
98
|
+
)
|
99
|
+
|
100
|
+
|
101
|
+
class Category(BaseModel):
|
102
|
+
"""Category of a dimension.
|
103
|
+
|
104
|
+
It is used to describe the possible values of a dimension.
|
105
|
+
"""
|
106
|
+
|
107
|
+
index: Optional[Union[List[str], Dict[str, int]]] = Field(
|
108
|
+
default=None,
|
109
|
+
description=(
|
110
|
+
"It is used to order the possible values (categories) of a dimension. "
|
111
|
+
"The order of the categories and the order of the dimensions themselves "
|
112
|
+
"determine the order of the data in the value array. While the dimensions "
|
113
|
+
"order has only this functional role (and therefore any order chosen by "
|
114
|
+
"the provider is valid), the categories order has also a presentation "
|
115
|
+
"role: it is assumed that the categories are sorted in a meaningful order "
|
116
|
+
"and that the consumer can rely on it when displaying the information. "
|
117
|
+
"- index is required unless the dimension is a constant dimension "
|
118
|
+
"(dimension with a single category). When a dimension has only one "
|
119
|
+
"category, the index property is indeed unnecessary. In the case that "
|
120
|
+
"a category index is not provided, a category label must be included."
|
121
|
+
),
|
122
|
+
)
|
123
|
+
label: Optional[Dict[str, str]] = Field(
|
124
|
+
default=None,
|
125
|
+
description=(
|
126
|
+
"It is used to assign a very short (one line) descriptive text to IDs "
|
127
|
+
"at different levels of the response tree. It is language-dependent."
|
128
|
+
),
|
129
|
+
)
|
130
|
+
child: Optional[Dict[str, List[str]]] = Field(
|
131
|
+
default=None,
|
132
|
+
description=(
|
133
|
+
"It is used to describe the hierarchical relationship between different "
|
134
|
+
"categories. It takes the form of an object where the key is the ID of "
|
135
|
+
"the parent category and the value is an array of the IDs of the child "
|
136
|
+
"categories. It is also a way of exposing a certain category as a total."
|
137
|
+
),
|
138
|
+
)
|
139
|
+
coordinates: Optional[Dict[str, List[float]]] = Field(
|
140
|
+
default=None,
|
141
|
+
description=(
|
142
|
+
"It can be used to assign longitude/latitude geographic coordinates "
|
143
|
+
"to the categories of a dimension with a geo role. It takes the form "
|
144
|
+
"of an object where keys are category IDs and values are an array of "
|
145
|
+
"two numbers (longitude, latitude)."
|
146
|
+
),
|
147
|
+
)
|
148
|
+
unit: Optional[Dict[str, Unit]] = Field(
|
149
|
+
default=None,
|
150
|
+
description=(
|
151
|
+
"It can be used to assign unit of measure metadata to the categories "
|
152
|
+
"of a dimension with a metric role."
|
153
|
+
),
|
154
|
+
)
|
155
|
+
|
156
|
+
@model_validator(mode="after")
|
157
|
+
def validate_category(self):
|
158
|
+
"""Category-wide validation checks."""
|
159
|
+
# Ensure at least one of index or label fields is provided
|
160
|
+
if not self.index and not self.label:
|
161
|
+
raise ValueError("At least one of `index` or `label` is required.")
|
162
|
+
|
163
|
+
# Ensure coordinates are a dictionary where keys are category IDs
|
164
|
+
# and values are an array of two numbers (longitude, latitude).
|
165
|
+
if self.coordinates:
|
166
|
+
for key in self.coordinates:
|
167
|
+
value = self.coordinates[key]
|
168
|
+
if (self.index and key not in self.index) or (
|
169
|
+
self.label and key not in self.label
|
170
|
+
):
|
171
|
+
raise ValueError(
|
172
|
+
f"Trying to set coordinates for category ID: {key} "
|
173
|
+
"but it is not defined neither in `index` nor in `label`."
|
174
|
+
)
|
175
|
+
if not isinstance(value, list) or len(value) != 2:
|
176
|
+
raise ValueError(
|
177
|
+
f"Coordinates for category {key} must be a list of two numbers."
|
178
|
+
)
|
179
|
+
|
180
|
+
# Ensure child references an existing parent
|
181
|
+
if self.child:
|
182
|
+
for parent in self.child:
|
183
|
+
if (self.index and parent not in self.index) or (
|
184
|
+
self.label and parent not in self.label
|
185
|
+
):
|
186
|
+
raise ValueError(
|
187
|
+
f"Invalid parent: {parent} in the `child` field. "
|
188
|
+
"It is not defined neither in `index` nor in `label`."
|
189
|
+
)
|
190
|
+
|
191
|
+
# Ensure unit references an existing category
|
192
|
+
if self.unit:
|
193
|
+
for key in self.unit:
|
194
|
+
value = self.unit[key]
|
195
|
+
if (self.index and key not in self.index) or (
|
196
|
+
self.label and key not in self.label
|
197
|
+
):
|
198
|
+
raise ValueError(
|
199
|
+
f"Invalid unit: {key} in the `unit` field. "
|
200
|
+
"It is not defined neither in `index` nor in `label`."
|
201
|
+
)
|
202
|
+
return self
|
203
|
+
|
204
|
+
|
205
|
+
class Link(BaseModel):
|
206
|
+
"""Model for a link.
|
207
|
+
|
208
|
+
It is used to provide a list of links related to a dataset or a dimension,
|
209
|
+
sorted by relation.
|
210
|
+
"""
|
211
|
+
|
212
|
+
type: Optional[str] = Field(
|
213
|
+
default=None,
|
214
|
+
description=(
|
215
|
+
"It describes the media type of the accompanying href. "
|
216
|
+
"Not required when the resource referenced in the link "
|
217
|
+
"is a JSON-stat resource."
|
218
|
+
),
|
219
|
+
)
|
220
|
+
href: Optional[AnyUrl] = Field(default=None, description="It specifies a URL.")
|
221
|
+
class_: Optional[Literal["dataset", "dimension", "collection"]] = Field(
|
222
|
+
default=None,
|
223
|
+
description=(
|
224
|
+
"It describes the class of the resource referenced "
|
225
|
+
"in the link. Not required when the resource referenced "
|
226
|
+
"in the link is a JSON-stat resource."
|
227
|
+
),
|
228
|
+
)
|
229
|
+
label: Optional[str] = Field(
|
230
|
+
default=None,
|
231
|
+
description=(
|
232
|
+
"It provides a human-readable label for the link. "
|
233
|
+
"Not required when the resource referenced in the link "
|
234
|
+
"is a JSON-stat resource."
|
235
|
+
),
|
236
|
+
)
|
237
|
+
|
238
|
+
|
239
|
+
class Dimension(BaseModel):
|
240
|
+
"""JSON-stat dimension.
|
241
|
+
|
242
|
+
This is a full implementation of the dimension class
|
243
|
+
according to the JSON-stat 2.0 specification: https://json-stat.org/full/#dimension.
|
244
|
+
"""
|
245
|
+
|
246
|
+
version: str = Field(
|
247
|
+
default="2.0",
|
248
|
+
description=(
|
249
|
+
"It declares the JSON-stat version of the response. The goal "
|
250
|
+
"of this property is to help clients parsing that particular response."
|
251
|
+
),
|
252
|
+
)
|
253
|
+
class_: Literal["dimension"] = Field(
|
254
|
+
default="dimension",
|
255
|
+
alias="class",
|
256
|
+
description=(
|
257
|
+
"JSON-stat supports several classes of responses. "
|
258
|
+
"Possible values of class are: dataset, dimension and collection."
|
259
|
+
),
|
260
|
+
)
|
261
|
+
label: Optional[str] = Field(
|
262
|
+
default=None,
|
263
|
+
description=(
|
264
|
+
"It is used to assign a very short (one line) descriptive text to IDs "
|
265
|
+
"at different levels of the response tree. It is language-dependent."
|
266
|
+
),
|
267
|
+
)
|
268
|
+
category: Category = Field(
|
269
|
+
description=(
|
270
|
+
"It is used to describe the possible values of a dimension. "
|
271
|
+
"It is language-dependent."
|
272
|
+
),
|
273
|
+
)
|
274
|
+
href: Optional[AnyUrl] = Field(
|
275
|
+
default=None,
|
276
|
+
description=(
|
277
|
+
"It specifies a URL. Providers can use this property to avoid "
|
278
|
+
"sending information that is shared between different requests "
|
279
|
+
"(for example, dimensions)."
|
280
|
+
),
|
281
|
+
)
|
282
|
+
link: Optional[Dict[str, List[Union[Link, JSONStatSchema]]]] = Field(
|
283
|
+
default=None,
|
284
|
+
description=(
|
285
|
+
"It is used to provide a list of links related to a dataset or a dimension, "
|
286
|
+
"sorted by relation (see relation ID)."
|
287
|
+
),
|
288
|
+
)
|
289
|
+
note: Optional[List[str]] = Field(
|
290
|
+
default=None,
|
291
|
+
description=(
|
292
|
+
"Note allows to assign annotations to datasets (array), dimensions (array) "
|
293
|
+
"and categories (object)."
|
294
|
+
),
|
295
|
+
)
|
296
|
+
updated: Optional[str] = Field(
|
297
|
+
default=None,
|
298
|
+
description=(
|
299
|
+
"It contains the update time of the dataset. It is a string representing "
|
300
|
+
"a date in an ISO 8601 format recognized by the Javascript Date.parse "
|
301
|
+
"method (see ECMA-262 Date Time String Format: "
|
302
|
+
"https://262.ecma-international.org/6.0/#sec-date-time-string-format)."
|
303
|
+
),
|
304
|
+
)
|
305
|
+
source: Optional[str] = Field(
|
306
|
+
default=None,
|
307
|
+
description=(
|
308
|
+
"It contains a language-dependent short text describing the source "
|
309
|
+
"of the dataset."
|
310
|
+
),
|
311
|
+
)
|
312
|
+
extension: Optional[Dict[str, Any]] = Field(
|
313
|
+
default=None,
|
314
|
+
description=(
|
315
|
+
"Extension allows JSON-stat to be extended for particular needs. "
|
316
|
+
"Providers are free to define where they include this property and "
|
317
|
+
"what children are allowed in each case."
|
318
|
+
),
|
319
|
+
)
|
320
|
+
|
321
|
+
@field_validator("updated", mode="after")
|
322
|
+
@classmethod
|
323
|
+
def validate_updated_date(cls, v: Optional[str]):
|
324
|
+
"""Validates the updated date is in ISO 8601 format."""
|
325
|
+
if v and not is_valid_iso_date(v):
|
326
|
+
raise ValueError(f"Updated date: '{v}' is an invalid ISO 8601 format.")
|
327
|
+
return v
|
328
|
+
|
329
|
+
|
330
|
+
class DatasetRole(BaseModel):
|
331
|
+
"""Role of a dataset."""
|
332
|
+
|
333
|
+
time: Optional[List[str]] = Field(
|
334
|
+
default=None,
|
335
|
+
description=(
|
336
|
+
"It can be used to assign a time role to one or more dimensions. "
|
337
|
+
"It takes the form of an array of dimension IDs in which order does "
|
338
|
+
"not have a special meaning."
|
339
|
+
),
|
340
|
+
)
|
341
|
+
geo: Optional[List[str]] = Field(
|
342
|
+
default=None,
|
343
|
+
description=(
|
344
|
+
"It can be used to assign a spatial role to one or more dimensions. "
|
345
|
+
"It takes the form of an array of dimension IDs in which order does "
|
346
|
+
"not have a special meaning."
|
347
|
+
),
|
348
|
+
)
|
349
|
+
metric: Optional[List[str]] = Field(
|
350
|
+
default=None,
|
351
|
+
description=(
|
352
|
+
"It can be used to assign a metric role to one or more dimensions. "
|
353
|
+
"It takes the form of an array of dimension IDs in which order does "
|
354
|
+
"not have a special meaning."
|
355
|
+
),
|
356
|
+
)
|
357
|
+
|
358
|
+
|
359
|
+
class Dataset(BaseModel):
|
360
|
+
"""JSON-stat dataset."""
|
361
|
+
|
362
|
+
version: str = Field(
|
363
|
+
default="2.0",
|
364
|
+
description=(
|
365
|
+
"It declares the JSON-stat version of the response. The goal "
|
366
|
+
"of this property is to help clients parsing that particular response."
|
367
|
+
),
|
368
|
+
)
|
369
|
+
class_: Literal["dataset"] = Field(
|
370
|
+
default="dataset",
|
371
|
+
alias="class",
|
372
|
+
description=(
|
373
|
+
"JSON-stat supports several classes of responses. "
|
374
|
+
"Possible values of class are: dataset, dimension and collection."
|
375
|
+
),
|
376
|
+
)
|
377
|
+
href: Optional[AnyUrl] = Field(
|
378
|
+
default=None,
|
379
|
+
description=(
|
380
|
+
"It specifies a URL. Providers can use this property to avoid "
|
381
|
+
"sending information that is shared between different requests "
|
382
|
+
"(for example, dimensions)."
|
383
|
+
),
|
384
|
+
)
|
385
|
+
label: Optional[str] = Field(
|
386
|
+
default=None,
|
387
|
+
description=(
|
388
|
+
"It is used to assign a very short (one line) descriptive text to IDs "
|
389
|
+
"at different levels of the response tree. It is language-dependent."
|
390
|
+
),
|
391
|
+
)
|
392
|
+
source: Optional[str] = Field(
|
393
|
+
default=None,
|
394
|
+
description=(
|
395
|
+
"It contains a language-dependent short text describing the source "
|
396
|
+
"of the dataset."
|
397
|
+
),
|
398
|
+
)
|
399
|
+
updated: Optional[str] = Field(
|
400
|
+
default=None,
|
401
|
+
description=(
|
402
|
+
"It contains the update time of the dataset. It is a string representing "
|
403
|
+
"a date in an ISO 8601 format recognized by the Javascript Date.parse "
|
404
|
+
"method (see ECMA-262 Date Time String Format: "
|
405
|
+
"https://262.ecma-international.org/6.0/#sec-date-time-string-format)."
|
406
|
+
),
|
407
|
+
)
|
408
|
+
id: List[str] = Field(description="It contains an ordered list of dimension IDs.")
|
409
|
+
size: List[int] = Field(
|
410
|
+
description=(
|
411
|
+
"It contains the number (integer) of categories (possible values) "
|
412
|
+
"of each dimension in the dataset. It has the same number of elements "
|
413
|
+
"and in the same order as in id."
|
414
|
+
),
|
415
|
+
)
|
416
|
+
role: Optional[DatasetRole] = Field(
|
417
|
+
default=None,
|
418
|
+
description=(
|
419
|
+
"It can be used to assign special roles to dimensions. "
|
420
|
+
"At this moment, possible roles are: time, geo and metric. "
|
421
|
+
"A role can be shared by several dimensions."
|
422
|
+
),
|
423
|
+
)
|
424
|
+
value: Union[
|
425
|
+
List[Union[float, int, str, None]], Dict[str, Union[float, int, str, None]]
|
426
|
+
] = Field(
|
427
|
+
description=(
|
428
|
+
"It contains the data sorted according to the dataset dimensions. "
|
429
|
+
"It usually takes the form of an array where missing values are "
|
430
|
+
"expressed as nulls."
|
431
|
+
),
|
432
|
+
)
|
433
|
+
status: Optional[Union[str, List[str], Dict[str, str]]] = Field(
|
434
|
+
default=None,
|
435
|
+
description=(
|
436
|
+
"It contains metadata at the observation level. When it takes an "
|
437
|
+
"array form of the same size of value, it assigns a status to each "
|
438
|
+
"data by position. When it takes a dictionary form, it assigns a "
|
439
|
+
"status to each data by key."
|
440
|
+
),
|
441
|
+
)
|
442
|
+
|
443
|
+
dimension: Dict[str, Dimension] = Field(
|
444
|
+
description=(
|
445
|
+
"The dimension property contains information about the dimensions of "
|
446
|
+
"the dataset. dimension must have properties (see dimension ID) with "
|
447
|
+
"the same names of each element in the id array."
|
448
|
+
),
|
449
|
+
)
|
450
|
+
note: Optional[List[str]] = Field(
|
451
|
+
default=None,
|
452
|
+
description=(
|
453
|
+
"Note allows to assign annotations to datasets (array), dimensions (array) "
|
454
|
+
"and categories (object)."
|
455
|
+
),
|
456
|
+
)
|
457
|
+
extension: Optional[Dict[str, Any]] = Field(
|
458
|
+
default=None,
|
459
|
+
description=(
|
460
|
+
"Extension allows JSON-stat to be extended for particular needs. "
|
461
|
+
"Providers are free to define where they include this property and "
|
462
|
+
"what children are allowed in each case."
|
463
|
+
),
|
464
|
+
)
|
465
|
+
link: Optional[Dict[str, List[Union[Link, JSONStatSchema]]]] = Field(
|
466
|
+
default=None,
|
467
|
+
description=(
|
468
|
+
"It is used to provide a list of links related to a dataset or a dimension, "
|
469
|
+
"sorted by relation (see relation ID)."
|
470
|
+
),
|
471
|
+
)
|
472
|
+
|
473
|
+
@field_validator("updated", mode="after")
|
474
|
+
@classmethod
|
475
|
+
def validate_updated_date(cls, v: Optional[str]):
|
476
|
+
"""Validates the updated date is in ISO 8601 format."""
|
477
|
+
if v and not is_valid_iso_date(v):
|
478
|
+
raise ValueError(f"Updated date: '{v}' is an invalid ISO 8601 format.")
|
479
|
+
return v
|
480
|
+
|
481
|
+
@field_validator("role", mode="after")
|
482
|
+
@classmethod
|
483
|
+
def validate_role(cls, v: Optional[DatasetRole]):
|
484
|
+
"""Validate that role references are valid."""
|
485
|
+
if v:
|
486
|
+
all_values = [
|
487
|
+
value
|
488
|
+
for values in v.model_dump().values()
|
489
|
+
if values is not None
|
490
|
+
for value in values
|
491
|
+
]
|
492
|
+
duplicates = [
|
493
|
+
item for item, count in Counter(all_values).items() if count > 1
|
494
|
+
]
|
495
|
+
if duplicates:
|
496
|
+
raise ValueError(
|
497
|
+
f"Dimension(s): {', '.join(duplicates)} referenced in multiple "
|
498
|
+
"roles. Each dimension can only be referenced in one role."
|
499
|
+
)
|
500
|
+
return v
|
501
|
+
|
502
|
+
@model_validator(mode="after")
|
503
|
+
def validate_dataset(self):
|
504
|
+
"""Dataset-wide validation checks."""
|
505
|
+
# Validate size matches id length
|
506
|
+
if len(self.size) != len(self.id):
|
507
|
+
raise ValueError(
|
508
|
+
f"Size array length ({len(self.size)}) "
|
509
|
+
f"must match ID array length ({len(self.id)})"
|
510
|
+
)
|
511
|
+
|
512
|
+
# Validate status format
|
513
|
+
if isinstance(self.status, list):
|
514
|
+
if len(self.status) not in (len(self.value), 1):
|
515
|
+
raise ValueError(
|
516
|
+
"Status list must match value length "
|
517
|
+
f"({len(self.value)}) or be single value"
|
518
|
+
)
|
519
|
+
|
520
|
+
# Check all dimensions are defined
|
521
|
+
missing_dims = [dim_id for dim_id in self.id if dim_id not in self.dimension]
|
522
|
+
if missing_dims:
|
523
|
+
raise ValueError(
|
524
|
+
f"Missing dimension definitions: {', '.join(missing_dims)}"
|
525
|
+
)
|
526
|
+
return self
|
527
|
+
|
528
|
+
|
529
|
+
class Collection(BaseModel):
|
530
|
+
"""JSON-stat collection."""
|
531
|
+
|
532
|
+
version: str = Field(
|
533
|
+
default="2.0",
|
534
|
+
description=(
|
535
|
+
"It declares the JSON-stat version of the response. The goal "
|
536
|
+
"of this property is to help clients parsing that particular response."
|
537
|
+
),
|
538
|
+
)
|
539
|
+
|
540
|
+
class_: Literal["collection"] = Field(
|
541
|
+
default="collection",
|
542
|
+
alias="class",
|
543
|
+
description="It declares the class of the response.",
|
544
|
+
)
|
545
|
+
label: Optional[str] = Field(
|
546
|
+
default=None,
|
547
|
+
description="It provides a human-readable label for the collection.",
|
548
|
+
)
|
549
|
+
href: Optional[AnyUrl] = Field(
|
550
|
+
default=None,
|
551
|
+
description="It specifies a URL.",
|
552
|
+
)
|
553
|
+
updated: Optional[str] = Field(
|
554
|
+
default=None,
|
555
|
+
description="It contains the update time of the collection.",
|
556
|
+
)
|
557
|
+
link: Optional[Dict[str, List[Union[Link, JSONStatSchema]]]] = Field(
|
558
|
+
default=None,
|
559
|
+
description=(
|
560
|
+
"The items of the collection can be of any class "
|
561
|
+
"(datasets, dimensions, collections)."
|
562
|
+
),
|
563
|
+
)
|
564
|
+
|
565
|
+
@field_validator("updated", mode="after")
|
566
|
+
@classmethod
|
567
|
+
def validate_updated_date(cls, v: Optional[str]):
|
568
|
+
"""Validates the updated date is in ISO 8601 format."""
|
569
|
+
if v and not is_valid_iso_date(v):
|
570
|
+
raise ValueError(f"Updated date: '{v}' is an invalid ISO 8601 format.")
|
571
|
+
return v
|
572
|
+
|
573
|
+
@model_validator(mode="after")
|
574
|
+
def validate_collection(self):
|
575
|
+
"""Collection-wide validation checks."""
|
576
|
+
# Ensure collection links use correct relation type.
|
577
|
+
if self.link and "item" not in self.link:
|
578
|
+
raise ValueError("Collection links must use 'item' relation type")
|
579
|
+
return self
|
580
|
+
|
581
|
+
|
582
|
+
class JSONStatSchema(RootModel):
|
583
|
+
"""JSON-stat response."""
|
584
|
+
|
585
|
+
root: Union[Dataset, Dimension, Collection] = Field(
|
586
|
+
...,
|
587
|
+
discriminator="class_",
|
588
|
+
)
|
589
|
+
|
590
|
+
|
591
|
+
def validate_jsonstat(data: Dict[str, Any]) -> bool:
|
592
|
+
"""
|
593
|
+
Validate a JSON-stat 2.0 object against the specification.
|
594
|
+
|
595
|
+
Args:
|
596
|
+
data: A dictionary containing JSON-stat data
|
597
|
+
|
598
|
+
Returns:
|
599
|
+
bool: True if valid, raises ValueError otherwise
|
600
|
+
|
601
|
+
Raises:
|
602
|
+
ValueError: If the data does not conform to the JSON-stat specification
|
603
|
+
"""
|
604
|
+
try:
|
605
|
+
JSONStatSchema.model_validate(data)
|
606
|
+
return True
|
607
|
+
except ValidationError as exc:
|
608
|
+
raise ValueError(f"Validation error: {exc}") from exc
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 JSON-stat Validator Contributors
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|