jsonstat-validator 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jsonstat_validator/__init__.py +3 -16
- jsonstat_validator/models.py +775 -0
- jsonstat_validator/validator.py +38 -737
- {jsonstat_validator-0.1.6.dist-info → jsonstat_validator-0.2.0.dist-info}/METADATA +3 -3
- jsonstat_validator-0.2.0.dist-info/RECORD +8 -0
- jsonstat_validator-0.1.6.dist-info/RECORD +0 -7
- {jsonstat_validator-0.1.6.dist-info → jsonstat_validator-0.2.0.dist-info}/WHEEL +0 -0
- {jsonstat_validator-0.1.6.dist-info → jsonstat_validator-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {jsonstat_validator-0.1.6.dist-info → jsonstat_validator-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,775 @@
|
|
1
|
+
"""
|
2
|
+
JSON-stat validator.
|
3
|
+
|
4
|
+
Validates JSON-stat data against the specification:
|
5
|
+
https://json-stat.org/full/
|
6
|
+
"""
|
7
|
+
|
8
|
+
from __future__ import annotations
|
9
|
+
|
10
|
+
from collections import Counter
|
11
|
+
from datetime import datetime
|
12
|
+
from typing import Any, Dict, List, Literal, Optional, Union
|
13
|
+
|
14
|
+
from pydantic import (
|
15
|
+
AnyUrl,
|
16
|
+
BaseModel,
|
17
|
+
ConfigDict,
|
18
|
+
Field,
|
19
|
+
RootModel,
|
20
|
+
field_serializer,
|
21
|
+
field_validator,
|
22
|
+
model_validator,
|
23
|
+
)
|
24
|
+
|
25
|
+
# pylint: disable=[useless-parent-delegation]
|
26
|
+
|
27
|
+
|
28
|
+
def is_valid_iso_date(date_string: str) -> bool:
|
29
|
+
"""Check if a date string is in ISO 8601 format."""
|
30
|
+
try:
|
31
|
+
datetime.fromisoformat(date_string.replace("Z", "+00:00"))
|
32
|
+
return True
|
33
|
+
except ValueError:
|
34
|
+
return False
|
35
|
+
|
36
|
+
|
37
|
+
class JSONStatBaseModel(BaseModel):
|
38
|
+
"""Base model for all JSON-stat models with common configuration."""
|
39
|
+
|
40
|
+
def model_dump(self, *, exclude_none: bool = True, by_alias: bool = True, **kwargs):
|
41
|
+
"""Override model_dump to set exclude_none=True by default."""
|
42
|
+
return super().model_dump(exclude_none=exclude_none, by_alias=by_alias, **kwargs)
|
43
|
+
|
44
|
+
@field_serializer("href", check_fields=False, return_type=str)
|
45
|
+
def serialize_any_url(self, href: Optional[AnyUrl]) -> Optional[str]:
|
46
|
+
"""Convert AnyUrl to string, if it exists."""
|
47
|
+
return str(href) if href else None
|
48
|
+
|
49
|
+
model_config = ConfigDict(extra="forbid", serialize_by_alias=True)
|
50
|
+
|
51
|
+
|
52
|
+
class Unit(JSONStatBaseModel):
|
53
|
+
"""Unit of measurement of a dimension.
|
54
|
+
|
55
|
+
It can be used to assign unit of measure metadata to the categories
|
56
|
+
of a dimension with a metric role.
|
57
|
+
Four properties of this object are currently closed:
|
58
|
+
decimals, label, symbol and position.
|
59
|
+
Based on current standards and practices, other properties of this object could be:
|
60
|
+
base, type, multiplier, adjustment.
|
61
|
+
|
62
|
+
These properties are currently open. Data providers are free to use them
|
63
|
+
on their own terms, although it is safer to do it under extension.
|
64
|
+
"""
|
65
|
+
|
66
|
+
label: Optional[str] = Field(default=None)
|
67
|
+
decimals: Optional[int] = Field(
|
68
|
+
default=None,
|
69
|
+
description=(
|
70
|
+
"It contains the number of unit decimals (integer). "
|
71
|
+
"If unit is present, decimals is required."
|
72
|
+
),
|
73
|
+
)
|
74
|
+
symbol: Optional[str] = Field(
|
75
|
+
default=None,
|
76
|
+
description=(
|
77
|
+
"It contains a possible unit symbol to add to the value "
|
78
|
+
"when it is displayed (like '€', '$' or '%')."
|
79
|
+
),
|
80
|
+
)
|
81
|
+
position: Optional[Literal["start", "end"]] = Field(
|
82
|
+
default=None,
|
83
|
+
description=(
|
84
|
+
"where the unit symbol should be written (before or after the value). "
|
85
|
+
"Default is end."
|
86
|
+
),
|
87
|
+
)
|
88
|
+
base: Optional[str] = Field(
|
89
|
+
default=None,
|
90
|
+
description=("It is the base unit (person, gram, euro, etc.)."),
|
91
|
+
)
|
92
|
+
type: Optional[str] = Field(
|
93
|
+
default=None,
|
94
|
+
description=(
|
95
|
+
"This property should probably help deriving new data from the data. "
|
96
|
+
"It should probably help answering questions like: does it make sense "
|
97
|
+
"to add two different cell values? Some possible values of this "
|
98
|
+
"property could be count or ratio. Some might also consider as "
|
99
|
+
"possible values things like currency, mass, length, time, etc."
|
100
|
+
),
|
101
|
+
)
|
102
|
+
multiplier: Optional[Union[int, float]] = Field(
|
103
|
+
default=None,
|
104
|
+
description=(
|
105
|
+
"It is the unit multiplier. It should help comparing data with the "
|
106
|
+
"same base unit but different multiplier. If a decimal system is used, "
|
107
|
+
"it can be expressed as powers of 10 (0=1, 1=10, -1=0.1, etc.)."
|
108
|
+
),
|
109
|
+
)
|
110
|
+
adjustment: Optional[str] = Field(
|
111
|
+
default=None,
|
112
|
+
description=(
|
113
|
+
"A code to express the time series adjustment (for example, "
|
114
|
+
"seasonally adjusted or adjusted by working days) or indices "
|
115
|
+
"adjustment (for example, chain-linked indices)."
|
116
|
+
),
|
117
|
+
)
|
118
|
+
|
119
|
+
|
120
|
+
class Category(JSONStatBaseModel):
|
121
|
+
"""Category of a dimension.
|
122
|
+
|
123
|
+
It is used to describe the possible values of a dimension.
|
124
|
+
"""
|
125
|
+
|
126
|
+
index: Optional[Union[List[str], Dict[str, int]]] = Field(
|
127
|
+
default=None,
|
128
|
+
description=(
|
129
|
+
"It is used to order the possible values (categories) of a dimension. "
|
130
|
+
"The order of the categories and the order of the dimensions themselves "
|
131
|
+
"determine the order of the data in the value array. While the dimensions "
|
132
|
+
"order has only this functional role (and therefore any order chosen by "
|
133
|
+
"the provider is valid), the categories order has also a presentation "
|
134
|
+
"role: it is assumed that the categories are sorted in a meaningful order "
|
135
|
+
"and that the consumer can rely on it when displaying the information. "
|
136
|
+
"- index is required unless the dimension is a constant dimension "
|
137
|
+
"(dimension with a single category). When a dimension has only one "
|
138
|
+
"category, the index property is indeed unnecessary. In the case that "
|
139
|
+
"a category index is not provided, a category label must be included."
|
140
|
+
),
|
141
|
+
)
|
142
|
+
label: Optional[Dict[str, str]] = Field(
|
143
|
+
default=None,
|
144
|
+
description=(
|
145
|
+
"It is used to assign a very short (one line) descriptive text to IDs "
|
146
|
+
"at different levels of the response tree. It is language-dependent."
|
147
|
+
),
|
148
|
+
)
|
149
|
+
child: Optional[Dict[str, List[str]]] = Field(
|
150
|
+
default=None,
|
151
|
+
description=(
|
152
|
+
"It is used to describe the hierarchical relationship between different "
|
153
|
+
"categories. It takes the form of an object where the key is the ID of "
|
154
|
+
"the parent category and the value is an array of the IDs of the child "
|
155
|
+
"categories. It is also a way of exposing a certain category as a total."
|
156
|
+
),
|
157
|
+
)
|
158
|
+
coordinates: Optional[Dict[str, List[float]]] = Field(
|
159
|
+
default=None,
|
160
|
+
description=(
|
161
|
+
"It can be used to assign longitude/latitude geographic coordinates "
|
162
|
+
"to the categories of a dimension with a geo role. It takes the form "
|
163
|
+
"of an object where keys are category IDs and values are an array of "
|
164
|
+
"two numbers (longitude, latitude)."
|
165
|
+
),
|
166
|
+
)
|
167
|
+
unit: Optional[Dict[str, Unit]] = Field(
|
168
|
+
default=None,
|
169
|
+
description=(
|
170
|
+
"It can be used to assign unit of measure metadata to the categories "
|
171
|
+
"of a dimension with a metric role."
|
172
|
+
),
|
173
|
+
)
|
174
|
+
note: Optional[Dict[str, List[str]]] = Field(
|
175
|
+
default=None,
|
176
|
+
description=(
|
177
|
+
"note allows to assign annotations to datasets (array), dimensions "
|
178
|
+
"(array) and categories (object). To assign annotations to individual "
|
179
|
+
"data, use status: https://json-stat.org/full/#status."
|
180
|
+
),
|
181
|
+
)
|
182
|
+
|
183
|
+
@model_validator(mode="after")
|
184
|
+
def validate_category(self):
|
185
|
+
"""Category-wide validation checks."""
|
186
|
+
# Ensure at least one of index or label fields is provided
|
187
|
+
if not self.index and not self.label:
|
188
|
+
raise ValueError("At least one of `index` or `label` is required.")
|
189
|
+
|
190
|
+
# Ensure index and label have the same keys if both are dictionaries
|
191
|
+
if self.index and self.label:
|
192
|
+
if isinstance(self.label, dict):
|
193
|
+
index_keys = (
|
194
|
+
set(self.index)
|
195
|
+
if isinstance(self.index, list)
|
196
|
+
else set(self.index.keys())
|
197
|
+
)
|
198
|
+
if index_keys != set(self.label.keys()):
|
199
|
+
raise ValueError(
|
200
|
+
"Validation error: `index` and `label` must have the same keys."
|
201
|
+
)
|
202
|
+
|
203
|
+
# Ensure coordinates are a dictionary where keys are category IDs
|
204
|
+
# and values are an array of two numbers (longitude, latitude).
|
205
|
+
if self.coordinates:
|
206
|
+
for key in self.coordinates:
|
207
|
+
value = self.coordinates[key]
|
208
|
+
if (self.index and key not in self.index) or (
|
209
|
+
self.label and key not in self.label
|
210
|
+
):
|
211
|
+
raise ValueError(
|
212
|
+
f"Trying to set coordinates for category ID: {key} "
|
213
|
+
"but it is not defined neither in `index` nor in `label`."
|
214
|
+
)
|
215
|
+
if not isinstance(value, list) or len(value) != 2:
|
216
|
+
raise ValueError(
|
217
|
+
f"Coordinates for category {key} must be a list of two numbers."
|
218
|
+
)
|
219
|
+
|
220
|
+
# Ensure child references an existing parent
|
221
|
+
if self.child:
|
222
|
+
for parent in self.child:
|
223
|
+
if (self.index and parent not in self.index) or (
|
224
|
+
self.label and parent not in self.label
|
225
|
+
):
|
226
|
+
raise ValueError(
|
227
|
+
f"Invalid parent: {parent} in the `child` field. "
|
228
|
+
"It is not defined neither in `index` nor in `label`."
|
229
|
+
)
|
230
|
+
|
231
|
+
# Ensure unit references an existing category
|
232
|
+
if self.unit:
|
233
|
+
for key in self.unit:
|
234
|
+
value = self.unit[key]
|
235
|
+
if (self.index and key not in self.index) or (
|
236
|
+
self.label and key not in self.label
|
237
|
+
):
|
238
|
+
raise ValueError(
|
239
|
+
f"Invalid unit: {key} in the `unit` field. "
|
240
|
+
"It is not defined neither in `index` nor in `label`."
|
241
|
+
)
|
242
|
+
return self
|
243
|
+
|
244
|
+
|
245
|
+
class Link(JSONStatBaseModel):
|
246
|
+
"""Model for a link.
|
247
|
+
|
248
|
+
It is used to provide a list of links related to a dataset or a dimension,
|
249
|
+
sorted by relation.
|
250
|
+
"""
|
251
|
+
|
252
|
+
type: Optional[str] = Field(
|
253
|
+
default=None,
|
254
|
+
description=(
|
255
|
+
"It describes the media type of the accompanying href. "
|
256
|
+
"Not required when the resource referenced in the link "
|
257
|
+
"is a JSON-stat resource."
|
258
|
+
),
|
259
|
+
)
|
260
|
+
href: Optional[AnyUrl] = Field(default=None, description="It specifies a URL.")
|
261
|
+
class_: Optional[Literal["dataset", "dimension", "collection"]] = Field(
|
262
|
+
default=None,
|
263
|
+
alias="class",
|
264
|
+
description=(
|
265
|
+
"It describes the class of the resource referenced "
|
266
|
+
"in the link. Not required when the resource referenced "
|
267
|
+
"in the link is a JSON-stat resource."
|
268
|
+
),
|
269
|
+
)
|
270
|
+
label: Optional[str] = Field(
|
271
|
+
default=None,
|
272
|
+
description=(
|
273
|
+
"It provides a human-readable label for the link. "
|
274
|
+
"Not required when the resource referenced in the link "
|
275
|
+
"is a JSON-stat resource."
|
276
|
+
),
|
277
|
+
)
|
278
|
+
|
279
|
+
|
280
|
+
class Dimension(JSONStatBaseModel):
|
281
|
+
"""JSON-stat dimension.
|
282
|
+
|
283
|
+
This is a full implementation of the dimension class
|
284
|
+
according to the JSON-stat 2.0 specification: https://json-stat.org/full/#dimension.
|
285
|
+
"""
|
286
|
+
|
287
|
+
version: str = Field(
|
288
|
+
default="2.0",
|
289
|
+
description=(
|
290
|
+
"It declares the JSON-stat version of the response. The goal "
|
291
|
+
"of this property is to help clients parsing that particular response."
|
292
|
+
),
|
293
|
+
)
|
294
|
+
class_: Literal["dimension"] = Field(
|
295
|
+
default="dimension",
|
296
|
+
alias="class",
|
297
|
+
description=(
|
298
|
+
"JSON-stat supports several classes of responses. "
|
299
|
+
"Possible values of class are: dataset, dimension and collection."
|
300
|
+
),
|
301
|
+
)
|
302
|
+
label: Optional[str] = Field(
|
303
|
+
default=None,
|
304
|
+
description=(
|
305
|
+
"It is used to assign a very short (one line) descriptive text to IDs "
|
306
|
+
"at different levels of the response tree. It is language-dependent."
|
307
|
+
),
|
308
|
+
)
|
309
|
+
category: Category = Field(
|
310
|
+
description=(
|
311
|
+
"It is used to describe the possible values of a dimension. "
|
312
|
+
"It is language-dependent."
|
313
|
+
),
|
314
|
+
)
|
315
|
+
href: Optional[AnyUrl] = Field(
|
316
|
+
default=None,
|
317
|
+
description=(
|
318
|
+
"It specifies a URL. Providers can use this property to avoid "
|
319
|
+
"sending information that is shared between different requests "
|
320
|
+
"(for example, dimensions)."
|
321
|
+
),
|
322
|
+
)
|
323
|
+
link: Optional[Dict[str, List[Union[Link, JSONStatSchema]]]] = Field(
|
324
|
+
default=None,
|
325
|
+
description=(
|
326
|
+
"It is used to provide a list of links related to a dataset or a dimension, "
|
327
|
+
"sorted by relation (see https://json-stat.org/full/#relationid)."
|
328
|
+
),
|
329
|
+
)
|
330
|
+
note: Optional[List[str]] = Field(
|
331
|
+
default=None,
|
332
|
+
description=(
|
333
|
+
"note allows to assign annotations to datasets (array), dimensions "
|
334
|
+
"(array) and categories (object). To assign annotations to individual "
|
335
|
+
"data, use status: https://json-stat.org/full/#status."
|
336
|
+
),
|
337
|
+
)
|
338
|
+
updated: Optional[str] = Field(
|
339
|
+
default=None,
|
340
|
+
description=(
|
341
|
+
"It contains the update time of the dataset. It is a string representing "
|
342
|
+
"a date in an ISO 8601 format recognized by the Javascript Date.parse "
|
343
|
+
"method (see ECMA-262 Date Time String Format: "
|
344
|
+
"https://262.ecma-international.org/6.0/#sec-date-time-string-format)."
|
345
|
+
),
|
346
|
+
)
|
347
|
+
source: Optional[str] = Field(
|
348
|
+
default=None,
|
349
|
+
description=(
|
350
|
+
"It contains a language-dependent short text describing the source "
|
351
|
+
"of the dataset."
|
352
|
+
),
|
353
|
+
)
|
354
|
+
extension: Optional[Dict[str, Any]] = Field(
|
355
|
+
default=None,
|
356
|
+
description=(
|
357
|
+
"Extension allows JSON-stat to be extended for particular needs. "
|
358
|
+
"Providers are free to define where they include this property and "
|
359
|
+
"what children are allowed in each case."
|
360
|
+
),
|
361
|
+
)
|
362
|
+
|
363
|
+
@field_validator("updated", mode="after")
|
364
|
+
@classmethod
|
365
|
+
def validate_updated_date(cls, v: Optional[str]):
|
366
|
+
"""Validates the updated date is in ISO 8601 format."""
|
367
|
+
if v and not is_valid_iso_date(v):
|
368
|
+
raise ValueError(f"Updated date: '{v}' is an invalid ISO 8601 format.")
|
369
|
+
return v
|
370
|
+
|
371
|
+
|
372
|
+
class DatasetDimension(JSONStatBaseModel):
|
373
|
+
"""Dataset dimension.
|
374
|
+
|
375
|
+
A dimension model for when the dimension is a child of a Dataset
|
376
|
+
as it has different validation rules than a root Dimension.
|
377
|
+
"""
|
378
|
+
|
379
|
+
version: Optional[str] = Field(
|
380
|
+
default=None,
|
381
|
+
description=(
|
382
|
+
"It declares the JSON-stat version of the response. The goal "
|
383
|
+
"of this property is to help clients parsing that particular response."
|
384
|
+
),
|
385
|
+
)
|
386
|
+
class_: Optional[str] = Field(
|
387
|
+
default="dataset_dimension",
|
388
|
+
alias="class",
|
389
|
+
description=(
|
390
|
+
"JSON-stat supports several classes of responses. "
|
391
|
+
"Possible values of class are: dataset, dimension and collection. "
|
392
|
+
"This is an addition to the standard JSON-stat classes to allow for "
|
393
|
+
"different validation rules for dataset dimensions."
|
394
|
+
),
|
395
|
+
exclude=True,
|
396
|
+
init=False,
|
397
|
+
frozen=True,
|
398
|
+
)
|
399
|
+
label: Optional[str] = Field(
|
400
|
+
default=None,
|
401
|
+
description=(
|
402
|
+
"It is used to assign a very short (one line) descriptive text to IDs "
|
403
|
+
"at different levels of the response tree. It is language-dependent."
|
404
|
+
),
|
405
|
+
)
|
406
|
+
category: Optional[Category] = Field(
|
407
|
+
default=None,
|
408
|
+
description=(
|
409
|
+
"It is used to describe the possible values of a dimension. "
|
410
|
+
"It is language-dependent."
|
411
|
+
),
|
412
|
+
)
|
413
|
+
href: Optional[AnyUrl] = Field(
|
414
|
+
default=None,
|
415
|
+
description=(
|
416
|
+
"It specifies a URL. Providers can use this property to avoid "
|
417
|
+
"sending information that is shared between different requests "
|
418
|
+
"(for example, dimensions)."
|
419
|
+
),
|
420
|
+
)
|
421
|
+
link: Optional[Dict[str, List[Union[Link, JSONStatSchema]]]] = Field(
|
422
|
+
default=None,
|
423
|
+
description=(
|
424
|
+
"It is used to provide a list of links related to a dataset or a dimension, "
|
425
|
+
"sorted by relation (see https://json-stat.org/full/#relationid)."
|
426
|
+
),
|
427
|
+
)
|
428
|
+
note: Optional[List[str]] = Field(
|
429
|
+
default=None,
|
430
|
+
description=(
|
431
|
+
"note allows to assign annotations to datasets (array), dimensions "
|
432
|
+
"(array) and categories (object). To assign annotations to individual "
|
433
|
+
"data, use status: https://json-stat.org/full/#status."
|
434
|
+
),
|
435
|
+
)
|
436
|
+
updated: Optional[str] = Field(
|
437
|
+
default=None,
|
438
|
+
description=(
|
439
|
+
"It contains the update time of the dataset. It is a string representing "
|
440
|
+
"a date in an ISO 8601 format recognized by the Javascript Date.parse "
|
441
|
+
"method (see ECMA-262 Date Time String Format: "
|
442
|
+
"https://262.ecma-international.org/6.0/#sec-date-time-string-format)."
|
443
|
+
),
|
444
|
+
)
|
445
|
+
source: Optional[str] = Field(
|
446
|
+
default=None,
|
447
|
+
description=(
|
448
|
+
"It contains a language-dependent short text describing the source "
|
449
|
+
"of the dataset."
|
450
|
+
),
|
451
|
+
)
|
452
|
+
extension: Optional[Dict[str, Any]] = Field(
|
453
|
+
default=None,
|
454
|
+
description=(
|
455
|
+
"Extension allows JSON-stat to be extended for particular needs. "
|
456
|
+
"Providers are free to define where they include this property and "
|
457
|
+
"what children are allowed in each case."
|
458
|
+
),
|
459
|
+
)
|
460
|
+
|
461
|
+
@field_validator("updated", mode="after")
|
462
|
+
@classmethod
|
463
|
+
def validate_updated_date(cls, v: Optional[str]):
|
464
|
+
"""Validates the updated date is in ISO 8601 format."""
|
465
|
+
if v and not is_valid_iso_date(v):
|
466
|
+
raise ValueError(f"Updated date: '{v}' is an invalid ISO 8601 format.")
|
467
|
+
return v
|
468
|
+
|
469
|
+
@model_validator(mode="after")
|
470
|
+
def validate_dataset_dimension(self):
|
471
|
+
"""Dataset dimension-wide validation checks."""
|
472
|
+
if not self.category and not self.href:
|
473
|
+
raise ValueError(
|
474
|
+
"A category is required if a reference (href) is not provided."
|
475
|
+
"For an example, see: https://json-stat.org/full/#href"
|
476
|
+
)
|
477
|
+
return self
|
478
|
+
|
479
|
+
|
480
|
+
class DatasetRole(JSONStatBaseModel):
|
481
|
+
"""Role of a dataset."""
|
482
|
+
|
483
|
+
time: Optional[List[str]] = Field(
|
484
|
+
default=None,
|
485
|
+
description=(
|
486
|
+
"It can be used to assign a time role to one or more dimensions. "
|
487
|
+
"It takes the form of an array of dimension IDs in which order does "
|
488
|
+
"not have a special meaning."
|
489
|
+
),
|
490
|
+
)
|
491
|
+
geo: Optional[List[str]] = Field(
|
492
|
+
default=None,
|
493
|
+
description=(
|
494
|
+
"It can be used to assign a spatial role to one or more dimensions. "
|
495
|
+
"It takes the form of an array of dimension IDs in which order does "
|
496
|
+
"not have a special meaning."
|
497
|
+
),
|
498
|
+
)
|
499
|
+
metric: Optional[List[str]] = Field(
|
500
|
+
default=None,
|
501
|
+
description=(
|
502
|
+
"It can be used to assign a metric role to one or more dimensions. "
|
503
|
+
"It takes the form of an array of dimension IDs in which order does "
|
504
|
+
"not have a special meaning."
|
505
|
+
),
|
506
|
+
)
|
507
|
+
|
508
|
+
@model_validator(mode="after")
|
509
|
+
def validate_dataset_role(self):
|
510
|
+
"""Dataset role-wide validation checks."""
|
511
|
+
if not self.time and not self.geo and not self.metric:
|
512
|
+
raise ValueError("At least one role must be provided.")
|
513
|
+
return self
|
514
|
+
|
515
|
+
|
516
|
+
class Dataset(JSONStatBaseModel):
|
517
|
+
"""JSON-stat dataset."""
|
518
|
+
|
519
|
+
version: str = Field(
|
520
|
+
default="2.0",
|
521
|
+
description=(
|
522
|
+
"It declares the JSON-stat version of the response. The goal "
|
523
|
+
"of this property is to help clients parsing that particular response."
|
524
|
+
),
|
525
|
+
)
|
526
|
+
class_: Literal["dataset"] = Field(
|
527
|
+
default="dataset",
|
528
|
+
alias="class",
|
529
|
+
description=(
|
530
|
+
"JSON-stat supports several classes of responses. "
|
531
|
+
"Possible values of class are: dataset, dimension and collection."
|
532
|
+
),
|
533
|
+
)
|
534
|
+
href: Optional[AnyUrl] = Field(
|
535
|
+
default=None,
|
536
|
+
description=(
|
537
|
+
"It specifies a URL. Providers can use this property to avoid "
|
538
|
+
"sending information that is shared between different requests "
|
539
|
+
"(for example, dimensions)."
|
540
|
+
),
|
541
|
+
)
|
542
|
+
label: Optional[str] = Field(
|
543
|
+
default=None,
|
544
|
+
description=(
|
545
|
+
"It is used to assign a very short (one line) descriptive text to IDs "
|
546
|
+
"at different levels of the response tree. It is language-dependent."
|
547
|
+
),
|
548
|
+
)
|
549
|
+
source: Optional[str] = Field(
|
550
|
+
default=None,
|
551
|
+
description=(
|
552
|
+
"It contains a language-dependent short text describing the source "
|
553
|
+
"of the dataset."
|
554
|
+
),
|
555
|
+
)
|
556
|
+
updated: Optional[str] = Field(
|
557
|
+
default=None,
|
558
|
+
description=(
|
559
|
+
"It contains the update time of the dataset. It is a string representing "
|
560
|
+
"a date in an ISO 8601 format recognized by the Javascript Date.parse "
|
561
|
+
"method (see ECMA-262 Date Time String Format: "
|
562
|
+
"https://262.ecma-international.org/6.0/#sec-date-time-string-format)."
|
563
|
+
),
|
564
|
+
)
|
565
|
+
id: List[str] = Field(description="It contains an ordered list of dimension IDs.")
|
566
|
+
size: List[int] = Field(
|
567
|
+
description=(
|
568
|
+
"It contains the number (integer) of categories (possible values) "
|
569
|
+
"of each dimension in the dataset. It has the same number of elements "
|
570
|
+
"and in the same order as in id."
|
571
|
+
),
|
572
|
+
)
|
573
|
+
role: DatasetRole = Field(
|
574
|
+
default=None,
|
575
|
+
description=(
|
576
|
+
"It can be used to assign special roles to dimensions. "
|
577
|
+
"At this moment, possible roles are: time, geo and metric. "
|
578
|
+
"A role can be shared by several dimensions."
|
579
|
+
"We differ from the specification in that the role is required, not optional"
|
580
|
+
),
|
581
|
+
)
|
582
|
+
value: Union[
|
583
|
+
List[Union[float, int, str, None]], Dict[str, Union[float, int, str, None]]
|
584
|
+
] = Field(
|
585
|
+
description=(
|
586
|
+
"It contains the data sorted according to the dataset dimensions. "
|
587
|
+
"It usually takes the form of an array where missing values are "
|
588
|
+
"expressed as nulls."
|
589
|
+
),
|
590
|
+
)
|
591
|
+
status: Optional[Union[str, List[str], Dict[str, str]]] = Field(
|
592
|
+
default=None,
|
593
|
+
description=(
|
594
|
+
"It contains metadata at the observation level. When it takes an "
|
595
|
+
"array form of the same size of value, it assigns a status to each "
|
596
|
+
"data by position. When it takes a dictionary form, it assigns a "
|
597
|
+
"status to each data by key."
|
598
|
+
),
|
599
|
+
)
|
600
|
+
|
601
|
+
dimension: Dict[str, DatasetDimension] = Field(
|
602
|
+
description=(
|
603
|
+
"The dimension property contains information about the dimensions of "
|
604
|
+
"the dataset. dimension must have properties "
|
605
|
+
"(see https://json-stat.org/full/#dimensionid) with "
|
606
|
+
"the same names of each element in the id array."
|
607
|
+
),
|
608
|
+
)
|
609
|
+
note: Optional[List[str]] = Field(
|
610
|
+
default=None,
|
611
|
+
description=(
|
612
|
+
"note allows to assign annotations to datasets (array), dimensions "
|
613
|
+
"(array) and categories (object). To assign annotations to individual "
|
614
|
+
"data, use status: https://json-stat.org/full/#status."
|
615
|
+
),
|
616
|
+
)
|
617
|
+
extension: Optional[Dict[str, Any]] = Field(
|
618
|
+
default=None,
|
619
|
+
description=(
|
620
|
+
"Extension allows JSON-stat to be extended for particular needs. "
|
621
|
+
"Providers are free to define where they include this property and "
|
622
|
+
"what children are allowed in each case."
|
623
|
+
),
|
624
|
+
)
|
625
|
+
link: Optional[Dict[str, List[Union[Link, JSONStatSchema]]]] = Field(
|
626
|
+
default=None,
|
627
|
+
description=(
|
628
|
+
"It is used to provide a list of links related to a dataset or a dimension, "
|
629
|
+
"sorted by relation (see https://json-stat.org/full/#relationid)."
|
630
|
+
),
|
631
|
+
)
|
632
|
+
|
633
|
+
@field_validator("updated", mode="after")
|
634
|
+
@classmethod
|
635
|
+
def validate_updated_date(cls, v: Optional[str]):
|
636
|
+
"""Validates the updated date is in ISO 8601 format."""
|
637
|
+
if v and not is_valid_iso_date(v):
|
638
|
+
raise ValueError(f"Updated date: '{v}' is an invalid ISO 8601 format.")
|
639
|
+
return v
|
640
|
+
|
641
|
+
@field_validator("role", mode="after")
|
642
|
+
@classmethod
|
643
|
+
def validate_role(cls, v: Optional[DatasetRole]):
|
644
|
+
"""Validate that role references are valid."""
|
645
|
+
if v:
|
646
|
+
all_values = [
|
647
|
+
value
|
648
|
+
for values in v.model_dump().values()
|
649
|
+
if values is not None
|
650
|
+
for value in values
|
651
|
+
]
|
652
|
+
duplicates = [
|
653
|
+
item for item, count in Counter(all_values).items() if count > 1
|
654
|
+
]
|
655
|
+
if duplicates:
|
656
|
+
raise ValueError(
|
657
|
+
f"Dimension(s): {', '.join(duplicates)} referenced in multiple "
|
658
|
+
"roles. Each dimension can only be referenced in one role."
|
659
|
+
)
|
660
|
+
return v
|
661
|
+
|
662
|
+
@model_validator(mode="after")
|
663
|
+
def validate_dataset(self):
|
664
|
+
"""Dataset-wide validation checks."""
|
665
|
+
# Validate size matches id length
|
666
|
+
|
667
|
+
if len(self.size) != len(self.id):
|
668
|
+
raise ValueError(
|
669
|
+
f"Size array length ({len(self.size)}) "
|
670
|
+
f"must match ID array length ({len(self.id)})"
|
671
|
+
)
|
672
|
+
|
673
|
+
# Validate status format
|
674
|
+
if isinstance(self.status, list):
|
675
|
+
if len(self.status) not in (len(self.value), 1):
|
676
|
+
raise ValueError(
|
677
|
+
"Status list must match value length "
|
678
|
+
f"({len(self.value)}) or be single value"
|
679
|
+
)
|
680
|
+
|
681
|
+
# Check all dimensions are defined
|
682
|
+
missing_dims = [dim_id for dim_id in self.id if dim_id not in self.dimension]
|
683
|
+
if missing_dims:
|
684
|
+
raise ValueError(f"Missing dimension definitions: {', '.join(missing_dims)}")
|
685
|
+
return self
|
686
|
+
|
687
|
+
|
688
|
+
class Collection(JSONStatBaseModel):
|
689
|
+
"""JSON-stat collection."""
|
690
|
+
|
691
|
+
version: str = Field(
|
692
|
+
default="2.0",
|
693
|
+
description=(
|
694
|
+
"It declares the JSON-stat version of the response. The goal "
|
695
|
+
"of this property is to help clients parsing that particular response."
|
696
|
+
),
|
697
|
+
)
|
698
|
+
|
699
|
+
class_: Literal["collection"] = Field(
|
700
|
+
default="collection",
|
701
|
+
alias="class",
|
702
|
+
description="It declares the class of the response.",
|
703
|
+
)
|
704
|
+
label: Optional[str] = Field(
|
705
|
+
default=None,
|
706
|
+
description="It provides a human-readable label for the collection.",
|
707
|
+
)
|
708
|
+
href: Optional[AnyUrl] = Field(
|
709
|
+
default=None,
|
710
|
+
description="It specifies a URL.",
|
711
|
+
)
|
712
|
+
updated: Optional[str] = Field(
|
713
|
+
default=None,
|
714
|
+
description="It contains the update time of the collection.",
|
715
|
+
)
|
716
|
+
link: Optional[Dict[str, List[Union[Link, JSONStatSchema]]]] = Field(
|
717
|
+
default=None,
|
718
|
+
description=(
|
719
|
+
"The items of the collection can be of any class "
|
720
|
+
"(datasets, dimensions, collections)."
|
721
|
+
),
|
722
|
+
)
|
723
|
+
source: Optional[str] = Field(
|
724
|
+
default=None,
|
725
|
+
description="It contains a language-dependent short text describing the source "
|
726
|
+
"of the collection.",
|
727
|
+
)
|
728
|
+
note: Optional[List[str]] = Field(
|
729
|
+
default=None,
|
730
|
+
description=(
|
731
|
+
"note allows to assign annotations to datasets (array), dimensions "
|
732
|
+
"(array) and categories (object). To assign annotations to individual "
|
733
|
+
"data, use status: https://json-stat.org/full/#status."
|
734
|
+
),
|
735
|
+
)
|
736
|
+
extension: Optional[Dict[str, Any]] = Field(
|
737
|
+
default=None,
|
738
|
+
description="Extension allows JSON-stat to be extended for particular needs. "
|
739
|
+
"Providers are free to define where they include this property and "
|
740
|
+
"what children are allowed in each case.",
|
741
|
+
)
|
742
|
+
|
743
|
+
@field_validator("updated", mode="after")
|
744
|
+
@classmethod
|
745
|
+
def validate_updated_date(cls, v: Optional[str]):
|
746
|
+
"""Validates the updated date is in ISO 8601 format."""
|
747
|
+
if v and not is_valid_iso_date(v):
|
748
|
+
raise ValueError(f"Updated date: '{v}' is an invalid ISO 8601 format.")
|
749
|
+
return v
|
750
|
+
|
751
|
+
@model_validator(mode="after")
|
752
|
+
def validate_collection(self):
|
753
|
+
"""Collection-wide validation checks."""
|
754
|
+
# Ensure collection links use correct relation type.
|
755
|
+
if self.link and "item" not in self.link:
|
756
|
+
raise ValueError("Collection links must use 'item' relation type")
|
757
|
+
return self
|
758
|
+
|
759
|
+
|
760
|
+
class JSONStatSchema(RootModel):
|
761
|
+
"""JSON-stat response."""
|
762
|
+
|
763
|
+
root: Union[Dataset, Dimension, Collection] = Field(
|
764
|
+
...,
|
765
|
+
discriminator="class_",
|
766
|
+
)
|
767
|
+
|
768
|
+
def model_dump(self, *, exclude_none: bool = True, by_alias: bool = True, **kwargs):
|
769
|
+
"""Override model_dump to set exclude_none=True by default."""
|
770
|
+
return super().model_dump(exclude_none=exclude_none, by_alias=by_alias, **kwargs)
|
771
|
+
|
772
|
+
@field_serializer("href", check_fields=False, return_type=str)
|
773
|
+
def serialize_any_url(self, href: Optional[AnyUrl]) -> Optional[str]:
|
774
|
+
"""Convert AnyUrl to string, if it exists."""
|
775
|
+
return str(href) if href else None
|