dapla-toolbelt-metadata 0.6.5__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dapla-toolbelt-metadata might be problematic. Click here for more details.

@@ -7,7 +7,7 @@ warnings.filterwarnings(
7
7
  message="As the c extension couldn't be imported, `google-crc32c` is using a pure python implementation that is significantly slower.",
8
8
  )
9
9
 
10
- import datadoc_model.model as datadoc_model
10
+ import datadoc_model.all_optional.model as datadoc_model
11
11
 
12
12
  from . import dapla
13
13
  from . import datasets
@@ -9,9 +9,11 @@ import warnings
9
9
  from concurrent.futures import ThreadPoolExecutor
10
10
  from pathlib import Path
11
11
  from typing import TYPE_CHECKING
12
+ from typing import cast
12
13
 
13
- from datadoc_model import model
14
- from datadoc_model.model import DataSetStatus
14
+ import datadoc_model.all_optional.model as all_optional_model
15
+ import datadoc_model.required.model as required_model
16
+ from datadoc_model.all_optional.model import DataSetStatus
15
17
 
16
18
  from dapla_metadata._shared import config
17
19
  from dapla_metadata.dapla import user_info
@@ -30,6 +32,8 @@ from dapla_metadata.datasets.utility.constants import INCONSISTENCIES_MESSAGE
30
32
  from dapla_metadata.datasets.utility.constants import METADATA_DOCUMENT_FILE_SUFFIX
31
33
  from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_DATASET_FIELDS
32
34
  from dapla_metadata.datasets.utility.constants import NUM_OBLIGATORY_VARIABLES_FIELDS
35
+ from dapla_metadata.datasets.utility.utils import ExistingPseudonymizationMetadataType
36
+ from dapla_metadata.datasets.utility.utils import OptionalDatadocMetadataType
33
37
  from dapla_metadata.datasets.utility.utils import calculate_percentage
34
38
  from dapla_metadata.datasets.utility.utils import derive_assessment_from_state
35
39
  from dapla_metadata.datasets.utility.utils import get_timestamp_now
@@ -84,8 +88,8 @@ class Datadoc:
84
88
  dataset_path: str | None = None,
85
89
  metadata_document_path: str | None = None,
86
90
  statistic_subject_mapping: StatisticSubjectMapping | None = None,
87
- *,
88
91
  errors_as_warnings: bool = False,
92
+ validate_required_fields_on_existing_metadata: bool = False,
89
93
  ) -> None:
90
94
  """Initialize the Datadoc instance.
91
95
 
@@ -101,17 +105,23 @@ class Datadoc:
101
105
  Defaults to None
102
106
  errors_as_warnings: Disable raising exceptions if inconsistencies
103
107
  are found between existing and extracted metadata.
108
+ validate_required_fields_on_existing_metadata: Use a Pydantic model
109
+ which validates whether required fields are present when reading
110
+ in an existing metadata file.
104
111
  """
105
112
  self._statistic_subject_mapping = statistic_subject_mapping
106
113
  self.errors_as_warnings = errors_as_warnings
114
+ self.validate_required_fields_on_existing_metadata = (
115
+ validate_required_fields_on_existing_metadata
116
+ )
107
117
  self.metadata_document: pathlib.Path | CloudPath | None = None
108
- self.container: model.MetadataContainer | None = None
118
+ self.container: all_optional_model.MetadataContainer | None = None
109
119
  self.dataset_path: pathlib.Path | CloudPath | None = None
110
- self.dataset = model.Dataset()
120
+ self.dataset = all_optional_model.Dataset()
111
121
  self.variables: list = []
112
- self.pseudo_variables: list[model.PseudoVariable] = []
113
- self.variables_lookup: dict[str, model.Variable] = {}
114
- self.pseudo_variables_lookup: dict[str, model.PseudoVariable] = {}
122
+ self.pseudo_variables: list[all_optional_model.PseudoVariable] = []
123
+ self.variables_lookup: dict[str, all_optional_model.Variable] = {}
124
+ self.pseudo_variables_lookup: dict[str, all_optional_model.PseudoVariable] = {}
115
125
  self.explicitly_defined_metadata_document = False
116
126
  self.dataset_consistency_status: list = []
117
127
  if metadata_document_path:
@@ -149,9 +159,9 @@ class Datadoc:
149
159
  - The 'contains_personal_data' attribute is set to False if not specified.
150
160
  - A lookup dictionary for variables is created based on their short names.
151
161
  """
152
- extracted_metadata: model.DatadocMetadata | None = None
153
- existing_metadata: model.DatadocMetadata | None = None
154
- existing_pseudonymization: model.PseudonymizationMetadata | None = None
162
+ extracted_metadata: all_optional_model.DatadocMetadata | None = None
163
+ existing_metadata: OptionalDatadocMetadataType = None
164
+ existing_pseudonymization: ExistingPseudonymizationMetadataType = None
155
165
 
156
166
  if self.metadata_document and self.metadata_document.exists():
157
167
  existing_metadata = self._extract_metadata_from_existing_document(
@@ -166,11 +176,26 @@ class Datadoc:
166
176
 
167
177
  if (
168
178
  self.dataset_path is not None
169
- and self.dataset == model.Dataset()
179
+ and self.dataset == all_optional_model.Dataset()
170
180
  and len(self.variables) == 0
171
181
  ):
172
182
  extracted_metadata = self._extract_metadata_from_dataset(self.dataset_path)
173
183
 
184
+ if extracted_metadata is not None:
185
+ existing_file_path = self._get_existing_file_path(extracted_metadata)
186
+ if (
187
+ self.dataset_path
188
+ and existing_file_path is not None
189
+ and extracted_metadata is not None
190
+ and existing_metadata is not None
191
+ ):
192
+ self.dataset_consistency_status = self._check_dataset_consistency(
193
+ self.dataset_path,
194
+ Path(existing_file_path),
195
+ extracted_metadata,
196
+ existing_metadata,
197
+ )
198
+
174
199
  if (
175
200
  self.dataset_path
176
201
  and self.explicitly_defined_metadata_document
@@ -179,13 +204,6 @@ class Datadoc:
179
204
  and extracted_metadata is not None
180
205
  and existing_metadata is not None
181
206
  ):
182
- existing_file_path = self._get_existing_file_path(extracted_metadata)
183
- self.dataset_consistency_status = self._check_dataset_consistency(
184
- self.dataset_path,
185
- Path(existing_file_path),
186
- extracted_metadata,
187
- existing_metadata,
188
- )
189
207
  self._check_ready_to_merge(
190
208
  self.dataset_consistency_status,
191
209
  errors_as_warnings=self.errors_as_warnings,
@@ -207,14 +225,14 @@ class Datadoc:
207
225
  self._set_pseudonymization_metadata(existing_pseudonymization)
208
226
 
209
227
  set_default_values_variables(self.variables)
210
- set_default_values_dataset(self.dataset)
228
+ set_default_values_dataset(cast("all_optional_model.Dataset", self.dataset))
211
229
  set_dataset_owner(self.dataset)
212
230
  self._create_variables_lookup()
213
231
  self._create_pseudo_variables_lookup()
214
232
 
215
233
  def _get_existing_file_path(
216
234
  self,
217
- extracted_metadata: model.DatadocMetadata | None,
235
+ extracted_metadata: all_optional_model.DatadocMetadata | None,
218
236
  ) -> str:
219
237
  if (
220
238
  extracted_metadata is not None
@@ -227,19 +245,19 @@ class Datadoc:
227
245
 
228
246
  def _set_metadata(
229
247
  self,
230
- merged_metadata: model.DatadocMetadata | None,
248
+ merged_metadata: OptionalDatadocMetadataType,
231
249
  ) -> None:
232
250
  if not merged_metadata or not (
233
251
  merged_metadata.dataset and merged_metadata.variables
234
252
  ):
235
253
  msg = "Could not read metadata"
236
254
  raise ValueError(msg)
237
- self.dataset = merged_metadata.dataset
255
+ self.dataset = cast("all_optional_model.Dataset", merged_metadata.dataset)
238
256
  self.variables = merged_metadata.variables
239
257
 
240
258
  def _set_pseudonymization_metadata(
241
259
  self,
242
- existing_pseudonymization: model.PseudonymizationMetadata | None,
260
+ existing_pseudonymization: ExistingPseudonymizationMetadataType,
243
261
  ) -> None:
244
262
  if not existing_pseudonymization or not (
245
263
  existing_pseudonymization.pseudo_variables is not None
@@ -247,7 +265,10 @@ class Datadoc:
247
265
  msg = "Error reading pseudonymization metadata"
248
266
  logger.error(msg)
249
267
  return
250
- self.pseudo_variables = existing_pseudonymization.pseudo_variables
268
+ self.pseudo_variables = cast(
269
+ "list[all_optional_model.PseudoVariable]",
270
+ existing_pseudonymization.pseudo_variables,
271
+ )
251
272
 
252
273
  def _create_variables_lookup(self) -> None:
253
274
  self.variables_lookup = {
@@ -264,8 +285,8 @@ class Datadoc:
264
285
  def _check_dataset_consistency(
265
286
  new_dataset_path: Path | CloudPath,
266
287
  existing_dataset_path: Path,
267
- extracted_metadata: model.DatadocMetadata,
268
- existing_metadata: model.DatadocMetadata,
288
+ extracted_metadata: all_optional_model.DatadocMetadata,
289
+ existing_metadata: OptionalDatadocMetadataType,
269
290
  ) -> list[dict[str, object]]:
270
291
  """Run consistency tests.
271
292
 
@@ -312,14 +333,16 @@ class Datadoc:
312
333
  {
313
334
  "name": "Variable names",
314
335
  "success": (
315
- {v.short_name for v in extracted_metadata.variables or []}
336
+ existing_metadata is not None
337
+ and {v.short_name for v in extracted_metadata.variables or []}
316
338
  == {v.short_name for v in existing_metadata.variables or []}
317
339
  ),
318
340
  },
319
341
  {
320
342
  "name": "Variable datatypes",
321
343
  "success": (
322
- [v.data_type for v in extracted_metadata.variables or []]
344
+ existing_metadata is not None
345
+ and [v.data_type for v in extracted_metadata.variables or []]
323
346
  == [v.data_type for v in existing_metadata.variables or []]
324
347
  ),
325
348
  },
@@ -353,27 +376,29 @@ class Datadoc:
353
376
 
354
377
  @staticmethod
355
378
  def _merge_metadata(
356
- extracted_metadata: model.DatadocMetadata | None,
357
- existing_metadata: model.DatadocMetadata | None,
358
- ) -> model.DatadocMetadata:
379
+ extracted_metadata: all_optional_model.DatadocMetadata | None,
380
+ existing_metadata: OptionalDatadocMetadataType,
381
+ ) -> all_optional_model.DatadocMetadata:
359
382
  if not existing_metadata:
360
383
  logger.warning(
361
384
  "No existing metadata found, no merge to perform. Continuing with extracted metadata.",
362
385
  )
363
- return extracted_metadata or model.DatadocMetadata()
386
+ return extracted_metadata or all_optional_model.DatadocMetadata()
364
387
 
365
388
  if not extracted_metadata:
366
- return existing_metadata
389
+ return cast("all_optional_model.DatadocMetadata", existing_metadata)
367
390
 
368
391
  # Use the extracted metadata as a base
369
- merged_metadata = model.DatadocMetadata(
392
+ merged_metadata = all_optional_model.DatadocMetadata(
370
393
  dataset=copy.deepcopy(extracted_metadata.dataset),
371
394
  variables=[],
372
395
  )
373
396
 
374
397
  override_dataset_fields(
375
398
  merged_metadata=merged_metadata,
376
- existing_metadata=existing_metadata,
399
+ existing_metadata=cast(
400
+ "all_optional_model.DatadocMetadata", existing_metadata
401
+ ),
377
402
  )
378
403
 
379
404
  # Merge variables.
@@ -387,7 +412,7 @@ class Datadoc:
387
412
  def _extract_metadata_from_existing_document(
388
413
  self,
389
414
  document: pathlib.Path | CloudPath,
390
- ) -> model.DatadocMetadata | None:
415
+ ) -> OptionalDatadocMetadataType:
391
416
  """Read metadata from an existing metadata document.
392
417
 
393
418
  If an existing metadata document is available, this method reads and
@@ -402,7 +427,13 @@ class Datadoc:
402
427
 
403
428
  Raises:
404
429
  json.JSONDecodeError: If the metadata document cannot be parsed.
430
+ pydantic.ValidationError: If the data does not successfully validate.
405
431
  """
432
+ metadata_model = (
433
+ required_model
434
+ if self.validate_required_fields_on_existing_metadata
435
+ else all_optional_model
436
+ )
406
437
  fresh_metadata = {}
407
438
  try:
408
439
  with document.open(mode="r", encoding="utf-8") as file:
@@ -412,7 +443,7 @@ class Datadoc:
412
443
  fresh_metadata,
413
444
  )
414
445
  if is_metadata_in_container_structure(fresh_metadata):
415
- self.container = model.MetadataContainer.model_validate_json(
446
+ self.container = metadata_model.MetadataContainer.model_validate_json(
416
447
  json.dumps(fresh_metadata),
417
448
  )
418
449
  datadoc_metadata = fresh_metadata["datadoc"]
@@ -420,7 +451,7 @@ class Datadoc:
420
451
  datadoc_metadata = fresh_metadata
421
452
  if datadoc_metadata is None:
422
453
  return None
423
- return model.DatadocMetadata.model_validate_json(
454
+ return metadata_model.DatadocMetadata.model_validate_json(
424
455
  json.dumps(datadoc_metadata),
425
456
  )
426
457
  except json.JSONDecodeError:
@@ -435,7 +466,11 @@ class Datadoc:
435
466
  def _extract_pseudonymization_from_existing_document(
436
467
  self,
437
468
  document: pathlib.Path | CloudPath,
438
- ) -> model.PseudonymizationMetadata | None:
469
+ ) -> (
470
+ all_optional_model.PseudonymizationMetadata
471
+ | required_model.PseudonymizationMetadata
472
+ | None
473
+ ):
439
474
  """Read pseudo metadata from an existing metadata document.
440
475
 
441
476
  If there is pseudo metadata in the document supplied, the method validates and returns the pseudonymization structure.
@@ -445,7 +480,14 @@ class Datadoc:
445
480
 
446
481
  Raises:
447
482
  json.JSONDecodeError: If the metadata document cannot be parsed.
483
+ pydantic.ValidationError: If the data does not successfully validate.
448
484
  """
485
+ metadata_model = (
486
+ required_model
487
+ if self.validate_required_fields_on_existing_metadata
488
+ else all_optional_model
489
+ )
490
+
449
491
  try:
450
492
  with document.open(mode="r", encoding="utf-8") as file:
451
493
  fresh_metadata = json.load(file)
@@ -464,7 +506,7 @@ class Datadoc:
464
506
  if pseudonymization_metadata is None:
465
507
  return None
466
508
 
467
- return model.PseudonymizationMetadata.model_validate_json(
509
+ return metadata_model.PseudonymizationMetadata.model_validate_json(
468
510
  json.dumps(pseudonymization_metadata),
469
511
  )
470
512
 
@@ -500,7 +542,7 @@ class Datadoc:
500
542
  def _extract_metadata_from_dataset(
501
543
  self,
502
544
  dataset: pathlib.Path | CloudPath,
503
- ) -> model.DatadocMetadata:
545
+ ) -> all_optional_model.DatadocMetadata:
504
546
  """Obtain what metadata we can from the dataset itself.
505
547
 
506
548
  This makes it easier for the user by 'pre-filling' certain fields.
@@ -520,9 +562,9 @@ class Datadoc:
520
562
  - variables: A list of fields extracted from the dataset schema.
521
563
  """
522
564
  dapla_dataset_path_info = DaplaDatasetPathInfo(dataset)
523
- metadata = model.DatadocMetadata()
565
+ metadata = all_optional_model.DatadocMetadata()
524
566
 
525
- metadata.dataset = model.Dataset(
567
+ metadata.dataset = all_optional_model.Dataset(
526
568
  short_name=dapla_dataset_path_info.dataset_short_name,
527
569
  dataset_state=dapla_dataset_path_info.dataset_state,
528
570
  dataset_status=DataSetStatus.DRAFT,
@@ -586,12 +628,14 @@ class Datadoc:
586
628
  if self.container:
587
629
  self.container.datadoc = datadoc
588
630
  if not self.container.pseudonymization:
589
- self.container.pseudonymization = model.PseudonymizationMetadata(
590
- pseudo_dataset=model.PseudoDataset()
631
+ self.container.pseudonymization = (
632
+ all_optional_model.PseudonymizationMetadata(
633
+ pseudo_dataset=all_optional_model.PseudoDataset()
634
+ )
591
635
  )
592
636
  self.container.pseudonymization.pseudo_variables = self.pseudo_variables
593
637
  else:
594
- self.container = model.MetadataContainer(datadoc=datadoc)
638
+ self.container = all_optional_model.MetadataContainer(datadoc=datadoc)
595
639
  if self.metadata_document:
596
640
  content = self.container.model_dump_json(indent=4)
597
641
  self.metadata_document.write_text(content)
@@ -623,12 +667,14 @@ class Datadoc:
623
667
  def add_pseudo_variable(self, variable_short_name: str) -> None:
624
668
  """Adds a new pseudo variable to the list of pseudonymized variables."""
625
669
  if self.variables_lookup[variable_short_name] is not None:
626
- pseudo_variable = model.PseudoVariable(short_name=variable_short_name)
670
+ pseudo_variable = all_optional_model.PseudoVariable(
671
+ short_name=variable_short_name
672
+ )
627
673
  self.pseudo_variables.append(pseudo_variable)
628
674
  self.pseudo_variables_lookup[variable_short_name] = pseudo_variable
629
675
 
630
676
  def get_pseudo_variable(
631
677
  self, variable_short_name: str
632
- ) -> model.PseudoVariable | None:
678
+ ) -> all_optional_model.PseudoVariable | None:
633
679
  """Finds a pseudo variable by shortname."""
634
680
  return self.pseudo_variables_lookup.get(variable_short_name)
@@ -14,7 +14,7 @@ from typing import Literal
14
14
 
15
15
  import arrow
16
16
  from cloudpathlib import GSPath
17
- from datadoc_model.model import DataSetState
17
+ from datadoc_model.all_optional.model import DataSetState
18
18
 
19
19
  if TYPE_CHECKING:
20
20
  import datetime
@@ -12,10 +12,10 @@ from abc import abstractmethod
12
12
  from typing import TYPE_CHECKING
13
13
 
14
14
  import pandas as pd
15
- from datadoc_model.model import DataType
16
- from datadoc_model.model import LanguageStringType
17
- from datadoc_model.model import LanguageStringTypeItem
18
- from datadoc_model.model import Variable
15
+ from datadoc_model.all_optional.model import DataType
16
+ from datadoc_model.all_optional.model import LanguageStringType
17
+ from datadoc_model.all_optional.model import LanguageStringTypeItem
18
+ from datadoc_model.all_optional.model import Variable
19
19
  from pyarrow import parquet as pq
20
20
 
21
21
  from dapla_metadata.datasets.utility.enums import SupportedLanguages
@@ -140,7 +140,11 @@ class StatisticSubjectMapping(GetExternalSource):
140
140
  SecondarySubject(
141
141
  self._extract_titles(s.titler),
142
142
  s["emnekode"],
143
- [statistikk["kortnavn"] for statistikk in s.find_all("Statistikk")],
143
+ [
144
+ statistikk["kortnavn"]
145
+ for statistikk in s.find_all("Statistikk")
146
+ if statistikk["isPrimaerPlassering"] == "true"
147
+ ],
144
148
  )
145
149
  for s in p.find_all("delemne")
146
150
  ]
@@ -1,7 +1,7 @@
1
1
  """Repository for constant values in Datadoc backend."""
2
2
 
3
- from datadoc_model.model import LanguageStringType
4
- from datadoc_model.model import LanguageStringTypeItem
3
+ from datadoc_model.all_optional.model import LanguageStringType
4
+ from datadoc_model.all_optional.model import LanguageStringTypeItem
5
5
 
6
6
  VALIDATION_ERROR = "Validation error: "
7
7
 
@@ -4,15 +4,19 @@ import datetime # import is needed in xdoctest
4
4
  import logging
5
5
  import pathlib
6
6
  import uuid
7
+ from typing import cast
7
8
 
9
+ import datadoc_model
10
+ import datadoc_model.all_optional.model as all_optional_model
11
+ import datadoc_model.required.model as required_model
8
12
  import google.auth
9
13
  from cloudpathlib import CloudPath
10
14
  from cloudpathlib import GSClient
11
15
  from cloudpathlib import GSPath
12
16
  from datadoc_model import model
13
- from datadoc_model.model import Assessment
14
- from datadoc_model.model import DataSetState
15
- from datadoc_model.model import VariableRole
17
+ from datadoc_model.all_optional.model import Assessment
18
+ from datadoc_model.all_optional.model import DataSetState
19
+ from datadoc_model.all_optional.model import VariableRole
16
20
 
17
21
  from dapla_metadata.dapla import user_info
18
22
  from dapla_metadata.datasets.utility.constants import (
@@ -34,6 +38,17 @@ from dapla_metadata.datasets.utility.constants import (
34
38
 
35
39
  logger = logging.getLogger(__name__)
36
40
 
41
+ DatadocMetadataType = (
42
+ all_optional_model.DatadocMetadata | required_model.DatadocMetadata
43
+ )
44
+ DatasetType = all_optional_model.Dataset | required_model.Dataset
45
+ OptionalDatadocMetadataType = DatadocMetadataType | None
46
+ ExistingPseudonymizationMetadataType = (
47
+ all_optional_model.PseudonymizationMetadata
48
+ | required_model.PseudonymizationMetadata
49
+ | None
50
+ )
51
+
37
52
 
38
53
  def get_timestamp_now() -> datetime.datetime:
39
54
  """Return a timestamp for the current moment."""
@@ -119,7 +134,9 @@ def set_default_values_variables(variables: list) -> None:
119
134
  v.variable_role = VariableRole.MEASURE
120
135
 
121
136
 
122
- def set_default_values_dataset(dataset: model.Dataset) -> None:
137
+ def set_default_values_dataset(
138
+ dataset: DatasetType,
139
+ ) -> None:
123
140
  """Set default values on dataset.
124
141
 
125
142
  Args:
@@ -140,7 +157,9 @@ def set_default_values_dataset(dataset: model.Dataset) -> None:
140
157
  dataset.contains_personal_data = False
141
158
 
142
159
 
143
- def set_dataset_owner(dataset: model.Dataset) -> None:
160
+ def set_dataset_owner(
161
+ dataset: DatasetType,
162
+ ) -> None:
144
163
  """Sets the owner of the dataset from the DAPLA_GROUP_CONTEXT enviornment variable.
145
164
 
146
165
  Args:
@@ -153,7 +172,7 @@ def set_dataset_owner(dataset: model.Dataset) -> None:
153
172
 
154
173
 
155
174
  def set_variables_inherit_from_dataset(
156
- dataset: model.Dataset,
175
+ dataset: DatasetType,
157
176
  variables: list,
158
177
  ) -> None:
159
178
  """Set specific dataset values on a list of variable objects.
@@ -283,7 +302,9 @@ def _is_missing_metadata(
283
302
  )
284
303
 
285
304
 
286
- def num_obligatory_dataset_fields_completed(dataset: model.Dataset) -> int:
305
+ def num_obligatory_dataset_fields_completed(
306
+ dataset: DatasetType,
307
+ ) -> int:
287
308
  """Count the number of completed obligatory dataset fields.
288
309
 
289
310
  This function returns the total count of obligatory fields in the dataset that
@@ -345,7 +366,9 @@ def num_obligatory_variable_fields_completed(variable: model.Variable) -> int:
345
366
  return NUM_OBLIGATORY_VARIABLES_FIELDS - len(missing_metadata)
346
367
 
347
368
 
348
- def get_missing_obligatory_dataset_fields(dataset: model.Dataset) -> list:
369
+ def get_missing_obligatory_dataset_fields(
370
+ dataset: DatasetType,
371
+ ) -> list:
349
372
  """Identify all obligatory dataset fields that are missing values.
350
373
 
351
374
  This function checks for obligatory fields that are either directly missing
@@ -422,8 +445,9 @@ def running_in_notebook() -> bool:
422
445
 
423
446
 
424
447
  def override_dataset_fields(
425
- merged_metadata: model.DatadocMetadata,
426
- existing_metadata: model.DatadocMetadata,
448
+ merged_metadata: all_optional_model.DatadocMetadata,
449
+ existing_metadata: all_optional_model.DatadocMetadata
450
+ | required_model.DatadocMetadata,
427
451
  ) -> None:
428
452
  """Overrides specific fields in the dataset of `merged_metadata` with values from the dataset of `existing_metadata`.
429
453
 
@@ -449,10 +473,10 @@ def override_dataset_fields(
449
473
 
450
474
 
451
475
  def merge_variables(
452
- existing_metadata: model.DatadocMetadata,
453
- extracted_metadata: model.DatadocMetadata,
454
- merged_metadata: model.DatadocMetadata,
455
- ) -> model.DatadocMetadata:
476
+ existing_metadata: OptionalDatadocMetadataType,
477
+ extracted_metadata: all_optional_model.DatadocMetadata,
478
+ merged_metadata: all_optional_model.DatadocMetadata,
479
+ ) -> all_optional_model.DatadocMetadata:
456
480
  """Merges variables from the extracted metadata into the existing metadata and updates the merged metadata.
457
481
 
458
482
  This function compares the variables from `extracted_metadata` with those in `existing_metadata`.
@@ -466,11 +490,12 @@ def merge_variables(
466
490
  merged_metadata: The metadata object that will contain the result of the merge.
467
491
 
468
492
  Returns:
469
- model.DatadocMetadata: The `merged_metadata` object containing variables from both `existing_metadata`
493
+ all_optional_model.DatadocMetadata: The `merged_metadata` object containing variables from both `existing_metadata`
470
494
  and `extracted_metadata`.
471
495
  """
472
496
  if (
473
- existing_metadata.variables is not None
497
+ existing_metadata is not None
498
+ and existing_metadata.variables is not None
474
499
  and extracted_metadata is not None
475
500
  and extracted_metadata.variables is not None
476
501
  and merged_metadata.variables is not None
@@ -494,7 +519,9 @@ def merge_variables(
494
519
  existing.contains_data_until = (
495
520
  extracted.contains_data_until or existing.contains_data_until
496
521
  )
497
- merged_metadata.variables.append(existing)
522
+ merged_metadata.variables.append(
523
+ cast("datadoc_model.all_optional.model.Variable", existing)
524
+ )
498
525
  else:
499
526
  # If there is no existing metadata for this variable, we just use what we have extracted
500
527
  merged_metadata.variables.append(extracted)
@@ -9,24 +9,6 @@ from dapla_metadata.variable_definitions._generated.vardef_client.configuration
9
9
 
10
10
  VARDEF_HOST_TEST = "https://metadata.intern.test.ssb.no"
11
11
  WORKSPACE_DIR = "WORKSPACE_DIR"
12
- VARDEF_DESCRIPTIONS_FILE_PATH = "VARDEF_DESCRIPTIONS_FILE_PATH"
13
- VARDEF_DEFAULT_DESCRIPTION_PATH = (
14
- "variable_definitions/resources/vardef_model_descriptions_nb.yaml"
15
- )
16
-
17
-
18
- def get_descriptions_path() -> str:
19
- """Get the relative file path from the repo root to the Norwegian descriptions.
20
-
21
- First checks the `VARDEF_DESCRIPTIONS_FILE_PATH` environment variable; if not set, returns a default path.
22
-
23
- Returns:
24
- str: The file path to the descriptions.
25
- """
26
- return (
27
- get_config_item(VARDEF_DESCRIPTIONS_FILE_PATH)
28
- or VARDEF_DEFAULT_DESCRIPTION_PATH
29
- )
30
12
 
31
13
 
32
14
  def get_workspace_dir() -> str | None:
@@ -24,8 +24,6 @@ TEMPLATE_SECTION_HEADER_MACHINE_GENERATED_EN = (
24
24
  "\n--- Machine generated fields. Do not edit ---\n"
25
25
  )
26
26
 
27
- NORWEGIAN_DESCRIPTIONS = "norwegian_description"
28
-
29
27
  DEFAULT_DATE = date(1000, 1, 1)
30
28
 
31
29
  MACHINE_GENERATED_FIELDS = [
@@ -3,12 +3,8 @@
3
3
  import logging
4
4
  from datetime import datetime
5
5
  from pathlib import Path
6
- from typing import TYPE_CHECKING
7
- from typing import Any
8
- from typing import cast
9
6
 
10
7
  import pytz
11
- from pydantic.config import JsonDict
12
8
  from ruamel.yaml import YAML
13
9
  from ruamel.yaml import CommentedMap
14
10
  from ruamel.yaml import RoundTripRepresenter
@@ -27,10 +23,7 @@ from dapla_metadata.variable_definitions._utils.constants import DOUBLE_QUOTE_FI
27
23
  from dapla_metadata.variable_definitions._utils.constants import (
28
24
  MACHINE_GENERATED_FIELDS,
29
25
  )
30
- from dapla_metadata.variable_definitions._utils.constants import NORWEGIAN_DESCRIPTIONS
31
- from dapla_metadata.variable_definitions._utils.constants import OPTIONAL_FIELD
32
26
  from dapla_metadata.variable_definitions._utils.constants import OWNER_FIELD_NAME
33
- from dapla_metadata.variable_definitions._utils.constants import REQUIRED_FIELD
34
27
  from dapla_metadata.variable_definitions._utils.constants import (
35
28
  TEMPLATE_SECTION_HEADER_MACHINE_GENERATED,
36
29
  )
@@ -47,14 +40,8 @@ from dapla_metadata.variable_definitions._utils.constants import (
47
40
  VARIABLE_STATUS_FIELD_NAME,
48
41
  )
49
42
  from dapla_metadata.variable_definitions._utils.constants import YAML_STR_TAG
50
- from dapla_metadata.variable_definitions._utils.descriptions import (
51
- apply_norwegian_descriptions_to_model,
52
- )
53
43
  from dapla_metadata.variable_definitions.exceptions import VardefFileError
54
44
 
55
- if TYPE_CHECKING:
56
- from pydantic import JsonValue
57
-
58
45
  logger = logging.getLogger(__name__)
59
46
 
60
47
 
@@ -119,41 +106,6 @@ def _get_variable_definitions_dir():
119
106
  return folder_path
120
107
 
121
108
 
122
- def _set_field_requirement(field_name: str, field: Any) -> str | None:
123
- """Determine the field requirement status."""
124
- if field_name not in MACHINE_GENERATED_FIELDS:
125
- if field.is_required() or field_name == VARIABLE_STATUS_FIELD_NAME:
126
- return REQUIRED_FIELD
127
- return OPTIONAL_FIELD
128
- return None
129
-
130
-
131
- def _populate_commented_map(
132
- field_name: str,
133
- value: str,
134
- commented_map: CommentedMap,
135
- model_instance: CompleteResponse,
136
- ) -> None:
137
- """Add data to a CommentedMap."""
138
- commented_map[field_name] = value
139
- field = type(model_instance).model_fields[field_name]
140
- description: JsonValue = cast(
141
- JsonDict,
142
- field.json_schema_extra,
143
- )[NORWEGIAN_DESCRIPTIONS]
144
- field_requirement: str | None = _set_field_requirement(field_name, field)
145
- if description is not None:
146
- new_description = (
147
- ("\n" + field_requirement + "\n" + str(description))
148
- if field_requirement
149
- else ("\n" + str(description))
150
- )
151
- commented_map.yaml_set_comment_before_after_key(
152
- field_name,
153
- before=new_description,
154
- )
155
-
156
-
157
109
  def _validate_and_create_directory(custom_directory: Path) -> Path:
158
110
  """Ensure that the given path is a valid directory, creating it if necessary.
159
111
 
@@ -290,9 +242,9 @@ def _model_to_yaml_with_comments(
290
242
  start_comment: str,
291
243
  custom_directory: Path | None = None,
292
244
  ) -> Path:
293
- """Convert a model instance to a structured YAML file with Norwegian descriptions as comments.
245
+ """Convert a model instance to a structured YAML file.
294
246
 
295
- Adds Norwegian descriptions to the model, organizes fields into sections, and saves
247
+ Organizes fields into sections with headers and saves
296
248
  the YAML file with a structured format and timestamped filename.
297
249
 
298
250
  Args:
@@ -307,13 +259,6 @@ def _model_to_yaml_with_comments(
307
259
  yaml = YAML()
308
260
  configure_yaml(yaml)
309
261
 
310
- from dapla_metadata.variable_definitions.variable_definition import (
311
- VariableDefinition,
312
- )
313
-
314
- # Apply new fields to model
315
- apply_norwegian_descriptions_to_model(VariableDefinition)
316
-
317
262
  # Convert Pydantic model instance to dictionary
318
263
  data = model_instance.model_dump(
319
264
  serialize_as_any=True,
@@ -326,21 +271,16 @@ def _model_to_yaml_with_comments(
326
271
  status_map = CommentedMap()
327
272
  owner_map = CommentedMap()
328
273
 
329
- # Loop through all fields in the model and populate the commented maps
274
+ # Loop through all fields in the model and assigne to commented maps
330
275
  for field_name, value in data.items():
331
276
  if field_name == VARIABLE_STATUS_FIELD_NAME:
332
- _populate_commented_map(field_name, value, status_map, model_instance)
277
+ status_map[field_name] = value
333
278
  elif field_name == OWNER_FIELD_NAME:
334
- _populate_commented_map(field_name, value, owner_map, model_instance)
279
+ owner_map[field_name] = value
335
280
  elif field_name in MACHINE_GENERATED_FIELDS:
336
- _populate_commented_map(
337
- field_name,
338
- value,
339
- machine_generated_map,
340
- model_instance,
341
- )
342
- elif field_name not in {VARIABLE_STATUS_FIELD_NAME, OWNER_FIELD_NAME}:
343
- _populate_commented_map(field_name, value, commented_map, model_instance)
281
+ machine_generated_map[field_name] = value
282
+ else:
283
+ commented_map[field_name] = value
344
284
 
345
285
  base_path = (
346
286
  _get_variable_definitions_dir()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dapla-toolbelt-metadata
3
- Version: 0.6.5
3
+ Version: 0.7.0
4
4
  Summary: Dapla Toolbelt Metadata
5
5
  License: MIT
6
6
  Author: Team Metadata
@@ -24,7 +24,7 @@ Requires-Dist: pyjwt (>=2.8.0)
24
24
  Requires-Dist: python-dotenv (>=1.0.1)
25
25
  Requires-Dist: requests (>=2.31.0)
26
26
  Requires-Dist: ruamel-yaml (>=0.18.10)
27
- Requires-Dist: ssb-datadoc-model (==6.0.0)
27
+ Requires-Dist: ssb-datadoc-model (==6.1.0)
28
28
  Requires-Dist: ssb-klass-python (>=1.0.1)
29
29
  Requires-Dist: typing-extensions (>=4.12.2)
30
30
  Project-URL: Changelog, https://github.com/statisticsnorway/dapla-toolbelt-metadata/releases
@@ -1,4 +1,4 @@
1
- dapla_metadata/__init__.py,sha256=LI-qV1Vq1nKw8KkO0uppNwjOoXwd8niQFCx9jECn6Aw,415
1
+ dapla_metadata/__init__.py,sha256=37yh9XWYQoLIVIS_fDdwNN8OXzbYY-6kMYwvjQrLMJQ,428
2
2
  dapla_metadata/_shared/__init__.py,sha256=qUFgnVhBVlPRQP0ePmY76c8FvWRrJ-9c5GvzibwERnQ,103
3
3
  dapla_metadata/_shared/config.py,sha256=QqXcmP66AfXF8wi6FMsa7et7kH2k4EJPOF4IELKuQig,3213
4
4
  dapla_metadata/_shared/enums.py,sha256=WHkH1d8xw41gOly6au_izZB1_-6XTcKu5rhBWUImjp8,509
@@ -7,19 +7,19 @@ dapla_metadata/dapla/__init__.py,sha256=tkapF-YwmruPPrKvN3pEoCZqb7xvJx_ogBM8XyGM
7
7
  dapla_metadata/dapla/user_info.py,sha256=bENez-ICt9ySR8orYebO68Q3_2LkIW9QTL58DTctmEQ,4833
8
8
  dapla_metadata/datasets/__init__.py,sha256=TvzskpdFC6hGcC9_55URT5jr5wNAPzXuISd2UjJWM_8,280
9
9
  dapla_metadata/datasets/code_list.py,sha256=kp1O6sUiUAP9WKlWY8IgHWx_1IOzJA63WveHqolgKmg,9082
10
- dapla_metadata/datasets/core.py,sha256=AGnGTYF7JV8hySu-_Cd4PvxOMlgX1ICpkiWi9GGLlRg,25778
11
- dapla_metadata/datasets/dapla_dataset_path_info.py,sha256=zdkVjxlqXMBe7eTAneUrTDP0_fx7JsEQ_0JrKjREhfU,26854
12
- dapla_metadata/datasets/dataset_parser.py,sha256=bc3KOIDQGgdZMPh3XVHhiKMsY6FxIY9glvGlwTM4g7I,8233
10
+ dapla_metadata/datasets/core.py,sha256=Kc248-U1XoyjFgGo2uOAiOrHrCIo-2-4P53SM8FDKHo,28090
11
+ dapla_metadata/datasets/dapla_dataset_path_info.py,sha256=WPeV_mwKk2B9sXd14SaP-kTb1bOQ_8W2KtrqOG7sJIY,26867
12
+ dapla_metadata/datasets/dataset_parser.py,sha256=3dtRXNy1C8SfG8zTYWdY26nV4l-dG25IC_0J5t2bYwI,8285
13
13
  dapla_metadata/datasets/external_sources/__init__.py,sha256=qvIdXwqyEmXNUCB94ZtZXRzifdW4hiXASFFPtC70f6E,83
14
14
  dapla_metadata/datasets/external_sources/external_sources.py,sha256=9eIcOIUbaodNX1w9Tj2wl4U4wUmr5kF1R0i01fKUzGs,2974
15
15
  dapla_metadata/datasets/model_backwards_compatibility.py,sha256=RKhi6cjqmPKW8lTYQ0mIXTAwhMGo_X-QMad4Y5tvq_0,19136
16
16
  dapla_metadata/datasets/model_validation.py,sha256=pGT-jqaQQY4z7jz-7UQd0BQoTWDxDWPYAnDoRC2vd_c,6818
17
17
  dapla_metadata/datasets/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- dapla_metadata/datasets/statistic_subject_mapping.py,sha256=QdC22DUBOdRgsfmTTEUr9CKCE8fKjMq6-Ezdr6Eof_A,6254
18
+ dapla_metadata/datasets/statistic_subject_mapping.py,sha256=ovT-bZv6eGPD3L0UIs5nIw4AjJrfZn0hyWyD72JBmhs,6395
19
19
  dapla_metadata/datasets/utility/__init__.py,sha256=pp6tUcgUbo8iq9OPtFKQrTbLuI3uY7NHptwWSTpasOU,33
20
- dapla_metadata/datasets/utility/constants.py,sha256=SqZMc1v8rO2b_nRFJR7frVd0TAGvvxzIPEIzkqOuBSw,2444
20
+ dapla_metadata/datasets/utility/constants.py,sha256=YEs2ECLNJMM1SSORPTDnzNep_Qut5YbJ5JJx_oP3ios,2470
21
21
  dapla_metadata/datasets/utility/enums.py,sha256=SpV4xlmP1YMaJPbmX03hqRLHUOhXIk5gquTeJ8G_5OE,432
22
- dapla_metadata/datasets/utility/utils.py,sha256=fAevz9X0PHw-JL0_4V0geTkoBV31qoO7-dVLFJaIfbo,18370
22
+ dapla_metadata/datasets/utility/utils.py,sha256=JpJuvYEXmNUXTgaxPhUg24aiiZS201wRNnAOWyH_DO0,19210
23
23
  dapla_metadata/standards/__init__.py,sha256=n8jnMrudLuScSdfQ4UMJorc-Ptg3Y1-ilT8zAaQnM70,179
24
24
  dapla_metadata/standards/name_validator.py,sha256=6-DQE_EKVd6UjL--EXpFcZDQtusVbSFaWaUY-CfOV2c,9184
25
25
  dapla_metadata/standards/standard_validators.py,sha256=tcCiCI76wUVtMzXA2oCgdauZc0uGgUi11FKu-t7KGwQ,3767
@@ -73,17 +73,15 @@ dapla_metadata/variable_definitions/_generated/vardef_client/py.typed,sha256=47D
73
73
  dapla_metadata/variable_definitions/_generated/vardef_client/rest.py,sha256=x4PWmg3IYQBr8OgnrWr3l4Ke2rElHP3zAEVxk2U-mOc,12022
74
74
  dapla_metadata/variable_definitions/_utils/__init__.py,sha256=qAhRLJoTBqtR3f9xRXTRhD7-5Xg0Opk1Ks5F4AUYnpA,45
75
75
  dapla_metadata/variable_definitions/_utils/_client.py,sha256=v1-9VjrdPI6-sroam5vXMPEV1dQMPsYk7KyGd48HjYw,971
76
- dapla_metadata/variable_definitions/_utils/config.py,sha256=BpLrnuqgtqz_kxBc_Kd-I1QNL7y2RxRXgX-IVbMIclQ,2416
77
- dapla_metadata/variable_definitions/_utils/constants.py,sha256=BGITkRNYtRDySM-anDMQDvO2JrXm3lDjw7ZmYfhFlXU,1884
78
- dapla_metadata/variable_definitions/_utils/descriptions.py,sha256=bB5QHNc4eOhmpLQHCty-CP5_aA82chkICifXw430suI,2746
79
- dapla_metadata/variable_definitions/_utils/files.py,sha256=qdO9D0l-6FnSGZImTtyMsrFfauFqvQyCWz0knLSklbo,13193
76
+ dapla_metadata/variable_definitions/_utils/config.py,sha256=h5MtmueCdAgg82c5upvQUC9QSzK0TOs40KwQj5mTrE8,1822
77
+ dapla_metadata/variable_definitions/_utils/constants.py,sha256=Jy9xFa4ZpTUxpDZ_vdUaFlB-cPnQpFArwS9VtEIG0SY,1834
78
+ dapla_metadata/variable_definitions/_utils/files.py,sha256=JbPgPNQ7iA38juMqGEdcg5OjZZUwCb6NQtPL0AEspD0,10933
80
79
  dapla_metadata/variable_definitions/_utils/template_files.py,sha256=-PgYs4TG4vrXLQgk47pow9ZsqlZqhtO755LnEmvN4MA,3405
81
80
  dapla_metadata/variable_definitions/_utils/variable_definition_files.py,sha256=PbqsFdHxsq0EWBg9s2Y57LqVP7aPmGD5-FZfnzuOw2Q,4078
82
81
  dapla_metadata/variable_definitions/exceptions.py,sha256=z6Gtd84FboDu7vWjC3wathIF7I0gF0imtRhwMkr16lY,7851
83
- dapla_metadata/variable_definitions/resources/vardef_model_descriptions_nb.yaml,sha256=VNglLU6jBLbfoM12fc2fiby_pi2GAgA-4t30yKypeuY,5474
84
82
  dapla_metadata/variable_definitions/vardef.py,sha256=KYd31nCGhxuzC0hpKR6foQjO39Tlb3vu9IDqUoMvTeY,11352
85
83
  dapla_metadata/variable_definitions/variable_definition.py,sha256=sj49uot0e4UJW4QJ3dEJGgjY4yfCHOkxS2NdD2t60b8,14883
86
- dapla_toolbelt_metadata-0.6.5.dist-info/LICENSE,sha256=np3IfD5m0ZUofn_kVzDZqliozuiO6wrktw3LRPjyEiI,1073
87
- dapla_toolbelt_metadata-0.6.5.dist-info/METADATA,sha256=nCYM7_zMNUogDtfXGQirVfghQwc4KDnE1GRNQqO-Grw,4905
88
- dapla_toolbelt_metadata-0.6.5.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
89
- dapla_toolbelt_metadata-0.6.5.dist-info/RECORD,,
84
+ dapla_toolbelt_metadata-0.7.0.dist-info/LICENSE,sha256=np3IfD5m0ZUofn_kVzDZqliozuiO6wrktw3LRPjyEiI,1073
85
+ dapla_toolbelt_metadata-0.7.0.dist-info/METADATA,sha256=BvUM_PyKUxWsZcS1wBYh1n6aC0nEA9A14NQUbphZwIw,4905
86
+ dapla_toolbelt_metadata-0.7.0.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
87
+ dapla_toolbelt_metadata-0.7.0.dist-info/RECORD,,
@@ -1,89 +0,0 @@
1
- """Utilities for dynamically adding extra fields to Pydantic models, specifically Norwegian descriptions."""
2
-
3
- import logging
4
- from pathlib import Path
5
- from typing import TYPE_CHECKING
6
- from typing import cast
7
-
8
- import ruamel.yaml
9
- from pydantic import BaseModel
10
- from pydantic import Field
11
-
12
- from dapla_metadata.variable_definitions._utils.config import get_descriptions_path
13
-
14
- if TYPE_CHECKING:
15
- from pydantic.config import JsonDict
16
-
17
- logger = logging.getLogger(__name__)
18
-
19
-
20
- def get_package_root() -> Path:
21
- """Get an absolute Path to the root of the package (dapla_metadata)."""
22
- number_of_directories_up_from_descriptions_file = 2
23
- return (
24
- Path(__file__)
25
- .resolve()
26
- .parents[number_of_directories_up_from_descriptions_file]
27
- )
28
-
29
-
30
- def load_descriptions(file_path: Path) -> dict:
31
- """Load and return the contents of a YAML file as a dictionary.
32
-
33
- Args:
34
- file_path (Path): Path to the YAML file.
35
-
36
- Returns:
37
- dict: Parsed contents of the YAML file.
38
- """
39
- with Path.open(file_path, encoding="utf-8") as f:
40
- return ruamel.yaml.YAML().load(f)
41
-
42
-
43
- def apply_norwegian_descriptions_to_model(
44
- model: type[BaseModel],
45
- ) -> None:
46
- """Add Norwegian descriptions to the fields of a Pydantic model.
47
-
48
- This function globally modifies the model fields by inserting a Norwegian description
49
- from a predefined dictionary. If a field does not have a corresponding
50
- Norwegian description, a default message is used.
51
-
52
- Args:
53
- model (BaseModel): A Pydantic model instance to be updated.
54
-
55
- Returns:
56
- None: The function modifies the model in place.
57
- """
58
- new_fields = {}
59
-
60
- descriptions = load_descriptions(
61
- get_package_root() / get_descriptions_path(),
62
- )
63
-
64
- for field_name, field_info in model.model_fields.items():
65
- new_description: str = descriptions.get(
66
- field_name,
67
- f"No description in norwegian found for {field_name}",
68
- )
69
- if "No description in norwegian found" in new_description:
70
- logger.warning("Missing description for %s", field_name)
71
- else:
72
- logger.debug("Field %s: %s", field_name, new_description)
73
-
74
- new_fields[field_name] = Field( # type: ignore[call-overload]
75
- default=field_info.default,
76
- alias=field_info.alias,
77
- title=field_info.title,
78
- description=field_info.description,
79
- json_schema_extra=cast(
80
- "JsonDict",
81
- {
82
- "norwegian_description": new_description,
83
- "annotation": field_info.annotation,
84
- },
85
- ),
86
- )
87
-
88
- model.model_fields.update(new_fields) # Apply changes
89
- model.model_rebuild()
@@ -1,109 +0,0 @@
1
- # --- Variabel definisjoner ---
2
- # ref: https://statistics-norway.atlassian.net/wiki/spaces/MPD/pages/3009839199/VarDef+-+Krav+til+dokumentasjon+av+variabler
3
- name: |
4
- Variabelens navn. Dette skal ikke være en mer “teknisk” forkortelse, men et navn som er forståelig for mennesker.
5
- -------------------------
6
- >>> EKSEMPEL:
7
- name:
8
- nb: |-
9
- Lønnsinntekter
10
- short_name: |
11
- Dette er variabelens kortnavn, som kan være en mer “teknisk” forkortelse, f.eks. wlonn (kortnavnet til Lønnsinntekter). Kortnavnet til en variabel i Vardef skal være unikt.
12
- Kravet til kortnavnet er at det kan inneholde a-z (kun små bokstaver), 0-9 og _ (understrek). Minimumslengden på kortnavnet er 2 tegn.
13
- Bokstavene “æ”, “ø” og “å” kan ikke brukes. Disse anbefales erstattet med hhv. “ae”, “oe” og “aa"
14
- definition: |
15
- En definisjon skal beskrive hva variabelen betyr og være så kort og presis som mulig. Mer utfyllende opplysninger kan legges i Merknad-feltet.
16
- -------------------------
17
- >>> EKSEMPEL:
18
- definition:
19
- nb: |-
20
- Yrkesinntekter, kapitalinntekter, skattepliktige og skattefrie overføringer, i løpet av kalenderåret.
21
- classification_reference: |
22
- ID av en klassifikasjon eller kodeliste fra KLASS som beskriver verdiene variabelen kan anta.
23
- For eksempel vil variabelen 'Sivilstand' ha klassifikasjon 'Standard for sivilstand' (kan vises på https://www.ssb.no/klass/klassifikasjoner/19 ) som har ID 19.
24
- -------------------------
25
- >>> EKSEMPEL:
26
- classification_reference: "19"
27
- unit_types: |
28
- Enhetstyper - enhetene som beskrives av denne variabelen. Variabelen “sivilstand” vil f.eks. ha enhetstypen person,
29
- mens f.eks. “Produsentpris for tjenester” vil ha både foretak og bedrift som enhetstyper siden variabelen kan beskrive begge.
30
- Verdier skal være koder fra: https://www.ssb.no/klass/klassifikasjoner/702.
31
- -------------------------
32
- >>> EKSEMPEL:
33
- unit_types:
34
- - "20"
35
- subject_fields: |
36
- Statistikkområder som variabelen brukes innenfor. For eksempel tilhører variabelen “Sivilstand” statistikkområdet “Befolkning”.
37
- Verdier skal være koder fra https://www.ssb.no/klass/klassifikasjoner/618.
38
- -------------------------
39
- >>> EKSEMPEL:
40
- subject_fields:
41
- - "bf"
42
- - "be"
43
- contains_special_categories_of_personal_data: |
44
- Viser om variabelen inneholder spesielt sensitive personopplysninger.
45
- -------------------------
46
- >>> EKSEMPEL:
47
- contains_special_categories_of_personal_data: true
48
- measurement_type: |
49
- Måletype som en kvantitativ variabelen tilhører, f.eks. valuta, areal osv.
50
- Verdien skal være en kode fra: https://www.ssb.no/klass/klassifikasjoner/303
51
- -------------------------
52
- >>> EKSEMPEL:
53
- measurement_type: "03"
54
- valid_from: |
55
- Datoen variabeldefinisjonen er gyldig f.o.m.
56
- -------------------------
57
- >>> EKSEMPEL:
58
- valid_from: 1999-01-30
59
- valid_until: |
60
- Datoen variabeldefinisjonens var gyldig t.o.m. Settes hvis definisjonen skal erstattet av en ny definisjon (med en ny gyldighetsperiode), eller variabelen ikke lenger skal brukes.
61
- -------------------------
62
- >>> EKSEMPEL:
63
- valid_until: 2024-10-23
64
- external_reference_uri: |
65
- En peker (URI) til ekstern definisjon/dokumentasjon, f.eks. ei webside som er relevant for variabelen.
66
- -----------------------------------------------------
67
- >>> EKSEMPEL:
68
- external_reference_uri: "https://www.landbruksdirektoratet.com"
69
- comment: |
70
- Her kan en sette inn eventuelle tilleggsopplysninger som ikke hører hjemme i selve definisjonen.
71
- Variabelen “Landbakgrunn” har f.eks. merknaden “Fra og med 1.1.2003 ble definisjon endret til også å trekke inn besteforeldrenes fødeland”.
72
- -----------------------------------------------------------------------------------------------
73
- >>> EKSEMPEL:
74
- comment:
75
- nb: |-
76
- Fra og med 1.1.2003 ble definisjon endret til også å trekke inn besteforeldrenes fødeland.
77
- related_variable_definition_uris: |
78
- Her kan en legge inn URIer til andre variabler som er relevante. Eksempelvis er variabelen “Inntekt etter skatt” en beregnet variabel der “Yrkesinntekter” og “Kapitalinntekter” inngår i beregningen.
79
- En kan da legge inn deres URI-er i dette feltet.
80
- -------------------------
81
- >>> EKSEMPEL:
82
- related_variable_definition_uris:
83
- - "https://example.com/"
84
- contact: |
85
- Her dokumenterer en navn og epost for person eller gruppe som kan svare på spørsmål.
86
- -------------------------
87
- >>> EKSEMPEL:
88
- contact:
89
- title:
90
- nb: |-
91
- Seksjonsleder
92
- email: leder@ssb.no
93
- variable_status: |
94
- Livssyklus for variabelen.
95
- id: |
96
- Unik SSB identifikator for variabeldefinisjonen. Denne blir maskingenerert.
97
- Variabeldefinisjoner med ulike gyldighetsperioder har samme ID (og samme kortnavn).
98
- patch_id: |
99
- Løpenummer som identifiserer en patch, endring, for en variabeldefinisjon.
100
- owner: |
101
- Eier av variabelen dvs. ansvarlig Dapla-team (statistikk-team) og informasjon om tilgangsstyringsgrupper. Team-tilhørighet settes automatisk til det samme som teamtilhørigheten til den som oppretter variabelen.
102
- created_at: |
103
- Tidsstempelet da variabelen ble opprettet. Denne er maskingenerert.
104
- created_by: |
105
- Personen som har opprettet variabelen. Dette er maskingenerert.
106
- last_updated_at: |
107
- Tidsstempelet da variabelen sist ble oppdatert. Denne er maskingenerert.
108
- last_updated_by: |
109
- Personen som sist utførte en endring i variabelen. Denne er maskingenerert.