dapla-toolbelt-metadata 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dapla-toolbelt-metadata might be problematic. Click here for more details.

@@ -0,0 +1,685 @@
1
+ """Extract info from a path following SSB's dataset naming convention."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import pathlib
7
+ import re
8
+ from abc import ABC
9
+ from abc import abstractmethod
10
+ from dataclasses import dataclass
11
+ from typing import TYPE_CHECKING
12
+ from typing import Final
13
+ from typing import Literal
14
+
15
+ import arrow
16
+ from cloudpathlib import GSPath
17
+ from datadoc_model.model import DataSetState
18
+
19
+ if TYPE_CHECKING:
20
+ import datetime
21
+ import os
22
+ from datetime import date
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ GS_PREFIX_FROM_PATHLIB = "gs:/"
27
+
28
+
29
+ @dataclass
30
+ class DateFormat(ABC):
31
+ """A super class for date formats."""
32
+
33
+ name: str
34
+ regex_pattern: str
35
+ arrow_pattern: str
36
+ timeframe: Literal["year", "month", "day", "week"]
37
+
38
+ @abstractmethod
39
+ def get_floor(self, period_string: str) -> date | None:
40
+ """Abstract method implemented in the child class.
41
+
42
+ Return the first date of the timeframe period.
43
+
44
+ Args:
45
+ period_string: A string representing the timeframe period.
46
+ """
47
+
48
+ @abstractmethod
49
+ def get_ceil(self, period_string: str) -> date | None:
50
+ """Abstract method implemented in the child class.
51
+
52
+ Return the last date of the timeframe period.
53
+
54
+ Args:
55
+ period_string: A string representing the timeframe period.
56
+ """
57
+
58
+
59
+ @dataclass
60
+ class IsoDateFormat(DateFormat):
61
+ """A subclass of Dateformat with relevant patterns for ISO dates."""
62
+
63
+ def get_floor(self, period_string: str) -> date | None:
64
+ """Return first date of timeframe period defined in ISO date format.
65
+
66
+ Examples:
67
+ >>> ISO_YEAR_MONTH.get_floor("1980-08")
68
+ datetime.date(1980, 8, 1)
69
+
70
+ >>> ISO_YEAR.get_floor("2021")
71
+ datetime.date(2021, 1, 1)
72
+ """
73
+ return arrow.get(period_string, self.arrow_pattern).floor(self.timeframe).date()
74
+
75
+ def get_ceil(self, period_string: str) -> date | None:
76
+ """Return last date of timeframe period defined in ISO date format.
77
+
78
+ Examples:
79
+ >>> ISO_YEAR.get_ceil("1921")
80
+ datetime.date(1921, 12, 31)
81
+
82
+ >>> ISO_YEAR_MONTH.get_ceil("2021-05")
83
+ datetime.date(2021, 5, 31)
84
+ """
85
+ return arrow.get(period_string, self.arrow_pattern).ceil(self.timeframe).date()
86
+
87
+
88
+ ISO_YEAR = IsoDateFormat(
89
+ name="ISO_YEAR",
90
+ regex_pattern=r"^\d{4}$",
91
+ arrow_pattern="YYYY",
92
+ timeframe="year",
93
+ )
94
+ ISO_YEAR_MONTH = IsoDateFormat(
95
+ name="ISO_YEAR_MONTH",
96
+ regex_pattern=r"^\d{4}\-\d{2}$",
97
+ arrow_pattern="YYYY-MM",
98
+ timeframe="month",
99
+ )
100
+ ISO_YEAR_MONTH_DAY = IsoDateFormat(
101
+ name="ISO_YEAR_MONTH_DAY",
102
+ regex_pattern=r"^\d{4}\-\d{2}\-\d{2}$",
103
+ arrow_pattern="YYYY-MM-DD",
104
+ timeframe="day",
105
+ )
106
+ ISO_YEAR_WEEK = IsoDateFormat(
107
+ name="ISO_YEAR_WEEK",
108
+ regex_pattern=r"^\d{4}\-{0,1}W\d{2}$",
109
+ arrow_pattern="W",
110
+ timeframe="week",
111
+ )
112
+
113
+
114
+ @dataclass
115
+ class SsbDateFormat(DateFormat):
116
+ """A subclass of Dateformat with relevant patterns for SSB unique dates.
117
+
118
+ Attributes:
119
+ ssb_dates: A dictionary where keys are date format strings and values
120
+ are corresponding date patterns specific to SSB.
121
+ """
122
+
123
+ ssb_dates: dict
124
+
125
+ def get_floor(self, period_string: str) -> date | None:
126
+ """Return first date of the timeframe period defined in SSB date format.
127
+
128
+ Convert SSB format to date-string and return the first date.
129
+
130
+ Args:
131
+ period_string: A string representing the timeframe period in
132
+ SSB format.
133
+
134
+ Returns:
135
+ The first date of the period if the period_string is a valid
136
+ SSB format, otherwise None.
137
+
138
+ Example:
139
+ >>> SSB_BIMESTER.get_floor("2003B8")
140
+ None
141
+
142
+ >>> SSB_BIMESTER.get_floor("2003B4")
143
+ datetime.date(2003, 7, 1)
144
+ """
145
+ try:
146
+ year = period_string[:4]
147
+ month = self.ssb_dates[period_string[-2:]]["start"]
148
+ period = year + month
149
+ return arrow.get(period, self.arrow_pattern).floor(self.timeframe).date()
150
+ except KeyError:
151
+ logger.exception("Error while converting to SSB date format")
152
+ return None
153
+
154
+ def get_ceil(self, period_string: str) -> date | None:
155
+ """Return last date of the timeframe period defined in SSB date format.
156
+
157
+ Convert SSB format to date-string and return the last date.
158
+
159
+ Args:
160
+ period_string: A string representing the timeframe period in SSB
161
+ format.
162
+
163
+ Returns:
164
+ The last date of the period if the period_string is a valid SSB format,
165
+ otherwise None.
166
+
167
+ Example:
168
+ >>> SSB_TRIANNUAL.get_ceil("1999T11")
169
+ None
170
+
171
+ >>> SSB_HALF_YEAR.get_ceil("2024H1")
172
+ datetime.date(2024, 6, 30)
173
+ """
174
+ try:
175
+ year = period_string[:4]
176
+ month = self.ssb_dates[period_string[-2:]]["end"]
177
+ period = year + month
178
+ return arrow.get(period, self.arrow_pattern).ceil(self.timeframe).date()
179
+ except KeyError:
180
+ return None
181
+
182
+
183
+ SSB_BIMESTER = SsbDateFormat(
184
+ name="SSB_BIMESTER",
185
+ regex_pattern=r"^\d{4}[B]\d{1}$",
186
+ arrow_pattern="YYYYMM",
187
+ timeframe="month",
188
+ ssb_dates={
189
+ "B1": {
190
+ "start": "01",
191
+ "end": "02",
192
+ },
193
+ "B2": {
194
+ "start": "03",
195
+ "end": "04",
196
+ },
197
+ "B3": {
198
+ "start": "05",
199
+ "end": "06",
200
+ },
201
+ "B4": {
202
+ "start": "07",
203
+ "end": "08",
204
+ },
205
+ "B5": {
206
+ "start": "09",
207
+ "end": "10",
208
+ },
209
+ "B6": {
210
+ "start": "11",
211
+ "end": "12",
212
+ },
213
+ },
214
+ )
215
+
216
+ SSB_QUARTERLY = SsbDateFormat(
217
+ name="SSB_QUARTERLY",
218
+ regex_pattern=r"^\d{4}[Q]\d{1}$",
219
+ arrow_pattern="YYYYMM",
220
+ timeframe="month",
221
+ ssb_dates={
222
+ "Q1": {
223
+ "start": "01",
224
+ "end": "03",
225
+ },
226
+ "Q2": {
227
+ "start": "04",
228
+ "end": "06",
229
+ },
230
+ "Q3": {
231
+ "start": "07",
232
+ "end": "09",
233
+ },
234
+ "Q4": {
235
+ "start": "10",
236
+ "end": "12",
237
+ },
238
+ },
239
+ )
240
+
241
+ SSB_TRIANNUAL = SsbDateFormat(
242
+ name="SSB_TRIANNUAL",
243
+ regex_pattern=r"^\d{4}[T]\d{1}$",
244
+ arrow_pattern="YYYYMM",
245
+ timeframe="month",
246
+ ssb_dates={
247
+ "T1": {
248
+ "start": "01",
249
+ "end": "04",
250
+ },
251
+ "T2": {
252
+ "start": "05",
253
+ "end": "08",
254
+ },
255
+ "T3": {
256
+ "start": "09",
257
+ "end": "12",
258
+ },
259
+ },
260
+ )
261
+ SSB_HALF_YEAR = SsbDateFormat(
262
+ name="SSB_HALF_YEAR",
263
+ regex_pattern=r"^\d{4}[H]\d{1}$",
264
+ arrow_pattern="YYYYMM",
265
+ timeframe="month",
266
+ ssb_dates={
267
+ "H1": {
268
+ "start": "01",
269
+ "end": "06",
270
+ },
271
+ "H2": {
272
+ "start": "07",
273
+ "end": "12",
274
+ },
275
+ },
276
+ )
277
+
278
+ SUPPORTED_DATE_FORMATS: list[IsoDateFormat | SsbDateFormat] = [
279
+ ISO_YEAR,
280
+ ISO_YEAR_MONTH,
281
+ ISO_YEAR_MONTH_DAY,
282
+ ISO_YEAR_WEEK,
283
+ SSB_BIMESTER,
284
+ SSB_QUARTERLY,
285
+ SSB_TRIANNUAL,
286
+ SSB_HALF_YEAR,
287
+ ]
288
+
289
+
290
+ def categorize_period_string(period: str) -> IsoDateFormat | SsbDateFormat:
291
+ """Categorize a period string into one of the supported date formats.
292
+
293
+ Args:
294
+ period: A string representing the period to be categorized.
295
+
296
+ Returns:
297
+ An instance of either IsoDateFormat or SsbDateFormat depending on the
298
+ format of the input period string.
299
+
300
+ Raises:
301
+ NotImplementedError: If the period string is not recognized as either an
302
+ ISO or SSB date format.
303
+
304
+ Examples:
305
+ >>> date_format = categorize_period_string('2022-W01')
306
+ >>> date_format.name
307
+ ISO_YEAR_WEEK
308
+
309
+ >>> date_format = categorize_period_string('1954T2')
310
+ >>> date_format.name
311
+ SSB_TRIANNUAL
312
+
313
+ >>> categorize_period_string('unknown format')
314
+ Traceback (most recent call last):
315
+ ...
316
+ NotImplementedError: Period format unknown format is not supported
317
+ """
318
+ for date_format in SUPPORTED_DATE_FORMATS:
319
+ if re.match(date_format.regex_pattern, period):
320
+ return date_format
321
+
322
+ msg = f"Period format {period} is not supported"
323
+ raise NotImplementedError(
324
+ msg,
325
+ )
326
+
327
+
328
+ class DaplaDatasetPathInfo:
329
+ """Extract info from a path following SSB's dataset naming convention."""
330
+
331
+ def __init__(self, dataset_path: str | os.PathLike[str]) -> None:
332
+ """Digest the path so that it's ready for further parsing."""
333
+ self.dataset_string = str(dataset_path)
334
+ self.dataset_path = pathlib.Path(dataset_path)
335
+ self.dataset_name_sections = self.dataset_path.stem.split("_")
336
+ self._period_strings = self._extract_period_strings(self.dataset_name_sections)
337
+
338
+ @staticmethod
339
+ def _get_period_string_indices(dataset_name_sections: list[str]) -> list[int]:
340
+ """Get all the indices at which period strings are found in list.
341
+
342
+ Args:
343
+ dataset_name_sections: A list of strings representing sections of a
344
+ dataset name.
345
+
346
+ Returns:
347
+ A list of indices where period strings are found within the
348
+ dataset_name_sections.
349
+
350
+ Examples:
351
+ >>> DaplaDatasetPathInfo._get_period_string_indices(['kommune', 'p2022', 'v1'])
352
+ [1]
353
+
354
+ >>> DaplaDatasetPathInfo._get_period_string_indices(['kommune', 'p2022-01', 'p2023-06', 'v1'])
355
+ [1, 2]
356
+
357
+ >>> DaplaDatasetPathInfo._get_period_string_indices(['kommune', 'p1990Q1', 'v1'])
358
+ [1]
359
+
360
+ >>> DaplaDatasetPathInfo._get_period_string_indices(['varehandel','v1'])
361
+ []
362
+ """
363
+
364
+ def insert_p(regex: str) -> str:
365
+ r"""Insert a 'p' as the second character.
366
+
367
+ Args:
368
+ regex: A string representing the regular expression pattern to be
369
+ modified.
370
+
371
+ Returns:
372
+ The modified regular expression pattern with 'p' inserted as the
373
+ second character.
374
+
375
+ Examples:
376
+ >>> insert_p(r"^\d{4}[H]\d{1}$")
377
+ '^p\d{4}[H]\d{1}$'
378
+ """
379
+ return regex[:1] + "p" + regex[1:]
380
+
381
+ return [
382
+ i
383
+ for i, x in enumerate(dataset_name_sections)
384
+ if any(
385
+ re.match(insert_p(date_format.regex_pattern), x)
386
+ for date_format in SUPPORTED_DATE_FORMATS
387
+ )
388
+ ]
389
+
390
+ @staticmethod
391
+ def _extract_period_strings(dataset_name_sections: list[str]) -> list[str]:
392
+ """Extract period strings from dataset name sections.
393
+
394
+ Iterates over the dataset name sections and returns a list of strings
395
+ that match the year regex, stripping the first character. This extracts
396
+ the year periods from the dataset name.
397
+
398
+ Args:
399
+ dataset_name_sections: A list of strings representing sections of a
400
+ dataset name.
401
+
402
+ Returns:
403
+ A list of extracted period strings, with the first character stripped
404
+ from each match.
405
+
406
+ Examples:
407
+ >>> DaplaDatasetPathInfo._extract_period_strings(['p2022', 'kommune', 'v1'])
408
+ ['2022']
409
+
410
+ >>> DaplaDatasetPathInfo._extract_period_strings(['p2022-01', 'p2023-06', 'kommune', 'v1'])
411
+ ['2022-01', '2023-06']
412
+
413
+ >>> DaplaDatasetPathInfo._extract_period_strings(['p1990Q1', 'kommune', 'v1'])
414
+ ['1990Q1']
415
+
416
+ >>> DaplaDatasetPathInfo._extract_period_strings(['varehandel','v1'])
417
+ []
418
+ """
419
+ return [
420
+ dataset_name_sections[i][1:]
421
+ for i in DaplaDatasetPathInfo._get_period_string_indices(
422
+ dataset_name_sections,
423
+ )
424
+ ]
425
+
426
+ def _extract_period_string_from_index(self, index: int) -> str | None:
427
+ """Extract a period string by its index from the list of period strings.
428
+
429
+ Args:
430
+ index: The index of the period string to extract.
431
+
432
+ Returns:
433
+ The extracted period string if it exists, otherwise None.
434
+ """
435
+ try:
436
+ return self._period_strings[index]
437
+ except IndexError:
438
+ return None
439
+
440
+ def _extract_norwegian_dataset_state_path_part(
441
+ self,
442
+ dataset_state: DataSetState,
443
+ ) -> set:
444
+ """Extract the Norwegian dataset state path part.
445
+
446
+ Args:
447
+ dataset_state: The dataset state.
448
+
449
+ Returns:
450
+ A set of variations of the Norwegian dataset state path part.
451
+ """
452
+ norwegian_mappings = {
453
+ "SOURCE_DATA": "kildedata",
454
+ "INPUT_DATA": "inndata",
455
+ "PROCESSED_DATA": "klargjorte_data",
456
+ "STATISTICS": "statistikk",
457
+ "OUTPUT_DATA": "utdata",
458
+ }
459
+ norwegian_state = norwegian_mappings.get(dataset_state.name)
460
+ if norwegian_state:
461
+ state_name = norwegian_state.lower().replace("_", " ")
462
+ return {state_name.replace(" ", "-"), state_name.replace(" ", "_")}
463
+ return set()
464
+
465
+ @property
466
+ def bucket_name(
467
+ self,
468
+ ) -> str | None:
469
+ """Extract the bucket name from the dataset path.
470
+
471
+ Returns:
472
+ The bucket name or None if the dataset path is not a GCS path.
473
+
474
+ Examples:
475
+ >>> DaplaDatasetPathInfo('gs://ssb-staging-dapla-felles-data-delt/datadoc/utdata/person_data_p2021_v2.parquet').bucket_name
476
+ ssb-staging-dapla-felles-data-delt
477
+
478
+ >>> DaplaDatasetPathInfo(pathlib.Path('gs://ssb-staging-dapla-felles-data-delt/datadoc/utdata/person_data_p2021_v2.parquet')).bucket_name
479
+ ssb-staging-dapla-felles-data-delt
480
+
481
+ >>> DaplaDatasetPathInfo('gs:/ssb-staging-dapla-felles-data-delt/datadoc/utdata/person_data_p2021_v2.parquet').bucket_name
482
+ ssb-staging-dapla-felles-data-delt
483
+
484
+ >>> DaplaDatasetPathInfo('ssb-staging-dapla-felles-data-delt/datadoc/utdata/person_data_p2021_v2.parquet').bucket_name
485
+ None
486
+ """
487
+ prefix: str | None = None
488
+ if self.dataset_string.startswith(GSPath.cloud_prefix):
489
+ prefix = GSPath.cloud_prefix
490
+ elif self.dataset_string.startswith(GS_PREFIX_FROM_PATHLIB):
491
+ prefix = GS_PREFIX_FROM_PATHLIB
492
+ else:
493
+ return None
494
+
495
+ return pathlib.Path(
496
+ self.dataset_string.removeprefix(prefix),
497
+ ).parts[0]
498
+
499
+ @property
500
+ def dataset_short_name(
501
+ self,
502
+ ) -> str | None:
503
+ """Extract the dataset short name from the filepath.
504
+
505
+ The dataset short name is defined as the first section of the stem, up to
506
+ the period information or the version information if no period information
507
+ is present.
508
+
509
+ Returns:
510
+ The extracted dataset short name if it can be determined, otherwise
511
+ None.
512
+
513
+ Examples:
514
+ >>> DaplaDatasetPathInfo('prosjekt/befolkning/klargjorte_data/person_data_v1.parquet').dataset_short_name
515
+ person_data
516
+
517
+ >>> DaplaDatasetPathInfo('befolkning/inndata/sykepenger_p2022Q1_p2022Q2_v23.parquet').dataset_short_name
518
+ sykepenger
519
+
520
+ >>> DaplaDatasetPathInfo('my_data/simple_dataset_name.parquet').dataset_short_name
521
+ simple_dataset_name
522
+ """
523
+ if self.contains_data_from or self.contains_data_until:
524
+ short_name_sections = self.dataset_name_sections[
525
+ : min(
526
+ DaplaDatasetPathInfo._get_period_string_indices(
527
+ self.dataset_name_sections,
528
+ ),
529
+ )
530
+ ]
531
+ elif self.dataset_version:
532
+ short_name_sections = self.dataset_name_sections[:-1]
533
+ else:
534
+ short_name_sections = self.dataset_name_sections
535
+
536
+ return "_".join(short_name_sections)
537
+
538
+ @property
539
+ def contains_data_from(self) -> datetime.date | None:
540
+ """The earliest date from which data in the dataset is relevant for.
541
+
542
+ Returns:
543
+ The earliest relevant date for the dataset if available, otherwise None.
544
+ """
545
+ period_string = self._extract_period_string_from_index(0)
546
+ if not period_string or (
547
+ len(self._period_strings) > 1 and period_string > self._period_strings[1]
548
+ ):
549
+ return None
550
+ date_format = categorize_period_string(period_string)
551
+ return date_format.get_floor(period_string)
552
+
553
+ @property
554
+ def contains_data_until(self) -> datetime.date | None:
555
+ """The latest date until which data in the dataset is relevant for.
556
+
557
+ Returns:
558
+ The latest relevant date for the dataset if available, otherwise None.
559
+ """
560
+ first_period_string = self._extract_period_string_from_index(0)
561
+ second_period_string = self._extract_period_string_from_index(1)
562
+ period_string = second_period_string or first_period_string
563
+ if not period_string or (
564
+ second_period_string
565
+ and first_period_string is not None
566
+ and second_period_string < first_period_string
567
+ ):
568
+ return None
569
+ date_format = categorize_period_string(period_string)
570
+ return date_format.get_ceil(period_string)
571
+
572
+ @property
573
+ def dataset_state(
574
+ self,
575
+ ) -> DataSetState | None:
576
+ """Extract the dataset state from the path.
577
+
578
+ We assume that files are saved in the Norwegian language as specified by
579
+ SSB.
580
+
581
+ Returns:
582
+ The extracted dataset state if it can be determined from the path,
583
+ otherwise None.
584
+
585
+ Examples:
586
+ >>> DaplaDatasetPathInfo('klargjorte_data/person_data_v1.parquet').dataset_state
587
+ <DataSetState.PROCESSED_DATA: 'PROCESSED_DATA'>
588
+
589
+ >>> DaplaDatasetPathInfo('utdata/min_statistikk/person_data_v1.parquet').dataset_state
590
+ <DataSetState.OUTPUT_DATA: 'OUTPUT_DATA'>
591
+
592
+ >>> DaplaDatasetPathInfo('my_special_data/person_data_v1.parquet').dataset_state
593
+ None
594
+ """
595
+ dataset_path_parts = set(self.dataset_path.parts)
596
+ for state in DataSetState:
597
+ norwegian_variations = self._extract_norwegian_dataset_state_path_part(
598
+ state,
599
+ )
600
+ if norwegian_variations.intersection(dataset_path_parts):
601
+ return state
602
+ return None
603
+
604
+ @property
605
+ def dataset_version(
606
+ self,
607
+ ) -> str | None:
608
+ """Extract version information if exists in filename.
609
+
610
+ Returns:
611
+ The extracted version information if available in the filename,
612
+ otherwise None.
613
+
614
+ Examples:
615
+ >>> DaplaDatasetPathInfo('person_data_v1.parquet').dataset_version
616
+ '1'
617
+
618
+ >>> DaplaDatasetPathInfo('person_data_v20.parquet').dataset_version
619
+ '20'
620
+
621
+ >>> DaplaDatasetPathInfo('person_data.parquet').dataset_version
622
+ None
623
+ """
624
+ minimum_elements_in_file_name: Final[int] = 2
625
+ minimum_characters_in_version_string: Final[int] = 2
626
+ if len(self.dataset_name_sections) >= minimum_elements_in_file_name:
627
+ last_filename_element = str(self.dataset_name_sections[-1])
628
+ if (
629
+ len(last_filename_element) >= minimum_characters_in_version_string
630
+ and last_filename_element[0:1] == "v"
631
+ and last_filename_element[1:].isdigit()
632
+ ):
633
+ return last_filename_element[1:]
634
+ return None
635
+
636
+ @property
637
+ def statistic_short_name(
638
+ self,
639
+ ) -> str | None:
640
+ """Extract the statistical short name from the filepath.
641
+
642
+ Extract the statistical short name from the filepath right before the
643
+ dataset state based on the Dapla filepath naming convention.
644
+
645
+ Returns:
646
+ The extracted statistical short name if it can be determined,
647
+ otherwise None.
648
+
649
+ Examples:
650
+ >>> DaplaDatasetPathInfo('prosjekt/befolkning/klargjorte_data/person_data_v1.parquet').statistic_short_name
651
+ befolkning
652
+
653
+ >>> DaplaDatasetPathInfo('befolkning/inndata/person_data_v1.parquet').statistic_short_name
654
+ befolkning
655
+
656
+ >>> DaplaDatasetPathInfo('befolkning/person_data.parquet').statistic_short_name
657
+ None
658
+ """
659
+ dataset_state = self.dataset_state
660
+ if dataset_state is not None:
661
+ dataset_state_names = self._extract_norwegian_dataset_state_path_part(
662
+ dataset_state,
663
+ )
664
+ dataset_path_parts = list(self.dataset_path.parts)
665
+ for i in dataset_state_names:
666
+ if i in dataset_path_parts and dataset_path_parts.index(i) != 0:
667
+ return dataset_path_parts[dataset_path_parts.index(i) - 1]
668
+ return None
669
+
670
+ def path_complies_with_naming_standard(self) -> bool:
671
+ """Check if path is valid according to SSB standard.
672
+
673
+ Read more about SSB naming convention in the Dapla manual:
674
+ https://manual.dapla.ssb.no/statistikkere/navnestandard.html
675
+
676
+ Returns:
677
+ True if the path conforms to the SSB naming standard, otherwise False.
678
+ """
679
+ return bool(
680
+ self.dataset_state
681
+ and self.statistic_short_name
682
+ and self.contains_data_from
683
+ and self.contains_data_until
684
+ and self.dataset_version,
685
+ )