nominal 1.100.0__py3-none-any.whl → 1.102.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nominal/core/client.py CHANGED
@@ -7,7 +7,7 @@ from dataclasses import dataclass, field
7
7
  from datetime import datetime, timedelta
8
8
  from io import TextIOBase
9
9
  from pathlib import Path
10
- from typing import BinaryIO, Iterable, Mapping, Sequence
10
+ from typing import BinaryIO, Iterable, Mapping, Sequence, overload
11
11
 
12
12
  import certifi
13
13
  import conjure_python_client
@@ -16,7 +16,6 @@ from nominal_api import (
16
16
  api,
17
17
  attachments_api,
18
18
  authentication_api,
19
- event,
20
19
  ingest_api,
21
20
  scout_asset_api,
22
21
  scout_catalog,
@@ -24,7 +23,6 @@ from nominal_api import (
24
23
  scout_datasource_connection_api,
25
24
  scout_layout_api,
26
25
  scout_notebook_api,
27
- scout_run_api,
28
26
  scout_template_api,
29
27
  scout_video_api,
30
28
  scout_workbookcommon_api,
@@ -34,18 +32,18 @@ from nominal_api import (
34
32
  from typing_extensions import Self, deprecated
35
33
 
36
34
  from nominal import ts
35
+ from nominal._utils.deprecation_tools import warn_on_deprecated_argument
37
36
  from nominal.config import NominalConfig, _config
38
37
  from nominal.core._clientsbunch import ClientsBunch
39
38
  from nominal.core._constants import DEFAULT_API_BASE_URL
39
+ from nominal.core._event_types import EventType
40
40
  from nominal.core._utils.api_tools import (
41
41
  Link,
42
42
  LinkDict,
43
43
  construct_user_agent_string,
44
- create_links,
45
44
  rid_from_instance_or_string,
46
45
  )
47
46
  from nominal.core._utils.multipart import (
48
- path_upload_name,
49
47
  upload_multipart_io,
50
48
  )
51
49
  from nominal.core._utils.pagination_tools import (
@@ -55,7 +53,6 @@ from nominal.core._utils.pagination_tools import (
55
53
  search_checklists_paginated,
56
54
  search_data_reviews_paginated,
57
55
  search_datasets_paginated,
58
- search_events_paginated,
59
56
  search_runs_by_asset_paginated,
60
57
  search_runs_paginated,
61
58
  search_secrets_paginated,
@@ -69,7 +66,6 @@ from nominal.core._utils.query_tools import (
69
66
  create_search_checklists_query,
70
67
  create_search_containerized_extractors_query,
71
68
  create_search_datasets_query,
72
- create_search_events_query,
73
69
  create_search_runs_query,
74
70
  create_search_secrets_query,
75
71
  create_search_users_query,
@@ -95,10 +91,10 @@ from nominal.core.dataset import (
95
91
  _get_datasets,
96
92
  )
97
93
  from nominal.core.datasource import DataSource
98
- from nominal.core.event import Event, EventType
99
- from nominal.core.exceptions import NominalConfigError, NominalError, NominalIngestError, NominalMethodRemovedError
94
+ from nominal.core.event import Event, _create_event, _search_events
95
+ from nominal.core.exceptions import NominalConfigError, NominalError, NominalMethodRemovedError
100
96
  from nominal.core.filetype import FileType, FileTypes
101
- from nominal.core.run import Run
97
+ from nominal.core.run import Run, _create_run
102
98
  from nominal.core.secret import Secret
103
99
  from nominal.core.unit import Unit, _available_units
104
100
  from nominal.core.user import User
@@ -109,8 +105,6 @@ from nominal.core.workspace import Workspace
109
105
  from nominal.ts import (
110
106
  IntegralNanosecondsDuration,
111
107
  IntegralNanosecondsUTC,
112
- _SecondsNanos,
113
- _to_api_duration,
114
108
  _to_typed_timestamp_type,
115
109
  )
116
110
 
@@ -492,6 +486,7 @@ class NominalClient:
492
486
  )
493
487
  return list(self._iter_search_videos(query))
494
488
 
489
+ @overload
495
490
  def create_run(
496
491
  self,
497
492
  name: str,
@@ -503,24 +498,96 @@ class NominalClient:
503
498
  labels: Sequence[str] = (),
504
499
  links: Sequence[str | Link | LinkDict] = (),
505
500
  attachments: Iterable[Attachment] | Iterable[str] = (),
501
+ ) -> Run: ...
502
+ @overload
503
+ def create_run(
504
+ self,
505
+ name: str,
506
+ start: datetime | IntegralNanosecondsUTC,
507
+ end: datetime | IntegralNanosecondsUTC | None,
508
+ description: str | None = None,
509
+ *,
510
+ properties: Mapping[str, str] | None = None,
511
+ labels: Sequence[str] = (),
512
+ links: Sequence[str | Link | LinkDict] = (),
513
+ attachments: Iterable[Attachment] | Iterable[str] = (),
514
+ asset: Asset | str,
515
+ ) -> Run: ...
516
+ @overload
517
+ def create_run(
518
+ self,
519
+ name: str,
520
+ start: datetime | IntegralNanosecondsUTC,
521
+ end: datetime | IntegralNanosecondsUTC | None,
522
+ description: str | None = None,
523
+ *,
524
+ properties: Mapping[str, str] | None = None,
525
+ labels: Sequence[str] = (),
526
+ links: Sequence[str | Link | LinkDict] = (),
527
+ attachments: Iterable[Attachment] | Iterable[str] = (),
528
+ assets: Sequence[Asset | str],
529
+ ) -> Run: ...
530
+ @warn_on_deprecated_argument(
531
+ "asset", "The 'asset' parameter is deprecated and will be removed in a future release. Use 'assets' instead."
532
+ )
533
+ def create_run(
534
+ self,
535
+ name: str,
536
+ start: datetime | IntegralNanosecondsUTC,
537
+ end: datetime | IntegralNanosecondsUTC | None,
538
+ description: str | None = None,
539
+ *,
540
+ properties: Mapping[str, str] | None = None,
541
+ labels: Sequence[str] | None = None,
542
+ links: Sequence[str | Link | LinkDict] | None = None,
543
+ attachments: Iterable[Attachment] | Iterable[str] | None = None,
506
544
  asset: Asset | str | None = None,
545
+ assets: Sequence[Asset | str] | None = None,
507
546
  ) -> Run:
508
- """Create a run."""
509
- request = scout_run_api.CreateRunRequest(
510
- attachments=[rid_from_instance_or_string(a) for a in attachments],
511
- data_sources={},
512
- description=description or "",
513
- labels=list(labels),
514
- links=create_links(links),
515
- properties={} if properties is None else dict(properties),
516
- start_time=_SecondsNanos.from_flexible(start).to_scout_run_api(),
517
- title=name,
518
- end_time=None if end is None else _SecondsNanos.from_flexible(end).to_scout_run_api(),
519
- assets=[] if asset is None else [rid_from_instance_or_string(asset)],
520
- workspace=self._clients.workspace_rid,
547
+ """Create a run, which is is effectively a slice of time across a collection of assets and datasources.
548
+
549
+ Args:
550
+ name: Name of the run to create
551
+ start: Starting timestamp of the run to create
552
+ end: Ending timestamp of the run to create, or None for an unbounded run.
553
+ description: Optional description of the run to create
554
+ properties: Optional key-value pairs to use as properties on the created run
555
+ labels: Optional sequence of labels for the created run
556
+ links: Link metadata to add to the created run
557
+ attachments: Attachments to associate with the created run
558
+ asset: Singular asset to associate with the run
559
+ NOTE: mutually exclusive with `assets`
560
+ NOTE: deprecated-- use `assets` instead.
561
+ assets: Sequence of assets to associate with the run
562
+ NOTE: mutually exclusive with `asset`
563
+
564
+ Returns:
565
+ Reference to the created run object
566
+
567
+ Raises:
568
+ ValueError: both `asset` and `assets` provided
569
+ ConjureHTTPError: error making request
570
+
571
+ """
572
+ if asset and assets:
573
+ raise ValueError("Only one of 'asset' and 'assets' may be provided")
574
+ elif asset:
575
+ assets = [asset]
576
+ elif assets is None:
577
+ assets = []
578
+
579
+ return _create_run(
580
+ self._clients,
581
+ name=name,
582
+ start=start,
583
+ end=end,
584
+ description=description,
585
+ properties=properties,
586
+ labels=labels,
587
+ links=links,
588
+ attachments=attachments,
589
+ asset_rids=[rid_from_instance_or_string(asset) for asset in assets],
521
590
  )
522
- response = self._clients.run.create_run(self._clients.auth_header, request)
523
- return Run._from_conjure(self._clients, response)
524
591
 
525
592
  def get_run(self, rid: str) -> Run:
526
593
  """Retrieve a run by its RID."""
@@ -665,7 +732,7 @@ class NominalClient:
665
732
 
666
733
  return dataset
667
734
 
668
- def create_empty_video(
735
+ def create_video(
669
736
  self,
670
737
  name: str,
671
738
  *,
@@ -695,6 +762,8 @@ class NominalClient:
695
762
  )
696
763
  return Video._from_conjure(self._clients, response)
697
764
 
765
+ create_empty_video = create_video
766
+
698
767
  def get_video(self, rid: str) -> Video:
699
768
  """Retrieve a video by its RID."""
700
769
  response = self._clients.video.get(self._clients.auth_header, rid)
@@ -890,6 +959,10 @@ class NominalClient:
890
959
  response = self._clients.connection.get_connection(self._clients.auth_header, rid)
891
960
  return Connection._from_conjure(self._clients, response)
892
961
 
962
+ @deprecated(
963
+ "`create_video_from_mcap` is deprecated and will be removed in a future version. "
964
+ "Create a new video with `create_video` and then `add_mcap` to upload a file to the video."
965
+ )
893
966
  def create_video_from_mcap(
894
967
  self,
895
968
  path: Path | str,
@@ -910,18 +983,14 @@ class NominalClient:
910
983
  if name is None:
911
984
  name = path.name
912
985
 
913
- with path.open("rb") as data_file:
914
- return self.create_video_from_mcap_io(
915
- data_file,
916
- name=name,
917
- topic=topic,
918
- file_type=FileTypes.MCAP,
919
- description=description,
920
- labels=labels,
921
- properties=properties,
922
- file_name=path_upload_name(path, FileTypes.MCAP),
923
- )
986
+ video = self.create_video(name, description=description, labels=labels, properties=properties)
987
+ video.add_mcap(path, topic, description)
988
+ return video
924
989
 
990
+ @deprecated(
991
+ "`create_video_from_mcap_io` is deprecated and will be removed in a future version. "
992
+ "Create a new video with `create_video` and then `add_mcap_from_io` to upload a file to the video."
993
+ )
925
994
  def create_video_from_mcap_io(
926
995
  self,
927
996
  mcap: BinaryIO,
@@ -940,40 +1009,9 @@ class NominalClient:
940
1009
 
941
1010
  If name is None, the name of the file will be used.
942
1011
  """
943
- if isinstance(mcap, TextIOBase):
944
- raise TypeError(f"dataset {mcap} must be open in binary mode, rather than text mode")
945
-
946
- if file_name is None:
947
- file_name = name
948
-
949
- file_type = FileType(*file_type)
950
- s3_path = upload_multipart_io(
951
- self._clients.auth_header, self._clients.workspace_rid, mcap, file_name, file_type, self._clients.upload
952
- )
953
- request = ingest_api.IngestRequest(
954
- options=ingest_api.IngestOptions(
955
- video=ingest_api.VideoOpts(
956
- source=ingest_api.IngestSource(s3=ingest_api.S3IngestSource(s3_path)),
957
- target=ingest_api.VideoIngestTarget(
958
- new=ingest_api.NewVideoIngestDestination(
959
- title=name,
960
- description=description,
961
- properties={} if properties is None else dict(properties),
962
- labels=list(labels),
963
- workspace=self._clients.workspace_rid,
964
- marking_rids=[],
965
- )
966
- ),
967
- timestamp_manifest=scout_video_api.VideoFileTimestampManifest(
968
- mcap=scout_video_api.McapTimestampManifest(api.McapChannelLocator(topic=topic))
969
- ),
970
- )
971
- )
972
- )
973
- response = self._clients.ingest.ingest(self._clients.auth_header, request)
974
- if response.details.video is None:
975
- raise NominalIngestError("error ingesting mcap video: no video created")
976
- return self.get_video(response.details.video.video_rid)
1012
+ video = self.create_video(name, description=description, labels=labels, properties=properties)
1013
+ video.add_mcap_from_io(mcap, file_name or name, topic, description, file_type)
1014
+ return video
977
1015
 
978
1016
  def create_streaming_connection(
979
1017
  self,
@@ -1158,19 +1196,17 @@ class NominalClient:
1158
1196
  properties: Mapping[str, str] | None = None,
1159
1197
  labels: Iterable[str] = (),
1160
1198
  ) -> Event:
1161
- request = event.CreateEvent(
1199
+ return _create_event(
1200
+ clients=self._clients,
1162
1201
  name=name,
1202
+ type=type,
1203
+ start=start,
1204
+ duration=duration,
1163
1205
  description=description,
1164
- asset_rids=[rid_from_instance_or_string(asset) for asset in assets],
1165
- timestamp=_SecondsNanos.from_flexible(start).to_api(),
1166
- duration=_to_api_duration(duration),
1167
- origins=[],
1168
- properties=dict(properties) if properties else {},
1169
- labels=list(labels),
1170
- type=type._to_api_event_type(),
1206
+ assets=assets,
1207
+ properties=properties,
1208
+ labels=labels,
1171
1209
  )
1172
- response = self._clients.event.create_event(self._clients.auth_header, request)
1173
- return Event._from_conjure(self._clients, response)
1174
1210
 
1175
1211
  def get_event(self, rid: str) -> Event:
1176
1212
  events = self.get_events([rid])
@@ -1205,10 +1241,6 @@ class NominalClient:
1205
1241
  # TODO (drake-nominal): Expose checklist_refs to users
1206
1242
  return list(self._iter_search_data_reviews(assets, runs))
1207
1243
 
1208
- def _iter_search_events(self, query: event.SearchQuery) -> Iterable[Event]:
1209
- for e in search_events_paginated(self._clients.event, self._clients.auth_header, query):
1210
- yield Event._from_conjure(self._clients, e)
1211
-
1212
1244
  def search_events(
1213
1245
  self,
1214
1246
  *,
@@ -1251,21 +1283,21 @@ class NominalClient:
1251
1283
  Returns:
1252
1284
  All events which match all of the provided conditions
1253
1285
  """
1254
- query = create_search_events_query(
1286
+ return _search_events(
1287
+ clients=self._clients,
1255
1288
  search_text=search_text,
1256
1289
  after=after,
1257
1290
  before=before,
1258
- assets=None if assets is None else [rid_from_instance_or_string(asset) for asset in assets],
1291
+ asset_rids=[rid_from_instance_or_string(asset) for asset in assets] if assets else None,
1259
1292
  labels=labels,
1260
1293
  properties=properties,
1261
- created_by=rid_from_instance_or_string(created_by) if created_by else None,
1262
- workbook=rid_from_instance_or_string(workbook) if workbook else None,
1263
- data_review=rid_from_instance_or_string(data_review) if data_review else None,
1264
- assignee=rid_from_instance_or_string(assignee) if assignee else None,
1294
+ created_by_rid=rid_from_instance_or_string(created_by) if created_by else None,
1295
+ workbook_rid=rid_from_instance_or_string(workbook) if workbook else None,
1296
+ data_review_rid=rid_from_instance_or_string(data_review) if data_review else None,
1297
+ assignee_rid=rid_from_instance_or_string(assignee) if assignee else None,
1265
1298
  event_type=event_type,
1266
1299
  workspace_rid=self._workspace_rid_for_search(workspace or WorkspaceSearchType.ALL),
1267
1300
  )
1268
- return list(self._iter_search_events(query))
1269
1301
 
1270
1302
  def get_containerized_extractor(self, rid: str) -> ContainerizedExtractor:
1271
1303
  return ContainerizedExtractor._from_conjure(
@@ -1453,7 +1485,7 @@ class NominalClient:
1453
1485
  properties: A mapping of key-value pairs that must ALL be present on an workbook to be included.
1454
1486
  created_by: Searches for workbook templates with the given creator's rid
1455
1487
  archived: Searches for workbook templates that are archived if true
1456
- published: Searches f8or workbook templates that have been published if true
1488
+ published: Searches for workbook templates that have been published if true
1457
1489
 
1458
1490
  Returns:
1459
1491
  All workbook templates which match all of the provided conditions
nominal/core/dataset.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import abc
3
4
  import logging
4
5
  from dataclasses import dataclass
5
6
  from datetime import timedelta
@@ -8,7 +9,7 @@ from pathlib import Path
8
9
  from types import MappingProxyType
9
10
  from typing import BinaryIO, Iterable, Mapping, Sequence, TypeAlias, overload
10
11
 
11
- from nominal_api import api, ingest_api, scout_catalog
12
+ from nominal_api import api, ingest_api, scout_asset_api, scout_catalog
12
13
  from typing_extensions import Self, deprecated
13
14
 
14
15
  from nominal.core._stream.batch_processor import process_log_batch
@@ -646,6 +647,288 @@ class Dataset(DataSource, RefreshableMixin[scout_catalog.EnrichedDataset]):
646
647
  )
647
648
 
648
649
 
650
+ def _unify_tags(datascope_tags: Mapping[str, str], provided_tags: Mapping[str, str] | None) -> Mapping[str, str]:
651
+ return {**datascope_tags, **(provided_tags or {})}
652
+
653
+
654
+ class _DatasetWrapper(abc.ABC):
655
+ """A lightweight façade over `nominal.core.Dataset` that routes ingest calls through a *data scope*.
656
+
657
+ `_DatasetWrapper` resolves `data_scope_name` to a backing `nominal.core.Dataset` and then delegates to the
658
+ corresponding `Dataset` method.
659
+
660
+ How this differs from `Dataset`
661
+ -------------------------------
662
+ - All "add data" methods take an extra first argument, `data_scope_name`, which selects the target dataset.
663
+ - For methods that accept `tags`, this wrapper merges the scope's required tags into the provided tags.
664
+ User-provided tags take precedence on key collisions.
665
+ - Some formats cannot be safely tagged with scope tags; those wrapper methods raise `RuntimeError` when the selected
666
+ scope requires tags.
667
+
668
+ Subclasses must implement `_list_dataset_scopes`, which is used to resolve scopes.
669
+ """
670
+
671
+ # static typing for required field
672
+ _clients: Dataset._Clients
673
+
674
+ @abc.abstractmethod
675
+ def _list_dataset_scopes(self) -> Sequence[scout_asset_api.DataScope]:
676
+ """Return the data scopes available to this wrapper.
677
+
678
+ Subclasses provide the authoritative list of `scout_asset_api.DataScope` objects used to
679
+ resolve `data_scope_name` in wrapper methods.
680
+ """
681
+
682
+ def _get_dataset_scope(self, data_scope_name: str) -> tuple[Dataset, Mapping[str, str]]:
683
+ """Resolve a data scope name to its backing dataset and required series tags.
684
+
685
+ Returns:
686
+ A tuple of the resolved `Dataset` and the scope's required `series_tags`.
687
+
688
+ Raises:
689
+ ValueError: If no scope exists with the given `data_scope_name`, or if the scope is not backed by a dataset.
690
+ """
691
+ dataset_scopes = {scope.data_scope_name: scope for scope in self._list_dataset_scopes()}
692
+ data_scope = dataset_scopes.get(data_scope_name)
693
+ if data_scope is None:
694
+ raise ValueError(f"No such data scope found with data_scope_name {data_scope_name}")
695
+ elif data_scope.data_source.dataset is None:
696
+ raise ValueError(f"Datascope {data_scope_name} is not a dataset!")
697
+
698
+ dataset = Dataset._from_conjure(
699
+ self._clients,
700
+ _get_dataset(self._clients.auth_header, self._clients.catalog, data_scope.data_source.dataset),
701
+ )
702
+ return dataset, data_scope.series_tags
703
+
704
+ ################
705
+ # Add Data API #
706
+ ################
707
+
708
+ def add_tabular_data(
709
+ self,
710
+ data_scope_name: str,
711
+ path: Path | str,
712
+ *,
713
+ timestamp_column: str,
714
+ timestamp_type: _AnyTimestampType,
715
+ tag_columns: Mapping[str, str] | None = None,
716
+ tags: Mapping[str, str] | None = None,
717
+ ) -> DatasetFile:
718
+ """Append tabular data on-disk to the dataset selected by `data_scope_name`.
719
+
720
+ This method behaves like `nominal.core.Dataset.add_tabular_data`, except that the data scope's required
721
+ tags are merged into `tags` before ingest (with user-provided tags taking precedence on key collisions).
722
+
723
+ For supported file types, argument semantics, and return value details, see
724
+ `nominal.core.Dataset.add_tabular_data`.
725
+ """
726
+ dataset, scope_tags = self._get_dataset_scope(data_scope_name)
727
+ return dataset.add_tabular_data(
728
+ path,
729
+ timestamp_column=timestamp_column,
730
+ timestamp_type=timestamp_type,
731
+ tag_columns=tag_columns,
732
+ tags=_unify_tags(scope_tags, tags),
733
+ )
734
+
735
+ def add_avro_stream(
736
+ self,
737
+ data_scope_name: str,
738
+ path: Path | str,
739
+ ) -> DatasetFile:
740
+ """Upload an avro stream file to the dataset selected by `data_scope_name`.
741
+
742
+ This method behaves like `nominal.core.Dataset.add_avro_stream`, with one important difference:
743
+ avro stream ingestion does not support applying scope tags. If the selected scope requires tags, this method
744
+ raises `RuntimeError` rather than ingesting (potentially) untagged data. This file may still be ingested
745
+ directly on the dataset itself if it is known to contain the correct set of tags.
746
+
747
+ For schema requirements and return value details, see
748
+ `nominal.core.Dataset.add_avro_stream`.
749
+ """
750
+ dataset, scope_tags = self._get_dataset_scope(data_scope_name)
751
+
752
+ # TODO(drake): remove once avro stream supports ingest with tags
753
+ if scope_tags:
754
+ raise RuntimeError(
755
+ f"Cannot add avro files to datascope {data_scope_name}-- data would not get "
756
+ f"tagged with required tags: {scope_tags}"
757
+ )
758
+
759
+ return dataset.add_avro_stream(path)
760
+
761
+ def add_journal_json(
762
+ self,
763
+ data_scope_name: str,
764
+ path: Path | str,
765
+ ) -> DatasetFile:
766
+ """Add a journald json file to the dataset selected by `data_scope_name`.
767
+
768
+ This method behaves like `nominal.core.Dataset.add_journal_json`, with one important difference:
769
+ journal json ingestion does not support applying scope tags as args. If the selected scope requires tags,
770
+ this method raises `RuntimeError` rather than potentially ingesting untagged data. This file may still be
771
+ ingested directly on the dataset itself if it is known to contain the correct set of args.
772
+
773
+ For file expectations and return value details, see
774
+ `nominal.core.Dataset.add_journal_json`.
775
+ """
776
+ dataset, scope_tags = self._get_dataset_scope(data_scope_name)
777
+
778
+ # TODO(drake): remove once journal json supports ingest with tags
779
+ if scope_tags:
780
+ raise RuntimeError(
781
+ f"Cannot add journal json files to datascope {data_scope_name}-- data would not get "
782
+ f"tagged with required arguments: {scope_tags}"
783
+ )
784
+
785
+ return dataset.add_journal_json(path)
786
+
787
+ def add_mcap(
788
+ self,
789
+ data_scope_name: str,
790
+ path: Path | str,
791
+ *,
792
+ include_topics: Iterable[str] | None = None,
793
+ exclude_topics: Iterable[str] | None = None,
794
+ ) -> DatasetFile:
795
+ """Add an MCAP file to the dataset selected by `data_scope_name`.
796
+
797
+ This method behaves like `nominal.core.Dataset.add_mcap`, with one important difference:
798
+ MCAP ingestion does not support applying scope tags. If the selected scope requires tags, this method raises
799
+ `RuntimeError` rather than ingesting untagged data.
800
+
801
+ For topic-filtering semantics and return value details, see
802
+ `nominal.core.Dataset.add_mcap`.
803
+ """
804
+ dataset, scope_tags = self._get_dataset_scope(data_scope_name)
805
+
806
+ # TODO(drake): remove once MCAP supports ingest with tags
807
+ if scope_tags:
808
+ raise RuntimeError(
809
+ f"Cannot add mcap files to datascope {data_scope_name}-- data would not get "
810
+ f"tagged with required tags: {scope_tags}"
811
+ )
812
+
813
+ return dataset.add_mcap(path, include_topics=include_topics, exclude_topics=exclude_topics)
814
+
815
+ def add_ardupilot_dataflash(
816
+ self,
817
+ data_scope_name: str,
818
+ path: Path | str,
819
+ tags: Mapping[str, str] | None = None,
820
+ ) -> DatasetFile:
821
+ """Add a Dataflash file to the dataset selected by `data_scope_name`.
822
+
823
+ This method behaves like `nominal.core.Dataset.add_ardupilot_dataflash`, except that the data scope's
824
+ required tags are merged into `tags` before ingest (with user-provided tags taking precedence on key
825
+ collisions).
826
+
827
+ For file expectations and return value details, see
828
+ `nominal.core.Dataset.add_ardupilot_dataflash`.
829
+ """
830
+ dataset, scope_tags = self._get_dataset_scope(data_scope_name)
831
+ return dataset.add_ardupilot_dataflash(path, tags=_unify_tags(scope_tags, tags))
832
+
833
+ @overload
834
+ def add_containerized(
835
+ self,
836
+ data_scope_name: str,
837
+ extractor: str | ContainerizedExtractor,
838
+ sources: Mapping[str, Path | str],
839
+ *,
840
+ tag: str | None = None,
841
+ tags: Mapping[str, str] | None = None,
842
+ ) -> DatasetFile: ...
843
+ @overload
844
+ def add_containerized(
845
+ self,
846
+ data_scope_name: str,
847
+ extractor: str | ContainerizedExtractor,
848
+ sources: Mapping[str, Path | str],
849
+ *,
850
+ tag: str | None = None,
851
+ tags: Mapping[str, str] | None = None,
852
+ timestamp_column: str,
853
+ timestamp_type: _AnyTimestampType,
854
+ ) -> DatasetFile: ...
855
+ def add_containerized(
856
+ self,
857
+ data_scope_name: str,
858
+ extractor: str | ContainerizedExtractor,
859
+ sources: Mapping[str, Path | str],
860
+ *,
861
+ tag: str | None = None,
862
+ tags: Mapping[str, str] | None = None,
863
+ timestamp_column: str | None = None,
864
+ timestamp_type: _AnyTimestampType | None = None,
865
+ ) -> DatasetFile:
866
+ """Add data from proprietary formats using a pre-registered custom extractor.
867
+
868
+ This method behaves like `nominal.core.Dataset.add_containerized`, except that the data scope's required
869
+ tags are merged into `tags` before ingest (with user-provided tags taking precedence on key collisions).
870
+
871
+ This wrapper also enforces that `timestamp_column` and `timestamp_type` are provided together (or omitted
872
+ together) before delegating.
873
+
874
+ For extractor inputs, tagging semantics, timestamp metadata behavior, and return value details, see
875
+ `nominal.core.Dataset.add_containerized`.
876
+ """
877
+ dataset, scope_tags = self._get_dataset_scope(data_scope_name)
878
+ if timestamp_column is None and timestamp_type is None:
879
+ return dataset.add_containerized(
880
+ extractor,
881
+ sources,
882
+ tag=tag,
883
+ tags=_unify_tags(scope_tags, tags),
884
+ )
885
+ elif timestamp_column is not None and timestamp_type is not None:
886
+ return dataset.add_containerized(
887
+ extractor,
888
+ sources,
889
+ tag=tag,
890
+ tags=_unify_tags(scope_tags, tags),
891
+ timestamp_column=timestamp_column,
892
+ timestamp_type=timestamp_type,
893
+ )
894
+ else:
895
+ raise ValueError(
896
+ "Only one of `timestamp_column` and `timestamp_type` were provided to `add_containerized`, "
897
+ "either both must or neither must be provided."
898
+ )
899
+
900
+ def add_from_io(
901
+ self,
902
+ data_scope_name: str,
903
+ data_stream: BinaryIO,
904
+ file_type: tuple[str, str] | FileType,
905
+ *,
906
+ timestamp_column: str,
907
+ timestamp_type: _AnyTimestampType,
908
+ file_name: str | None = None,
909
+ tag_columns: Mapping[str, str] | None = None,
910
+ tags: Mapping[str, str] | None = None,
911
+ ) -> DatasetFile:
912
+ """Append to the dataset selected by `data_scope_name` from a file-like object.
913
+
914
+ This method behaves like `nominal.core.Dataset.add_from_io`, except that the data scope's required tags
915
+ are merged into `tags` before ingest (with user-provided tags taking precedence on key collisions).
916
+
917
+ For stream requirements, supported file types, argument semantics, and return value details, see
918
+ `nominal.core.Dataset.add_from_io`.
919
+ """
920
+ dataset, scope_tags = self._get_dataset_scope(data_scope_name)
921
+ return dataset.add_from_io(
922
+ data_stream,
923
+ timestamp_column=timestamp_column,
924
+ timestamp_type=timestamp_type,
925
+ file_type=file_type,
926
+ file_name=file_name,
927
+ tag_columns=tag_columns,
928
+ tags=_unify_tags(scope_tags, tags),
929
+ )
930
+
931
+
649
932
  @deprecated(
650
933
  "poll_until_ingestion_completed() is deprecated and will be removed in a future release. "
651
934
  "Instead, call poll_until_ingestion_completed() on individual DatasetFiles."