nominal 1.97.0__py3-none-any.whl → 1.99.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.99.0](https://github.com/nominal-io/nominal-client/compare/v1.98.0...v1.99.0) (2025-12-04)
4
+
5
+
6
+ ### Features
7
+
8
+ * allow ingesting .avro files ([#544](https://github.com/nominal-io/nominal-client/issues/544)) ([f5c4561](https://github.com/nominal-io/nominal-client/commit/f5c4561e1db6174a56d6b32b388ed7ad94679fdf))
9
+
10
+ ## [1.98.0](https://github.com/nominal-io/nominal-client/compare/v1.97.0...v1.98.0) (2025-12-04)
11
+
12
+
13
+ ### Features
14
+
15
+ * as_files_ingested iterator for dataset files ([#533](https://github.com/nominal-io/nominal-client/issues/533)) ([7afc0f6](https://github.com/nominal-io/nominal-client/commit/7afc0f61d24f930d37d55c8e0840ebd18b8711b6))
16
+
3
17
  ## [1.97.0](https://github.com/nominal-io/nominal-client/compare/v1.96.0...v1.97.0) (2025-12-04)
4
18
 
5
19
 
nominal/core/__init__.py CHANGED
@@ -17,7 +17,7 @@ from nominal.core.containerized_extractors import (
17
17
  )
18
18
  from nominal.core.data_review import CheckViolation, DataReview, DataReviewBuilder
19
19
  from nominal.core.dataset import Dataset, poll_until_ingestion_completed
20
- from nominal.core.dataset_file import DatasetFile
20
+ from nominal.core.dataset_file import DatasetFile, IngestWaitType, as_files_ingested, wait_for_files_to_ingest
21
21
  from nominal.core.datasource import DataSource
22
22
  from nominal.core.event import Event, EventType
23
23
  from nominal.core.filetype import FileType, FileTypes
@@ -33,6 +33,7 @@ from nominal.core.workbook_template import WorkbookTemplate
33
33
  from nominal.core.workspace import Workspace
34
34
 
35
35
  __all__ = [
36
+ "as_files_ingested",
36
37
  "Asset",
37
38
  "Attachment",
38
39
  "Bounds",
@@ -53,6 +54,7 @@ __all__ = [
53
54
  "FileExtractionInput",
54
55
  "FileType",
55
56
  "FileTypes",
57
+ "IngestWaitType",
56
58
  "LinkDict",
57
59
  "LogPoint",
58
60
  "NominalClient",
@@ -67,6 +69,7 @@ __all__ = [
67
69
  "UserPassAuth",
68
70
  "Video",
69
71
  "VideoFile",
72
+ "wait_for_files_to_ingest",
70
73
  "Workbook",
71
74
  "WorkbookTemplate",
72
75
  "WorkbookType",
nominal/core/dataset.py CHANGED
@@ -203,6 +203,78 @@ class Dataset(DataSource, RefreshableMixin[scout_catalog.EnrichedDataset]):
203
203
  # Backward compatibility
204
204
  add_to_dataset_from_io = add_from_io
205
205
 
206
+ def add_avro_stream(
207
+ self,
208
+ path: Path | str,
209
+ ) -> DatasetFile:
210
+ """Upload an avro stream file with a specific schema, described below.
211
+
212
+ This is a "stream-like" file format to support
213
+ use cases where a columnar/tabular format does not make sense. This closely matches Nominal's streaming
214
+ API, making it useful for use cases where network connection drops during streaming and a backup file needs
215
+ to be created.
216
+
217
+ If this schema is not used, will result in a failed ingestion.
218
+ {
219
+ "type": "record",
220
+ "name": "AvroStream",
221
+ "namespace": "io.nominal.ingest",
222
+ "fields": [
223
+ {
224
+ "name": "channel",
225
+ "type": "string",
226
+ "doc": "Channel/series name (e.g., 'vehicle_id', 'col_1', 'temperature')",
227
+ },
228
+ {
229
+ "name": "timestamps",
230
+ "type": {"type": "array", "items": "long"},
231
+ "doc": "Array of Unix timestamps in nanoseconds",
232
+ },
233
+ {
234
+ "name": "values",
235
+ "type": {"type": "array", "items": ["double", "string"]},
236
+ "doc": "Array of values. Can either be doubles or strings",
237
+ },
238
+ {
239
+ "name": "tags",
240
+ "type": {"type": "map", "values": "string"},
241
+ "default": {},
242
+ "doc": "Key-value metadata tags",
243
+ },
244
+ ],
245
+ }
246
+
247
+ Args:
248
+ path: Path to the .avro file to upload
249
+
250
+ Returns:
251
+ Reference to the ingesting DatasetFile
252
+
253
+ """
254
+ avro_path = Path(path)
255
+ s3_path = upload_multipart_file(
256
+ self._clients.auth_header,
257
+ self._clients.workspace_rid,
258
+ avro_path,
259
+ self._clients.upload,
260
+ file_type=FileTypes.AVRO_STREAM,
261
+ )
262
+ target = ingest_api.DatasetIngestTarget(
263
+ existing=ingest_api.ExistingDatasetIngestDestination(dataset_rid=self.rid)
264
+ )
265
+ resp = self._clients.ingest.ingest(
266
+ self._clients.auth_header,
267
+ ingest_api.IngestRequest(
268
+ options=ingest_api.IngestOptions(
269
+ avro_stream=ingest_api.AvroStreamOpts(
270
+ source=ingest_api.IngestSource(s3=ingest_api.S3IngestSource(s3_path)),
271
+ target=target,
272
+ )
273
+ )
274
+ ),
275
+ )
276
+ return self._handle_ingest_response(resp)
277
+
206
278
  def add_journal_json(
207
279
  self,
208
280
  path: Path | str,
@@ -6,7 +6,7 @@ import pathlib
6
6
  import time
7
7
  from dataclasses import dataclass, field
8
8
  from enum import Enum
9
- from typing import Mapping, Protocol, Sequence
9
+ from typing import Iterable, Mapping, Protocol, Sequence
10
10
  from urllib.parse import unquote, urlparse
11
11
 
12
12
  from nominal_api import api, ingest_api, scout_catalog
@@ -287,3 +287,115 @@ class IngestStatus(Enum):
287
287
  elif status.error is not None:
288
288
  return cls.FAILED
289
289
  raise ValueError(f"Unknown ingest status: {status.type}")
290
+
291
+
292
+ class IngestWaitType(Enum):
293
+ FIRST_COMPLETED = "FIRST_COMPLETED"
294
+ FIRST_EXCEPTION = "FIRST_EXCEPTION"
295
+ ALL_COMPLETED = "ALL_COMPLETED"
296
+
297
+
298
+ def wait_for_files_to_ingest(
299
+ files: Sequence[DatasetFile],
300
+ *,
301
+ poll_interval: datetime.timedelta = datetime.timedelta(seconds=1),
302
+ timeout: datetime.timedelta | None = None,
303
+ return_when: IngestWaitType = IngestWaitType.ALL_COMPLETED,
304
+ ) -> tuple[Sequence[DatasetFile], Sequence[DatasetFile]]:
305
+ """Blocks until all of the dataset files have completed their ingestion (or other specified conditions)
306
+ in a similar fashion to `concurrent.futures.wait`.
307
+
308
+ Any files that are already ingested (successfully or with errors) will be returned as "done", whereas any
309
+ files still ingesting by the time of this function's exit will be returned as "not done".
310
+
311
+ Args:
312
+ files: Dataset files to monitor for ingestion completion.
313
+ poll_interval: Interval to sleep between polling the remaining files under watch.
314
+ timeout: If given, the maximum time to wait before returning
315
+ return_when: Condition for this function to exit. By default, this function will block until all files
316
+ have completed their ingestion (successfully or unsuccessfully), but this can be changed to return
317
+ upon the first completed or first failing ingest. This behavior mirrors that of
318
+ `concurrent.futures.wait`.
319
+
320
+ Returns:
321
+ Returns a tuple of (done, not done) dataset files.
322
+ """
323
+ start_time = datetime.datetime.now()
324
+ done: list[DatasetFile] = []
325
+ not_done: list[DatasetFile] = [*files]
326
+ has_failed = False
327
+
328
+ while not_done and (timeout is None or datetime.datetime.now() - start_time < timeout):
329
+ logger.info("Polling for ingestion completion for %d files (%d total)", len(not_done), len(files))
330
+
331
+ next_not_done = []
332
+ for file in not_done:
333
+ latest_api = file._get_latest_api()
334
+ latest_file = file._refresh_from_api(latest_api)
335
+ match file.ingest_status:
336
+ case IngestStatus.SUCCESS:
337
+ done.append(latest_file)
338
+ case IngestStatus.FAILED:
339
+ logger.warning(
340
+ "Dataset file %s from dataset %s failed to ingest! Error message: %s",
341
+ latest_file.id,
342
+ latest_file.dataset_rid,
343
+ latest_api.ingest_status.error.message if latest_api.ingest_status.error else "",
344
+ )
345
+ done.append(latest_file)
346
+ has_failed = True
347
+ case IngestStatus.IN_PROGRESS:
348
+ next_not_done.append(latest_file)
349
+
350
+ not_done = next_not_done
351
+
352
+ if has_failed and return_when is IngestWaitType.FIRST_EXCEPTION:
353
+ break
354
+ elif done and return_when is IngestWaitType.FIRST_COMPLETED:
355
+ break
356
+ elif not not_done:
357
+ break
358
+
359
+ if timeout is not None and datetime.datetime.now() - start_time < timeout:
360
+ logger.info(
361
+ "Sleeping for %f seconds while awaiting ingestion for %d files (%d total)... ",
362
+ len(not_done),
363
+ len(files),
364
+ poll_interval.total_seconds(),
365
+ )
366
+ time.sleep(poll_interval.total_seconds())
367
+
368
+ return done, not_done
369
+
370
+
371
+ def as_files_ingested(
372
+ files: Sequence[DatasetFile],
373
+ *,
374
+ poll_interval: datetime.timedelta = datetime.timedelta(seconds=1),
375
+ ) -> Iterable[DatasetFile]:
376
+ """Iterates over DatasetFiles as they complete their ingestion in a similar fashion to
377
+ `concurrent.futures.as_completed`.
378
+
379
+ Any files that are already ingested (successfully or with errors) will immediately be yielded.
380
+
381
+ Args:
382
+ files: Dataset files to monitor for ingestion completion.
383
+ poll_interval: Interval to sleep between polling the remaining files under watch.
384
+
385
+ Yields:
386
+ Yields DatasetFiles as they are ingested. Due to the polling mechanics, the files are not yielded in
387
+ strictly sorted order based on their ingestion completion time. Ensure to check the `ingest_status` of
388
+ yielded dataset files if important.
389
+ """
390
+ to_poll: Sequence[DatasetFile] = [*files]
391
+ while to_poll:
392
+ logger.info("Awaiting ingestion for %d files (%d total)", len(to_poll), len(files))
393
+ done, not_done = wait_for_files_to_ingest(
394
+ to_poll, poll_interval=poll_interval, return_when=IngestWaitType.FIRST_COMPLETED
395
+ )
396
+ for file in done:
397
+ yield file
398
+
399
+ to_poll = not_done
400
+ if to_poll:
401
+ time.sleep(poll_interval.total_seconds())
nominal/core/filetype.py CHANGED
@@ -111,6 +111,7 @@ class FileType(NamedTuple):
111
111
 
112
112
 
113
113
  class FileTypes:
114
+ AVRO_STREAM: FileType = FileType(".avro", "application/avro")
114
115
  BINARY: FileType = FileType("", "application/octet-stream")
115
116
  CSV: FileType = FileType(".csv", "text/csv")
116
117
  CSV_GZ: FileType = FileType(".csv.gz", "text/csv")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nominal
3
- Version: 1.97.0
3
+ Version: 1.99.0
4
4
  Summary: Automate Nominal workflows in Python
5
5
  Project-URL: Homepage, https://nominal.io
6
6
  Project-URL: Documentation, https://docs.nominal.io
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=6LrMT1H8kFV_1cH8Bu_s8Itk_yzsIcAd0UfnMWuHdAU,80317
1
+ CHANGELOG.md,sha256=52CVvxyESwAGvmBLo5soPa95wuhlqckBIYFLtsSI9zE,80966
2
2
  LICENSE,sha256=zEGHG9mjDjaIS3I79O8mweQo-yiTbqx8jJvUPppVAwk,1067
3
3
  README.md,sha256=KKe0dxh_pHXCtB7I9G4qWGQYvot_BZU8yW6MJyuyUHM,311
4
4
  nominal/__init__.py,sha256=rbraORnXUrNn1hywLXM0XwSQCd9UmQt20PDYlsBalfE,2167
@@ -27,7 +27,7 @@ nominal/cli/util/global_decorators.py,sha256=SBxhz4KbMlWDcCV08feouftd3HLnBNR-JJt
27
27
  nominal/cli/util/verify_connection.py,sha256=KU17ejaDfKBLmLiZ3MZSVLyfrqNE7c6mFBvskhqQLCo,1902
28
28
  nominal/config/__init__.py,sha256=wV8cq8X3J4NTJ5H_uR5THaMT_NQpWQO5qCUGEb-rPnM,3157
29
29
  nominal/config/_config.py,sha256=yKq_H1iYJDoxRfLz2iXLbbVdoL0MTEY0FS4eVL12w0g,2004
30
- nominal/core/__init__.py,sha256=N2N_kesiFkvfD6eVmGus2gwAfCsZbwR9KzYw4Wtlvc8,2193
30
+ nominal/core/__init__.py,sha256=5eC2J0lzpV7JcuKDUimJCfgXuVL7HNgHrLhqxcy5NCc,2333
31
31
  nominal/core/_clientsbunch.py,sha256=YwciugX7rQ9AOPHyvKuavG7b9SlX1PURRquP37nvLqE,8458
32
32
  nominal/core/_constants.py,sha256=SrxgaSqAEB1MvTSrorgGam3eO29iCmRr6VIdajxX3gI,56
33
33
  nominal/core/asset.py,sha256=Kq3RvdFSdAK-ViACpd_-H30fz1lnOaGU3zgFt13ag20,16674
@@ -39,12 +39,12 @@ nominal/core/client.py,sha256=L6IQVEPTiKbOjtbn4G0_R90jbVeOOxpHBHmminGQ3FE,67403
39
39
  nominal/core/connection.py,sha256=ySbPN_a2takVa8wIU9mK4fB6vYLyZnN-qSmXVkLUxAY,5157
40
40
  nominal/core/containerized_extractors.py,sha256=HrcMJzdE-hH66AgYIA0LTeFELsBHa0Sm0vlsKMiIzDU,9501
41
41
  nominal/core/data_review.py,sha256=bEnRsd8LI4x9YOBPcF2H3h5-e12A7Gh8gQfsNUAZmPQ,7922
42
- nominal/core/dataset.py,sha256=gbQYtAYx-fHaewOZUSC7P9CHlMfmdchtTv1XUuNKg3Y,29933
43
- nominal/core/dataset_file.py,sha256=OhkRsI4F3bz9YLF_lQklFfvi6Crq1r0zMMRGkWQEbeQ,11709
42
+ nominal/core/dataset.py,sha256=q8V5ULrGO-wHBbhm7_Qd_nXzp-D5FTQGfyYbIxhIvjI,32567
43
+ nominal/core/dataset_file.py,sha256=oENANJ17A4K63cZ8Fr7lUm_kVPyA4fL2rUsZ3oXXk2U,16396
44
44
  nominal/core/datasource.py,sha256=D9jHirAzUZ0pc3nW1XIURpw1UqQoA2E-nUUylZR1jbE,16707
45
45
  nominal/core/event.py,sha256=D8qIX_dTjfSHN7jFW8vV-9htbQTaqk9VvRfK7t-sbbw,5891
46
46
  nominal/core/exceptions.py,sha256=GUpwXRgdYamLl6684FE8ttCRHkBx6WEhOZ3NPE-ybD4,2671
47
- nominal/core/filetype.py,sha256=OgUWR9Ebr-KGVNoXZCnxlHZh9Tp3g9kY1VeTv4TULPQ,5399
47
+ nominal/core/filetype.py,sha256=jAPe6F7pDT8ixsD2-Y8eJdHOxgimdEQte4RQybWwsos,5465
48
48
  nominal/core/log.py,sha256=z3hI3CIEyMwpUSWjwBsJ6a3JNGzBbsmrVusSU6uI7CY,3885
49
49
  nominal/core/run.py,sha256=IvKyvyQ9sOefQQorDbOo1KHSGNtNN_9OnsJajlgoRSg,14803
50
50
  nominal/core/secret.py,sha256=Ckq48m60i7rktxL9GY-nxHU5v8gHv9F1-JN7_MSf4bM,2863
@@ -102,8 +102,8 @@ nominal/thirdparty/polars/polars_export_handler.py,sha256=hGCSwXX9dC4MG01CmmjlTb
102
102
  nominal/thirdparty/tdms/__init__.py,sha256=6n2ImFr2Wiil6JM1P5Q7Mpr0VzLcnDkmup_ftNpPq-s,142
103
103
  nominal/thirdparty/tdms/_tdms.py,sha256=eiHFTUviyDPDClckNldjs_jTTSH_sdmboKDq0oIGChQ,8711
104
104
  nominal/ts/__init__.py,sha256=hmd0ENvDhxRnzDKGLxIub6QG8LpcxCgcyAct029CaEs,21442
105
- nominal-1.97.0.dist-info/METADATA,sha256=f_-Nkovbw63L8TqEce4QVrxF6xRqpH8lOJwSBt97-9g,1946
106
- nominal-1.97.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
107
- nominal-1.97.0.dist-info/entry_points.txt,sha256=-mCLhxgg9R_lm5efT7vW9wuBH12izvY322R0a3TYxbE,66
108
- nominal-1.97.0.dist-info/licenses/LICENSE,sha256=zEGHG9mjDjaIS3I79O8mweQo-yiTbqx8jJvUPppVAwk,1067
109
- nominal-1.97.0.dist-info/RECORD,,
105
+ nominal-1.99.0.dist-info/METADATA,sha256=PEAgXJMK7XkQoxWyji_iTfWdSiZ31DcdzRBjVqF6Ixc,1946
106
+ nominal-1.99.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
107
+ nominal-1.99.0.dist-info/entry_points.txt,sha256=-mCLhxgg9R_lm5efT7vW9wuBH12izvY322R0a3TYxbE,66
108
+ nominal-1.99.0.dist-info/licenses/LICENSE,sha256=zEGHG9mjDjaIS3I79O8mweQo-yiTbqx8jJvUPppVAwk,1067
109
+ nominal-1.99.0.dist-info/RECORD,,