nominal 1.97.0__tar.gz → 1.99.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nominal-1.97.0 → nominal-1.99.0}/CHANGELOG.md +14 -0
- {nominal-1.97.0 → nominal-1.99.0}/PKG-INFO +1 -1
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/__init__.py +4 -1
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/dataset.py +72 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/dataset_file.py +113 -1
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/filetype.py +1 -0
- {nominal-1.97.0 → nominal-1.99.0}/pyproject.toml +1 -1
- {nominal-1.97.0 → nominal-1.99.0}/.gitignore +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/LICENSE +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/README.md +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/__main__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/_utils/README.md +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/_utils/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/_utils/dataclass_tools.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/_utils/deprecation_tools.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/_utils/iterator_tools.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/_utils/streaming_tools.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/_utils/timing_tools.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/cli/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/cli/__main__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/cli/attachment.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/cli/auth.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/cli/config.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/cli/dataset.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/cli/download.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/cli/mis.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/cli/run.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/cli/util/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/cli/util/click_log_handler.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/cli/util/global_decorators.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/cli/util/verify_connection.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/config/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/config/_config.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_clientsbunch.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_constants.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_stream/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_stream/batch_processor.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_stream/batch_processor_proto.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_stream/write_stream.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_stream/write_stream_base.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_utils/README.md +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_utils/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_utils/api_tools.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_utils/multipart.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_utils/multipart_downloader.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_utils/networking.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_utils/pagination_tools.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_utils/query_tools.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/_utils/queueing.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/asset.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/attachment.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/bounds.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/channel.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/checklist.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/client.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/connection.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/containerized_extractors.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/data_review.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/datasource.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/event.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/exceptions.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/log.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/run.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/secret.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/unit.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/user.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/video.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/video_file.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/workbook.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/workbook_template.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/core/workspace.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/exceptions/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/compute/README.md +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/compute/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/compute/_buckets.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/compute/dsl/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/compute/dsl/_enum_expr_impls.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/compute/dsl/_numeric_expr_impls.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/compute/dsl/_range_expr_impls.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/compute/dsl/exprs.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/compute/dsl/params.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/logging/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/logging/click_log_handler.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/logging/nominal_log_handler.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/logging/rich_log_handler.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/rust_streaming/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/rust_streaming/rust_write_stream.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/stream_v2/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/stream_v2/_serializer.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/stream_v2/_write_stream.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/video_processing/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/video_processing/resolution.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/experimental/video_processing/video_conversion.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/nominal.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/py.typed +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/thirdparty/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/thirdparty/matlab/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/thirdparty/matlab/_matlab.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/thirdparty/pandas/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/thirdparty/pandas/_pandas.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/thirdparty/polars/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/thirdparty/polars/polars_export_handler.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/thirdparty/tdms/__init__.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/thirdparty/tdms/_tdms.py +0 -0
- {nominal-1.97.0 → nominal-1.99.0}/nominal/ts/__init__.py +0 -0
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [1.99.0](https://github.com/nominal-io/nominal-client/compare/v1.98.0...v1.99.0) (2025-12-04)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Features
|
|
7
|
+
|
|
8
|
+
* allow ingesting .avro files ([#544](https://github.com/nominal-io/nominal-client/issues/544)) ([f5c4561](https://github.com/nominal-io/nominal-client/commit/f5c4561e1db6174a56d6b32b388ed7ad94679fdf))
|
|
9
|
+
|
|
10
|
+
## [1.98.0](https://github.com/nominal-io/nominal-client/compare/v1.97.0...v1.98.0) (2025-12-04)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
### Features
|
|
14
|
+
|
|
15
|
+
* as_files_ingested iterator for dataset files ([#533](https://github.com/nominal-io/nominal-client/issues/533)) ([7afc0f6](https://github.com/nominal-io/nominal-client/commit/7afc0f61d24f930d37d55c8e0840ebd18b8711b6))
|
|
16
|
+
|
|
3
17
|
## [1.97.0](https://github.com/nominal-io/nominal-client/compare/v1.96.0...v1.97.0) (2025-12-04)
|
|
4
18
|
|
|
5
19
|
|
|
@@ -17,7 +17,7 @@ from nominal.core.containerized_extractors import (
|
|
|
17
17
|
)
|
|
18
18
|
from nominal.core.data_review import CheckViolation, DataReview, DataReviewBuilder
|
|
19
19
|
from nominal.core.dataset import Dataset, poll_until_ingestion_completed
|
|
20
|
-
from nominal.core.dataset_file import DatasetFile
|
|
20
|
+
from nominal.core.dataset_file import DatasetFile, IngestWaitType, as_files_ingested, wait_for_files_to_ingest
|
|
21
21
|
from nominal.core.datasource import DataSource
|
|
22
22
|
from nominal.core.event import Event, EventType
|
|
23
23
|
from nominal.core.filetype import FileType, FileTypes
|
|
@@ -33,6 +33,7 @@ from nominal.core.workbook_template import WorkbookTemplate
|
|
|
33
33
|
from nominal.core.workspace import Workspace
|
|
34
34
|
|
|
35
35
|
__all__ = [
|
|
36
|
+
"as_files_ingested",
|
|
36
37
|
"Asset",
|
|
37
38
|
"Attachment",
|
|
38
39
|
"Bounds",
|
|
@@ -53,6 +54,7 @@ __all__ = [
|
|
|
53
54
|
"FileExtractionInput",
|
|
54
55
|
"FileType",
|
|
55
56
|
"FileTypes",
|
|
57
|
+
"IngestWaitType",
|
|
56
58
|
"LinkDict",
|
|
57
59
|
"LogPoint",
|
|
58
60
|
"NominalClient",
|
|
@@ -67,6 +69,7 @@ __all__ = [
|
|
|
67
69
|
"UserPassAuth",
|
|
68
70
|
"Video",
|
|
69
71
|
"VideoFile",
|
|
72
|
+
"wait_for_files_to_ingest",
|
|
70
73
|
"Workbook",
|
|
71
74
|
"WorkbookTemplate",
|
|
72
75
|
"WorkbookType",
|
|
@@ -203,6 +203,78 @@ class Dataset(DataSource, RefreshableMixin[scout_catalog.EnrichedDataset]):
|
|
|
203
203
|
# Backward compatibility
|
|
204
204
|
add_to_dataset_from_io = add_from_io
|
|
205
205
|
|
|
206
|
+
def add_avro_stream(
|
|
207
|
+
self,
|
|
208
|
+
path: Path | str,
|
|
209
|
+
) -> DatasetFile:
|
|
210
|
+
"""Upload an avro stream file with a specific schema, described below.
|
|
211
|
+
|
|
212
|
+
This is a "stream-like" file format to support
|
|
213
|
+
use cases where a columnar/tabular format does not make sense. This closely matches Nominal's streaming
|
|
214
|
+
API, making it useful for use cases where network connection drops during streaming and a backup file needs
|
|
215
|
+
to be created.
|
|
216
|
+
|
|
217
|
+
If this schema is not used, will result in a failed ingestion.
|
|
218
|
+
{
|
|
219
|
+
"type": "record",
|
|
220
|
+
"name": "AvroStream",
|
|
221
|
+
"namespace": "io.nominal.ingest",
|
|
222
|
+
"fields": [
|
|
223
|
+
{
|
|
224
|
+
"name": "channel",
|
|
225
|
+
"type": "string",
|
|
226
|
+
"doc": "Channel/series name (e.g., 'vehicle_id', 'col_1', 'temperature')",
|
|
227
|
+
},
|
|
228
|
+
{
|
|
229
|
+
"name": "timestamps",
|
|
230
|
+
"type": {"type": "array", "items": "long"},
|
|
231
|
+
"doc": "Array of Unix timestamps in nanoseconds",
|
|
232
|
+
},
|
|
233
|
+
{
|
|
234
|
+
"name": "values",
|
|
235
|
+
"type": {"type": "array", "items": ["double", "string"]},
|
|
236
|
+
"doc": "Array of values. Can either be doubles or strings",
|
|
237
|
+
},
|
|
238
|
+
{
|
|
239
|
+
"name": "tags",
|
|
240
|
+
"type": {"type": "map", "values": "string"},
|
|
241
|
+
"default": {},
|
|
242
|
+
"doc": "Key-value metadata tags",
|
|
243
|
+
},
|
|
244
|
+
],
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
path: Path to the .avro file to upload
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
Reference to the ingesting DatasetFile
|
|
252
|
+
|
|
253
|
+
"""
|
|
254
|
+
avro_path = Path(path)
|
|
255
|
+
s3_path = upload_multipart_file(
|
|
256
|
+
self._clients.auth_header,
|
|
257
|
+
self._clients.workspace_rid,
|
|
258
|
+
avro_path,
|
|
259
|
+
self._clients.upload,
|
|
260
|
+
file_type=FileTypes.AVRO_STREAM,
|
|
261
|
+
)
|
|
262
|
+
target = ingest_api.DatasetIngestTarget(
|
|
263
|
+
existing=ingest_api.ExistingDatasetIngestDestination(dataset_rid=self.rid)
|
|
264
|
+
)
|
|
265
|
+
resp = self._clients.ingest.ingest(
|
|
266
|
+
self._clients.auth_header,
|
|
267
|
+
ingest_api.IngestRequest(
|
|
268
|
+
options=ingest_api.IngestOptions(
|
|
269
|
+
avro_stream=ingest_api.AvroStreamOpts(
|
|
270
|
+
source=ingest_api.IngestSource(s3=ingest_api.S3IngestSource(s3_path)),
|
|
271
|
+
target=target,
|
|
272
|
+
)
|
|
273
|
+
)
|
|
274
|
+
),
|
|
275
|
+
)
|
|
276
|
+
return self._handle_ingest_response(resp)
|
|
277
|
+
|
|
206
278
|
def add_journal_json(
|
|
207
279
|
self,
|
|
208
280
|
path: Path | str,
|
|
@@ -6,7 +6,7 @@ import pathlib
|
|
|
6
6
|
import time
|
|
7
7
|
from dataclasses import dataclass, field
|
|
8
8
|
from enum import Enum
|
|
9
|
-
from typing import Mapping, Protocol, Sequence
|
|
9
|
+
from typing import Iterable, Mapping, Protocol, Sequence
|
|
10
10
|
from urllib.parse import unquote, urlparse
|
|
11
11
|
|
|
12
12
|
from nominal_api import api, ingest_api, scout_catalog
|
|
@@ -287,3 +287,115 @@ class IngestStatus(Enum):
|
|
|
287
287
|
elif status.error is not None:
|
|
288
288
|
return cls.FAILED
|
|
289
289
|
raise ValueError(f"Unknown ingest status: {status.type}")
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
class IngestWaitType(Enum):
|
|
293
|
+
FIRST_COMPLETED = "FIRST_COMPLETED"
|
|
294
|
+
FIRST_EXCEPTION = "FIRST_EXCEPTION"
|
|
295
|
+
ALL_COMPLETED = "ALL_COMPLETED"
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def wait_for_files_to_ingest(
|
|
299
|
+
files: Sequence[DatasetFile],
|
|
300
|
+
*,
|
|
301
|
+
poll_interval: datetime.timedelta = datetime.timedelta(seconds=1),
|
|
302
|
+
timeout: datetime.timedelta | None = None,
|
|
303
|
+
return_when: IngestWaitType = IngestWaitType.ALL_COMPLETED,
|
|
304
|
+
) -> tuple[Sequence[DatasetFile], Sequence[DatasetFile]]:
|
|
305
|
+
"""Blocks until all of the dataset files have completed their ingestion (or other specified conditions)
|
|
306
|
+
in a similar fashion to `concurrent.futures.wait`.
|
|
307
|
+
|
|
308
|
+
Any files that are already ingested (successfully or with errors) will be returned as "done", whereas any
|
|
309
|
+
files still ingesting by the time of this function's exit will be returned as "not done".
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
files: Dataset files to monitor for ingestion completion.
|
|
313
|
+
poll_interval: Interval to sleep between polling the remaining files under watch.
|
|
314
|
+
timeout: If given, the maximum time to wait before returning
|
|
315
|
+
return_when: Condition for this function to exit. By default, this function will block until all files
|
|
316
|
+
have completed their ingestion (successfully or unsuccessfully), but this can be changed to return
|
|
317
|
+
upon the first completed or first failing ingest. This behavior mirrors that of
|
|
318
|
+
`concurrent.futures.wait`.
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
Returns a tuple of (done, not done) dataset files.
|
|
322
|
+
"""
|
|
323
|
+
start_time = datetime.datetime.now()
|
|
324
|
+
done: list[DatasetFile] = []
|
|
325
|
+
not_done: list[DatasetFile] = [*files]
|
|
326
|
+
has_failed = False
|
|
327
|
+
|
|
328
|
+
while not_done and (timeout is None or datetime.datetime.now() - start_time < timeout):
|
|
329
|
+
logger.info("Polling for ingestion completion for %d files (%d total)", len(not_done), len(files))
|
|
330
|
+
|
|
331
|
+
next_not_done = []
|
|
332
|
+
for file in not_done:
|
|
333
|
+
latest_api = file._get_latest_api()
|
|
334
|
+
latest_file = file._refresh_from_api(latest_api)
|
|
335
|
+
match file.ingest_status:
|
|
336
|
+
case IngestStatus.SUCCESS:
|
|
337
|
+
done.append(latest_file)
|
|
338
|
+
case IngestStatus.FAILED:
|
|
339
|
+
logger.warning(
|
|
340
|
+
"Dataset file %s from dataset %s failed to ingest! Error message: %s",
|
|
341
|
+
latest_file.id,
|
|
342
|
+
latest_file.dataset_rid,
|
|
343
|
+
latest_api.ingest_status.error.message if latest_api.ingest_status.error else "",
|
|
344
|
+
)
|
|
345
|
+
done.append(latest_file)
|
|
346
|
+
has_failed = True
|
|
347
|
+
case IngestStatus.IN_PROGRESS:
|
|
348
|
+
next_not_done.append(latest_file)
|
|
349
|
+
|
|
350
|
+
not_done = next_not_done
|
|
351
|
+
|
|
352
|
+
if has_failed and return_when is IngestWaitType.FIRST_EXCEPTION:
|
|
353
|
+
break
|
|
354
|
+
elif done and return_when is IngestWaitType.FIRST_COMPLETED:
|
|
355
|
+
break
|
|
356
|
+
elif not not_done:
|
|
357
|
+
break
|
|
358
|
+
|
|
359
|
+
if timeout is not None and datetime.datetime.now() - start_time < timeout:
|
|
360
|
+
logger.info(
|
|
361
|
+
"Sleeping for %f seconds while awaiting ingestion for %d files (%d total)... ",
|
|
362
|
+
len(not_done),
|
|
363
|
+
len(files),
|
|
364
|
+
poll_interval.total_seconds(),
|
|
365
|
+
)
|
|
366
|
+
time.sleep(poll_interval.total_seconds())
|
|
367
|
+
|
|
368
|
+
return done, not_done
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def as_files_ingested(
|
|
372
|
+
files: Sequence[DatasetFile],
|
|
373
|
+
*,
|
|
374
|
+
poll_interval: datetime.timedelta = datetime.timedelta(seconds=1),
|
|
375
|
+
) -> Iterable[DatasetFile]:
|
|
376
|
+
"""Iterates over DatasetFiles as they complete their ingestion in a similar fashion to
|
|
377
|
+
`concurrent.futures.as_completed`.
|
|
378
|
+
|
|
379
|
+
Any files that are already ingested (successfully or with errors) will immediately be yielded.
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
files: Dataset files to monitor for ingestion completion.
|
|
383
|
+
poll_interval: Interval to sleep between polling the remaining files under watch.
|
|
384
|
+
|
|
385
|
+
Yields:
|
|
386
|
+
Yields DatasetFiles as they are ingested. Due to the polling mechanics, the files are not yielded in
|
|
387
|
+
strictly sorted order based on their ingestion completion time. Ensure to check the `ingest_status` of
|
|
388
|
+
yielded dataset files if important.
|
|
389
|
+
"""
|
|
390
|
+
to_poll: Sequence[DatasetFile] = [*files]
|
|
391
|
+
while to_poll:
|
|
392
|
+
logger.info("Awaiting ingestion for %d files (%d total)", len(to_poll), len(files))
|
|
393
|
+
done, not_done = wait_for_files_to_ingest(
|
|
394
|
+
to_poll, poll_interval=poll_interval, return_when=IngestWaitType.FIRST_COMPLETED
|
|
395
|
+
)
|
|
396
|
+
for file in done:
|
|
397
|
+
yield file
|
|
398
|
+
|
|
399
|
+
to_poll = not_done
|
|
400
|
+
if to_poll:
|
|
401
|
+
time.sleep(poll_interval.total_seconds())
|
|
@@ -111,6 +111,7 @@ class FileType(NamedTuple):
|
|
|
111
111
|
|
|
112
112
|
|
|
113
113
|
class FileTypes:
|
|
114
|
+
AVRO_STREAM: FileType = FileType(".avro", "application/avro")
|
|
114
115
|
BINARY: FileType = FileType("", "application/octet-stream")
|
|
115
116
|
CSV: FileType = FileType(".csv", "text/csv")
|
|
116
117
|
CSV_GZ: FileType = FileType(".csv.gz", "text/csv")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|