nominal 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 nominal-io
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,44 @@
1
+ # ⬖ Nominal
2
+ Python client for Nominal test data, storage, & compute
3
+
4
+ 🚧 WIP - API and syntax subject to change
5
+
6
+ ## Install
7
+
8
+ > pip3 install nominal
9
+
10
+ ## Usage
11
+
12
+ ### Set your API key
13
+
14
+ Retrieve your API key from /sandbox on your Nominal tenant
15
+
16
+ ```py
17
+ import nominal as nm
18
+
19
+ nm.set_token(...)
20
+ ```
21
+
22
+ ### Upload a Dataset (3 lines)
23
+
24
+ ```py
25
+ from nominal import Ingest
26
+
27
+ dataset = Ingest().read_csv('../data/penguins.csv')
28
+
29
+ dataset.upload()
30
+ ```
31
+
32
+ ### Upload a Run (3 lines)
33
+
34
+ ```py
35
+ from nominal import Run
36
+
37
+ r = Run(path='../data/penguins.csv')
38
+
39
+ run.upload()
40
+ ```
41
+
42
+ ### Apply a Check to a Run
43
+
44
+ TODO
nominal/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ from .nominal import (
2
+ Dataset as Dataset,
3
+ Ingest as Ingest,
4
+ Run as Run,
5
+ )
6
+ import os
7
+
8
+ os.environ["NOMINAL_BASE_URL"] = "https://api-staging.gov.nominal.io/api"
nominal/nominal.py ADDED
@@ -0,0 +1,573 @@
1
+ import os
2
+ import io
3
+ import copy
4
+ import requests
5
+ from dateutil import parser
6
+ import polars as pl
7
+ import keyring as kr
8
+ from datetime import datetime
9
+ import jsondiff as jd
10
+ from jsondiff import diff
11
+ from math import floor
12
+ from rich import print
13
+ from utils import default_filename, PayloadFactory
14
+
15
+
16
+ ENDPOINTS = dict(
17
+ file_upload = '{}/upload/v1/upload-file?fileName={}',
18
+ dataset_upload = '{}/ingest/v1/trigger-ingest-v2',
19
+ run_upload = '{}/ingest/v1/ingest-run',
20
+ run_retrieve = '{}/scout/v1/run/{}', # GET
21
+ run_update = '{}/scout/v1/run/{}' # PUT
22
+ )
23
+
24
+ BASE_URLS = dict(
25
+ STAGING="https://api-staging.gov.nominal.io/api",
26
+ PROD="https://api.gov.nominal.io/api",
27
+ )
28
+
29
+
30
+ def set_base_url(base_url: str = "STAGING"):
31
+ """
32
+ Usage:
33
+ import nominal as nm
34
+ nm.set_base_url('PROD')
35
+
36
+ TODO
37
+ ----
38
+ Default is staging. Change to prod after beta period.
39
+ """
40
+ if base_url in BASE_URLS.keys():
41
+ os.environ["NOMINAL_BASE_URL"] = BASE_URLS[base_url]
42
+ else:
43
+ os.environ["NOMINAL_BASE_URL"] = base_url
44
+
45
+
46
+ def get_base_url():
47
+ if "NOMINAL_BASE_URL" not in os.environ:
48
+ set_base_url() # set to default
49
+ return os.environ["NOMINAL_BASE_URL"]
50
+
51
+
52
+ def get_app_base_url():
53
+ """
54
+ eg, https://app-staging.gov.nominal.io
55
+
56
+ TODO
57
+ ----
58
+ This won't work for custom domains
59
+ """
60
+ return get_base_url().rstrip("/api").replace("api", "app")
61
+
62
+
63
+ def set_token(token):
64
+ if token is None:
65
+ print("Retrieve your access token from [link]{0}/sandbox[/link]".format(get_base_url()))
66
+ kr.set_password("Nominal API", "python-client", token)
67
+
68
+
69
+ class Dataset(pl.DataFrame):
70
+ """
71
+ Dataset inherits from Polars DataFrame for its rich display, ingestion, and wrangling capabilities.
72
+
73
+ Parameters
74
+ ----------
75
+ data : various, optional
76
+ The input data for the dataset. This can be in any format supported by Polars DataFrame.
77
+ filename : str, optional
78
+ The name of the dataset file. Default is None.
79
+ overwrite : bool, optional
80
+ A flag to indicate whether to overwrite an existing file during upload. Default is False.
81
+ properties : dict, optional
82
+ A dictionary of additional properties associated with the dataset. Default is an empty dictionary.
83
+ description : str, optional
84
+ A brief description of the dataset. Default is an empty string.
85
+
86
+ Attributes
87
+ ----------
88
+ s3_path : str or None
89
+ The S3 path where the dataset is stored after upload. Initially None.
90
+ filename : str
91
+ The name of the dataset file.
92
+ properties : dict
93
+ A dictionary of additional properties associated with the dataset.
94
+ description : str
95
+ A brief description of the dataset.
96
+ rid : str or None
97
+ The dataset's RID (Resource ID) after registration on the Nominal platform. Initially None.
98
+ dataset_link : str
99
+ A URL link to the dataset on the Nominal platform. Initially an empty string.
100
+
101
+ Methods
102
+ -------
103
+ upload(overwrite=False)
104
+ Uploads and registers the dataset on the Nominal platform.
105
+ """
106
+
107
+ def __init__(
108
+ self, data: any = None, filename: str = None, rid: str = None, properties: dict = dict(), description: str = ""
109
+ ):
110
+ super().__init__(data)
111
+
112
+ self.s3_path = None
113
+ self.filename = filename
114
+ self.properties = properties
115
+ self.description = description
116
+ self.rid = rid
117
+ self.dataset_link = ""
118
+
119
+ def __get_headers(self, content_type: str = 'json') -> dict:
120
+ TOKEN = kr.get_password('Nominal API', 'python-client')
121
+ return {
122
+ "Authorization": "Bearer {}".format(TOKEN),
123
+ "Content-Type": "application/{0}".format(content_type),
124
+ }
125
+
126
+ def __upload_file(self, overwrite: bool) -> requests.Response:
127
+ """
128
+ Uploads dataframe to S3 as a file.
129
+
130
+ Returns:
131
+ Response object from the REST call.
132
+ """
133
+
134
+ if self.s3_path is not None and not overwrite:
135
+ print(
136
+ "\nThis Dataset is already uploaded to an S3 bucket:\n{0}\nTry [code]upload(overwrite = True)[/code] to overwrite it.".format(
137
+ self.s3_path
138
+ )
139
+ )
140
+ return
141
+
142
+ # Create a default dataset name
143
+ if self.filename is None:
144
+ self.filname = default_filename("DATASET")
145
+
146
+ csv_file_buffer = io.BytesIO()
147
+ self.write_csv(csv_file_buffer)
148
+
149
+ # Get the size of the buffer in bytes
150
+ csv_file_buffer.seek(0, os.SEEK_END)
151
+ csv_buffer_size_bytes = csv_file_buffer.tell()
152
+ csv_file_buffer.seek(0)
153
+
154
+ print(
155
+ "\nUploading: [bold green]{0}[/bold green]\nto {1}\n = {2} bytes".format(
156
+ self.filename, get_base_url(), csv_buffer_size_bytes
157
+ )
158
+ )
159
+
160
+ # Make POST request to upload data file to S3
161
+ resp = requests.post(
162
+ url=ENDPOINTS["file_upload"].format(get_base_url(), self.filename),
163
+ data=csv_file_buffer.read(),
164
+ params={"sizeBytes": csv_buffer_size_bytes},
165
+ headers=self.__get_headers(content_type="octet-stream"),
166
+ )
167
+
168
+ if resp.status_code == 200:
169
+ self.s3_path = resp.text.strip('"')
170
+ print("\nUpload to S3 successful.\nS3 bucket:\n", self.s3_path)
171
+ else:
172
+ print("\n{0} error during upload to S3:\n".format(resp.status_code), resp.json())
173
+
174
+ return resp
175
+
176
+ def upload(self, overwrite: bool = False):
177
+ """
178
+ Registers Dataset in Nominal on Nominal platform.
179
+
180
+ Endpoint:
181
+ /ingest/v1/trigger-ingest-v2
182
+
183
+ Returns:
184
+ Response object from the REST call.
185
+ """
186
+
187
+ s3_upload_resp = self.__upload_file(overwrite)
188
+
189
+ if isinstance(s3_upload_resp, dict):
190
+ if s3_upload_resp.status_code != 200:
191
+ print("Aborting Dataset registration")
192
+ return
193
+
194
+ if self.s3_path is None:
195
+ print("Cannnot register Dataset on Nominal - Dataset.s3_path is not set")
196
+ return
197
+
198
+ print("\nRegistering [bold green]{0}[/bold green] on {1}".format(self.filename, get_base_url()))
199
+
200
+ payload = dict(
201
+ url=ENDPOINTS["dataset_upload"].format(get_base_url()),
202
+ json=PayloadFactory.dataset_trigger_ingest(self),
203
+ headers=self.__get_headers(),
204
+ )
205
+
206
+ resp = requests.post(url=payload["url"], json=payload["json"], headers=payload["headers"])
207
+
208
+ if resp.status_code == 200:
209
+ self.rid = resp.json()["datasetRid"]
210
+ self.dataset_link = "{0}/data-sources/{1}".format(get_app_base_url(), self.rid)
211
+ print("\nDataset RID: ", self.rid)
212
+ print("\nDataset Link: ", "[link={0}]{0}[/link]\n".format(self.dataset_link))
213
+ else:
214
+ print("\n{0} error registering Dataset on Nominal:\n".format(resp.status_code), resp.json())
215
+
216
+ return resp
217
+
218
+
219
+ class Ingest:
220
+ """
221
+ Handles ingestion of various tabular and video file formats.
222
+
223
+ This class provides static and instance methods for ingesting data from various formats, such as CSV and Parquet files,
224
+ and for setting a timestamp index column in the ingested data. The ingested data is returned as a `Dataset` object.
225
+
226
+ Methods
227
+ -------
228
+ set_ts_index(df, ts_col)
229
+ Sets a timestamp index for the provided DataFrame. This method adds internal columns for the datetime in Python format,
230
+ ISO 8601 format, and Unix timestamp format.
231
+
232
+ read_csv(path, ts_col=None)
233
+ Reads a CSV file from the specified path and returns a `Dataset` object with a timestamp index set.
234
+
235
+ read_parquet(path, ts_col=None)
236
+ Reads a Parquet file from the specified path and returns a `Dataset` object with a timestamp index set.
237
+
238
+ Notes
239
+ -----
240
+ TODO: Consider using Ibis for database source connectivity.
241
+ TODO: Implement video ingest functionality.
242
+ """
243
+
244
+ @staticmethod
245
+ def set_ts_index(df: pl.DataFrame, ts_col: str = None) -> pl.DataFrame:
246
+ """
247
+ Sets a timestamp index for the provided DataFrame.
248
+
249
+ This method attempts to infer the timestamp column if one is not specified. It adds internal columns to the
250
+ DataFrame: '_python_datetime' and '_unix_timestamp'. The DataFrame is then sorted by the '_python_datetime' column.
251
+
252
+ Parameters
253
+ ----------
254
+ df : polars.DataFrame
255
+ The DataFrame for which the timestamp index will be set.
256
+ ts_col : str, optional
257
+ The name of the column to use as the timestamp. If None, the method will attempt to infer the timestamp column.
258
+
259
+ Returns
260
+ -------
261
+ polars.DataFrame
262
+ The modified DataFrame with the timestamp index set.
263
+ """
264
+ if ts_col is None:
265
+ # Infer timestamp column
266
+ for col in df.columns:
267
+ try:
268
+ dt = parser.parse(df[col][0])
269
+ if type(dt) is datetime:
270
+ ts_col = col
271
+ break
272
+ except Exception:
273
+ pass
274
+
275
+ if ts_col is not None:
276
+ try:
277
+ df.drop_in_place("_python_datetime")
278
+ df.drop_in_place("_unix_timestamp")
279
+ except Exception:
280
+ pass
281
+ datetime_series = pl.Series("_python_datetime", [parser.parse(dt_str) for dt_str in df[ts_col]])
282
+ unix_series = pl.Series("_unix_timestamp", [dt.timestamp() for dt in datetime_series])
283
+ df.insert_column(-1, datetime_series)
284
+ df.insert_column(-1, unix_series)
285
+ df = df.sort("_python_datetime") # Datasets must be sorted in order to upload to Nominal
286
+ else:
287
+ print(
288
+ "A Dataset must have at least one column that is a timestamp. Please specify which column is a date or datetime with the `ts_col` parameter."
289
+ )
290
+
291
+ return df
292
+
293
+ def read_csv(self, path: str, ts_col: str = None) -> Dataset:
294
+ dfc = pl.read_csv(path)
295
+ dft = self.set_ts_index(dfc, ts_col)
296
+ return Dataset(dft, filename=os.path.basename(path))
297
+
298
+ def read_parquet(self, path: str, ts_col: str = None) -> Dataset:
299
+ dfp = pl.read_parquet(path)
300
+ dft = self.set_ts_index(dfp, ts_col)
301
+ return Dataset(dft, filename=os.path.basename(path))
302
+
303
+
304
+ class Run:
305
+ '''
306
+ Python representation of a Nominal Run.
307
+
308
+ Parameters
309
+ ----------
310
+ path : str, optional
311
+ A single file path to a dataset. If provided, it will be added to `paths`. Default is None.
312
+ paths : list of str, optional
313
+ A list of file paths to datasets. Default is an empty list.
314
+ datasets : list of Dataset, optional
315
+ A list of `Dataset` objects to be included in the run. Default is an empty list.
316
+ properties : list of str, optional
317
+ A list of properties associated with the run. Default is an empty list.
318
+ title : str, optional
319
+ The title of the run. Default is None, which will generate a default filename.
320
+ description : str, optional
321
+ A brief description of the run. Default is an empty string.
322
+ start : str or datetime, optional
323
+ The start time for the run. Can be a string or a datetime object. Default is None.
324
+ end : str or datetime, optional
325
+ The end time for the run. Can be a string or a datetime object. Default is None.
326
+
327
+ Attributes
328
+ ----------
329
+ title : str
330
+ The title of the run. Defaults to a timestamped, autogenerated filename if not provided.
331
+ description : str
332
+ A brief description of the run.
333
+ properties : dict
334
+ A dict of properties associated with the run.
335
+ datasets : list of Dataset
336
+ A list of `Dataset` objects associated with the run.
337
+ domain : dict
338
+ A dictionary containing 'START' and 'END' time domain for the run.
339
+ datasets_domain : dict
340
+ A dictionary holding the overall 'START' and 'END' domain from the datasets.
341
+
342
+ Methods
343
+ -------
344
+ upload()
345
+ Uploads the run and its datasets to Nominal.
346
+ '''
347
+
348
+ def __print_human_readable_endpoint(self, endpoint):
349
+ '''
350
+ Print the Run datetime endpoints in a human-readable form
351
+ '''
352
+ print('Run {} time:'.format(endpoint))
353
+ unix_seconds = self._domain[endpoint]['SECONDS'] + self._domain[endpoint]['NANOS']*10e9
354
+ print('Unix: ', unix_seconds)
355
+ datetime_endpoint = datetime.fromtimestamp(unix_seconds)
356
+ print('Datetime: ', datetime_endpoint)
357
+
358
+ def __setattr__(self, k: str, v) -> None:
359
+ '''
360
+ Convenience method to allow setting Run endpoints as human-readable strings
361
+ '''
362
+ if k in ['start', 'end']:
363
+ endpoint = k.upper()
364
+ self._domain[endpoint]['DATETIME'] = parser.parse(v)
365
+ self.__set_run_unix_timestamp_domain([endpoint])
366
+ self.__print_human_readable_endpoint(endpoint)
367
+ else:
368
+ super().__setattr__(k, v)
369
+
370
+ def __getattr__(self, k: str) -> None:
371
+ if k in ['start', 'end']:
372
+ self.__print_human_readable_endpoint(k.upper())
373
+ else:
374
+ super().__getattr__(k)
375
+
376
+ def __init__(self,
377
+ rid: str = None,
378
+ path: str = None,
379
+ paths: list[str] = [],
380
+ datasets: list[Dataset] = [],
381
+ properties: dict = {},
382
+ title: str = None,
383
+ description: str = '',
384
+ start: str = None,
385
+ end: str = None,
386
+ cloud: dict = {}):
387
+
388
+ if title is None:
389
+ self.title = default_filename('RUN')
390
+ self.description = description
391
+ self.properties = properties
392
+ self._domain = {'START': {}, 'END': {}}
393
+
394
+ if rid is not None:
395
+ # Attempt to retrieve run by its resource ID (rid)
396
+ resp = requests.get(
397
+ headers = self.__get_headers(),
398
+ url = ENDPOINTS['run_retrieve'].format(get_base_url(), rid)
399
+ )
400
+ if resp.status_code == 200:
401
+ self.cloud = resp.json()
402
+ print('Cloud response:')
403
+ print(self.cloud)
404
+ print('... Downloaded to Run.cloud')
405
+
406
+ # Assign Run metadata to local Run object metadata
407
+ local_metadata = ['rid', 'description', 'title', 'start', 'end', 'properties', 'labels']
408
+ cloud_metadata = list(self.cloud.keys())
409
+ for md_key in local_metadata:
410
+ if md_key in cloud_metadata:
411
+ # Override local value with cloud value
412
+ setattr(self, md_key, self.cloud[md_key])
413
+ elif md_key == 'start':
414
+ self._domain['START']['SECONDS'] = self.cloud['startTime']['secondsSinceEpoch']
415
+ self._domain['START']['NANOS'] = self.cloud['startTime']['offsetNanoseconds']
416
+ elif md_key == 'end':
417
+ self._domain['END']['SECONDS'] = self.cloud['endTime']['secondsSinceEpoch']
418
+ self._domain['END']['NANOS'] = self.cloud['endTime']['offsetNanoseconds']
419
+ else:
420
+ print('There was an error retrieving Run with rid = {0}'.format(rid))
421
+ print('Make sure that your rid is correct and from [link]{0}[/link]'.format(get_app_base_url()))
422
+ print(resp.json())
423
+ return
424
+
425
+ if path is not None:
426
+ paths = [path]
427
+
428
+ if len(paths) == 0 and len(datasets) == 0:
429
+ print("Please provide a list of Datasets or list of paths for this Run")
430
+ return
431
+
432
+ if len(paths) > 0:
433
+ self.datasets = [Ingest().read_csv(fp) for fp in paths]
434
+ else:
435
+ self.datasets = datasets
436
+
437
+ mins = []
438
+ maxs = []
439
+ for ds in self.datasets:
440
+ mins.append(ds["_python_datetime"].min())
441
+ maxs.append(ds["_python_datetime"].max())
442
+ self.datasets_domain = dict(START=min(mins), END=max(maxs))
443
+
444
+ self.__set_run_datetime_boundary('START', start)
445
+ self.__set_run_datetime_boundary('END', end)
446
+ self.__set_run_unix_timestamp_domain()
447
+
448
+ def __set_run_datetime_boundary(self, key: str, str_datetime: any):
449
+ '''
450
+ Set start & end boundary variables for Run
451
+ '''
452
+ if str_datetime is None:
453
+ self._domain[key]['DATETIME'] = self.datasets_domain[key]
454
+ elif type(str_datetime) is datetime:
455
+ self._domain[key]['DATETIME'] = str_datetime
456
+ elif type(str_datetime) is str:
457
+ self._domain[key]['DATETIME'] = parser.parse(str_datetime)
458
+
459
+ def __set_run_unix_timestamp_domain(self, endpoints = ['START', 'END']):
460
+ '''
461
+ Set start & end boundary variables for Run
462
+ '''
463
+ for key in endpoints:
464
+ dt = self._domain[key]['DATETIME']
465
+ unix = dt.timestamp()
466
+ seconds = floor(unix)
467
+ self._domain[key]['SECONDS'] = seconds
468
+ self._domain[key]['NANOS'] = floor((unix - seconds) / 1e9)
469
+
470
+ def __get_headers(self, content_type: str = "json") -> dict:
471
+ TOKEN = kr.get_password("Nominal API", "python-client")
472
+ return {
473
+ "Authorization": "Bearer {}".format(TOKEN),
474
+ "Content-Type": "application/{0}".format(content_type),
475
+ }
476
+
477
+ def diff(self):
478
+ '''
479
+ Compare local and cloud Run instances
480
+ '''
481
+ if self.cloud is None:
482
+ print('No Run instance has been downloaded from the cloud')
483
+ print('Download a run with [code]r = Run(rid = RID)[/code]')
484
+ return
485
+
486
+ local_copy = PayloadFactory.run_upload(self)
487
+ cloud_copy = copy.deepcopy(self.cloud)
488
+
489
+ # rm datasources - we're not comparing those
490
+ del cloud_copy['dataSources']
491
+ del local_copy['dataSources']
492
+
493
+ def rm_deletions_and_datasources(rd):
494
+ if jd.delete in rd:
495
+ del rd[jd.delete]
496
+
497
+ run_diff_labeled = diff(cloud_copy, local_copy, syntax='explicit')
498
+ rm_deletions_and_datasources(run_diff_labeled)
499
+ print(run_diff_labeled)
500
+
501
+ run_diff_unlabeled = diff(cloud_copy, local_copy)
502
+ rm_deletions_and_datasources(run_diff_unlabeled)
503
+ return run_diff_unlabeled
504
+
505
+ def update(self):
506
+ '''
507
+ Updating run metadata is done in 4 steps:
508
+ 1. Download a Run: r = Run(rid = RID)
509
+ 2. Update something about the Run: r.title = 'Runs with Friends'
510
+ 3. [Optional] Inspect a diff between the cloud and local versions: r.diff()
511
+ 4. r.update()
512
+ By design, no changes are synced with the cloud without an explicit call to update()
513
+ At the moment, only Run start, end, and metadata can be updated (not datasources)
514
+ '''
515
+
516
+ if self.rid is None or self.cloud is None:
517
+ print('No Run instance has been downloaded from the cloud')
518
+ print('Download a run with [code]r = Run(rid = RID)[/code]')
519
+
520
+ rd = self.diff() # rd = "run diff"
521
+ if len(rd) == 0:
522
+ print('No difference between Run.cloud and the local Run instance')
523
+ return
524
+
525
+ # Make PUT request to update Run
526
+ resp = requests.put(
527
+ url = ENDPOINTS['run_update'].format(get_base_url(), self.rid),
528
+ json = rd,
529
+ headers = self.__get_headers(),
530
+ )
531
+
532
+ if resp.status_code == 200:
533
+ self.cloud = resp.json()
534
+ print('\nUpdated Run on Nominal:')
535
+ print('[link]{0}/runs/{1}[/link]'.format(get_app_base_url(), self.cloud['runNumber']))
536
+ else:
537
+ print('\n{0} error updating Run on Nominal:\n'.format(resp.status_code), resp.json())
538
+
539
+ def upload(self) -> requests.Response:
540
+ """
541
+ Uploads the run and its datasets to Nominal.
542
+
543
+ Returns
544
+ -------
545
+ requests.Response
546
+ The response object from the REST call.
547
+ """
548
+ datasets_payload = dict()
549
+
550
+ for ds in self.datasets:
551
+ # First, check if Run Datasets have been uploaded to S3
552
+ if ds.s3_path is None:
553
+ ds.upload()
554
+ datasets_payload[ds.filename] = PayloadFactory.create_unix_datasource(ds)
555
+
556
+ run_payload = PayloadFactory.run_upload(self, datasets_payload)
557
+
558
+ # Make POST request to register Run and Datasets on Nominal
559
+ resp = requests.post(
560
+ url = ENDPOINTS['run_upload'].format(get_base_url()),
561
+ json = run_payload,
562
+ headers = self.__get_headers(),
563
+ )
564
+
565
+ self.last_upload_payload = run_payload
566
+
567
+ if resp.status_code == 200:
568
+ self.rid = resp.json()["runRid"]
569
+ print("\nRun RID: ", self.rid)
570
+ else:
571
+ print("\n{0} error registering Run on Nominal:\n".format(resp.status_code), resp.json())
572
+
573
+ return resp
nominal/utils.py ADDED
@@ -0,0 +1,90 @@
1
+ """
2
+ Internal utility functions for Nominal Python client
3
+ """
4
+
5
+ import random
6
+ import string
7
+ from datetime import datetime
8
+
9
+
10
+ def default_filename(nominal_file_class):
11
+ if nominal_file_class not in ["DATASET", "RUN"]:
12
+ raise Exception("Unrecognized Nominal class", nominal_file_class)
13
+ rand_str = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(4))
14
+ ts = datetime.today().strftime("%Y-%m-%d")
15
+ return "_".join([nominal_file_class, ts, rand_str])
16
+
17
+
18
+ class PayloadFactory:
19
+ """
20
+ Given a Nominal Python object, generate JSON payload
21
+ for REST API to instantiate on Nominal platform.
22
+ """
23
+
24
+ @staticmethod
25
+ def dataset_trigger_ingest(ds) -> dict:
26
+ return {
27
+ "source": {
28
+ "type": "s3",
29
+ "s3": {
30
+ "path": ds.s3_path,
31
+ },
32
+ },
33
+ "properties": ds.properties,
34
+ "datasetName": ds.filename,
35
+ "datasetDescription": ds.description,
36
+ "timestampMetadata": {
37
+ # "seriesName": "_iso_8601",
38
+ "seriesName": "_python_datetime",
39
+ "timestampType": {
40
+ "type": "absolute",
41
+ "absolute": {
42
+ # "type": "iso8601",
43
+ "type": "customFormat",
44
+ # "iso8601": {}
45
+ "customFormat": {"format": "yyyy-MM-dd['T']HH:mm:ss.SSSSSS", "defaultYear": 0},
46
+ },
47
+ },
48
+ },
49
+ }
50
+
51
+ @staticmethod
52
+ def create_unix_datasource(ds) -> dict:
53
+ return {
54
+ "dataSource": { # IngestRunDataSource
55
+ "type": "newDataSourceV2",
56
+ "newDataSourceV2": {
57
+ "source": {"type": "s3", "s3": {"path": ds.s3_path}},
58
+ "name": ds.filename,
59
+ "properties": {},
60
+ "timeColumnSpec": {
61
+ "seriesName": "_unix",
62
+ "timestampType": {
63
+ "type": "absolute",
64
+ "absolute": {
65
+ "type": "epochOfTimeUnit",
66
+ "epochOfTimeUnit": {"timeUnit": "SECONDS"},
67
+ },
68
+ },
69
+ },
70
+ },
71
+ },
72
+ "timeOffsetSpec": {"type": "nanos", "nanos": {"seconds": 0, "nanos": 0}},
73
+ }
74
+
75
+ @staticmethod
76
+ def run_upload(r, datasets_payload = {}) -> dict:
77
+ return {
78
+ "title": r.title,
79
+ "description": r.description,
80
+ "startTime": {
81
+ "secondsSinceEpoch": r._domain["START"]["SECONDS"],
82
+ "offsetNanoseconds": r._domain["START"]["NANOS"],
83
+ },
84
+ "endTime": {
85
+ "secondsSinceEpoch": r._domain["END"]["SECONDS"],
86
+ "offsetNanoseconds": r._domain["END"]["NANOS"],
87
+ },
88
+ "dataSources": datasets_payload,
89
+ "properties": {}
90
+ }
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 nominal-io
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,62 @@
1
+ Metadata-Version: 2.1
2
+ Name: nominal
3
+ Version: 0.0.2
4
+ Summary: Automate Nominal workflows in Python
5
+ Author: JP
6
+ Author-email: jack@nominal.io
7
+ Requires-Python: >=3.12,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Requires-Dist: jsondiff (>=2.2.0,<3.0.0)
11
+ Requires-Dist: keyring (>=25.3.0,<26.0.0)
12
+ Requires-Dist: polars (>=1.4.1,<2.0.0)
13
+ Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
14
+ Requires-Dist: requests (>=2.32.3,<3.0.0)
15
+ Requires-Dist: rich (>=13.7.1,<14.0.0)
16
+ Description-Content-Type: text/markdown
17
+
18
+ # ⬖ Nominal
19
+ Python client for Nominal test data, storage, &amp; compute
20
+
21
+ 🚧 WIP - API and syntax subject to change
22
+
23
+ ## Install
24
+
25
+ > pip3 install nominal
26
+
27
+ ## Usage
28
+
29
+ ### Set your API key
30
+
31
+ Retrieve your API key from /sandbox on your Nominal tenant
32
+
33
+ ```py
34
+ import nominal as nm
35
+
36
+ nm.set_token(...)
37
+ ```
38
+
39
+ ### Upload a Dataset (3 lines)
40
+
41
+ ```py
42
+ from nominal import Ingest
43
+
44
+ dataset = Ingest().read_csv('../data/penguins.csv')
45
+
46
+ dataset.upload()
47
+ ```
48
+
49
+ ### Upload a Run (3 lines)
50
+
51
+ ```py
52
+ from nominal import Run
53
+
54
+ r = Run(path='../data/penguins.csv')
55
+
56
+ run.upload()
57
+ ```
58
+
59
+ ### Apply a Check to a Run
60
+
61
+ TODO
62
+
@@ -0,0 +1,9 @@
1
+ LICENSE,sha256=zEGHG9mjDjaIS3I79O8mweQo-yiTbqx8jJvUPppVAwk,1067
2
+ README.md,sha256=BjXBdaad_vNmGf4tqP55cIZrgVxdjzoK83at0cc7-Sg,588
3
+ nominal/__init__.py,sha256=MRTsIi6-1gIKPjjvlJloySWaYUoraHcrTKb0bJSKUeM,172
4
+ nominal/nominal.py,sha256=I-5PXrMOjEQoGQqbTzcigT_TwbSlGEoGAXIiNwV65eo,21112
5
+ nominal/utils.py,sha256=QJet87l6DT8EJCC06_r71izXuLqfXapytfq9Vpyibew,3068
6
+ nominal-0.0.2.dist-info/LICENSE,sha256=zEGHG9mjDjaIS3I79O8mweQo-yiTbqx8jJvUPppVAwk,1067
7
+ nominal-0.0.2.dist-info/METADATA,sha256=h9G063K31hYeyUEgRcMgVgMH9ocOQ-eHeirKrUedS4U,1153
8
+ nominal-0.0.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
9
+ nominal-0.0.2.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 1.9.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any