nominal 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE +21 -0
- README.md +44 -0
- nominal/__init__.py +8 -0
- nominal/nominal.py +573 -0
- nominal/utils.py +90 -0
- nominal-0.0.2.dist-info/LICENSE +21 -0
- nominal-0.0.2.dist-info/METADATA +62 -0
- nominal-0.0.2.dist-info/RECORD +9 -0
- nominal-0.0.2.dist-info/WHEEL +4 -0
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 nominal-io
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
README.md
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# ⬖ Nominal
|
|
2
|
+
Python client for Nominal test data, storage, & compute
|
|
3
|
+
|
|
4
|
+
🚧 WIP - API and syntax subject to change
|
|
5
|
+
|
|
6
|
+
## Install
|
|
7
|
+
|
|
8
|
+
> pip3 install nominal
|
|
9
|
+
|
|
10
|
+
## Usage
|
|
11
|
+
|
|
12
|
+
### Set your API key
|
|
13
|
+
|
|
14
|
+
Retrieve your API key from /sandbox on your Nominal tenant
|
|
15
|
+
|
|
16
|
+
```py
|
|
17
|
+
import nominal as nm
|
|
18
|
+
|
|
19
|
+
nm.set_token(...)
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
### Upload a Dataset (3 lines)
|
|
23
|
+
|
|
24
|
+
```py
|
|
25
|
+
from nominal import Ingest
|
|
26
|
+
|
|
27
|
+
dataset = Ingest().read_csv('../data/penguins.csv')
|
|
28
|
+
|
|
29
|
+
dataset.upload()
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Upload a Run (3 lines)
|
|
33
|
+
|
|
34
|
+
```py
|
|
35
|
+
from nominal import Run
|
|
36
|
+
|
|
37
|
+
r = Run(path='../data/penguins.csv')
|
|
38
|
+
|
|
39
|
+
run.upload()
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Apply a Check to a Run
|
|
43
|
+
|
|
44
|
+
TODO
|
nominal/__init__.py
ADDED
nominal/nominal.py
ADDED
|
@@ -0,0 +1,573 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import io
|
|
3
|
+
import copy
|
|
4
|
+
import requests
|
|
5
|
+
from dateutil import parser
|
|
6
|
+
import polars as pl
|
|
7
|
+
import keyring as kr
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
import jsondiff as jd
|
|
10
|
+
from jsondiff import diff
|
|
11
|
+
from math import floor
|
|
12
|
+
from rich import print
|
|
13
|
+
from utils import default_filename, PayloadFactory
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
ENDPOINTS = dict(
|
|
17
|
+
file_upload = '{}/upload/v1/upload-file?fileName={}',
|
|
18
|
+
dataset_upload = '{}/ingest/v1/trigger-ingest-v2',
|
|
19
|
+
run_upload = '{}/ingest/v1/ingest-run',
|
|
20
|
+
run_retrieve = '{}/scout/v1/run/{}', # GET
|
|
21
|
+
run_update = '{}/scout/v1/run/{}' # PUT
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
BASE_URLS = dict(
|
|
25
|
+
STAGING="https://api-staging.gov.nominal.io/api",
|
|
26
|
+
PROD="https://api.gov.nominal.io/api",
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def set_base_url(base_url: str = "STAGING"):
|
|
31
|
+
"""
|
|
32
|
+
Usage:
|
|
33
|
+
import nominal as nm
|
|
34
|
+
nm.set_base_url('PROD')
|
|
35
|
+
|
|
36
|
+
TODO
|
|
37
|
+
----
|
|
38
|
+
Default is staging. Change to prod after beta period.
|
|
39
|
+
"""
|
|
40
|
+
if base_url in BASE_URLS.keys():
|
|
41
|
+
os.environ["NOMINAL_BASE_URL"] = BASE_URLS[base_url]
|
|
42
|
+
else:
|
|
43
|
+
os.environ["NOMINAL_BASE_URL"] = base_url
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_base_url():
|
|
47
|
+
if "NOMINAL_BASE_URL" not in os.environ:
|
|
48
|
+
set_base_url() # set to default
|
|
49
|
+
return os.environ["NOMINAL_BASE_URL"]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_app_base_url():
|
|
53
|
+
"""
|
|
54
|
+
eg, https://app-staging.gov.nominal.io
|
|
55
|
+
|
|
56
|
+
TODO
|
|
57
|
+
----
|
|
58
|
+
This won't work for custom domains
|
|
59
|
+
"""
|
|
60
|
+
return get_base_url().rstrip("/api").replace("api", "app")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def set_token(token):
|
|
64
|
+
if token is None:
|
|
65
|
+
print("Retrieve your access token from [link]{0}/sandbox[/link]".format(get_base_url()))
|
|
66
|
+
kr.set_password("Nominal API", "python-client", token)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class Dataset(pl.DataFrame):
|
|
70
|
+
"""
|
|
71
|
+
Dataset inherits from Polars DataFrame for its rich display, ingestion, and wrangling capabilities.
|
|
72
|
+
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
data : various, optional
|
|
76
|
+
The input data for the dataset. This can be in any format supported by Polars DataFrame.
|
|
77
|
+
filename : str, optional
|
|
78
|
+
The name of the dataset file. Default is None.
|
|
79
|
+
overwrite : bool, optional
|
|
80
|
+
A flag to indicate whether to overwrite an existing file during upload. Default is False.
|
|
81
|
+
properties : dict, optional
|
|
82
|
+
A dictionary of additional properties associated with the dataset. Default is an empty dictionary.
|
|
83
|
+
description : str, optional
|
|
84
|
+
A brief description of the dataset. Default is an empty string.
|
|
85
|
+
|
|
86
|
+
Attributes
|
|
87
|
+
----------
|
|
88
|
+
s3_path : str or None
|
|
89
|
+
The S3 path where the dataset is stored after upload. Initially None.
|
|
90
|
+
filename : str
|
|
91
|
+
The name of the dataset file.
|
|
92
|
+
properties : dict
|
|
93
|
+
A dictionary of additional properties associated with the dataset.
|
|
94
|
+
description : str
|
|
95
|
+
A brief description of the dataset.
|
|
96
|
+
rid : str or None
|
|
97
|
+
The dataset's RID (Resource ID) after registration on the Nominal platform. Initially None.
|
|
98
|
+
dataset_link : str
|
|
99
|
+
A URL link to the dataset on the Nominal platform. Initially an empty string.
|
|
100
|
+
|
|
101
|
+
Methods
|
|
102
|
+
-------
|
|
103
|
+
upload(overwrite=False)
|
|
104
|
+
Uploads and registers the dataset on the Nominal platform.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
def __init__(
|
|
108
|
+
self, data: any = None, filename: str = None, rid: str = None, properties: dict = dict(), description: str = ""
|
|
109
|
+
):
|
|
110
|
+
super().__init__(data)
|
|
111
|
+
|
|
112
|
+
self.s3_path = None
|
|
113
|
+
self.filename = filename
|
|
114
|
+
self.properties = properties
|
|
115
|
+
self.description = description
|
|
116
|
+
self.rid = rid
|
|
117
|
+
self.dataset_link = ""
|
|
118
|
+
|
|
119
|
+
def __get_headers(self, content_type: str = 'json') -> dict:
|
|
120
|
+
TOKEN = kr.get_password('Nominal API', 'python-client')
|
|
121
|
+
return {
|
|
122
|
+
"Authorization": "Bearer {}".format(TOKEN),
|
|
123
|
+
"Content-Type": "application/{0}".format(content_type),
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
def __upload_file(self, overwrite: bool) -> requests.Response:
|
|
127
|
+
"""
|
|
128
|
+
Uploads dataframe to S3 as a file.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Response object from the REST call.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
if self.s3_path is not None and not overwrite:
|
|
135
|
+
print(
|
|
136
|
+
"\nThis Dataset is already uploaded to an S3 bucket:\n{0}\nTry [code]upload(overwrite = True)[/code] to overwrite it.".format(
|
|
137
|
+
self.s3_path
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
return
|
|
141
|
+
|
|
142
|
+
# Create a default dataset name
|
|
143
|
+
if self.filename is None:
|
|
144
|
+
self.filname = default_filename("DATASET")
|
|
145
|
+
|
|
146
|
+
csv_file_buffer = io.BytesIO()
|
|
147
|
+
self.write_csv(csv_file_buffer)
|
|
148
|
+
|
|
149
|
+
# Get the size of the buffer in bytes
|
|
150
|
+
csv_file_buffer.seek(0, os.SEEK_END)
|
|
151
|
+
csv_buffer_size_bytes = csv_file_buffer.tell()
|
|
152
|
+
csv_file_buffer.seek(0)
|
|
153
|
+
|
|
154
|
+
print(
|
|
155
|
+
"\nUploading: [bold green]{0}[/bold green]\nto {1}\n = {2} bytes".format(
|
|
156
|
+
self.filename, get_base_url(), csv_buffer_size_bytes
|
|
157
|
+
)
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Make POST request to upload data file to S3
|
|
161
|
+
resp = requests.post(
|
|
162
|
+
url=ENDPOINTS["file_upload"].format(get_base_url(), self.filename),
|
|
163
|
+
data=csv_file_buffer.read(),
|
|
164
|
+
params={"sizeBytes": csv_buffer_size_bytes},
|
|
165
|
+
headers=self.__get_headers(content_type="octet-stream"),
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
if resp.status_code == 200:
|
|
169
|
+
self.s3_path = resp.text.strip('"')
|
|
170
|
+
print("\nUpload to S3 successful.\nS3 bucket:\n", self.s3_path)
|
|
171
|
+
else:
|
|
172
|
+
print("\n{0} error during upload to S3:\n".format(resp.status_code), resp.json())
|
|
173
|
+
|
|
174
|
+
return resp
|
|
175
|
+
|
|
176
|
+
def upload(self, overwrite: bool = False):
|
|
177
|
+
"""
|
|
178
|
+
Registers Dataset in Nominal on Nominal platform.
|
|
179
|
+
|
|
180
|
+
Endpoint:
|
|
181
|
+
/ingest/v1/trigger-ingest-v2
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
Response object from the REST call.
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
s3_upload_resp = self.__upload_file(overwrite)
|
|
188
|
+
|
|
189
|
+
if isinstance(s3_upload_resp, dict):
|
|
190
|
+
if s3_upload_resp.status_code != 200:
|
|
191
|
+
print("Aborting Dataset registration")
|
|
192
|
+
return
|
|
193
|
+
|
|
194
|
+
if self.s3_path is None:
|
|
195
|
+
print("Cannnot register Dataset on Nominal - Dataset.s3_path is not set")
|
|
196
|
+
return
|
|
197
|
+
|
|
198
|
+
print("\nRegistering [bold green]{0}[/bold green] on {1}".format(self.filename, get_base_url()))
|
|
199
|
+
|
|
200
|
+
payload = dict(
|
|
201
|
+
url=ENDPOINTS["dataset_upload"].format(get_base_url()),
|
|
202
|
+
json=PayloadFactory.dataset_trigger_ingest(self),
|
|
203
|
+
headers=self.__get_headers(),
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
resp = requests.post(url=payload["url"], json=payload["json"], headers=payload["headers"])
|
|
207
|
+
|
|
208
|
+
if resp.status_code == 200:
|
|
209
|
+
self.rid = resp.json()["datasetRid"]
|
|
210
|
+
self.dataset_link = "{0}/data-sources/{1}".format(get_app_base_url(), self.rid)
|
|
211
|
+
print("\nDataset RID: ", self.rid)
|
|
212
|
+
print("\nDataset Link: ", "[link={0}]{0}[/link]\n".format(self.dataset_link))
|
|
213
|
+
else:
|
|
214
|
+
print("\n{0} error registering Dataset on Nominal:\n".format(resp.status_code), resp.json())
|
|
215
|
+
|
|
216
|
+
return resp
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class Ingest:
|
|
220
|
+
"""
|
|
221
|
+
Handles ingestion of various tabular and video file formats.
|
|
222
|
+
|
|
223
|
+
This class provides static and instance methods for ingesting data from various formats, such as CSV and Parquet files,
|
|
224
|
+
and for setting a timestamp index column in the ingested data. The ingested data is returned as a `Dataset` object.
|
|
225
|
+
|
|
226
|
+
Methods
|
|
227
|
+
-------
|
|
228
|
+
set_ts_index(df, ts_col)
|
|
229
|
+
Sets a timestamp index for the provided DataFrame. This method adds internal columns for the datetime in Python format,
|
|
230
|
+
ISO 8601 format, and Unix timestamp format.
|
|
231
|
+
|
|
232
|
+
read_csv(path, ts_col=None)
|
|
233
|
+
Reads a CSV file from the specified path and returns a `Dataset` object with a timestamp index set.
|
|
234
|
+
|
|
235
|
+
read_parquet(path, ts_col=None)
|
|
236
|
+
Reads a Parquet file from the specified path and returns a `Dataset` object with a timestamp index set.
|
|
237
|
+
|
|
238
|
+
Notes
|
|
239
|
+
-----
|
|
240
|
+
TODO: Consider using Ibis for database source connectivity.
|
|
241
|
+
TODO: Implement video ingest functionality.
|
|
242
|
+
"""
|
|
243
|
+
|
|
244
|
+
@staticmethod
|
|
245
|
+
def set_ts_index(df: pl.DataFrame, ts_col: str = None) -> pl.DataFrame:
|
|
246
|
+
"""
|
|
247
|
+
Sets a timestamp index for the provided DataFrame.
|
|
248
|
+
|
|
249
|
+
This method attempts to infer the timestamp column if one is not specified. It adds internal columns to the
|
|
250
|
+
DataFrame: '_python_datetime' and '_unix_timestamp'. The DataFrame is then sorted by the '_python_datetime' column.
|
|
251
|
+
|
|
252
|
+
Parameters
|
|
253
|
+
----------
|
|
254
|
+
df : polars.DataFrame
|
|
255
|
+
The DataFrame for which the timestamp index will be set.
|
|
256
|
+
ts_col : str, optional
|
|
257
|
+
The name of the column to use as the timestamp. If None, the method will attempt to infer the timestamp column.
|
|
258
|
+
|
|
259
|
+
Returns
|
|
260
|
+
-------
|
|
261
|
+
polars.DataFrame
|
|
262
|
+
The modified DataFrame with the timestamp index set.
|
|
263
|
+
"""
|
|
264
|
+
if ts_col is None:
|
|
265
|
+
# Infer timestamp column
|
|
266
|
+
for col in df.columns:
|
|
267
|
+
try:
|
|
268
|
+
dt = parser.parse(df[col][0])
|
|
269
|
+
if type(dt) is datetime:
|
|
270
|
+
ts_col = col
|
|
271
|
+
break
|
|
272
|
+
except Exception:
|
|
273
|
+
pass
|
|
274
|
+
|
|
275
|
+
if ts_col is not None:
|
|
276
|
+
try:
|
|
277
|
+
df.drop_in_place("_python_datetime")
|
|
278
|
+
df.drop_in_place("_unix_timestamp")
|
|
279
|
+
except Exception:
|
|
280
|
+
pass
|
|
281
|
+
datetime_series = pl.Series("_python_datetime", [parser.parse(dt_str) for dt_str in df[ts_col]])
|
|
282
|
+
unix_series = pl.Series("_unix_timestamp", [dt.timestamp() for dt in datetime_series])
|
|
283
|
+
df.insert_column(-1, datetime_series)
|
|
284
|
+
df.insert_column(-1, unix_series)
|
|
285
|
+
df = df.sort("_python_datetime") # Datasets must be sorted in order to upload to Nominal
|
|
286
|
+
else:
|
|
287
|
+
print(
|
|
288
|
+
"A Dataset must have at least one column that is a timestamp. Please specify which column is a date or datetime with the `ts_col` parameter."
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
return df
|
|
292
|
+
|
|
293
|
+
def read_csv(self, path: str, ts_col: str = None) -> Dataset:
|
|
294
|
+
dfc = pl.read_csv(path)
|
|
295
|
+
dft = self.set_ts_index(dfc, ts_col)
|
|
296
|
+
return Dataset(dft, filename=os.path.basename(path))
|
|
297
|
+
|
|
298
|
+
def read_parquet(self, path: str, ts_col: str = None) -> Dataset:
|
|
299
|
+
dfp = pl.read_parquet(path)
|
|
300
|
+
dft = self.set_ts_index(dfp, ts_col)
|
|
301
|
+
return Dataset(dft, filename=os.path.basename(path))
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
class Run:
|
|
305
|
+
'''
|
|
306
|
+
Python representation of a Nominal Run.
|
|
307
|
+
|
|
308
|
+
Parameters
|
|
309
|
+
----------
|
|
310
|
+
path : str, optional
|
|
311
|
+
A single file path to a dataset. If provided, it will be added to `paths`. Default is None.
|
|
312
|
+
paths : list of str, optional
|
|
313
|
+
A list of file paths to datasets. Default is an empty list.
|
|
314
|
+
datasets : list of Dataset, optional
|
|
315
|
+
A list of `Dataset` objects to be included in the run. Default is an empty list.
|
|
316
|
+
properties : list of str, optional
|
|
317
|
+
A list of properties associated with the run. Default is an empty list.
|
|
318
|
+
title : str, optional
|
|
319
|
+
The title of the run. Default is None, which will generate a default filename.
|
|
320
|
+
description : str, optional
|
|
321
|
+
A brief description of the run. Default is an empty string.
|
|
322
|
+
start : str or datetime, optional
|
|
323
|
+
The start time for the run. Can be a string or a datetime object. Default is None.
|
|
324
|
+
end : str or datetime, optional
|
|
325
|
+
The end time for the run. Can be a string or a datetime object. Default is None.
|
|
326
|
+
|
|
327
|
+
Attributes
|
|
328
|
+
----------
|
|
329
|
+
title : str
|
|
330
|
+
The title of the run. Defaults to a timestamped, autogenerated filename if not provided.
|
|
331
|
+
description : str
|
|
332
|
+
A brief description of the run.
|
|
333
|
+
properties : dict
|
|
334
|
+
A dict of properties associated with the run.
|
|
335
|
+
datasets : list of Dataset
|
|
336
|
+
A list of `Dataset` objects associated with the run.
|
|
337
|
+
domain : dict
|
|
338
|
+
A dictionary containing 'START' and 'END' time domain for the run.
|
|
339
|
+
datasets_domain : dict
|
|
340
|
+
A dictionary holding the overall 'START' and 'END' domain from the datasets.
|
|
341
|
+
|
|
342
|
+
Methods
|
|
343
|
+
-------
|
|
344
|
+
upload()
|
|
345
|
+
Uploads the run and its datasets to Nominal.
|
|
346
|
+
'''
|
|
347
|
+
|
|
348
|
+
def __print_human_readable_endpoint(self, endpoint):
|
|
349
|
+
'''
|
|
350
|
+
Print the Run datetime endpoints in a human-readable form
|
|
351
|
+
'''
|
|
352
|
+
print('Run {} time:'.format(endpoint))
|
|
353
|
+
unix_seconds = self._domain[endpoint]['SECONDS'] + self._domain[endpoint]['NANOS']*10e9
|
|
354
|
+
print('Unix: ', unix_seconds)
|
|
355
|
+
datetime_endpoint = datetime.fromtimestamp(unix_seconds)
|
|
356
|
+
print('Datetime: ', datetime_endpoint)
|
|
357
|
+
|
|
358
|
+
def __setattr__(self, k: str, v) -> None:
|
|
359
|
+
'''
|
|
360
|
+
Convenience method to allow setting Run endpoints as human-readable strings
|
|
361
|
+
'''
|
|
362
|
+
if k in ['start', 'end']:
|
|
363
|
+
endpoint = k.upper()
|
|
364
|
+
self._domain[endpoint]['DATETIME'] = parser.parse(v)
|
|
365
|
+
self.__set_run_unix_timestamp_domain([endpoint])
|
|
366
|
+
self.__print_human_readable_endpoint(endpoint)
|
|
367
|
+
else:
|
|
368
|
+
super().__setattr__(k, v)
|
|
369
|
+
|
|
370
|
+
def __getattr__(self, k: str) -> None:
|
|
371
|
+
if k in ['start', 'end']:
|
|
372
|
+
self.__print_human_readable_endpoint(k.upper())
|
|
373
|
+
else:
|
|
374
|
+
super().__getattr__(k)
|
|
375
|
+
|
|
376
|
+
def __init__(self,
|
|
377
|
+
rid: str = None,
|
|
378
|
+
path: str = None,
|
|
379
|
+
paths: list[str] = [],
|
|
380
|
+
datasets: list[Dataset] = [],
|
|
381
|
+
properties: dict = {},
|
|
382
|
+
title: str = None,
|
|
383
|
+
description: str = '',
|
|
384
|
+
start: str = None,
|
|
385
|
+
end: str = None,
|
|
386
|
+
cloud: dict = {}):
|
|
387
|
+
|
|
388
|
+
if title is None:
|
|
389
|
+
self.title = default_filename('RUN')
|
|
390
|
+
self.description = description
|
|
391
|
+
self.properties = properties
|
|
392
|
+
self._domain = {'START': {}, 'END': {}}
|
|
393
|
+
|
|
394
|
+
if rid is not None:
|
|
395
|
+
# Attempt to retrieve run by its resource ID (rid)
|
|
396
|
+
resp = requests.get(
|
|
397
|
+
headers = self.__get_headers(),
|
|
398
|
+
url = ENDPOINTS['run_retrieve'].format(get_base_url(), rid)
|
|
399
|
+
)
|
|
400
|
+
if resp.status_code == 200:
|
|
401
|
+
self.cloud = resp.json()
|
|
402
|
+
print('Cloud response:')
|
|
403
|
+
print(self.cloud)
|
|
404
|
+
print('... Downloaded to Run.cloud')
|
|
405
|
+
|
|
406
|
+
# Assign Run metadata to local Run object metadata
|
|
407
|
+
local_metadata = ['rid', 'description', 'title', 'start', 'end', 'properties', 'labels']
|
|
408
|
+
cloud_metadata = list(self.cloud.keys())
|
|
409
|
+
for md_key in local_metadata:
|
|
410
|
+
if md_key in cloud_metadata:
|
|
411
|
+
# Override local value with cloud value
|
|
412
|
+
setattr(self, md_key, self.cloud[md_key])
|
|
413
|
+
elif md_key == 'start':
|
|
414
|
+
self._domain['START']['SECONDS'] = self.cloud['startTime']['secondsSinceEpoch']
|
|
415
|
+
self._domain['START']['NANOS'] = self.cloud['startTime']['offsetNanoseconds']
|
|
416
|
+
elif md_key == 'end':
|
|
417
|
+
self._domain['END']['SECONDS'] = self.cloud['endTime']['secondsSinceEpoch']
|
|
418
|
+
self._domain['END']['NANOS'] = self.cloud['endTime']['offsetNanoseconds']
|
|
419
|
+
else:
|
|
420
|
+
print('There was an error retrieving Run with rid = {0}'.format(rid))
|
|
421
|
+
print('Make sure that your rid is correct and from [link]{0}[/link]'.format(get_app_base_url()))
|
|
422
|
+
print(resp.json())
|
|
423
|
+
return
|
|
424
|
+
|
|
425
|
+
if path is not None:
|
|
426
|
+
paths = [path]
|
|
427
|
+
|
|
428
|
+
if len(paths) == 0 and len(datasets) == 0:
|
|
429
|
+
print("Please provide a list of Datasets or list of paths for this Run")
|
|
430
|
+
return
|
|
431
|
+
|
|
432
|
+
if len(paths) > 0:
|
|
433
|
+
self.datasets = [Ingest().read_csv(fp) for fp in paths]
|
|
434
|
+
else:
|
|
435
|
+
self.datasets = datasets
|
|
436
|
+
|
|
437
|
+
mins = []
|
|
438
|
+
maxs = []
|
|
439
|
+
for ds in self.datasets:
|
|
440
|
+
mins.append(ds["_python_datetime"].min())
|
|
441
|
+
maxs.append(ds["_python_datetime"].max())
|
|
442
|
+
self.datasets_domain = dict(START=min(mins), END=max(maxs))
|
|
443
|
+
|
|
444
|
+
self.__set_run_datetime_boundary('START', start)
|
|
445
|
+
self.__set_run_datetime_boundary('END', end)
|
|
446
|
+
self.__set_run_unix_timestamp_domain()
|
|
447
|
+
|
|
448
|
+
def __set_run_datetime_boundary(self, key: str, str_datetime: any):
|
|
449
|
+
'''
|
|
450
|
+
Set start & end boundary variables for Run
|
|
451
|
+
'''
|
|
452
|
+
if str_datetime is None:
|
|
453
|
+
self._domain[key]['DATETIME'] = self.datasets_domain[key]
|
|
454
|
+
elif type(str_datetime) is datetime:
|
|
455
|
+
self._domain[key]['DATETIME'] = str_datetime
|
|
456
|
+
elif type(str_datetime) is str:
|
|
457
|
+
self._domain[key]['DATETIME'] = parser.parse(str_datetime)
|
|
458
|
+
|
|
459
|
+
def __set_run_unix_timestamp_domain(self, endpoints = ['START', 'END']):
|
|
460
|
+
'''
|
|
461
|
+
Set start & end boundary variables for Run
|
|
462
|
+
'''
|
|
463
|
+
for key in endpoints:
|
|
464
|
+
dt = self._domain[key]['DATETIME']
|
|
465
|
+
unix = dt.timestamp()
|
|
466
|
+
seconds = floor(unix)
|
|
467
|
+
self._domain[key]['SECONDS'] = seconds
|
|
468
|
+
self._domain[key]['NANOS'] = floor((unix - seconds) / 1e9)
|
|
469
|
+
|
|
470
|
+
def __get_headers(self, content_type: str = "json") -> dict:
|
|
471
|
+
TOKEN = kr.get_password("Nominal API", "python-client")
|
|
472
|
+
return {
|
|
473
|
+
"Authorization": "Bearer {}".format(TOKEN),
|
|
474
|
+
"Content-Type": "application/{0}".format(content_type),
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
def diff(self):
|
|
478
|
+
'''
|
|
479
|
+
Compare local and cloud Run instances
|
|
480
|
+
'''
|
|
481
|
+
if self.cloud is None:
|
|
482
|
+
print('No Run instance has been downloaded from the cloud')
|
|
483
|
+
print('Download a run with [code]r = Run(rid = RID)[/code]')
|
|
484
|
+
return
|
|
485
|
+
|
|
486
|
+
local_copy = PayloadFactory.run_upload(self)
|
|
487
|
+
cloud_copy = copy.deepcopy(self.cloud)
|
|
488
|
+
|
|
489
|
+
# rm datasources - we're not comparing those
|
|
490
|
+
del cloud_copy['dataSources']
|
|
491
|
+
del local_copy['dataSources']
|
|
492
|
+
|
|
493
|
+
def rm_deletions_and_datasources(rd):
|
|
494
|
+
if jd.delete in rd:
|
|
495
|
+
del rd[jd.delete]
|
|
496
|
+
|
|
497
|
+
run_diff_labeled = diff(cloud_copy, local_copy, syntax='explicit')
|
|
498
|
+
rm_deletions_and_datasources(run_diff_labeled)
|
|
499
|
+
print(run_diff_labeled)
|
|
500
|
+
|
|
501
|
+
run_diff_unlabeled = diff(cloud_copy, local_copy)
|
|
502
|
+
rm_deletions_and_datasources(run_diff_unlabeled)
|
|
503
|
+
return run_diff_unlabeled
|
|
504
|
+
|
|
505
|
+
def update(self):
|
|
506
|
+
'''
|
|
507
|
+
Updating run metadata is done in 4 steps:
|
|
508
|
+
1. Download a Run: r = Run(rid = RID)
|
|
509
|
+
2. Update something about the Run: r.title = 'Runs with Friends'
|
|
510
|
+
3. [Optional] Inspect a diff between the cloud and local versions: r.diff()
|
|
511
|
+
4. r.update()
|
|
512
|
+
By design, no changes are synced with the cloud without an explicit call to update()
|
|
513
|
+
At the moment, only Run start, end, and metadata can be updated (not datasources)
|
|
514
|
+
'''
|
|
515
|
+
|
|
516
|
+
if self.rid is None or self.cloud is None:
|
|
517
|
+
print('No Run instance has been downloaded from the cloud')
|
|
518
|
+
print('Download a run with [code]r = Run(rid = RID)[/code]')
|
|
519
|
+
|
|
520
|
+
rd = self.diff() # rd = "run diff"
|
|
521
|
+
if len(rd) == 0:
|
|
522
|
+
print('No difference between Run.cloud and the local Run instance')
|
|
523
|
+
return
|
|
524
|
+
|
|
525
|
+
# Make PUT request to update Run
|
|
526
|
+
resp = requests.put(
|
|
527
|
+
url = ENDPOINTS['run_update'].format(get_base_url(), self.rid),
|
|
528
|
+
json = rd,
|
|
529
|
+
headers = self.__get_headers(),
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
if resp.status_code == 200:
|
|
533
|
+
self.cloud = resp.json()
|
|
534
|
+
print('\nUpdated Run on Nominal:')
|
|
535
|
+
print('[link]{0}/runs/{1}[/link]'.format(get_app_base_url(), self.cloud['runNumber']))
|
|
536
|
+
else:
|
|
537
|
+
print('\n{0} error updating Run on Nominal:\n'.format(resp.status_code), resp.json())
|
|
538
|
+
|
|
539
|
+
def upload(self) -> requests.Response:
|
|
540
|
+
"""
|
|
541
|
+
Uploads the run and its datasets to Nominal.
|
|
542
|
+
|
|
543
|
+
Returns
|
|
544
|
+
-------
|
|
545
|
+
requests.Response
|
|
546
|
+
The response object from the REST call.
|
|
547
|
+
"""
|
|
548
|
+
datasets_payload = dict()
|
|
549
|
+
|
|
550
|
+
for ds in self.datasets:
|
|
551
|
+
# First, check if Run Datasets have been uploaded to S3
|
|
552
|
+
if ds.s3_path is None:
|
|
553
|
+
ds.upload()
|
|
554
|
+
datasets_payload[ds.filename] = PayloadFactory.create_unix_datasource(ds)
|
|
555
|
+
|
|
556
|
+
run_payload = PayloadFactory.run_upload(self, datasets_payload)
|
|
557
|
+
|
|
558
|
+
# Make POST request to register Run and Datasets on Nominal
|
|
559
|
+
resp = requests.post(
|
|
560
|
+
url = ENDPOINTS['run_upload'].format(get_base_url()),
|
|
561
|
+
json = run_payload,
|
|
562
|
+
headers = self.__get_headers(),
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
self.last_upload_payload = run_payload
|
|
566
|
+
|
|
567
|
+
if resp.status_code == 200:
|
|
568
|
+
self.rid = resp.json()["runRid"]
|
|
569
|
+
print("\nRun RID: ", self.rid)
|
|
570
|
+
else:
|
|
571
|
+
print("\n{0} error registering Run on Nominal:\n".format(resp.status_code), resp.json())
|
|
572
|
+
|
|
573
|
+
return resp
|
nominal/utils.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Internal utility functions for Nominal Python client
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import random
|
|
6
|
+
import string
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def default_filename(nominal_file_class):
|
|
11
|
+
if nominal_file_class not in ["DATASET", "RUN"]:
|
|
12
|
+
raise Exception("Unrecognized Nominal class", nominal_file_class)
|
|
13
|
+
rand_str = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(4))
|
|
14
|
+
ts = datetime.today().strftime("%Y-%m-%d")
|
|
15
|
+
return "_".join([nominal_file_class, ts, rand_str])
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PayloadFactory:
|
|
19
|
+
"""
|
|
20
|
+
Given a Nominal Python object, generate JSON payload
|
|
21
|
+
for REST API to instantiate on Nominal platform.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def dataset_trigger_ingest(ds) -> dict:
|
|
26
|
+
return {
|
|
27
|
+
"source": {
|
|
28
|
+
"type": "s3",
|
|
29
|
+
"s3": {
|
|
30
|
+
"path": ds.s3_path,
|
|
31
|
+
},
|
|
32
|
+
},
|
|
33
|
+
"properties": ds.properties,
|
|
34
|
+
"datasetName": ds.filename,
|
|
35
|
+
"datasetDescription": ds.description,
|
|
36
|
+
"timestampMetadata": {
|
|
37
|
+
# "seriesName": "_iso_8601",
|
|
38
|
+
"seriesName": "_python_datetime",
|
|
39
|
+
"timestampType": {
|
|
40
|
+
"type": "absolute",
|
|
41
|
+
"absolute": {
|
|
42
|
+
# "type": "iso8601",
|
|
43
|
+
"type": "customFormat",
|
|
44
|
+
# "iso8601": {}
|
|
45
|
+
"customFormat": {"format": "yyyy-MM-dd['T']HH:mm:ss.SSSSSS", "defaultYear": 0},
|
|
46
|
+
},
|
|
47
|
+
},
|
|
48
|
+
},
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
@staticmethod
|
|
52
|
+
def create_unix_datasource(ds) -> dict:
|
|
53
|
+
return {
|
|
54
|
+
"dataSource": { # IngestRunDataSource
|
|
55
|
+
"type": "newDataSourceV2",
|
|
56
|
+
"newDataSourceV2": {
|
|
57
|
+
"source": {"type": "s3", "s3": {"path": ds.s3_path}},
|
|
58
|
+
"name": ds.filename,
|
|
59
|
+
"properties": {},
|
|
60
|
+
"timeColumnSpec": {
|
|
61
|
+
"seriesName": "_unix",
|
|
62
|
+
"timestampType": {
|
|
63
|
+
"type": "absolute",
|
|
64
|
+
"absolute": {
|
|
65
|
+
"type": "epochOfTimeUnit",
|
|
66
|
+
"epochOfTimeUnit": {"timeUnit": "SECONDS"},
|
|
67
|
+
},
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
"timeOffsetSpec": {"type": "nanos", "nanos": {"seconds": 0, "nanos": 0}},
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def run_upload(r, datasets_payload = {}) -> dict:
|
|
77
|
+
return {
|
|
78
|
+
"title": r.title,
|
|
79
|
+
"description": r.description,
|
|
80
|
+
"startTime": {
|
|
81
|
+
"secondsSinceEpoch": r._domain["START"]["SECONDS"],
|
|
82
|
+
"offsetNanoseconds": r._domain["START"]["NANOS"],
|
|
83
|
+
},
|
|
84
|
+
"endTime": {
|
|
85
|
+
"secondsSinceEpoch": r._domain["END"]["SECONDS"],
|
|
86
|
+
"offsetNanoseconds": r._domain["END"]["NANOS"],
|
|
87
|
+
},
|
|
88
|
+
"dataSources": datasets_payload,
|
|
89
|
+
"properties": {}
|
|
90
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 nominal-io
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: nominal
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: Automate Nominal workflows in Python
|
|
5
|
+
Author: JP
|
|
6
|
+
Author-email: jack@nominal.io
|
|
7
|
+
Requires-Python: >=3.12,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
10
|
+
Requires-Dist: jsondiff (>=2.2.0,<3.0.0)
|
|
11
|
+
Requires-Dist: keyring (>=25.3.0,<26.0.0)
|
|
12
|
+
Requires-Dist: polars (>=1.4.1,<2.0.0)
|
|
13
|
+
Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
|
|
14
|
+
Requires-Dist: requests (>=2.32.3,<3.0.0)
|
|
15
|
+
Requires-Dist: rich (>=13.7.1,<14.0.0)
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# ⬖ Nominal
|
|
19
|
+
Python client for Nominal test data, storage, & compute
|
|
20
|
+
|
|
21
|
+
🚧 WIP - API and syntax subject to change
|
|
22
|
+
|
|
23
|
+
## Install
|
|
24
|
+
|
|
25
|
+
> pip3 install nominal
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
|
|
29
|
+
### Set your API key
|
|
30
|
+
|
|
31
|
+
Retrieve your API key from /sandbox on your Nominal tenant
|
|
32
|
+
|
|
33
|
+
```py
|
|
34
|
+
import nominal as nm
|
|
35
|
+
|
|
36
|
+
nm.set_token(...)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Upload a Dataset (3 lines)
|
|
40
|
+
|
|
41
|
+
```py
|
|
42
|
+
from nominal import Ingest
|
|
43
|
+
|
|
44
|
+
dataset = Ingest().read_csv('../data/penguins.csv')
|
|
45
|
+
|
|
46
|
+
dataset.upload()
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Upload a Run (3 lines)
|
|
50
|
+
|
|
51
|
+
```py
|
|
52
|
+
from nominal import Run
|
|
53
|
+
|
|
54
|
+
r = Run(path='../data/penguins.csv')
|
|
55
|
+
|
|
56
|
+
run.upload()
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Apply a Check to a Run
|
|
60
|
+
|
|
61
|
+
TODO
|
|
62
|
+
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
LICENSE,sha256=zEGHG9mjDjaIS3I79O8mweQo-yiTbqx8jJvUPppVAwk,1067
|
|
2
|
+
README.md,sha256=BjXBdaad_vNmGf4tqP55cIZrgVxdjzoK83at0cc7-Sg,588
|
|
3
|
+
nominal/__init__.py,sha256=MRTsIi6-1gIKPjjvlJloySWaYUoraHcrTKb0bJSKUeM,172
|
|
4
|
+
nominal/nominal.py,sha256=I-5PXrMOjEQoGQqbTzcigT_TwbSlGEoGAXIiNwV65eo,21112
|
|
5
|
+
nominal/utils.py,sha256=QJet87l6DT8EJCC06_r71izXuLqfXapytfq9Vpyibew,3068
|
|
6
|
+
nominal-0.0.2.dist-info/LICENSE,sha256=zEGHG9mjDjaIS3I79O8mweQo-yiTbqx8jJvUPppVAwk,1067
|
|
7
|
+
nominal-0.0.2.dist-info/METADATA,sha256=h9G063K31hYeyUEgRcMgVgMH9ocOQ-eHeirKrUedS4U,1153
|
|
8
|
+
nominal-0.0.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
9
|
+
nominal-0.0.2.dist-info/RECORD,,
|