deriva-ml 1.17.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. deriva_ml/.DS_Store +0 -0
  2. deriva_ml/__init__.py +79 -0
  3. deriva_ml/bump_version.py +142 -0
  4. deriva_ml/core/__init__.py +39 -0
  5. deriva_ml/core/base.py +1527 -0
  6. deriva_ml/core/config.py +69 -0
  7. deriva_ml/core/constants.py +36 -0
  8. deriva_ml/core/definitions.py +74 -0
  9. deriva_ml/core/enums.py +222 -0
  10. deriva_ml/core/ermrest.py +288 -0
  11. deriva_ml/core/exceptions.py +28 -0
  12. deriva_ml/core/filespec.py +116 -0
  13. deriva_ml/dataset/__init__.py +12 -0
  14. deriva_ml/dataset/aux_classes.py +225 -0
  15. deriva_ml/dataset/dataset.py +1519 -0
  16. deriva_ml/dataset/dataset_bag.py +450 -0
  17. deriva_ml/dataset/history.py +109 -0
  18. deriva_ml/dataset/upload.py +439 -0
  19. deriva_ml/demo_catalog.py +495 -0
  20. deriva_ml/execution/__init__.py +26 -0
  21. deriva_ml/execution/environment.py +290 -0
  22. deriva_ml/execution/execution.py +1180 -0
  23. deriva_ml/execution/execution_configuration.py +147 -0
  24. deriva_ml/execution/workflow.py +413 -0
  25. deriva_ml/feature.py +228 -0
  26. deriva_ml/install_kernel.py +71 -0
  27. deriva_ml/model/__init__.py +0 -0
  28. deriva_ml/model/catalog.py +485 -0
  29. deriva_ml/model/database.py +719 -0
  30. deriva_ml/protocols/dataset.py +19 -0
  31. deriva_ml/run_notebook.py +228 -0
  32. deriva_ml/schema/__init__.py +3 -0
  33. deriva_ml/schema/annotations.py +473 -0
  34. deriva_ml/schema/check_schema.py +104 -0
  35. deriva_ml/schema/create_schema.py +393 -0
  36. deriva_ml/schema/deriva-ml-reference.json +8525 -0
  37. deriva_ml/schema/policy.json +81 -0
  38. deriva_ml/schema/table_comments_utils.py +57 -0
  39. deriva_ml/test.py +94 -0
  40. deriva_ml-1.17.10.dist-info/METADATA +38 -0
  41. deriva_ml-1.17.10.dist-info/RECORD +45 -0
  42. deriva_ml-1.17.10.dist-info/WHEEL +5 -0
  43. deriva_ml-1.17.10.dist-info/entry_points.txt +9 -0
  44. deriva_ml-1.17.10.dist-info/licenses/LICENSE +201 -0
  45. deriva_ml-1.17.10.dist-info/top_level.txt +1 -0
@@ -0,0 +1,450 @@
1
+ """
2
+ The module implements the sqllite interface to a set of directories representing a dataset bag.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ # Standard library imports
8
+ from collections import defaultdict
9
+ from copy import copy
10
+ from typing import TYPE_CHECKING, Any, Generator, Iterable, cast
11
+
12
+ import deriva.core.datapath as datapath
13
+
14
+ # Third-party imports
15
+ import pandas as pd
16
+
17
+ # Local imports
18
+ from deriva.core.ermrest_model import Table
19
+
20
+ # Deriva imports
21
+ from pydantic import ConfigDict, validate_call
22
+ from sqlalchemy import CompoundSelect, Engine, RowMapping, Select, and_, inspect, select, union
23
+ from sqlalchemy.orm import RelationshipProperty, Session
24
+ from sqlalchemy.orm.util import AliasedClass
25
+
26
+ from deriva_ml.core.definitions import RID, VocabularyTerm
27
+ from deriva_ml.core.exceptions import DerivaMLException, DerivaMLInvalidTerm
28
+ from deriva_ml.feature import Feature
29
+
30
+ if TYPE_CHECKING:
31
+ from deriva_ml.model.database import DatabaseModel
32
+
33
+ try:
34
+ from icecream import ic
35
+ except ImportError: # Graceful fallback if IceCream isn't installed.
36
+ ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a) # noqa
37
+
38
+
39
+ class DatasetBag:
40
+ """
41
+ DatasetBag is a class that manages a materialized bag. It is created from a locally materialized
42
+ BDBag for a dataset_table, which is created either by DerivaML.create_execution, or directly by
43
+ calling DerivaML.download_dataset.
44
+
45
+ A general a bag may contain multiple datasets, if the dataset is nested. The DatasetBag is used to
46
+ represent only one of the datasets in the bag.
47
+
48
+ All the metadata associated with the dataset is stored in a SQLLite database that can be queried using SQL.
49
+
50
+ Attributes:
51
+ dataset_rid (RID): RID for the specified dataset
52
+ version: The version of the dataset
53
+ model (DatabaseModel): The Database model that has all the catalog metadata associated with this dataset.
54
+ database:
55
+ dbase (sqlite3.Connection): connection to the sqlite database holding table values
56
+ domain_schema (str): Name of the domain schema
57
+ """
58
+
59
+ def __init__(self, database_model: DatabaseModel, dataset_rid: RID | None = None) -> None:
60
+ """
61
+ Initialize a DatasetBag instance.
62
+
63
+ Args:
64
+ database_model: Database version of the bag.
65
+ dataset_rid: Optional RID for the dataset.
66
+ """
67
+ self.model = database_model
68
+ self.engine = cast(Engine, self.model.engine)
69
+ self.metadata = self.model.metadata
70
+
71
+ self.dataset_rid = dataset_rid or self.model.dataset_rid
72
+ if not self.dataset_rid:
73
+ raise DerivaMLException("No dataset RID provided")
74
+
75
+ self.model.rid_lookup(self.dataset_rid) # Check to make sure that this dataset is in the bag.
76
+
77
+ self.version = self.model.dataset_version(self.dataset_rid)
78
+ self._dataset_table = self.model.dataset_table
79
+
80
+ def __repr__(self) -> str:
81
+ return f"<deriva_ml.DatasetBag object {self.dataset_rid} at {hex(id(self))}>"
82
+
83
+ def list_tables(self) -> list[str]:
84
+ """List the names of the tables in the catalog
85
+
86
+ Returns:
87
+ A list of table names. These names are all qualified with the Deriva schema name.
88
+ """
89
+ return self.model.list_tables()
90
+
91
+ @staticmethod
92
+ def _find_relationship_attr(source, target):
93
+ """
94
+ Return the relationship attribute (InstrumentedAttribute) on `source`
95
+ that points to `target`. Works with classes or AliasedClass.
96
+ Raises LookupError if not found.
97
+ """
98
+ src_mapper = inspect(source).mapper
99
+ tgt_mapper = inspect(target).mapper
100
+
101
+ # collect relationships on the *class* mapper (not on alias)
102
+ candidates: list[RelationshipProperty] = [rel for rel in src_mapper.relationships if rel.mapper is tgt_mapper]
103
+
104
+ if not candidates:
105
+ raise LookupError(f"No relationship from {src_mapper.class_.__name__} → {tgt_mapper.class_.__name__}")
106
+
107
+ # Prefer MANYTOONE when multiple paths exist (often best for joins)
108
+ candidates.sort(key=lambda r: r.direction.name != "MANYTOONE")
109
+ rel = candidates[0]
110
+
111
+ # Bind to the actual source (alias or class)
112
+ return getattr(source, rel.key) if isinstance(source, AliasedClass) else rel.class_attribute
113
+
114
+ def _dataset_table_view(self, table: str) -> CompoundSelect[Any]:
115
+ """Return a SQL command that will return all of the elements in the specified table that are associated with
116
+ dataset_rid"""
117
+ table_class = self.model.get_orm_class_by_name(table)
118
+ dataset_table_class = self.model.get_orm_class_by_name(self._dataset_table.name)
119
+ dataset_rids = [self.dataset_rid] + [c.dataset_rid for c in self.list_dataset_children(recurse=True)]
120
+
121
+ paths = [[t.name for t in p] for p in self.model._schema_to_paths() if p[-1].name == table]
122
+ sql_cmds = []
123
+ for path in paths:
124
+ path_sql = select(table_class)
125
+ last_class = self.model.get_orm_class_by_name(path[0])
126
+ for t in path[1:]:
127
+ t_class = self.model.get_orm_class_by_name(t)
128
+ path_sql = path_sql.join(self._find_relationship_attr(last_class, t_class))
129
+ last_class = t_class
130
+ path_sql = path_sql.where(dataset_table_class.RID.in_(dataset_rids))
131
+ sql_cmds.append(path_sql)
132
+ return union(*sql_cmds)
133
+
134
+ def get_table(self, table: str) -> Generator[tuple, None, None]:
135
+ """Retrieve the contents of the specified table. If schema is not provided as part of the table name,
136
+ the method will attempt to locate the schema for the table.
137
+
138
+ Args:
139
+ table: return: A generator that yields tuples of column values.
140
+
141
+ Returns:
142
+ A generator that yields tuples of column values.
143
+
144
+ """
145
+ with Session(self.engine) as session:
146
+ result = session.execute(self._dataset_table_view(table))
147
+ for row in result:
148
+ yield row
149
+
150
+ def get_table_as_dataframe(self, table: str) -> pd.DataFrame:
151
+ """Retrieve the contents of the specified table as a dataframe.
152
+
153
+
154
+ If schema is not provided as part of the table name,
155
+ the method will attempt to locate the schema for the table.
156
+
157
+ Args:
158
+ table: Table to retrieve data from.
159
+
160
+ Returns:
161
+ A dataframe containing the contents of the specified table.
162
+ """
163
+ return pd.read_sql(self._dataset_table_view(table), self.engine)
164
+
165
+ def get_table_as_dict(self, table: str) -> Generator[dict[str, Any], None, None]:
166
+ """Retrieve the contents of the specified table as a dictionary.
167
+
168
+ Args:
169
+ table: Table to retrieve data from. f schema is not provided as part of the table name,
170
+ the method will attempt to locate the schema for the table.
171
+
172
+ Returns:
173
+ A generator producing dictionaries containing the contents of the specified table as name/value pairs.
174
+ """
175
+
176
+ with Session(self.engine) as session:
177
+ result = session.execute(self._dataset_table_view(table))
178
+ for row in result.mappings():
179
+ yield row
180
+
181
+ # @validate_call
182
+ def list_dataset_members(self, recurse: bool = False) -> dict[str, list[dict[str, Any]]]:
183
+ """Return a list of entities associated with a specific dataset.
184
+
185
+ Args:
186
+ recurse: Whether to include nested datasets.
187
+
188
+ Returns:
189
+ Dictionary of entities associated with the dataset.
190
+ """
191
+
192
+ # Look at each of the element types that might be in the _dataset_table and get the list of rid for them from
193
+ # the appropriate association table.
194
+ members = defaultdict(list)
195
+
196
+ dataset_class = self.model.get_orm_class_for_table(self._dataset_table)
197
+ for element_table in self.model.list_dataset_element_types():
198
+ element_class = self.model.get_orm_class_for_table(element_table)
199
+
200
+ assoc_class, dataset_rel, element_rel = self.model.get_orm_association_class(dataset_class, element_class)
201
+
202
+ element_table = inspect(element_class).mapped_table
203
+ if element_table.schema != self.model.domain_schema and element_table.name not in ["Dataset", "File"]:
204
+ # Look at domain tables and nested datasets.
205
+ continue
206
+ # Get the names of the columns that we are going to need for linking
207
+ with Session(self.engine) as session:
208
+ sql_cmd = (
209
+ select(element_class)
210
+ .join(element_rel)
211
+ .where(self.dataset_rid == assoc_class.__table__.c["Dataset"])
212
+ )
213
+ # Get back the list of ORM entities and convert them to dictionaries.
214
+ element_entities = session.scalars(sql_cmd).all()
215
+ element_rows = [{c.key: getattr(obj, c.key) for c in obj.__table__.columns} for obj in element_entities]
216
+ members[element_table.name].extend(element_rows)
217
+ if recurse and (element_table.name == self._dataset_table.name):
218
+ # Get the members for all the nested datasets and add to the member list.
219
+ nested_datasets = [d["RID"] for d in element_rows]
220
+ for ds in nested_datasets:
221
+ nested_dataset = self.model.get_dataset(ds)
222
+ for k, v in nested_dataset.list_dataset_members(recurse=recurse).items():
223
+ members[k].extend(v)
224
+ return dict(members)
225
+
226
+ def find_features(self, table: str | Table) -> Iterable[Feature]:
227
+ """Find features for a table.
228
+
229
+ Args:
230
+ table: The table to find features for.
231
+
232
+ Returns:
233
+ An iterable of Feature instances.
234
+ """
235
+ return self.model.find_features(table)
236
+
237
+ def list_feature_values(self, table: Table | str, feature_name: str) -> datapath._ResultSet:
238
+ """Return feature values for a table.
239
+
240
+ Args:
241
+ table: The table to get feature values for.
242
+ feature_name: Name of the feature.
243
+
244
+ Returns:
245
+ Feature values.
246
+ """
247
+ feature = self.model.lookup_feature(table, feature_name)
248
+ feature_class = self.model.get_orm_class_for_table(feature.feature_table)
249
+ with Session(self.engine) as session:
250
+ sql_cmd = select(feature_class)
251
+ return cast(datapath._ResultSet, [row for row in session.execute(sql_cmd).mappings()])
252
+
253
+ def list_dataset_element_types(self) -> list[Table]:
254
+ """
255
+ Lists the data types of elements contained within a dataset.
256
+
257
+ This method analyzes the dataset and identifies the data types for all
258
+ elements within it. It is useful for understanding the structure and
259
+ content of the dataset and allows for better manipulation and usage of its
260
+ data.
261
+
262
+ Returns:
263
+ list[str]: A list of strings where each string represents a data type
264
+ of an element found in the dataset.
265
+
266
+ """
267
+ return self.model.list_dataset_element_types()
268
+
269
+ def list_dataset_children(self, recurse: bool = False) -> list[DatasetBag]:
270
+ """Get nested datasets.
271
+
272
+ Args:
273
+ recurse: Whether to include children of children.
274
+
275
+ Returns:
276
+ List of child dataset bags.
277
+ """
278
+ ds_table = self.model.get_orm_class_by_name(f"{self.model.ml_schema}.Dataset")
279
+ nds_table = self.model.get_orm_class_by_name(f"{self.model.ml_schema}.Dataset_Dataset")
280
+ dv_table = self.model.get_orm_class_by_name(f"{self.model.ml_schema}.Dataset_Version")
281
+
282
+ with Session(self.engine) as session:
283
+ sql_cmd = (
284
+ select(nds_table.Nested_Dataset, dv_table.Version)
285
+ .join_from(ds_table, nds_table, onclause=ds_table.RID == nds_table.Nested_Dataset)
286
+ .join_from(ds_table, dv_table, onclause=ds_table.Version == dv_table.RID)
287
+ .where(nds_table.Dataset == self.dataset_rid)
288
+ )
289
+ nested = [DatasetBag(self.model, r[0]) for r in session.execute(sql_cmd).all()]
290
+
291
+ result = copy(nested)
292
+ if recurse:
293
+ for child in nested:
294
+ result.extend(child.list_dataset_children(recurse))
295
+ return result
296
+
297
+ @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
298
+ def lookup_term(self, table: str | Table, term_name: str) -> VocabularyTerm:
299
+ """Finds a term in a vocabulary table.
300
+
301
+ Searches for a term in the specified vocabulary table, matching either the primary name
302
+ or any of its synonyms.
303
+
304
+ Args:
305
+ table: Vocabulary table to search in (name or Table object).
306
+ term_name: Name or synonym of the term to find.
307
+
308
+ Returns:
309
+ VocabularyTerm: The matching vocabulary term.
310
+
311
+ Raises:
312
+ DerivaMLVocabularyException: If the table is not a vocabulary table, or term is not found.
313
+
314
+ Examples:
315
+ Look up by primary name:
316
+ >>> term = ml.lookup_term("tissue_types", "epithelial")
317
+ >>> print(term.description)
318
+
319
+ Look up by synonym:
320
+ >>> term = ml.lookup_term("tissue_types", "epithelium")
321
+ """
322
+ # Get and validate vocabulary table reference
323
+ if not self.model.is_vocabulary(table):
324
+ raise DerivaMLException(f"The table {table} is not a controlled vocabulary")
325
+
326
+ # Search for term by name or synonym
327
+ for term in self.get_table_as_dict(table):
328
+ if term_name == term["Name"] or (term["Synonyms"] and term_name in term["Synonyms"]):
329
+ term["Synonyms"] = list(term["Synonyms"])
330
+ return VocabularyTerm.model_validate(term)
331
+
332
+ # Term not found
333
+ raise DerivaMLInvalidTerm(table, term_name)
334
+
335
+ def _denormalize(self, include_tables: list[str]) -> Select:
336
+ """
337
+ Generates an SQL statement for denormalizing the dataset based on the tables to include. Processes cycles in
338
+ graph relationships, ensures proper join order, and generates selected columns for denormalization.
339
+
340
+ Args:
341
+ include_tables (list[str] | None): List of table names to include in the denormalized dataset. If None,
342
+ all tables from the dataset will be included.
343
+
344
+ Returns:
345
+ str: SQL query string that represents the process of denormalization.
346
+ """
347
+ # Skip over tables that we don't want to include in the denormalized dataset.
348
+ # Also, strip off the Dataset/Dataset_X part of the path so we don't include dataset columns in the denormalized
349
+ # table.
350
+
351
+ def find_relationship(table, join_condition):
352
+ side1 = (join_condition[0].table.name, join_condition[0].name)
353
+ side2 = (join_condition[1].table.name, join_condition[1].name)
354
+
355
+ for relationship in inspect(table).relationships:
356
+ local_columns = list(relationship.local_columns)[0].table.name, list(relationship.local_columns)[0].name
357
+ remote_side = list(relationship.remote_side)[0].table.name, list(relationship.remote_side)[0].name
358
+ if local_columns == side1 and remote_side == side2 or local_columns == side2 and remote_side == side1:
359
+ return relationship
360
+ return None
361
+
362
+ join_tables, denormalized_columns = (
363
+ self.model._prepare_wide_table(self, self.dataset_rid, include_tables)
364
+ )
365
+
366
+ denormalized_columns = [
367
+ self.model.get_orm_class_by_name(table_name)
368
+ .__table__.columns[column_name]
369
+ .label(f"{table_name}.{column_name}")
370
+ for table_name, column_name in denormalized_columns
371
+ ]
372
+ sql_statements = []
373
+ for key, (path, join_conditions) in join_tables.items():
374
+ sql_statement = select(*denormalized_columns).select_from(
375
+ self.model.get_orm_class_for_table(self._dataset_table)
376
+ )
377
+ for table_name in path[1:]: # Skip over dataset table
378
+ table_class = self.model.get_orm_class_by_name(table_name)
379
+ on_clause = [
380
+ getattr(table_class, r.key)
381
+ for on_condition in join_conditions[table_name]
382
+ if (r := find_relationship(table_class, on_condition))
383
+ ]
384
+ sql_statement = sql_statement.join(table_class, onclause=and_(*on_clause))
385
+ dataset_rid_list = [self.dataset_rid] + self.list_dataset_children(recurse=True)
386
+ dataset_class = self.model.get_orm_class_by_name(self._dataset_table.name)
387
+ sql_statement = sql_statement.where(dataset_class.RID.in_(dataset_rid_list))
388
+ sql_statements.append(sql_statement)
389
+ return union(*sql_statements)
390
+
391
+ def denormalize_as_dataframe(self, include_tables: list[str]) -> pd.DataFrame:
392
+ """
393
+ Denormalize the dataset and return the result as a dataframe.
394
+
395
+ This routine will examine the domain schema for the dataset, determine which tables to include and denormalize
396
+ the dataset values into a single wide table. The result is returned as a generator that returns a dictionary
397
+ for each row in the denormalized wide table.
398
+
399
+ The optional argument include_tables can be used to specify a subset of tables to include in the denormalized
400
+ view. The tables in this argument can appear anywhere in the dataset schema. The method will determine which
401
+ additional tables are required to complete the denormalization process. If include_tables is not specified,
402
+ all of the tables in the schema will be included.
403
+
404
+ The resulting wide table will include a column for every table needed to complete the denormalization process.
405
+
406
+ Args:
407
+ include_tables: List of table names to include in the denormalized dataset.
408
+
409
+ Returns:
410
+ Dataframe containing the denormalized dataset.
411
+ """
412
+ return pd.read_sql(self._denormalize(include_tables=include_tables), self.engine)
413
+
414
+ def denormalize_as_dict(self, include_tables: list[str]) -> Generator[RowMapping, None, None]:
415
+ """
416
+ Denormalize the dataset and return the result as a set of dictionary's.
417
+
418
+ This routine will examine the domain schema for the dataset, determine which tables to include and denormalize
419
+ the dataset values into a single wide table. The result is returned as a generator that returns a dictionary
420
+ for each row in the denormalized wide table.
421
+
422
+ The optional argument include_tables can be used to specify a subset of tables to include in the denormalized
423
+ view. The tables in this argument can appear anywhere in the dataset schema. The method will determine which
424
+ additional tables are required to complete the denormalization process. If include_tables is not specified,
425
+ all of the tables in the schema will be included.
426
+
427
+ The resulting wide table will include a only those column for the tables listed in include_columns.
428
+
429
+ Args:
430
+ include_tables: List of table names to include in the denormalized dataset. If None, than the entire schema
431
+ is used.
432
+
433
+ Returns:
434
+ A generator that returns a dictionary representation of each row in the denormalized dataset.
435
+ """
436
+ with Session(self.engine) as session:
437
+ cursor = session.execute(
438
+ self._denormalize(include_tables=include_tables)
439
+ )
440
+ yield from cursor.mappings()
441
+ for row in cursor.mappings():
442
+ yield row
443
+
444
+
445
+ # Add annotations after definition to deal with forward reference issues in pydantic
446
+
447
+ DatasetBag.list_dataset_children = validate_call(
448
+ config=ConfigDict(arbitrary_types_allowed=True),
449
+ validate_return=True,
450
+ )(DatasetBag.list_dataset_children)
@@ -0,0 +1,109 @@
1
+ import base64
2
+ import struct
3
+ from datetime import datetime
4
+
5
+ from dateutil.parser import isoparse
6
+ from deriva.core import urlquote
7
+
8
+
9
+ # -- ==============================================================================================
10
+ def get_record_history(server, cid, sname, tname, kvals, kcols=["RID"], snap=None):
11
+ """Get the history of a record from the catalog.
12
+
13
+ Args:
14
+ server: The server instance.
15
+ cid: The catalog ID.
16
+ sname: The schema name.
17
+ tname: The table name.
18
+ kvals: The key values to look up.
19
+ kcols: The key columns. Defaults to ["RID"].
20
+ snap: Optional snapshot ID.
21
+
22
+ Returns:
23
+ The history data for the record.
24
+
25
+ Raises:
26
+ ValueError: If more than one row is returned.
27
+ """
28
+ parts = {
29
+ "cid": urlquote(cid),
30
+ "sname": urlquote(sname),
31
+ "tname": urlquote(tname),
32
+ "filter": ",".join(
33
+ [
34
+ "%s=%s" % (urlquote(kcol), urlquote(kval))
35
+ for kcol, kval in zip(kcols, kvals)
36
+ ]
37
+ ),
38
+ }
39
+
40
+ if snap is None:
41
+ # determinate starting (latest) snapshot
42
+ r = server.get("/ermrest/catalog/%(cid)s" % parts)
43
+ snap = r.json()["snaptime"]
44
+ parts["snap"] = snap
45
+
46
+ path = "/ermrest/catalog/%(cid)s@%(snap)s/entity/%(sname)s:%(tname)s/%(filter)s"
47
+
48
+ rows_found = []
49
+ snap2rows = {}
50
+ while True:
51
+ url = path % parts
52
+ # sys.stderr.write("%s\n" % url)
53
+ response_data = server.get(url).json()
54
+ if len(response_data) > 1:
55
+ raise ValueError("got more than one row for %r" % url)
56
+ if len(response_data) == 0:
57
+ # sys.stderr.write("ERROR: %s: No record found \n" % (url))
58
+ break
59
+ row = response_data[0]
60
+ snap2rows[parts["snap"]] = row
61
+ rows_found.append(row)
62
+ rmt = datetime.fromisoformat(row["RMT"])
63
+ # find snap ID prior to row version birth time
64
+ parts["snap"] = urlb32_encode(datetime_epoch_us(rmt) - 1)
65
+
66
+ return snap2rows
67
+
68
+
69
+ # -- --------------------------------------------------------------------------------------
70
+ def datetime_epoch_us(dt):
71
+ """Convert datetime to epoch microseconds.
72
+
73
+ Args:
74
+ dt: The datetime object to convert.
75
+
76
+ Returns:
77
+ The epoch time in microseconds.
78
+ """
79
+ return int(dt.timestamp() * 1000000)
80
+
81
+
82
+ # -- --------------------------------------------------------------------------------------
83
+ # Take the iso format string (same as RMT) and return the version number
84
+ #
85
+
86
+
87
+ def iso_to_snap(iso_datetime):
88
+ """Convert ISO datetime string to snapshot format.
89
+
90
+ Args:
91
+ iso_datetime: The ISO datetime string.
92
+
93
+ Returns:
94
+ The snapshot timestamp.
95
+ """
96
+ return datetime_epoch_us(isoparse(iso_datetime))
97
+
98
+
99
+ # -- --------------------------------------------------------------------------------------
100
+ def urlb32_encode(i):
101
+ """Encode an integer to URL-safe base32.
102
+
103
+ Args:
104
+ i: The integer to encode.
105
+
106
+ Returns:
107
+ The URL-safe base32 encoded string.
108
+ """
109
+ return base64.urlsafe_b64encode(struct.pack(">Q", i)).decode("ascii").rstrip("=")