deriva-ml 1.8.10__py3-none-any.whl → 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,15 @@
1
- """Ths module constains the definition of the DatabaseModel class. The role of this class is to provide an nterface between the BDBag representation
1
+ """Ths module contains the definition of the DatabaseModel class. The role of this class is to provide an nterface between the BDBag representation
2
2
  of a dataset and a sqllite database in which the contents of the bag are stored.
3
3
  """
4
+
5
+ from __future__ import annotations
6
+
4
7
  import logging
5
8
  import sqlite3
6
9
 
7
10
  from csv import reader
8
11
  from pathlib import Path
9
- from typing import Any, Optional
12
+ from typing import Any, Optional, Generator
10
13
  from urllib.parse import urlparse
11
14
 
12
15
  from deriva.core.ermrest_model import Model
@@ -20,7 +23,7 @@ from .dataset_bag import DatasetBag
20
23
  class DatabaseModelMeta(type):
21
24
  """Use metaclass to ensure that there is onl one instance per path"""
22
25
 
23
- _paths_loaded: dict[Path:"DatabaseModel"] = {}
26
+ _paths_loaded: dict[Path, "DatabaseModel"] = {}
24
27
 
25
28
  def __call__(cls, *args, **kwargs):
26
29
  logger = logging.getLogger("deriva_ml")
@@ -47,7 +50,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
47
50
  Because of nested datasets, it's possible that more than one dataset rid is in a bag, or that a dataset rid might
48
51
  appear in more than one database. To help manage this, a global list of all the datasets that have been loaded
49
52
  into DatabaseModels, is kept in the class variable `_rid_map`.
50
-
53
+
51
54
  Because you can load diffent versions of a dataset simultaniously, the dataset RID and version number are tracked, and a new
52
55
  sqllite instance is created for every new dataset version present.
53
56
 
@@ -315,6 +318,26 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
315
318
  )
316
319
  return datasets
317
320
 
321
+ def get_table_as_dict(self, table: str) -> Generator[dict[str, Any], None, None]:
322
+ """Retrieve the contents of the specified table as a dictionary.
323
+
324
+ Args:
325
+ table: Table to retrieve data from. f schema is not provided as part of the table name,
326
+ the method will attempt to locate the schema for the table.
327
+
328
+ Returns:
329
+ A generator producing dictionaries containing the contents of the specified table as name/value pairs.
330
+ """
331
+ table_name = self.normalize_table_name(table)
332
+ with self.dbase as dbase:
333
+ col_names = [
334
+ c[1]
335
+ for c in dbase.execute(f'PRAGMA table_info("{table_name}")').fetchall()
336
+ ]
337
+ result = self.dbase.execute(f'SELECT * FROM "{table_name}"')
338
+ while row := result.fetchone():
339
+ yield dict(zip(col_names, row))
340
+
318
341
  def normalize_table_name(self, table: str) -> str:
319
342
  """Attempt to insert the schema into a table name if it's not provided.
320
343
 
deriva_ml/dataset.py CHANGED
@@ -92,7 +92,7 @@ class Dataset:
92
92
  dataset_list: list[DatasetSpec],
93
93
  description: Optional[str] = "",
94
94
  execution_rid: Optional[RID] = None,
95
- ) -> RID:
95
+ ) -> list[dict[str, Any]]:
96
96
  schema_path = self._model.catalog.getPathBuilder().schemas[self._ml_schema]
97
97
 
98
98
  # Construct version records for insert
@@ -245,7 +245,7 @@ class Dataset:
245
245
  DerivaMLException: if provided RID is not to a dataset_table.
246
246
  """
247
247
 
248
- # Find all of the datasets that are reachable from this dataset and determine their new version numbers.
248
+ # Find all the datasets that are reachable from this dataset and determine their new version numbers.
249
249
  related_datasets = list(self._build_dataset_graph(dataset_rid=dataset_rid))
250
250
  version_update_list = [
251
251
  DatasetSpec(
@@ -254,7 +254,7 @@ class Dataset:
254
254
  )
255
255
  for ds_rid in related_datasets
256
256
  ]
257
- updated_versions = self._insert_dataset_versions(
257
+ self._insert_dataset_versions(
258
258
  version_update_list, description=description, execution_rid=execution_rid
259
259
  )
260
260
  return [d.version for d in version_update_list if d.rid == dataset_rid][0]
@@ -751,9 +751,10 @@ class Dataset:
751
751
  ]
752
752
 
753
753
  def _table_paths(
754
- self, dataset: DatasetSpec = None, snapshot_catalog: Optional[DerivaML] = None
754
+ self,
755
+ dataset: Optional[DatasetSpec] = None,
756
+ snapshot_catalog: Optional[DerivaML] = None,
755
757
  ) -> Iterator[tuple[str, str, Table]]:
756
-
757
758
  paths = self._collect_paths(dataset and dataset.rid, snapshot_catalog)
758
759
 
759
760
  def source_path(path: tuple[Table, ...]):
@@ -779,17 +780,20 @@ class Dataset:
779
780
  def _collect_paths(
780
781
  self,
781
782
  dataset_rid: Optional[RID] = None,
782
- snapshot_catalog: Optional[DerivaML] = None,
783
+ snapshot: Optional[Dataset] = None,
783
784
  dataset_nesting_depth: Optional[int] = None,
784
785
  ) -> set[tuple[Table, ...]]:
785
786
 
786
- snapshot_catalog = snapshot_catalog or self
787
+ snapshot_catalog = snapshot if snapshot else self
788
+
787
789
  dataset_table = snapshot_catalog._model.schemas[self._ml_schema].tables[
788
790
  "Dataset"
789
791
  ]
790
792
  dataset_dataset = snapshot_catalog._model.schemas[self._ml_schema].tables[
791
793
  "Dataset_Dataset"
792
794
  ]
795
+
796
+ # Figure out what types of elements the dataset contains.
793
797
  dataset_associations = [
794
798
  a
795
799
  for a in self.dataset_table.find_associations()
@@ -812,7 +816,8 @@ class Dataset:
812
816
  ]
813
817
  else:
814
818
  included_associations = dataset_associations
815
- # Get the paths through the schema and filter out all of dataset paths not used by this dataset.
819
+
820
+ # Get the paths through the schema and filter out all the dataset paths not used by this dataset.
816
821
  paths = {
817
822
  tuple(p)
818
823
  for p in snapshot_catalog._model._schema_to_paths()
@@ -827,7 +832,7 @@ class Dataset:
827
832
  if dataset_rid:
828
833
  for c in snapshot_catalog.list_dataset_children(dataset_rid=dataset_rid):
829
834
  nested_paths |= self._collect_paths(
830
- c, snapshot_catalog=snapshot_catalog
835
+ c, snapshot=snapshot_catalog
831
836
  )
832
837
  else:
833
838
  # Initialize nesting depth if not already provided.
deriva_ml/dataset_bag.py CHANGED
@@ -109,7 +109,7 @@ class DatasetBag:
109
109
  for ts, on in paths:
110
110
  tables = " JOIN ".join(ts)
111
111
  on_expression = " and ".join(
112
- [f"{column_name(l)}={column_name(r)}" for l, r in on]
112
+ [f"{column_name(left)}={column_name(right)}" for left, right in on]
113
113
  )
114
114
  sql.append(
115
115
  f"SELECT {select_args} FROM {tables} ON {on_expression} WHERE {dataset_table_name}.RID IN ({datasets})"
deriva_ml/demo_catalog.py CHANGED
@@ -5,6 +5,7 @@ import logging
5
5
  from random import random, randint
6
6
  import tempfile
7
7
  from tempfile import TemporaryDirectory
8
+ from typing import Optional
8
9
  import itertools
9
10
 
10
11
  from deriva.config.acl_config import AclConfig
@@ -18,7 +19,6 @@ from requests import HTTPError
18
19
  from deriva_ml import (
19
20
  DerivaML,
20
21
  ExecutionConfiguration,
21
- Workflow,
22
22
  MLVocab,
23
23
  BuiltinTypes,
24
24
  ColumnDefinition,
@@ -169,12 +169,9 @@ def create_demo_features(ml_instance):
169
169
  description="Model for our API workflow",
170
170
  )
171
171
 
172
- api_workflow = ml_instance.add_workflow(
173
- Workflow(
174
- name="API Workflow",
175
- url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/pyproject.toml",
176
- workflow_type="API Workflow",
177
- )
172
+ api_workflow = ml_instance.create_workflow(
173
+ name="API Workflow",
174
+ workflow_type="API Workflow",
178
175
  )
179
176
 
180
177
  api_execution = ml_instance.create_execution(
@@ -322,7 +319,11 @@ def create_demo_catalog(
322
319
 
323
320
  class DemoML(DerivaML):
324
321
  def __init__(
325
- self, hostname, catalog_id, cache_dir: str = None, working_dir: str = None
322
+ self,
323
+ hostname,
324
+ catalog_id,
325
+ cache_dir: Optional[str] = None,
326
+ working_dir: Optional[str] = None,
326
327
  ):
327
328
  super().__init__(
328
329
  hostname=hostname,
@@ -8,7 +8,7 @@ from enum import Enum
8
8
  from typing import Any, Iterable, Optional, Annotated
9
9
 
10
10
  import deriva.core.ermrest_model as em
11
- from urllib.parse import urlparse, urljoin
11
+ from urllib.parse import urlparse
12
12
  from deriva.core.ermrest_model import builtin_types
13
13
  from pydantic import (
14
14
  BaseModel,
@@ -139,13 +139,18 @@ class FileSpec(BaseModel):
139
139
  if url_parts.scheme == "tag":
140
140
  return v
141
141
  elif not url_parts.scheme:
142
- return f'tag://{gethostname()},{date.today()}:file://{v}'
142
+ return f"tag://{gethostname()},{date.today()}:file://{v}"
143
143
  else:
144
144
  raise ValidationError("url is not a file URL")
145
145
 
146
146
  @model_serializer()
147
147
  def serialize_filespec(self):
148
- return {'URL': self.url, 'Description': self.description, 'MD5': self.md5, 'Length': self.length}
148
+ return {
149
+ "URL": self.url,
150
+ "Description": self.description,
151
+ "MD5": self.md5,
152
+ "Length": self.length,
153
+ }
149
154
 
150
155
 
151
156
  class VocabularyTerm(BaseModel):
@@ -19,7 +19,6 @@ import setuptools_scm
19
19
  from pathlib import Path
20
20
  import requests
21
21
  import subprocess
22
- import shutil
23
22
  from typing import Optional, Any, Iterable, TYPE_CHECKING
24
23
  from deriva.core import (
25
24
  get_credential,
@@ -33,7 +32,9 @@ from deriva.core.deriva_server import DerivaServer
33
32
  from deriva.core.ermrest_catalog import ResolveRidResult
34
33
  from deriva.core.ermrest_model import Key, Table
35
34
  from deriva.core.hatrac_store import HatracStore
35
+ from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
36
36
  from pydantic import validate_call, ConfigDict
37
+ from requests import RequestException
37
38
 
38
39
  from .execution_configuration import ExecutionConfiguration, Workflow
39
40
  from .feature import Feature, FeatureRecord
@@ -70,7 +71,28 @@ except ImportError: # Graceful fallback if IceCream isn't installed.
70
71
  try:
71
72
  from IPython import get_ipython
72
73
  except ImportError: # Graceful fallback if IPython isn't installed.
73
- get_ipython = lambda: None
74
+
75
+ def get_ipython():
76
+ """Dummy routine in case you are not running in IPython."""
77
+ return None
78
+
79
+
80
+ try:
81
+ from jupyter_server.serverapp import list_running_servers
82
+ except ImportError:
83
+
84
+ def list_running_servers():
85
+ """Dummy routine in case you are not running in Jupyter."""
86
+ return []
87
+
88
+
89
+ try:
90
+ from ipykernel import get_connection_file
91
+ except ImportError:
92
+
93
+ def get_connection_file():
94
+ """Dummy routine in case you are not running in Jupyter."""
95
+ return ""
74
96
 
75
97
 
76
98
  if TYPE_CHECKING:
@@ -93,8 +115,8 @@ class DerivaML(Dataset):
93
115
  self,
94
116
  hostname: str,
95
117
  catalog_id: str | int,
96
- domain_schema: str = None,
97
- project_name: str = None,
118
+ domain_schema: Optional[str] = None,
119
+ project_name: Optional[str] = None,
98
120
  cache_dir: Optional[str] = None,
99
121
  working_dir: Optional[str] = None,
100
122
  model_version: str = "1",
@@ -151,8 +173,7 @@ class DerivaML(Dataset):
151
173
  self.version = model_version
152
174
  self.configuration = None
153
175
  self._execution: Optional[Execution] = None
154
- self._script_path, self._is_notebook = self._get_python_script()
155
- self._notebook = self._get_python_notebook()
176
+ self.executable_path, self._is_notebook = self._get_python_script()
156
177
  self.domain_schema = self.model.domain_schema
157
178
  self.project_name = project_name or self.domain_schema
158
179
  self.start_time = datetime.now()
@@ -179,38 +200,76 @@ class DerivaML(Dataset):
179
200
  except (AttributeError, requests.HTTPError):
180
201
  pass
181
202
 
182
- def _get_python_notebook(self) -> Path | None:
203
+ def _check_nbstrip_status(self) -> None:
183
204
  """Figure out if you are running in a Jupyter notebook
184
205
 
185
206
  Returns:
186
207
  A Path to the notebook file that is currently being executed.
187
208
  """
188
- notebook = None
189
209
  try:
190
- ipython = get_ipython()
191
- # Check if running in Jupyter's ZMQ kernel (used by notebooks)
192
- if ipython is not None and "IPKernelApp" in ipython.config:
193
- notebook = Path(ipython.user_ns.get("__session__"))
194
- # Check if running in Jupyter's ZMQ kernel (used by notebooks)
210
+ if subprocess.run(
211
+ ["nbstripout", "--is-installed"],
212
+ check=False,
213
+ capture_output=True,
214
+ ).returncode:
215
+ self._logger.warning(
216
+ "nbstripout is not installed in repository. Please run nbstripout --install"
217
+ )
218
+ except subprocess.CalledProcessError:
219
+ self._logger.error("nbstripout is not found.")
220
+
221
+ @staticmethod
222
+ def _get_notebook_session() -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
223
+ """Return the absolute path of the current notebook."""
224
+ # Get the kernel's connection file and extract the kernel ID
225
+ try:
226
+ if not (connection_file := Path(get_connection_file()).name):
227
+ return None, None
228
+ except RuntimeError:
229
+ return None, None
230
+
231
+ kernel_id = connection_file.split("-", 1)[1].split(".")[0]
232
+
233
+ # Look through the running server sessions to find the matching kernel ID
234
+ for server in list_running_servers():
235
+ try:
236
+ # If a token is required for authentication, include it in headers
237
+ token = server.get("token", "")
238
+ headers = {}
239
+ if token:
240
+ headers["Authorization"] = f"token {token}"
241
+
195
242
  try:
196
- if subprocess.run(
197
- [shutil.which("nbstripout"), "--is-installed"],
198
- check=False,
199
- capture_output=True,
200
- ).returncode:
201
- self._logger.warning(
202
- "nbstripout is not installed in repository. Please run nbstripout --install"
203
- )
204
- except subprocess.CalledProcessError:
205
- self._logger.error("nbstripout is not found.")
206
- except (ImportError, AttributeError):
207
- pass
208
- return notebook
243
+ sessions_url = server["url"] + "api/sessions"
244
+ response = requests.get(sessions_url, headers=headers)
245
+ response.raise_for_status()
246
+ sessions = response.json()
247
+ except RequestException as e:
248
+ raise e
249
+ for sess in sessions:
250
+ if sess["kernel"]["id"] == kernel_id:
251
+ return server, sess
252
+ except Exception as _e:
253
+ # Ignore servers we can't connect to.
254
+ pass
255
+ return None, None
256
+
257
+ def _get_notebook_path(self) -> Path | None:
258
+ """Return the absolute path of the current notebook."""
259
+
260
+ server, session = self._get_notebook_session()
261
+ if server and session:
262
+ self._check_nbstrip_status()
263
+ relative_path = session["notebook"]["path"]
264
+ # Join the notebook directory with the relative path
265
+ return Path(server["root_dir"]) / relative_path
266
+ else:
267
+ return None
209
268
 
210
269
  def _get_python_script(self) -> tuple[Path, bool]:
211
270
  """Return the path to the currently executing script"""
212
271
  is_notebook = False
213
- if filename := self._get_python_notebook():
272
+ if filename := self._get_notebook_path():
214
273
  is_notebook = True
215
274
  else:
216
275
  stack = inspect.stack()
@@ -220,7 +279,7 @@ class DerivaML(Dataset):
220
279
  ) # Get the caller's filename, which is two up the stack from here.
221
280
  else:
222
281
  raise DerivaMLException(
223
- f"Looking for caller failed"
282
+ "Looking for caller failed"
224
283
  ) # Stack is too shallow
225
284
  return filename, is_notebook
226
285
 
@@ -228,11 +287,11 @@ class DerivaML(Dataset):
228
287
  try:
229
288
  result = subprocess.run(
230
289
  ["git", "rev-parse", "--show-toplevel"],
231
- cwd=self._script_path.parent,
290
+ cwd=self.executable_path.parent,
232
291
  stdout=subprocess.PIPE,
233
292
  stderr=subprocess.DEVNULL,
234
293
  text=True,
235
- check=True
294
+ check=True,
236
295
  )
237
296
  return result.stdout.strip()
238
297
  except subprocess.CalledProcessError:
@@ -262,6 +321,7 @@ class DerivaML(Dataset):
262
321
  return self.catalog.getPathBuilder()
263
322
 
264
323
  def get_version(self) -> str:
324
+ """Return the version number of the executable"""
265
325
  return setuptools_scm.get_version(root=self._get_git_root())
266
326
 
267
327
  @property
@@ -287,7 +347,7 @@ class DerivaML(Dataset):
287
347
  )
288
348
 
289
349
  def asset_dir(
290
- self, table: str | Table, prefix: str | Path = None
350
+ self, table: str | Table, prefix: Optional[str | Path] = None
291
351
  ) -> UploadAssetDirectory:
292
352
  """Return a local file path in which to place a files for an asset table. T
293
353
 
@@ -321,6 +381,29 @@ class DerivaML(Dataset):
321
381
  """
322
382
  return self.cache_dir if cached else self.working_dir
323
383
 
384
+ @staticmethod
385
+ def globus_login(host: str) -> None:
386
+ """Log into the specified host using Globus.
387
+
388
+ Args:
389
+ host:
390
+
391
+ Returns:
392
+
393
+ """
394
+ gnl = GlobusNativeLogin(host=host)
395
+ if gnl.is_logged_in([host]):
396
+ print("You are already logged in.")
397
+ else:
398
+ gnl.login(
399
+ [host],
400
+ no_local_server=True,
401
+ no_browser=True,
402
+ refresh_tokens=True,
403
+ update_bdbag_keychain=True,
404
+ )
405
+ print("Login Successful")
406
+
324
407
  def chaise_url(self, table: RID | Table) -> str:
325
408
  """Return a Chaise URL to the specified table.
326
409
 
@@ -331,15 +414,15 @@ class DerivaML(Dataset):
331
414
  Returns:
332
415
  URL to the table in Chaise format.
333
416
  """
417
+ table_obj = self.model.name_to_table(table)
334
418
  try:
335
- table = self.model.name_to_table(table)
336
419
  uri = self.catalog.get_server_uri().replace(
337
420
  "ermrest/catalog/", "chaise/recordset/#"
338
421
  )
339
422
  except DerivaMLException:
340
423
  # Perhaps we have a RID....
341
424
  uri = self.cite(table)
342
- return f"{uri}/{urlquote(table.schema.name)}:{urlquote(table.name)}"
425
+ return f"{uri}/{urlquote(table_obj.schema.name)}:{urlquote(table_obj.name)}"
343
426
 
344
427
  def cite(self, entity: dict | str) -> str:
345
428
  """Return a citation URL for the provided entity.
@@ -353,7 +436,9 @@ class DerivaML(Dataset):
353
436
  Raises:
354
437
  DerivaMLException: if provided RID does not exist.
355
438
  """
356
- if entity.startswith(f"https://{self.host_name}/id/{self.catalog_id}/"):
439
+ if isinstance(entity, str) and entity.startswith(
440
+ f"https://{self.host_name}/id/{self.catalog_id}/"
441
+ ):
357
442
  # Already got a citation...
358
443
  return entity
359
444
  try:
@@ -450,9 +535,9 @@ class DerivaML(Dataset):
450
535
  def create_asset(
451
536
  self,
452
537
  asset_name: str,
453
- column_defs: Iterable[ColumnDefinition] = None,
538
+ column_defs: Optional[Iterable[ColumnDefinition]] = None,
454
539
  comment: str = "",
455
- schema: str = None,
540
+ schema: Optional[str] = None,
456
541
  ) -> Table:
457
542
  """Create an asset table with the given asset name.
458
543
 
@@ -484,9 +569,9 @@ class DerivaML(Dataset):
484
569
  self,
485
570
  target_table: Table | str,
486
571
  feature_name: str,
487
- terms: list[Table | str] = None,
488
- assets: list[Table | str] = None,
489
- metadata: Iterable[ColumnDefinition | Table | Key | str] = None,
572
+ terms: Optional[list[Table | str]] = None,
573
+ assets: Optional[list[Table | str]] = None,
574
+ metadata: Optional[Iterable[ColumnDefinition | Table | Key | str]] = None,
490
575
  optional: Optional[list[str]] = None,
491
576
  comment: str = "",
492
577
  ) -> type[FeatureRecord]:
@@ -851,6 +936,7 @@ class DerivaML(Dataset):
851
936
  """
852
937
 
853
938
  def path_to_asset(path: str) -> str:
939
+ """Pull the asset name out of a path to that asset in the filesystem"""
854
940
  components = path.split("/")
855
941
  return components[
856
942
  components.index("asset") + 2
@@ -915,6 +1001,7 @@ class DerivaML(Dataset):
915
1001
  )
916
1002
 
917
1003
  def check_file_type(dtype: str) -> bool:
1004
+ """Make sure that the specified string is either the name or synonym for a file type term."""
918
1005
  for term in defined_types:
919
1006
  if dtype == term.name or (term.synonyms and file_type in term.synonyms):
920
1007
  return True
@@ -1040,6 +1127,7 @@ class DerivaML(Dataset):
1040
1127
  return workflow_rid
1041
1128
 
1042
1129
  def lookup_workflow(self, url: str) -> Optional[RID]:
1130
+ """Given a URL, look in the workflow table to find a matching workflow."""
1043
1131
  workflow_path = self.pathBuilder.schemas[self.ml_schema].Workflow
1044
1132
  try:
1045
1133
  url_column = workflow_path.URL
@@ -1049,7 +1137,7 @@ class DerivaML(Dataset):
1049
1137
 
1050
1138
  def create_workflow(
1051
1139
  self, name: str, workflow_type: str, description: str = "", create: bool = True
1052
- ) -> RID:
1140
+ ) -> RID | None:
1053
1141
  """Identify current executing program and return a workflow RID for it
1054
1142
 
1055
1143
  Determine the notebook or script that is currently being executed. Assume that this is
@@ -1069,20 +1157,21 @@ class DerivaML(Dataset):
1069
1157
 
1070
1158
  if is_dirty:
1071
1159
  self._logger.warning(
1072
- f"File {self._script_path} has been modified since last commit. Consider commiting before executing"
1160
+ f"File {self.executable_path} has been modified since last commit. Consider commiting before executing"
1073
1161
  )
1074
1162
 
1075
1163
  # If you are in a notebook, strip out the outputs before computing the checksum.
1076
1164
  cmd = (
1077
- f"nbstripout {self._script_path} | git hash-object --stdin"
1165
+ f"nbstripout {self.executable_path} | git hash-object --stdin"
1078
1166
  if self._is_notebook
1079
- else f"git hash-object {self._script_path}"
1167
+ else f"git hash-object {self.executable_path}"
1080
1168
  )
1081
1169
  checksum = subprocess.run(
1082
1170
  cmd,
1083
1171
  capture_output=True,
1084
1172
  text=True,
1085
1173
  check=True,
1174
+ shell=True,
1086
1175
  ).stdout.strip()
1087
1176
 
1088
1177
  workflow = Workflow(
@@ -1109,12 +1198,14 @@ class DerivaML(Dataset):
1109
1198
  # Get repo URL from local github repo.
1110
1199
  try:
1111
1200
  result = subprocess.run(
1112
- ["git", "remote", "get-url", "origin"], capture_output=True, text=True,
1113
- cwd=self._script_path.parent,
1201
+ ["git", "remote", "get-url", "origin"],
1202
+ capture_output=True,
1203
+ text=True,
1204
+ cwd=self.executable_path.parent,
1114
1205
  )
1115
1206
  github_url = result.stdout.strip().removesuffix(".git")
1116
1207
  except subprocess.CalledProcessError:
1117
- raise DerivaMLException(f"No GIT remote found")
1208
+ raise DerivaMLException("No GIT remote found")
1118
1209
 
1119
1210
  # Find the root directory for the repository
1120
1211
  repo_root = self._get_git_root()
@@ -1123,7 +1214,7 @@ class DerivaML(Dataset):
1123
1214
  try:
1124
1215
  result = subprocess.run(
1125
1216
  ["git", "status", "--porcelain"],
1126
- cwd=self._script_path.parent,
1217
+ cwd=self.executable_path.parent,
1127
1218
  capture_output=True,
1128
1219
  text=True,
1129
1220
  check=True,
@@ -1136,14 +1227,14 @@ class DerivaML(Dataset):
1136
1227
 
1137
1228
  """Get SHA-1 hash of latest commit of the file in the repository"""
1138
1229
  result = subprocess.run(
1139
- ["git", "log", "-n", "1", "--pretty=format:%H" "--", self._script_path],
1140
- cwd=self._script_path.parent,
1230
+ ["git", "log", "-n", "1", "--pretty=format:%H--", self.executable_path],
1231
+ cwd=self.executable_path.parent,
1141
1232
  capture_output=True,
1142
1233
  text=True,
1143
1234
  check=True,
1144
1235
  )
1145
1236
  sha = result.stdout.strip()
1146
- url = f"{github_url}/blob/{sha}/{self._script_path.relative_to(repo_root)}"
1237
+ url = f"{github_url}/blob/{sha}/{self.executable_path.relative_to(repo_root)}"
1147
1238
  return url, is_dirty
1148
1239
 
1149
1240
  # @validate_call
@@ -1174,6 +1265,7 @@ class DerivaML(Dataset):
1174
1265
 
1175
1266
  # @validate_call
1176
1267
  def restore_execution(self, execution_rid: Optional[RID] = None) -> "Execution":
1268
+ """Return an Execution object for a previously started execution with the specified RID."""
1177
1269
  from .execution import Execution
1178
1270
 
1179
1271
  # Find path to execution
deriva_ml/deriva_model.py CHANGED
@@ -21,7 +21,7 @@ from .deriva_definitions import (
21
21
 
22
22
  from collections import Counter
23
23
  from pydantic import validate_call, ConfigDict
24
- from typing import Iterable
24
+ from typing import Iterable, Optional
25
25
 
26
26
 
27
27
  class DerivaModel:
@@ -267,7 +267,7 @@ class DerivaModel:
267
267
  def _schema_to_paths(
268
268
  self,
269
269
  root: Table = None,
270
- path: list[Table] = None,
270
+ path: Optional[list[Table]] = None,
271
271
  ) -> list[list[Table]]:
272
272
  """Recursively walk over the domain schema graph and extend the current path.
273
273