deriva-ml 1.8.10__py3-none-any.whl → 1.8.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,6 @@ import setuptools_scm
19
19
  from pathlib import Path
20
20
  import requests
21
21
  import subprocess
22
- import shutil
23
22
  from typing import Optional, Any, Iterable, TYPE_CHECKING
24
23
  from deriva.core import (
25
24
  get_credential,
@@ -34,6 +33,7 @@ from deriva.core.ermrest_catalog import ResolveRidResult
34
33
  from deriva.core.ermrest_model import Key, Table
35
34
  from deriva.core.hatrac_store import HatracStore
36
35
  from pydantic import validate_call, ConfigDict
36
+ from requests import RequestException
37
37
 
38
38
  from .execution_configuration import ExecutionConfiguration, Workflow
39
39
  from .feature import Feature, FeatureRecord
@@ -72,6 +72,15 @@ try:
72
72
  except ImportError: # Graceful fallback if IPython isn't installed.
73
73
  get_ipython = lambda: None
74
74
 
75
+ try:
76
+ from jupyter_server.serverapp import list_running_servers
77
+ except ImportError:
78
+ list_running_servers = lambda: []
79
+
80
+ try:
81
+ from ipykernel import get_connection_file
82
+ except ImportError:
83
+ get_connection_file = lambda: ""
75
84
 
76
85
  if TYPE_CHECKING:
77
86
  from .execution import Execution
@@ -151,8 +160,7 @@ class DerivaML(Dataset):
151
160
  self.version = model_version
152
161
  self.configuration = None
153
162
  self._execution: Optional[Execution] = None
154
- self._script_path, self._is_notebook = self._get_python_script()
155
- self._notebook = self._get_python_notebook()
163
+ self.executable_path, self._is_notebook = self._get_python_script()
156
164
  self.domain_schema = self.model.domain_schema
157
165
  self.project_name = project_name or self.domain_schema
158
166
  self.start_time = datetime.now()
@@ -179,38 +187,77 @@ class DerivaML(Dataset):
179
187
  except (AttributeError, requests.HTTPError):
180
188
  pass
181
189
 
182
- def _get_python_notebook(self) -> Path | None:
190
+ def _check_nbstrip_status(self) -> None:
183
191
  """Figure out if you are running in a Jupyter notebook
184
192
 
185
193
  Returns:
186
194
  A Path to the notebook file that is currently being executed.
187
195
  """
188
- notebook = None
189
196
  try:
190
- ipython = get_ipython()
191
- # Check if running in Jupyter's ZMQ kernel (used by notebooks)
192
- if ipython is not None and "IPKernelApp" in ipython.config:
193
- notebook = Path(ipython.user_ns.get("__session__"))
194
- # Check if running in Jupyter's ZMQ kernel (used by notebooks)
197
+ if subprocess.run(
198
+ ["nbstripout", "--is-installed"],
199
+ check=False,
200
+ capture_output=True,
201
+ ).returncode:
202
+ self._logger.warning(
203
+ "nbstripout is not installed in repository. Please run nbstripout --install"
204
+ )
205
+ except subprocess.CalledProcessError:
206
+ self._logger.error("nbstripout is not found.")
207
+
208
+ def _get_notebook_session(
209
+ self,
210
+ ) -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
211
+ """Return the absolute path of the current notebook."""
212
+ # Get the kernel's connection file and extract the kernel ID
213
+ try:
214
+ if not (connection_file := Path(get_connection_file()).name):
215
+ return None, None
216
+ except RuntimeError:
217
+ return None, None
218
+
219
+ kernel_id = connection_file.split("-", 1)[1].split(".")[0]
220
+
221
+ # Look through the running server sessions to find the matching kernel ID
222
+ for server in list_running_servers():
223
+ try:
224
+ # If a token is required for authentication, include it in headers
225
+ token = server.get("token", "")
226
+ headers = {}
227
+ if token:
228
+ headers["Authorization"] = f"token {token}"
229
+
195
230
  try:
196
- if subprocess.run(
197
- [shutil.which("nbstripout"), "--is-installed"],
198
- check=False,
199
- capture_output=True,
200
- ).returncode:
201
- self._logger.warning(
202
- "nbstripout is not installed in repository. Please run nbstripout --install"
203
- )
204
- except subprocess.CalledProcessError:
205
- self._logger.error("nbstripout is not found.")
206
- except (ImportError, AttributeError):
207
- pass
208
- return notebook
231
+ sessions_url = server["url"] + "api/sessions"
232
+ response = requests.get(sessions_url, headers=headers)
233
+ response.raise_for_status()
234
+ sessions = response.json()
235
+ except RequestException as e:
236
+ raise e
237
+ for sess in sessions:
238
+ if sess["kernel"]["id"] == kernel_id:
239
+ return server, sess
240
+ except Exception as _e:
241
+ # Ignore servers we can't connect to.
242
+ pass
243
+ return None, None
244
+
245
+ def _get_notebook_path(self) -> Path | None:
246
+ """Return the absolute path of the current notebook."""
247
+
248
+ server, session = self._get_notebook_session()
249
+ if server and session:
250
+ self._check_nbstrip_status()
251
+ relative_path = session["notebook"]["path"]
252
+ # Join the notebook directory with the relative path
253
+ return Path(server["root_dir"]) / relative_path
254
+ else:
255
+ return None
209
256
 
210
257
  def _get_python_script(self) -> tuple[Path, bool]:
211
258
  """Return the path to the currently executing script"""
212
259
  is_notebook = False
213
- if filename := self._get_python_notebook():
260
+ if filename := self._get_notebook_path():
214
261
  is_notebook = True
215
262
  else:
216
263
  stack = inspect.stack()
@@ -228,11 +275,11 @@ class DerivaML(Dataset):
228
275
  try:
229
276
  result = subprocess.run(
230
277
  ["git", "rev-parse", "--show-toplevel"],
231
- cwd=self._script_path.parent,
278
+ cwd=self.executable_path.parent,
232
279
  stdout=subprocess.PIPE,
233
280
  stderr=subprocess.DEVNULL,
234
281
  text=True,
235
- check=True
282
+ check=True,
236
283
  )
237
284
  return result.stdout.strip()
238
285
  except subprocess.CalledProcessError:
@@ -262,6 +309,7 @@ class DerivaML(Dataset):
262
309
  return self.catalog.getPathBuilder()
263
310
 
264
311
  def get_version(self) -> str:
312
+ """Return the version number of the executable"""
265
313
  return setuptools_scm.get_version(root=self._get_git_root())
266
314
 
267
315
  @property
@@ -1040,6 +1088,7 @@ class DerivaML(Dataset):
1040
1088
  return workflow_rid
1041
1089
 
1042
1090
  def lookup_workflow(self, url: str) -> Optional[RID]:
1091
+ """Given a URL, look in the workflow table to find a matching workflow."""
1043
1092
  workflow_path = self.pathBuilder.schemas[self.ml_schema].Workflow
1044
1093
  try:
1045
1094
  url_column = workflow_path.URL
@@ -1069,20 +1118,21 @@ class DerivaML(Dataset):
1069
1118
 
1070
1119
  if is_dirty:
1071
1120
  self._logger.warning(
1072
- f"File {self._script_path} has been modified since last commit. Consider commiting before executing"
1121
+ f"File {self.executable_path} has been modified since last commit. Consider commiting before executing"
1073
1122
  )
1074
1123
 
1075
1124
  # If you are in a notebook, strip out the outputs before computing the checksum.
1076
1125
  cmd = (
1077
- f"nbstripout {self._script_path} | git hash-object --stdin"
1126
+ f"nbstripout {self.executable_path} | git hash-object --stdin"
1078
1127
  if self._is_notebook
1079
- else f"git hash-object {self._script_path}"
1128
+ else f"git hash-object {self.executable_path}"
1080
1129
  )
1081
1130
  checksum = subprocess.run(
1082
1131
  cmd,
1083
1132
  capture_output=True,
1084
1133
  text=True,
1085
1134
  check=True,
1135
+ shell=True,
1086
1136
  ).stdout.strip()
1087
1137
 
1088
1138
  workflow = Workflow(
@@ -1109,8 +1159,10 @@ class DerivaML(Dataset):
1109
1159
  # Get repo URL from local github repo.
1110
1160
  try:
1111
1161
  result = subprocess.run(
1112
- ["git", "remote", "get-url", "origin"], capture_output=True, text=True,
1113
- cwd=self._script_path.parent,
1162
+ ["git", "remote", "get-url", "origin"],
1163
+ capture_output=True,
1164
+ text=True,
1165
+ cwd=self.executable_path.parent,
1114
1166
  )
1115
1167
  github_url = result.stdout.strip().removesuffix(".git")
1116
1168
  except subprocess.CalledProcessError:
@@ -1123,7 +1175,7 @@ class DerivaML(Dataset):
1123
1175
  try:
1124
1176
  result = subprocess.run(
1125
1177
  ["git", "status", "--porcelain"],
1126
- cwd=self._script_path.parent,
1178
+ cwd=self.executable_path.parent,
1127
1179
  capture_output=True,
1128
1180
  text=True,
1129
1181
  check=True,
@@ -1136,14 +1188,14 @@ class DerivaML(Dataset):
1136
1188
 
1137
1189
  """Get SHA-1 hash of latest commit of the file in the repository"""
1138
1190
  result = subprocess.run(
1139
- ["git", "log", "-n", "1", "--pretty=format:%H" "--", self._script_path],
1140
- cwd=self._script_path.parent,
1191
+ ["git", "log", "-n", "1", "--pretty=format:%H" "--", self.executable_path],
1192
+ cwd=self.executable_path.parent,
1141
1193
  capture_output=True,
1142
1194
  text=True,
1143
1195
  check=True,
1144
1196
  )
1145
1197
  sha = result.stdout.strip()
1146
- url = f"{github_url}/blob/{sha}/{self._script_path.relative_to(repo_root)}"
1198
+ url = f"{github_url}/blob/{sha}/{self.executable_path.relative_to(repo_root)}"
1147
1199
  return url, is_dirty
1148
1200
 
1149
1201
  # @validate_call
@@ -1174,6 +1226,7 @@ class DerivaML(Dataset):
1174
1226
 
1175
1227
  # @validate_call
1176
1228
  def restore_execution(self, execution_rid: Optional[RID] = None) -> "Execution":
1229
+ """Return an Execution object for a previously started execution with the specified RID."""
1177
1230
  from .execution import Execution
1178
1231
 
1179
1232
  # Find path to execution
deriva_ml/execution.py CHANGED
@@ -253,17 +253,9 @@ class Execution:
253
253
 
254
254
  def _create_notebook_checkpoint(self):
255
255
  """Trigger a checkpoint creation using Jupyter's API."""
256
- notebook_name = self._ml_object._notebook
257
-
258
- # Look for the server running this notebook.
259
- root = Path("").absolute().parent.as_posix()
260
- servers = list(list_running_servers())
261
- # Jupyterhub seems to handle root_dir differently then server case.
262
- server = (
263
- servers
264
- if len(servers) == 1
265
- else [s for s in servers if s["root_dir"] == root]
266
- )[0]
256
+
257
+ server, session = self._ml_object._get_notebook_session()
258
+ notebook_name = session["notebook"]["path"]
267
259
  notebook_url = f"{server['url']}api/contents/{notebook_name}"
268
260
 
269
261
  # Get notebook content
@@ -275,7 +267,7 @@ class Execution:
275
267
  # Execution metadata cannot be in a directory, so map path into filename.
276
268
  checkpoint_path = (
277
269
  self.execution_metadata_path(ExecMetadataVocab.runtime_env.value)
278
- / f"{notebook_name.as_posix().replace('/','_')}.checkpoint"
270
+ / f"{notebook_name.replace('/','_')}.checkpoint"
279
271
  )
280
272
  with open(checkpoint_path, "w", encoding="utf-8") as f:
281
273
  json.dump(notebook_content, f)
@@ -295,7 +287,7 @@ class Execution:
295
287
  minutes, seconds = divmod(remainder, 60)
296
288
  duration = f"{round(hours, 0)}H {round(minutes, 0)}min {round(seconds, 4)}sec"
297
289
 
298
- if self._ml_object._notebook:
290
+ if self._ml_object._is_notebook:
299
291
  self._create_notebook_checkpoint()
300
292
 
301
293
  self.update_status(Status.completed, "Algorithm execution ended.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.8.10
3
+ Version: 1.8.11
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -5,9 +5,9 @@ deriva_ml/dataset_aux_classes.py,sha256=YxjQnu2kS9kK_f8bGqhmgE6ty9GNeitCxfvReT9v
5
5
  deriva_ml/dataset_bag.py,sha256=e6IHv3saZUnZRfl0EjfnlV2NnmPeOagYYv3PuZqS1l0,11501
6
6
  deriva_ml/demo_catalog.py,sha256=xQPhFlflqwJskNQrQ-jdBSnGzBm2-aONBgcRxfsdNKM,11045
7
7
  deriva_ml/deriva_definitions.py,sha256=pZLPoUxiuJ-uGglmQ6sF9oVXsSUuOnPEqywoec78XNM,8893
8
- deriva_ml/deriva_ml_base.py,sha256=aVyGsFERZtpjNxfaVYzvKa7J0Ma-U3DEibfjnbr7lFQ,43817
8
+ deriva_ml/deriva_ml_base.py,sha256=KbmJ0-mGuZn7-wuzQemzof8o8mA_3-UVlQCrnDsKod0,45741
9
9
  deriva_ml/deriva_model.py,sha256=LV3FjIhIlz13ckZSmu0aOJhT9EVE0-M9oVMudfkxb0g,12004
10
- deriva_ml/execution.py,sha256=VlapQGPDQI2MOmYnA5-hpf-XM6Fu4hPLpFjNN5q9Udo,29889
10
+ deriva_ml/execution.py,sha256=uDblqngcldgR7X4W1PfMV4iPWkxwQYSr9CBmXNlIv1E,29572
11
11
  deriva_ml/execution_configuration.py,sha256=bjnZwXN6M7YPy5dFQwoGEBU8YjhQRSe1FW0rL0V9TaM,3422
12
12
  deriva_ml/execution_environment.py,sha256=bCRKrCELDbGQDo7_FKfw7e8iMzVjSRZK3baKkqH5-_0,3264
13
13
  deriva_ml/feature.py,sha256=7e8WYPCfJSrGxJh9oUTduYSnB5ekybRhXa_0HIigS_w,5459
@@ -25,9 +25,9 @@ deriva_ml/schema_setup/annotations.py,sha256=Uogm9YkRtoKSdgfQlICqRywbCATppwBO-Xr
25
25
  deriva_ml/schema_setup/create_schema.py,sha256=jwziMWJPbjRgjiRBT-KtidnXI8YNEFO74A9fwfptjHY,10626
26
26
  deriva_ml/schema_setup/policy.json,sha256=77sf0Imy6CAQV0_VwwbA56_KROJ05WXsvT-Wjtkk538,1633
27
27
  deriva_ml/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
28
- deriva_ml-1.8.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
29
- deriva_ml-1.8.10.dist-info/METADATA,sha256=Mhx0joyR1gPEX8G6ZoEpvxNVW4sUG9C_S5TIA6ueZKk,670
30
- deriva_ml-1.8.10.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
31
- deriva_ml-1.8.10.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
32
- deriva_ml-1.8.10.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
33
- deriva_ml-1.8.10.dist-info/RECORD,,
28
+ deriva_ml-1.8.11.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
29
+ deriva_ml-1.8.11.dist-info/METADATA,sha256=RyttxTkz_MCnWX1hZK2g7ffPyd54txE6AAZ2GMSpQ54,670
30
+ deriva_ml-1.8.11.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
31
+ deriva_ml-1.8.11.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
32
+ deriva_ml-1.8.11.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
33
+ deriva_ml-1.8.11.dist-info/RECORD,,