deriva-ml 1.8.5__py3-none-any.whl → 1.8.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deriva_ml/__init__.py CHANGED
@@ -41,3 +41,12 @@ from .execution_configuration import (
41
41
  Workflow,
42
42
  )
43
43
  from .execution import Execution
44
+
45
+ from importlib.metadata import version, PackageNotFoundError
46
+
47
+ try:
48
+ __version__ = version("deriva_ml")
49
+ except PackageNotFoundError:
50
+ # package is not installed
51
+ pass
52
+
@@ -13,17 +13,15 @@ from __future__ import annotations
13
13
  import getpass
14
14
  import logging
15
15
  from datetime import datetime
16
- import hashlib
17
16
  from itertools import chain
18
17
  import inspect
18
+ import setuptools_scm
19
19
  from pathlib import Path
20
20
  import requests
21
- from setuptools_git_versioning import get_latest_file_commit
22
21
  import subprocess
23
22
  import shutil
24
23
  from typing import Optional, Any, Iterable, TYPE_CHECKING
25
24
  from deriva.core import (
26
- ErmrestCatalog,
27
25
  get_credential,
28
26
  urlquote,
29
27
  DEFAULT_SESSION_CONFIG,
@@ -144,6 +142,8 @@ class DerivaML(Dataset):
144
142
 
145
143
  # Initialize dataset class.
146
144
  super().__init__(self.model, self.cache_dir)
145
+ self._logger = logging.getLogger("deriva_ml")
146
+ self._logger.setLevel(logging_level)
147
147
 
148
148
  self.host_name = hostname
149
149
  self.catalog_id = catalog_id
@@ -151,37 +151,12 @@ class DerivaML(Dataset):
151
151
  self.version = model_version
152
152
  self.configuration = None
153
153
  self._execution: Optional[Execution] = None
154
- self._notebook = None
155
- try:
156
- from IPython import get_ipython
157
-
158
- ipython = get_ipython()
159
- # Check if running in Jupyter's ZMQ kernel (used by notebooks)
160
- if ipython is not None and "IPKernelApp" in ipython.config:
161
- self._notebook = Path(ipython.user_ns.get("__session__"))
162
- # Check if running in Jupyter's ZMQ kernel (used by notebooks)
163
- try:
164
- if subprocess.run(
165
- [shutil.which("nbstripout"), "--is-installed"],
166
- check=False,
167
- capture_output=True,
168
- ).returncode:
169
- self._logger.warn(
170
- "nbstripout is not installed in repository. Please run nbstripout --install"
171
- )
172
- except subprocess.CalledProcessError:
173
- self._logger.error("nbstripout is not found.")
174
-
175
- except (ImportError, AttributeError):
176
- pass
177
-
154
+ self._script_path, self._is_notebook = self._get_python_script()
155
+ self._notebook = self._get_python_notebook()
178
156
  self.domain_schema = self.model.domain_schema
179
157
  self.project_name = project_name or self.domain_schema
180
-
181
158
  self.start_time = datetime.now()
182
159
  self.status = Status.pending.value
183
- self._logger = logging.getLogger("deriva_ml")
184
- self._logger.setLevel(logging_level)
185
160
 
186
161
  logging.basicConfig(
187
162
  level=logging_level,
@@ -204,6 +179,65 @@ class DerivaML(Dataset):
204
179
  except (AttributeError, requests.HTTPError):
205
180
  pass
206
181
 
182
+ def _get_python_notebook(self) -> Path | None:
183
+ """Figure out if you are running in a Jupyter notebook
184
+
185
+ Returns:
186
+ A Path to the notebook file that is currently being executed.
187
+ """
188
+ notebook = None
189
+ try:
190
+ ipython = get_ipython()
191
+ # Check if running in Jupyter's ZMQ kernel (used by notebooks)
192
+ if ipython is not None and "IPKernelApp" in ipython.config:
193
+ notebook = Path(ipython.user_ns.get("__session__"))
194
+ # Check if running in Jupyter's ZMQ kernel (used by notebooks)
195
+ try:
196
+ if subprocess.run(
197
+ [shutil.which("nbstripout"), "--is-installed"],
198
+ check=False,
199
+ capture_output=True,
200
+ ).returncode:
201
+ self._logger.warning(
202
+ "nbstripout is not installed in repository. Please run nbstripout --install"
203
+ )
204
+ except subprocess.CalledProcessError:
205
+ self._logger.error("nbstripout is not found.")
206
+ except (ImportError, AttributeError):
207
+ pass
208
+ return notebook
209
+
210
+ def _get_python_script(self) -> tuple[Path, bool]:
211
+ """Return the path to the currently executing script"""
212
+ is_notebook = False
213
+ if filename := self._get_python_notebook():
214
+ is_notebook = True
215
+ else:
216
+ stack = inspect.stack()
217
+ if len(stack) > 1:
218
+ filename = Path(
219
+ stack[2].filename
220
+ ) # Get the caller's filename, which is two up the stack from here.
221
+ else:
222
+ raise DerivaMLException(
223
+ f"Looking for caller failed"
224
+ ) # Stack is too shallow
225
+ return filename, is_notebook
226
+
227
+ def _get_git_root(self):
228
+ try:
229
+ result = subprocess.run(
230
+ ["git", "rev-parse", "--show-toplevel"],
231
+ cwd=self._script_path.parent,
232
+ stdout=subprocess.PIPE,
233
+ stderr=subprocess.DEVNULL,
234
+ text=True,
235
+ check=True
236
+ )
237
+ return result.stdout.strip()
238
+ except subprocess.CalledProcessError:
239
+ return None # Not in a git repository
240
+
207
241
  @staticmethod
208
242
  def _get_session_config():
209
243
  """ """
@@ -227,6 +261,9 @@ class DerivaML(Dataset):
227
261
  """Get a new instance of a pathBuilder object."""
228
262
  return self.catalog.getPathBuilder()
229
263
 
264
+ def get_version(self) -> str:
265
+ return setuptools_scm.get_version(root=self._get_git_root())
266
+
230
267
  @property
231
268
  def domain_path(self):
232
269
  """Get a new instance of a pathBuilder object to the domain schema"""
@@ -1028,27 +1065,25 @@ class DerivaML(Dataset):
1028
1065
  """
1029
1066
  # Make sure type is correct.
1030
1067
  self.lookup_term(MLVocab.workflow_type, workflow_type)
1031
- filename, github_url, is_dirty = self._github_url()
1068
+ github_url, is_dirty = self._github_url()
1032
1069
 
1033
1070
  if is_dirty:
1034
1071
  self._logger.warning(
1035
- f"File {filename} has been modified since last commit. Consider commiting before executing"
1072
+ f"File {self._script_path} has been modified since last commit. Consider commiting before executing"
1036
1073
  )
1037
1074
 
1038
- sha256_hash = hashlib.sha256()
1039
- if self._notebook:
1040
- # If you are in a notebook, strip out the outputs before computing the checksum.
1041
- result = subprocess.run(
1042
- ["nbstripout", "-t", filename],
1043
- capture_output=True,
1044
- text=False,
1045
- check=True,
1046
- )
1047
- sha256_hash.update(result.stdout)
1048
- else:
1049
- with open(filename, "rb") as f:
1050
- sha256_hash.update(f.read())
1051
- checksum = "SHA-256:" + sha256_hash.hexdigest()
1075
+ # If you are in a notebook, strip out the outputs before computing the checksum.
1076
+ cmd = (
1077
+ f"nbstripout {self._script_path} | git hash-object --stdin"
1078
+ if self._is_notebook
1079
+ else f"git hash-object {self._script_path}"
1080
+ )
1081
+ checksum = subprocess.run(
1082
+ cmd,
1083
+ capture_output=True,
1084
+ text=True,
1085
+ check=True,
1086
+ ).stdout.strip()
1052
1087
 
1053
1088
  workflow = Workflow(
1054
1089
  name=name,
@@ -1059,54 +1094,36 @@ class DerivaML(Dataset):
1059
1094
  )
1060
1095
  return self.add_workflow(workflow) if create else None
1061
1096
 
1062
- def _github_url(self) -> tuple[Path, str, bool]:
1097
+ def _github_url(self) -> tuple[str, bool]:
1063
1098
  """Return a GitHUB URL for the latest commit of the script from which this routine is called.
1064
1099
 
1065
1100
  This routine is used to be called from a script or notebook (e.g. python -m file). It assumes that
1066
1101
  the file is in a gitHUB repository and commited. It returns a URL to the last commited version of this
1067
1102
  file in GitHUB.
1068
1103
 
1069
- Returns: A tuple with the filename, gethub_url and a boolean to indicated if uncommited changes
1104
+ Returns: A tuple with the gethub_url and a boolean to indicated if uncommited changes
1070
1105
  have been made to the file.
1071
1106
 
1072
1107
  """
1073
1108
 
1074
- # Get the name of the script that is calling this function.
1075
- if self._notebook:
1076
- # Try to get the __session__ variable from the user namespace.
1077
- filename = Path("").absolute().parent / self._notebook
1078
- else:
1079
- stack = inspect.stack()
1080
- if len(stack) > 1:
1081
- filename = Path(
1082
- stack[2].filename
1083
- ) # Get the caller's filename, which is two up the stack from here.
1084
- else:
1085
- raise DerivaMLException(
1086
- f"Looking for caller failed"
1087
- ) # Stack is too shallow
1088
-
1089
1109
  # Get repo URL from local github repo.
1090
1110
  try:
1091
1111
  result = subprocess.run(
1092
- ["git", "remote", "get-url", "origin"], capture_output=True, text=True
1112
+ ["git", "remote", "get-url", "origin"], capture_output=True, text=True,
1113
+ cwd=self._script_path.parent,
1093
1114
  )
1094
1115
  github_url = result.stdout.strip().removesuffix(".git")
1095
1116
  except subprocess.CalledProcessError:
1096
1117
  raise DerivaMLException(f"No GIT remote found")
1097
1118
 
1098
1119
  # Find the root directory for the repository
1099
- repo_root = filename
1100
- while repo_root != repo_root.root:
1101
- if (repo_root / ".git").exists():
1102
- break
1103
- else:
1104
- repo_root = repo_root.parent
1120
+ repo_root = self._get_git_root()
1105
1121
 
1106
1122
  # Now check to see if file has been modified since the last commit.
1107
1123
  try:
1108
1124
  result = subprocess.run(
1109
1125
  ["git", "status", "--porcelain"],
1126
+ cwd=self._script_path.parent,
1110
1127
  capture_output=True,
1111
1128
  text=True,
1112
1129
  check=True,
@@ -1117,9 +1134,17 @@ class DerivaML(Dataset):
1117
1134
  except subprocess.CalledProcessError:
1118
1135
  is_dirty = False # If Git command fails, assume no changes
1119
1136
 
1120
- sha = get_latest_file_commit(filename)
1121
- url = f"{github_url}/blob/{sha}/{filename.relative_to(repo_root)}"
1122
- return filename, url, is_dirty
1137
+ """Get SHA-1 hash of latest commit of the file in the repository"""
1138
+ result = subprocess.run(
1139
+ ["git", "log", "-n", "1", "--pretty=format:%H" "--", self._script_path],
1140
+ cwd=self._script_path.parent,
1141
+ capture_output=True,
1142
+ text=True,
1143
+ check=True,
1144
+ )
1145
+ sha = result.stdout.strip()
1146
+ url = f"{github_url}/blob/{sha}/{self._script_path.relative_to(repo_root)}"
1147
+ return url, is_dirty
1123
1148
 
1124
1149
  # @validate_call
1125
1150
  def create_execution(self, configuration: ExecutionConfiguration) -> "Execution":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.8.5
3
+ Version: 1.8.10
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -11,7 +11,8 @@ Requires-Dist: pandas
11
11
  Requires-Dist: regex~=2024.7.24
12
12
  Requires-Dist: pydantic>=2.10.6
13
13
  Requires-Dist: semver>3.0.0
14
- Requires-Dist: setuptools-git-versioning<3,>=2.0
14
+ Requires-Dist: setuptools>=64
15
+ Requires-Dist: setuptools-scm<=6.0
15
16
  Requires-Dist: nbstripout
16
17
  Dynamic: license-file
17
18
 
@@ -1,12 +1,11 @@
1
- deriva_ml/VERSION.py,sha256=Dtbi4ISKI_kkTsaWuM-q8NfE3DySQu91TTP04Yhd8d8,22
2
- deriva_ml/__init__.py,sha256=DyHiqklSer7q7oPGAemkzg5Qcq2swMZf9ALwJhGf6Jo,905
1
+ deriva_ml/__init__.py,sha256=r1Z9N5vtZkAET7emqhpAx2bf_xJUp5wHOc4_DIplsG8,1082
3
2
  deriva_ml/database_model.py,sha256=uhoyVyd8MQmY8J9ovCH8fjxhZDxxXNkdJyYdeyEGPXA,13898
4
3
  deriva_ml/dataset.py,sha256=xC6QPUp4MZcJiEnOEU3NnzoLBL9RcJWtPTyzIQP0Ivw,60666
5
4
  deriva_ml/dataset_aux_classes.py,sha256=YxjQnu2kS9kK_f8bGqhmgE6ty9GNeitCxfvReT9vaM0,6537
6
5
  deriva_ml/dataset_bag.py,sha256=e6IHv3saZUnZRfl0EjfnlV2NnmPeOagYYv3PuZqS1l0,11501
7
6
  deriva_ml/demo_catalog.py,sha256=xQPhFlflqwJskNQrQ-jdBSnGzBm2-aONBgcRxfsdNKM,11045
8
7
  deriva_ml/deriva_definitions.py,sha256=pZLPoUxiuJ-uGglmQ6sF9oVXsSUuOnPEqywoec78XNM,8893
9
- deriva_ml/deriva_ml_base.py,sha256=e2UtT3TlDpFQrG6z0DaB2iV22wmi4TLP7qXF3hvb8to,42868
8
+ deriva_ml/deriva_ml_base.py,sha256=aVyGsFERZtpjNxfaVYzvKa7J0Ma-U3DEibfjnbr7lFQ,43817
10
9
  deriva_ml/deriva_model.py,sha256=LV3FjIhIlz13ckZSmu0aOJhT9EVE0-M9oVMudfkxb0g,12004
11
10
  deriva_ml/execution.py,sha256=VlapQGPDQI2MOmYnA5-hpf-XM6Fu4hPLpFjNN5q9Udo,29889
12
11
  deriva_ml/execution_configuration.py,sha256=bjnZwXN6M7YPy5dFQwoGEBU8YjhQRSe1FW0rL0V9TaM,3422
@@ -26,9 +25,9 @@ deriva_ml/schema_setup/annotations.py,sha256=Uogm9YkRtoKSdgfQlICqRywbCATppwBO-Xr
26
25
  deriva_ml/schema_setup/create_schema.py,sha256=jwziMWJPbjRgjiRBT-KtidnXI8YNEFO74A9fwfptjHY,10626
27
26
  deriva_ml/schema_setup/policy.json,sha256=77sf0Imy6CAQV0_VwwbA56_KROJ05WXsvT-Wjtkk538,1633
28
27
  deriva_ml/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
29
- deriva_ml-1.8.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
30
- deriva_ml-1.8.5.dist-info/METADATA,sha256=uuAq67MUZyY2LP8NbZ8RMJ5q-aX3pJV5ioYZqqLbuFA,653
31
- deriva_ml-1.8.5.dist-info/WHEEL,sha256=tTnHoFhvKQHCh4jz3yCn0WPTYIy7wXx3CJtJ7SJGV7c,91
32
- deriva_ml-1.8.5.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
33
- deriva_ml-1.8.5.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
34
- deriva_ml-1.8.5.dist-info/RECORD,,
28
+ deriva_ml-1.8.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
29
+ deriva_ml-1.8.10.dist-info/METADATA,sha256=Mhx0joyR1gPEX8G6ZoEpvxNVW4sUG9C_S5TIA6ueZKk,670
30
+ deriva_ml-1.8.10.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
31
+ deriva_ml-1.8.10.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
32
+ deriva_ml-1.8.10.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
33
+ deriva_ml-1.8.10.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (77.0.1)
2
+ Generator: setuptools (77.0.3)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
deriva_ml/VERSION.py DELETED
@@ -1 +0,0 @@
1
- __version__ = "1.8.5"