deriva-ml 1.8.10__tar.gz → 1.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {deriva_ml-1.8.10/src/deriva_ml.egg-info → deriva_ml-1.9.0}/PKG-INFO +1 -1
  2. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/Notebooks/DerivaML Execution.ipynb +3 -11
  3. deriva_ml-1.9.0/docs/user-guide/execution-configuration.md +26 -0
  4. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/release.sh +2 -1
  5. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/database_model.py +27 -4
  6. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/dataset.py +14 -9
  7. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/dataset_bag.py +1 -1
  8. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/demo_catalog.py +9 -8
  9. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/deriva_definitions.py +8 -3
  10. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/deriva_ml_base.py +142 -50
  11. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/deriva_model.py +2 -2
  12. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/execution.py +9 -16
  13. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/execution_configuration.py +20 -23
  14. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/schema_setup/annotations.py +1 -1
  15. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/schema_setup/create_schema.py +3 -2
  16. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/upload.py +1 -1
  17. {deriva_ml-1.8.10 → deriva_ml-1.9.0/src/deriva_ml.egg-info}/PKG-INFO +1 -1
  18. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml.egg-info/SOURCES.txt +0 -6
  19. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/derivaml_test.py +1 -0
  20. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/test_dataset.py +8 -38
  21. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/test_execution.py +9 -15
  22. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/test_upload.py +5 -7
  23. deriva_ml-1.8.10/docs/user-guide/execution-configuration.md +0 -14
  24. deriva_ml-1.8.10/src/deriva_ml/build/lib/schema_setup/alter_annotation.py +0 -36
  25. deriva_ml-1.8.10/src/deriva_ml/build/lib/schema_setup/annotation_temp.py +0 -255
  26. deriva_ml-1.8.10/src/deriva_ml/build/lib/schema_setup/create_schema.py +0 -165
  27. deriva_ml-1.8.10/src/deriva_ml/schema_setup/alter_annotation.py +0 -55
  28. deriva_ml-1.8.10/src/deriva_ml/schema_setup/table_comments_utils.py +0 -56
  29. deriva_ml-1.8.10/tests/__init__.py +0 -0
  30. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/.github/workflows/publish-docs.yml +0 -0
  31. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/.gitignore +0 -0
  32. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/LICENSE +0 -0
  33. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/README.md +0 -0
  34. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/.DS_Store +0 -0
  35. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
  36. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
  37. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/Notebooks/DerivaML Features.ipynb +0 -0
  38. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
  39. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/assets/ERD.png +0 -0
  40. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/assets/Launcher.png +0 -0
  41. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/assets/copy_minid.png +0 -0
  42. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/assets/deriva-logo.png +0 -0
  43. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/assets/deriva-ml.pdf +0 -0
  44. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/assets/sharing-at-home.pdf +0 -0
  45. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/dataset.md +0 -0
  46. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/dataset_aux_classes.md +0 -0
  47. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/dataset_bag.md +0 -0
  48. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/deriva_ml_base.md +0 -0
  49. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/deriva_model.md +0 -0
  50. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/execution.md +0 -0
  51. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/execution_configuration.md +0 -0
  52. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/feature.md +0 -0
  53. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/upload.md +0 -0
  54. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/deriva_ml_structure.md +0 -0
  55. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/index.md +0 -0
  56. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/release-notes.md +0 -0
  57. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/user-guide/datasets.md +0 -0
  58. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/user-guide/identifiers.md +0 -0
  59. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/user-guide/install.md +0 -0
  60. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/user-guide/ml_workflow_instruction.md +0 -0
  61. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/mkdocs.yml +0 -0
  62. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/pyproject.toml +0 -0
  63. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/setup.cfg +0 -0
  64. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/__init__.py +0 -0
  65. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/dataset_aux_classes.py +0 -0
  66. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/execution_environment.py +0 -0
  67. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/feature.py +0 -0
  68. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/history.py +0 -0
  69. {deriva_ml-1.8.10/src/deriva_ml/build/lib → deriva_ml-1.9.0/src/deriva_ml}/schema_setup/__init__.py +0 -0
  70. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/schema_setup/policy.json +0 -0
  71. {deriva_ml-1.8.10/src/deriva_ml/build/lib → deriva_ml-1.9.0/src/deriva_ml}/schema_setup/table_comments_utils.py +0 -0
  72. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/test_functions.py +0 -0
  73. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
  74. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml.egg-info/entry_points.txt +0 -0
  75. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml.egg-info/requires.txt +0 -0
  76. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml.egg-info/top_level.txt +0 -0
  77. {deriva_ml-1.8.10/src/deriva_ml/schema_setup → deriva_ml-1.9.0/tests}/__init__.py +0 -0
  78. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/runner.py +0 -0
  79. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/test_basic_tables.py +0 -0
  80. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/test_download.py +0 -0
  81. {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/test_features.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.8.10
3
+ Version: 1.9.0
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -28,7 +28,7 @@
28
28
  "source": [
29
29
  "import builtins\n",
30
30
  "from deriva.core.utils.globus_auth_utils import GlobusNativeLogin\n",
31
- "from deriva_ml import ExecutionConfiguration, Workflow, MLVocab, DerivaSystemColumns\n",
31
+ "from deriva_ml import ExecutionConfiguration, MLVocab, DerivaSystemColumns\n",
32
32
  "from deriva_ml.demo_catalog import create_demo_catalog, DemoML\n",
33
33
  "from IPython.display import display, Markdown, JSON\n",
34
34
  "import itertools\n",
@@ -166,12 +166,11 @@
166
166
  "metadata": {},
167
167
  "cell_type": "code",
168
168
  "source": [
169
- "ml_instance.add_term(MLVocab.workflow_type, \"Manual Workflow\", description=\"Inital setup of Model File\")\n",
169
+ "ml_instance.add_term(MLVocab.workflow_type, \"Manual Workflow\", description=\"Initial setup of Model File\")\n",
170
170
  "ml_instance.add_term(MLVocab.execution_asset_type, \"API_Model\", description=\"Model for our API workflow\")\n",
171
171
  "\n",
172
- "api_workflow = Workflow(\n",
172
+ "api_workflow = ml_instance.create_workflow(\n",
173
173
  " name=\"Manual Workflow\",\n",
174
- " url='https://github.com/informatics-isi-edu/deriva-ml/blob/main/docs/Notebooks/DerivaML%20Execution.ipynb',\n",
175
174
  " workflow_type=\"Manual Workflow\",\n",
176
175
  " description=\"A manual operation\"\n",
177
176
  ")\n",
@@ -207,13 +206,6 @@
207
206
  "source": [
208
207
  "ml_instance.add_term(MLVocab.workflow_type, \"ML Demo\", description=\"A ML Workflow that uses Deriva ML API\")\n",
209
208
  "\n",
210
- "api_workflow = Workflow(\n",
211
- " name=\"ML Demo\",\n",
212
- " url=\"https://github.com/informatics-isi-edu/deriva-ml/blob/main/pyproject.toml\",\n",
213
- " workflow_type=\"ML Demo\",\n",
214
- " description=\"A workflow that uses Deriva ML\"\n",
215
- ")\n",
216
- "\n",
217
209
  "config = ExecutionConfiguration(\n",
218
210
  " datasets=[training_dataset_rid, {'rid':testing_dataset_rid, 'materialize':False}],\n",
219
211
  " assets = [training_model_rid],\n",
@@ -0,0 +1,26 @@
1
+ # Configuring an execution
2
+
3
+ One of the essential functions of DerivaML is to help keep track how ML model results are created so that hey can be shared and reproduced.
4
+ Every execution in DerivaML is represented by an Execution object, whick keeps track of all of the paramemters associated with and execution and
5
+ provides a number of functions that enable a program to help keep track of the configuation and results of a model execution.
6
+
7
+ The first step in creating a DerivaML execution is to create an `ExectuionConfiguration`.
8
+ The `ExecutionConfiguration` class is used to specify the inputs that go are to be used by an Execution.
9
+ These inputs include
10
+ * A list of datasets that are used
11
+ * A list of other files (assets) that are to be used. This can include existing models, or any other infomration that the execution might need.
12
+ * The actual code that is being executed.
13
+
14
+ [`ExecutionConfiguration`][deriva_ml.execution_configuration.ExecutionConfiguration] is a Pydantic dataclass.
15
+ As part of initializing an execution, the assets and datasets in the configuration object are downloaded and cached.
16
+ The datasets are provided as a list of DatasetSpecw which
17
+ ```DatasetSpec(dataset_rid:RID, version:DatasetVersion, materialize:bool)```
18
+
19
+ it will be common to just want to use the latest version of the dataset, in which case you would use: `
20
+ ````
21
+ deriva_nl = DerivaML(...)
22
+ dataset_rid = ...
23
+ datasets = [DatasetSpec(dataset_rid, version=deriva_ml.dataset_version(dataset_rid))]
24
+ ```
25
+
26
+ If a dataset is large, downloading from the catalog might take a signficant amount of time.
@@ -13,7 +13,7 @@ echo "Bumping version: $VERSION_TYPE"
13
13
 
14
14
  # Bump the version using bump-my-version.
15
15
  # This command should update version files, commit the changes, and create a Git tag.
16
- bump-my-version bump $VERSION_TYPE --verbose
16
+ bump-my-version bump "$VERSION_TYPE" --verbose
17
17
 
18
18
  # Push commits and tags to the remote repository.
19
19
  echo "Pushing changes to remote repository..."
@@ -32,5 +32,6 @@ python -m build
32
32
  NEW_TAG=$(git describe --tags --abbrev=0)
33
33
  echo "New version tag: $NEW_TAG"
34
34
 
35
+ twine upload "dist/*${NEW_TAG/v/}"
35
36
 
36
37
  echo "Release process complete!"
@@ -1,12 +1,15 @@
1
- """Ths module constains the definition of the DatabaseModel class. The role of this class is to provide an nterface between the BDBag representation
1
+ """Ths module contains the definition of the DatabaseModel class. The role of this class is to provide an nterface between the BDBag representation
2
2
  of a dataset and a sqllite database in which the contents of the bag are stored.
3
3
  """
4
+
5
+ from __future__ import annotations
6
+
4
7
  import logging
5
8
  import sqlite3
6
9
 
7
10
  from csv import reader
8
11
  from pathlib import Path
9
- from typing import Any, Optional
12
+ from typing import Any, Optional, Generator
10
13
  from urllib.parse import urlparse
11
14
 
12
15
  from deriva.core.ermrest_model import Model
@@ -20,7 +23,7 @@ from .dataset_bag import DatasetBag
20
23
  class DatabaseModelMeta(type):
21
24
  """Use metaclass to ensure that there is onl one instance per path"""
22
25
 
23
- _paths_loaded: dict[Path:"DatabaseModel"] = {}
26
+ _paths_loaded: dict[Path, "DatabaseModel"] = {}
24
27
 
25
28
  def __call__(cls, *args, **kwargs):
26
29
  logger = logging.getLogger("deriva_ml")
@@ -47,7 +50,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
47
50
  Because of nested datasets, it's possible that more than one dataset rid is in a bag, or that a dataset rid might
48
51
  appear in more than one database. To help manage this, a global list of all the datasets that have been loaded
49
52
  into DatabaseModels, is kept in the class variable `_rid_map`.
50
-
53
+
51
54
  Because you can load diffent versions of a dataset simultaniously, the dataset RID and version number are tracked, and a new
52
55
  sqllite instance is created for every new dataset version present.
53
56
 
@@ -315,6 +318,26 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
315
318
  )
316
319
  return datasets
317
320
 
321
+ def get_table_as_dict(self, table: str) -> Generator[dict[str, Any], None, None]:
322
+ """Retrieve the contents of the specified table as a dictionary.
323
+
324
+ Args:
325
+ table: Table to retrieve data from. f schema is not provided as part of the table name,
326
+ the method will attempt to locate the schema for the table.
327
+
328
+ Returns:
329
+ A generator producing dictionaries containing the contents of the specified table as name/value pairs.
330
+ """
331
+ table_name = self.normalize_table_name(table)
332
+ with self.dbase as dbase:
333
+ col_names = [
334
+ c[1]
335
+ for c in dbase.execute(f'PRAGMA table_info("{table_name}")').fetchall()
336
+ ]
337
+ result = self.dbase.execute(f'SELECT * FROM "{table_name}"')
338
+ while row := result.fetchone():
339
+ yield dict(zip(col_names, row))
340
+
318
341
  def normalize_table_name(self, table: str) -> str:
319
342
  """Attempt to insert the schema into a table name if it's not provided.
320
343
 
@@ -92,7 +92,7 @@ class Dataset:
92
92
  dataset_list: list[DatasetSpec],
93
93
  description: Optional[str] = "",
94
94
  execution_rid: Optional[RID] = None,
95
- ) -> RID:
95
+ ) -> list[dict[str, Any]]:
96
96
  schema_path = self._model.catalog.getPathBuilder().schemas[self._ml_schema]
97
97
 
98
98
  # Construct version records for insert
@@ -245,7 +245,7 @@ class Dataset:
245
245
  DerivaMLException: if provided RID is not to a dataset_table.
246
246
  """
247
247
 
248
- # Find all of the datasets that are reachable from this dataset and determine their new version numbers.
248
+ # Find all the datasets that are reachable from this dataset and determine their new version numbers.
249
249
  related_datasets = list(self._build_dataset_graph(dataset_rid=dataset_rid))
250
250
  version_update_list = [
251
251
  DatasetSpec(
@@ -254,7 +254,7 @@ class Dataset:
254
254
  )
255
255
  for ds_rid in related_datasets
256
256
  ]
257
- updated_versions = self._insert_dataset_versions(
257
+ self._insert_dataset_versions(
258
258
  version_update_list, description=description, execution_rid=execution_rid
259
259
  )
260
260
  return [d.version for d in version_update_list if d.rid == dataset_rid][0]
@@ -751,9 +751,10 @@ class Dataset:
751
751
  ]
752
752
 
753
753
  def _table_paths(
754
- self, dataset: DatasetSpec = None, snapshot_catalog: Optional[DerivaML] = None
754
+ self,
755
+ dataset: Optional[DatasetSpec] = None,
756
+ snapshot_catalog: Optional[DerivaML] = None,
755
757
  ) -> Iterator[tuple[str, str, Table]]:
756
-
757
758
  paths = self._collect_paths(dataset and dataset.rid, snapshot_catalog)
758
759
 
759
760
  def source_path(path: tuple[Table, ...]):
@@ -779,17 +780,20 @@ class Dataset:
779
780
  def _collect_paths(
780
781
  self,
781
782
  dataset_rid: Optional[RID] = None,
782
- snapshot_catalog: Optional[DerivaML] = None,
783
+ snapshot: Optional[Dataset] = None,
783
784
  dataset_nesting_depth: Optional[int] = None,
784
785
  ) -> set[tuple[Table, ...]]:
785
786
 
786
- snapshot_catalog = snapshot_catalog or self
787
+ snapshot_catalog = snapshot if snapshot else self
788
+
787
789
  dataset_table = snapshot_catalog._model.schemas[self._ml_schema].tables[
788
790
  "Dataset"
789
791
  ]
790
792
  dataset_dataset = snapshot_catalog._model.schemas[self._ml_schema].tables[
791
793
  "Dataset_Dataset"
792
794
  ]
795
+
796
+ # Figure out what types of elements the dataset contains.
793
797
  dataset_associations = [
794
798
  a
795
799
  for a in self.dataset_table.find_associations()
@@ -812,7 +816,8 @@ class Dataset:
812
816
  ]
813
817
  else:
814
818
  included_associations = dataset_associations
815
- # Get the paths through the schema and filter out all of dataset paths not used by this dataset.
819
+
820
+ # Get the paths through the schema and filter out all the dataset paths not used by this dataset.
816
821
  paths = {
817
822
  tuple(p)
818
823
  for p in snapshot_catalog._model._schema_to_paths()
@@ -827,7 +832,7 @@ class Dataset:
827
832
  if dataset_rid:
828
833
  for c in snapshot_catalog.list_dataset_children(dataset_rid=dataset_rid):
829
834
  nested_paths |= self._collect_paths(
830
- c, snapshot_catalog=snapshot_catalog
835
+ c, snapshot=snapshot_catalog
831
836
  )
832
837
  else:
833
838
  # Initialize nesting depth if not already provided.
@@ -109,7 +109,7 @@ class DatasetBag:
109
109
  for ts, on in paths:
110
110
  tables = " JOIN ".join(ts)
111
111
  on_expression = " and ".join(
112
- [f"{column_name(l)}={column_name(r)}" for l, r in on]
112
+ [f"{column_name(left)}={column_name(right)}" for left, right in on]
113
113
  )
114
114
  sql.append(
115
115
  f"SELECT {select_args} FROM {tables} ON {on_expression} WHERE {dataset_table_name}.RID IN ({datasets})"
@@ -5,6 +5,7 @@ import logging
5
5
  from random import random, randint
6
6
  import tempfile
7
7
  from tempfile import TemporaryDirectory
8
+ from typing import Optional
8
9
  import itertools
9
10
 
10
11
  from deriva.config.acl_config import AclConfig
@@ -18,7 +19,6 @@ from requests import HTTPError
18
19
  from deriva_ml import (
19
20
  DerivaML,
20
21
  ExecutionConfiguration,
21
- Workflow,
22
22
  MLVocab,
23
23
  BuiltinTypes,
24
24
  ColumnDefinition,
@@ -169,12 +169,9 @@ def create_demo_features(ml_instance):
169
169
  description="Model for our API workflow",
170
170
  )
171
171
 
172
- api_workflow = ml_instance.add_workflow(
173
- Workflow(
174
- name="API Workflow",
175
- url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/pyproject.toml",
176
- workflow_type="API Workflow",
177
- )
172
+ api_workflow = ml_instance.create_workflow(
173
+ name="API Workflow",
174
+ workflow_type="API Workflow",
178
175
  )
179
176
 
180
177
  api_execution = ml_instance.create_execution(
@@ -322,7 +319,11 @@ def create_demo_catalog(
322
319
 
323
320
  class DemoML(DerivaML):
324
321
  def __init__(
325
- self, hostname, catalog_id, cache_dir: str = None, working_dir: str = None
322
+ self,
323
+ hostname,
324
+ catalog_id,
325
+ cache_dir: Optional[str] = None,
326
+ working_dir: Optional[str] = None,
326
327
  ):
327
328
  super().__init__(
328
329
  hostname=hostname,
@@ -8,7 +8,7 @@ from enum import Enum
8
8
  from typing import Any, Iterable, Optional, Annotated
9
9
 
10
10
  import deriva.core.ermrest_model as em
11
- from urllib.parse import urlparse, urljoin
11
+ from urllib.parse import urlparse
12
12
  from deriva.core.ermrest_model import builtin_types
13
13
  from pydantic import (
14
14
  BaseModel,
@@ -139,13 +139,18 @@ class FileSpec(BaseModel):
139
139
  if url_parts.scheme == "tag":
140
140
  return v
141
141
  elif not url_parts.scheme:
142
- return f'tag://{gethostname()},{date.today()}:file://{v}'
142
+ return f"tag://{gethostname()},{date.today()}:file://{v}"
143
143
  else:
144
144
  raise ValidationError("url is not a file URL")
145
145
 
146
146
  @model_serializer()
147
147
  def serialize_filespec(self):
148
- return {'URL': self.url, 'Description': self.description, 'MD5': self.md5, 'Length': self.length}
148
+ return {
149
+ "URL": self.url,
150
+ "Description": self.description,
151
+ "MD5": self.md5,
152
+ "Length": self.length,
153
+ }
149
154
 
150
155
 
151
156
  class VocabularyTerm(BaseModel):