deriva-ml 1.8.10__tar.gz → 1.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deriva_ml-1.8.10/src/deriva_ml.egg-info → deriva_ml-1.9.0}/PKG-INFO +1 -1
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/Notebooks/DerivaML Execution.ipynb +3 -11
- deriva_ml-1.9.0/docs/user-guide/execution-configuration.md +26 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/release.sh +2 -1
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/database_model.py +27 -4
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/dataset.py +14 -9
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/dataset_bag.py +1 -1
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/demo_catalog.py +9 -8
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/deriva_definitions.py +8 -3
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/deriva_ml_base.py +142 -50
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/deriva_model.py +2 -2
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/execution.py +9 -16
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/execution_configuration.py +20 -23
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/schema_setup/annotations.py +1 -1
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/schema_setup/create_schema.py +3 -2
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/upload.py +1 -1
- {deriva_ml-1.8.10 → deriva_ml-1.9.0/src/deriva_ml.egg-info}/PKG-INFO +1 -1
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml.egg-info/SOURCES.txt +0 -6
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/derivaml_test.py +1 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/test_dataset.py +8 -38
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/test_execution.py +9 -15
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/test_upload.py +5 -7
- deriva_ml-1.8.10/docs/user-guide/execution-configuration.md +0 -14
- deriva_ml-1.8.10/src/deriva_ml/build/lib/schema_setup/alter_annotation.py +0 -36
- deriva_ml-1.8.10/src/deriva_ml/build/lib/schema_setup/annotation_temp.py +0 -255
- deriva_ml-1.8.10/src/deriva_ml/build/lib/schema_setup/create_schema.py +0 -165
- deriva_ml-1.8.10/src/deriva_ml/schema_setup/alter_annotation.py +0 -55
- deriva_ml-1.8.10/src/deriva_ml/schema_setup/table_comments_utils.py +0 -56
- deriva_ml-1.8.10/tests/__init__.py +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/.github/workflows/publish-docs.yml +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/.gitignore +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/LICENSE +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/README.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/.DS_Store +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/Notebooks/DerivaML Features.ipynb +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/assets/ERD.png +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/assets/Launcher.png +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/assets/copy_minid.png +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/assets/deriva-logo.png +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/assets/deriva-ml.pdf +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/assets/sharing-at-home.pdf +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/dataset.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/dataset_aux_classes.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/dataset_bag.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/deriva_ml_base.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/deriva_model.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/execution.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/execution_configuration.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/feature.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/code-docs/upload.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/deriva_ml_structure.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/index.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/release-notes.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/user-guide/datasets.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/user-guide/identifiers.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/user-guide/install.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/docs/user-guide/ml_workflow_instruction.md +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/mkdocs.yml +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/pyproject.toml +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/setup.cfg +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/__init__.py +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/dataset_aux_classes.py +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/execution_environment.py +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/feature.py +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/history.py +0 -0
- {deriva_ml-1.8.10/src/deriva_ml/build/lib → deriva_ml-1.9.0/src/deriva_ml}/schema_setup/__init__.py +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/schema_setup/policy.json +0 -0
- {deriva_ml-1.8.10/src/deriva_ml/build/lib → deriva_ml-1.9.0/src/deriva_ml}/schema_setup/table_comments_utils.py +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml/test_functions.py +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml.egg-info/entry_points.txt +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml.egg-info/requires.txt +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/src/deriva_ml.egg-info/top_level.txt +0 -0
- {deriva_ml-1.8.10/src/deriva_ml/schema_setup → deriva_ml-1.9.0/tests}/__init__.py +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/runner.py +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/test_basic_tables.py +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/test_download.py +0 -0
- {deriva_ml-1.8.10 → deriva_ml-1.9.0}/tests/test_features.py +0 -0
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
"source": [
|
|
29
29
|
"import builtins\n",
|
|
30
30
|
"from deriva.core.utils.globus_auth_utils import GlobusNativeLogin\n",
|
|
31
|
-
"from deriva_ml import ExecutionConfiguration,
|
|
31
|
+
"from deriva_ml import ExecutionConfiguration, MLVocab, DerivaSystemColumns\n",
|
|
32
32
|
"from deriva_ml.demo_catalog import create_demo_catalog, DemoML\n",
|
|
33
33
|
"from IPython.display import display, Markdown, JSON\n",
|
|
34
34
|
"import itertools\n",
|
|
@@ -166,12 +166,11 @@
|
|
|
166
166
|
"metadata": {},
|
|
167
167
|
"cell_type": "code",
|
|
168
168
|
"source": [
|
|
169
|
-
"ml_instance.add_term(MLVocab.workflow_type, \"Manual Workflow\", description=\"
|
|
169
|
+
"ml_instance.add_term(MLVocab.workflow_type, \"Manual Workflow\", description=\"Initial setup of Model File\")\n",
|
|
170
170
|
"ml_instance.add_term(MLVocab.execution_asset_type, \"API_Model\", description=\"Model for our API workflow\")\n",
|
|
171
171
|
"\n",
|
|
172
|
-
"api_workflow =
|
|
172
|
+
"api_workflow = ml_instance.create_workflow(\n",
|
|
173
173
|
" name=\"Manual Workflow\",\n",
|
|
174
|
-
" url='https://github.com/informatics-isi-edu/deriva-ml/blob/main/docs/Notebooks/DerivaML%20Execution.ipynb',\n",
|
|
175
174
|
" workflow_type=\"Manual Workflow\",\n",
|
|
176
175
|
" description=\"A manual operation\"\n",
|
|
177
176
|
")\n",
|
|
@@ -207,13 +206,6 @@
|
|
|
207
206
|
"source": [
|
|
208
207
|
"ml_instance.add_term(MLVocab.workflow_type, \"ML Demo\", description=\"A ML Workflow that uses Deriva ML API\")\n",
|
|
209
208
|
"\n",
|
|
210
|
-
"api_workflow = Workflow(\n",
|
|
211
|
-
" name=\"ML Demo\",\n",
|
|
212
|
-
" url=\"https://github.com/informatics-isi-edu/deriva-ml/blob/main/pyproject.toml\",\n",
|
|
213
|
-
" workflow_type=\"ML Demo\",\n",
|
|
214
|
-
" description=\"A workflow that uses Deriva ML\"\n",
|
|
215
|
-
")\n",
|
|
216
|
-
"\n",
|
|
217
209
|
"config = ExecutionConfiguration(\n",
|
|
218
210
|
" datasets=[training_dataset_rid, {'rid':testing_dataset_rid, 'materialize':False}],\n",
|
|
219
211
|
" assets = [training_model_rid],\n",
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Configuring an execution
|
|
2
|
+
|
|
3
|
+
One of the essential functions of DerivaML is to help keep track how ML model results are created so that hey can be shared and reproduced.
|
|
4
|
+
Every execution in DerivaML is represented by an Execution object, whick keeps track of all of the paramemters associated with and execution and
|
|
5
|
+
provides a number of functions that enable a program to help keep track of the configuation and results of a model execution.
|
|
6
|
+
|
|
7
|
+
The first step in creating a DerivaML execution is to create an `ExectuionConfiguration`.
|
|
8
|
+
The `ExecutionConfiguration` class is used to specify the inputs that go are to be used by an Execution.
|
|
9
|
+
These inputs include
|
|
10
|
+
* A list of datasets that are used
|
|
11
|
+
* A list of other files (assets) that are to be used. This can include existing models, or any other infomration that the execution might need.
|
|
12
|
+
* The actual code that is being executed.
|
|
13
|
+
|
|
14
|
+
[`ExecutionConfiguration`][deriva_ml.execution_configuration.ExecutionConfiguration] is a Pydantic dataclass.
|
|
15
|
+
As part of initializing an execution, the assets and datasets in the configuration object are downloaded and cached.
|
|
16
|
+
The datasets are provided as a list of DatasetSpecw which
|
|
17
|
+
```DatasetSpec(dataset_rid:RID, version:DatasetVersion, materialize:bool)```
|
|
18
|
+
|
|
19
|
+
it will be common to just want to use the latest version of the dataset, in which case you would use: `
|
|
20
|
+
````
|
|
21
|
+
deriva_nl = DerivaML(...)
|
|
22
|
+
dataset_rid = ...
|
|
23
|
+
datasets = [DatasetSpec(dataset_rid, version=deriva_ml.dataset_version(dataset_rid))]
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
If a dataset is large, downloading from the catalog might take a signficant amount of time.
|
|
@@ -13,7 +13,7 @@ echo "Bumping version: $VERSION_TYPE"
|
|
|
13
13
|
|
|
14
14
|
# Bump the version using bump-my-version.
|
|
15
15
|
# This command should update version files, commit the changes, and create a Git tag.
|
|
16
|
-
bump-my-version bump $VERSION_TYPE --verbose
|
|
16
|
+
bump-my-version bump "$VERSION_TYPE" --verbose
|
|
17
17
|
|
|
18
18
|
# Push commits and tags to the remote repository.
|
|
19
19
|
echo "Pushing changes to remote repository..."
|
|
@@ -32,5 +32,6 @@ python -m build
|
|
|
32
32
|
NEW_TAG=$(git describe --tags --abbrev=0)
|
|
33
33
|
echo "New version tag: $NEW_TAG"
|
|
34
34
|
|
|
35
|
+
twine upload "dist/*${NEW_TAG/v/}"
|
|
35
36
|
|
|
36
37
|
echo "Release process complete!"
|
|
@@ -1,12 +1,15 @@
|
|
|
1
|
-
"""Ths module
|
|
1
|
+
"""Ths module contains the definition of the DatabaseModel class. The role of this class is to provide an nterface between the BDBag representation
|
|
2
2
|
of a dataset and a sqllite database in which the contents of the bag are stored.
|
|
3
3
|
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
4
7
|
import logging
|
|
5
8
|
import sqlite3
|
|
6
9
|
|
|
7
10
|
from csv import reader
|
|
8
11
|
from pathlib import Path
|
|
9
|
-
from typing import Any, Optional
|
|
12
|
+
from typing import Any, Optional, Generator
|
|
10
13
|
from urllib.parse import urlparse
|
|
11
14
|
|
|
12
15
|
from deriva.core.ermrest_model import Model
|
|
@@ -20,7 +23,7 @@ from .dataset_bag import DatasetBag
|
|
|
20
23
|
class DatabaseModelMeta(type):
|
|
21
24
|
"""Use metaclass to ensure that there is onl one instance per path"""
|
|
22
25
|
|
|
23
|
-
_paths_loaded: dict[Path
|
|
26
|
+
_paths_loaded: dict[Path, "DatabaseModel"] = {}
|
|
24
27
|
|
|
25
28
|
def __call__(cls, *args, **kwargs):
|
|
26
29
|
logger = logging.getLogger("deriva_ml")
|
|
@@ -47,7 +50,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
|
|
|
47
50
|
Because of nested datasets, it's possible that more than one dataset rid is in a bag, or that a dataset rid might
|
|
48
51
|
appear in more than one database. To help manage this, a global list of all the datasets that have been loaded
|
|
49
52
|
into DatabaseModels, is kept in the class variable `_rid_map`.
|
|
50
|
-
|
|
53
|
+
|
|
51
54
|
Because you can load diffent versions of a dataset simultaniously, the dataset RID and version number are tracked, and a new
|
|
52
55
|
sqllite instance is created for every new dataset version present.
|
|
53
56
|
|
|
@@ -315,6 +318,26 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
|
|
|
315
318
|
)
|
|
316
319
|
return datasets
|
|
317
320
|
|
|
321
|
+
def get_table_as_dict(self, table: str) -> Generator[dict[str, Any], None, None]:
|
|
322
|
+
"""Retrieve the contents of the specified table as a dictionary.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
table: Table to retrieve data from. f schema is not provided as part of the table name,
|
|
326
|
+
the method will attempt to locate the schema for the table.
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
A generator producing dictionaries containing the contents of the specified table as name/value pairs.
|
|
330
|
+
"""
|
|
331
|
+
table_name = self.normalize_table_name(table)
|
|
332
|
+
with self.dbase as dbase:
|
|
333
|
+
col_names = [
|
|
334
|
+
c[1]
|
|
335
|
+
for c in dbase.execute(f'PRAGMA table_info("{table_name}")').fetchall()
|
|
336
|
+
]
|
|
337
|
+
result = self.dbase.execute(f'SELECT * FROM "{table_name}"')
|
|
338
|
+
while row := result.fetchone():
|
|
339
|
+
yield dict(zip(col_names, row))
|
|
340
|
+
|
|
318
341
|
def normalize_table_name(self, table: str) -> str:
|
|
319
342
|
"""Attempt to insert the schema into a table name if it's not provided.
|
|
320
343
|
|
|
@@ -92,7 +92,7 @@ class Dataset:
|
|
|
92
92
|
dataset_list: list[DatasetSpec],
|
|
93
93
|
description: Optional[str] = "",
|
|
94
94
|
execution_rid: Optional[RID] = None,
|
|
95
|
-
) ->
|
|
95
|
+
) -> list[dict[str, Any]]:
|
|
96
96
|
schema_path = self._model.catalog.getPathBuilder().schemas[self._ml_schema]
|
|
97
97
|
|
|
98
98
|
# Construct version records for insert
|
|
@@ -245,7 +245,7 @@ class Dataset:
|
|
|
245
245
|
DerivaMLException: if provided RID is not to a dataset_table.
|
|
246
246
|
"""
|
|
247
247
|
|
|
248
|
-
# Find all
|
|
248
|
+
# Find all the datasets that are reachable from this dataset and determine their new version numbers.
|
|
249
249
|
related_datasets = list(self._build_dataset_graph(dataset_rid=dataset_rid))
|
|
250
250
|
version_update_list = [
|
|
251
251
|
DatasetSpec(
|
|
@@ -254,7 +254,7 @@ class Dataset:
|
|
|
254
254
|
)
|
|
255
255
|
for ds_rid in related_datasets
|
|
256
256
|
]
|
|
257
|
-
|
|
257
|
+
self._insert_dataset_versions(
|
|
258
258
|
version_update_list, description=description, execution_rid=execution_rid
|
|
259
259
|
)
|
|
260
260
|
return [d.version for d in version_update_list if d.rid == dataset_rid][0]
|
|
@@ -751,9 +751,10 @@ class Dataset:
|
|
|
751
751
|
]
|
|
752
752
|
|
|
753
753
|
def _table_paths(
|
|
754
|
-
self,
|
|
754
|
+
self,
|
|
755
|
+
dataset: Optional[DatasetSpec] = None,
|
|
756
|
+
snapshot_catalog: Optional[DerivaML] = None,
|
|
755
757
|
) -> Iterator[tuple[str, str, Table]]:
|
|
756
|
-
|
|
757
758
|
paths = self._collect_paths(dataset and dataset.rid, snapshot_catalog)
|
|
758
759
|
|
|
759
760
|
def source_path(path: tuple[Table, ...]):
|
|
@@ -779,17 +780,20 @@ class Dataset:
|
|
|
779
780
|
def _collect_paths(
|
|
780
781
|
self,
|
|
781
782
|
dataset_rid: Optional[RID] = None,
|
|
782
|
-
|
|
783
|
+
snapshot: Optional[Dataset] = None,
|
|
783
784
|
dataset_nesting_depth: Optional[int] = None,
|
|
784
785
|
) -> set[tuple[Table, ...]]:
|
|
785
786
|
|
|
786
|
-
snapshot_catalog =
|
|
787
|
+
snapshot_catalog = snapshot if snapshot else self
|
|
788
|
+
|
|
787
789
|
dataset_table = snapshot_catalog._model.schemas[self._ml_schema].tables[
|
|
788
790
|
"Dataset"
|
|
789
791
|
]
|
|
790
792
|
dataset_dataset = snapshot_catalog._model.schemas[self._ml_schema].tables[
|
|
791
793
|
"Dataset_Dataset"
|
|
792
794
|
]
|
|
795
|
+
|
|
796
|
+
# Figure out what types of elements the dataset contains.
|
|
793
797
|
dataset_associations = [
|
|
794
798
|
a
|
|
795
799
|
for a in self.dataset_table.find_associations()
|
|
@@ -812,7 +816,8 @@ class Dataset:
|
|
|
812
816
|
]
|
|
813
817
|
else:
|
|
814
818
|
included_associations = dataset_associations
|
|
815
|
-
|
|
819
|
+
|
|
820
|
+
# Get the paths through the schema and filter out all the dataset paths not used by this dataset.
|
|
816
821
|
paths = {
|
|
817
822
|
tuple(p)
|
|
818
823
|
for p in snapshot_catalog._model._schema_to_paths()
|
|
@@ -827,7 +832,7 @@ class Dataset:
|
|
|
827
832
|
if dataset_rid:
|
|
828
833
|
for c in snapshot_catalog.list_dataset_children(dataset_rid=dataset_rid):
|
|
829
834
|
nested_paths |= self._collect_paths(
|
|
830
|
-
c,
|
|
835
|
+
c, snapshot=snapshot_catalog
|
|
831
836
|
)
|
|
832
837
|
else:
|
|
833
838
|
# Initialize nesting depth if not already provided.
|
|
@@ -109,7 +109,7 @@ class DatasetBag:
|
|
|
109
109
|
for ts, on in paths:
|
|
110
110
|
tables = " JOIN ".join(ts)
|
|
111
111
|
on_expression = " and ".join(
|
|
112
|
-
[f"{column_name(
|
|
112
|
+
[f"{column_name(left)}={column_name(right)}" for left, right in on]
|
|
113
113
|
)
|
|
114
114
|
sql.append(
|
|
115
115
|
f"SELECT {select_args} FROM {tables} ON {on_expression} WHERE {dataset_table_name}.RID IN ({datasets})"
|
|
@@ -5,6 +5,7 @@ import logging
|
|
|
5
5
|
from random import random, randint
|
|
6
6
|
import tempfile
|
|
7
7
|
from tempfile import TemporaryDirectory
|
|
8
|
+
from typing import Optional
|
|
8
9
|
import itertools
|
|
9
10
|
|
|
10
11
|
from deriva.config.acl_config import AclConfig
|
|
@@ -18,7 +19,6 @@ from requests import HTTPError
|
|
|
18
19
|
from deriva_ml import (
|
|
19
20
|
DerivaML,
|
|
20
21
|
ExecutionConfiguration,
|
|
21
|
-
Workflow,
|
|
22
22
|
MLVocab,
|
|
23
23
|
BuiltinTypes,
|
|
24
24
|
ColumnDefinition,
|
|
@@ -169,12 +169,9 @@ def create_demo_features(ml_instance):
|
|
|
169
169
|
description="Model for our API workflow",
|
|
170
170
|
)
|
|
171
171
|
|
|
172
|
-
api_workflow = ml_instance.
|
|
173
|
-
Workflow
|
|
174
|
-
|
|
175
|
-
url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/pyproject.toml",
|
|
176
|
-
workflow_type="API Workflow",
|
|
177
|
-
)
|
|
172
|
+
api_workflow = ml_instance.create_workflow(
|
|
173
|
+
name="API Workflow",
|
|
174
|
+
workflow_type="API Workflow",
|
|
178
175
|
)
|
|
179
176
|
|
|
180
177
|
api_execution = ml_instance.create_execution(
|
|
@@ -322,7 +319,11 @@ def create_demo_catalog(
|
|
|
322
319
|
|
|
323
320
|
class DemoML(DerivaML):
|
|
324
321
|
def __init__(
|
|
325
|
-
self,
|
|
322
|
+
self,
|
|
323
|
+
hostname,
|
|
324
|
+
catalog_id,
|
|
325
|
+
cache_dir: Optional[str] = None,
|
|
326
|
+
working_dir: Optional[str] = None,
|
|
326
327
|
):
|
|
327
328
|
super().__init__(
|
|
328
329
|
hostname=hostname,
|
|
@@ -8,7 +8,7 @@ from enum import Enum
|
|
|
8
8
|
from typing import Any, Iterable, Optional, Annotated
|
|
9
9
|
|
|
10
10
|
import deriva.core.ermrest_model as em
|
|
11
|
-
from urllib.parse import urlparse
|
|
11
|
+
from urllib.parse import urlparse
|
|
12
12
|
from deriva.core.ermrest_model import builtin_types
|
|
13
13
|
from pydantic import (
|
|
14
14
|
BaseModel,
|
|
@@ -139,13 +139,18 @@ class FileSpec(BaseModel):
|
|
|
139
139
|
if url_parts.scheme == "tag":
|
|
140
140
|
return v
|
|
141
141
|
elif not url_parts.scheme:
|
|
142
|
-
return f
|
|
142
|
+
return f"tag://{gethostname()},{date.today()}:file://{v}"
|
|
143
143
|
else:
|
|
144
144
|
raise ValidationError("url is not a file URL")
|
|
145
145
|
|
|
146
146
|
@model_serializer()
|
|
147
147
|
def serialize_filespec(self):
|
|
148
|
-
return {
|
|
148
|
+
return {
|
|
149
|
+
"URL": self.url,
|
|
150
|
+
"Description": self.description,
|
|
151
|
+
"MD5": self.md5,
|
|
152
|
+
"Length": self.length,
|
|
153
|
+
}
|
|
149
154
|
|
|
150
155
|
|
|
151
156
|
class VocabularyTerm(BaseModel):
|