deriva-ml 1.17.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. deriva_ml/.DS_Store +0 -0
  2. deriva_ml/__init__.py +79 -0
  3. deriva_ml/bump_version.py +142 -0
  4. deriva_ml/core/__init__.py +39 -0
  5. deriva_ml/core/base.py +1527 -0
  6. deriva_ml/core/config.py +69 -0
  7. deriva_ml/core/constants.py +36 -0
  8. deriva_ml/core/definitions.py +74 -0
  9. deriva_ml/core/enums.py +222 -0
  10. deriva_ml/core/ermrest.py +288 -0
  11. deriva_ml/core/exceptions.py +28 -0
  12. deriva_ml/core/filespec.py +116 -0
  13. deriva_ml/dataset/__init__.py +12 -0
  14. deriva_ml/dataset/aux_classes.py +225 -0
  15. deriva_ml/dataset/dataset.py +1519 -0
  16. deriva_ml/dataset/dataset_bag.py +450 -0
  17. deriva_ml/dataset/history.py +109 -0
  18. deriva_ml/dataset/upload.py +439 -0
  19. deriva_ml/demo_catalog.py +495 -0
  20. deriva_ml/execution/__init__.py +26 -0
  21. deriva_ml/execution/environment.py +290 -0
  22. deriva_ml/execution/execution.py +1180 -0
  23. deriva_ml/execution/execution_configuration.py +147 -0
  24. deriva_ml/execution/workflow.py +413 -0
  25. deriva_ml/feature.py +228 -0
  26. deriva_ml/install_kernel.py +71 -0
  27. deriva_ml/model/__init__.py +0 -0
  28. deriva_ml/model/catalog.py +485 -0
  29. deriva_ml/model/database.py +719 -0
  30. deriva_ml/protocols/dataset.py +19 -0
  31. deriva_ml/run_notebook.py +228 -0
  32. deriva_ml/schema/__init__.py +3 -0
  33. deriva_ml/schema/annotations.py +473 -0
  34. deriva_ml/schema/check_schema.py +104 -0
  35. deriva_ml/schema/create_schema.py +393 -0
  36. deriva_ml/schema/deriva-ml-reference.json +8525 -0
  37. deriva_ml/schema/policy.json +81 -0
  38. deriva_ml/schema/table_comments_utils.py +57 -0
  39. deriva_ml/test.py +94 -0
  40. deriva_ml-1.17.10.dist-info/METADATA +38 -0
  41. deriva_ml-1.17.10.dist-info/RECORD +45 -0
  42. deriva_ml-1.17.10.dist-info/WHEEL +5 -0
  43. deriva_ml-1.17.10.dist-info/entry_points.txt +9 -0
  44. deriva_ml-1.17.10.dist-info/licenses/LICENSE +201 -0
  45. deriva_ml-1.17.10.dist-info/top_level.txt +1 -0
deriva_ml/feature.py ADDED
@@ -0,0 +1,228 @@
1
+ """Feature implementation for deriva-ml.
2
+
3
+ This module provides classes for defining and managing features in deriva-ml. Features represent measurable
4
+ properties or characteristics that can be associated with records in a table. The module includes:
5
+
6
+ - Feature: Main class for defining and managing features
7
+ - FeatureRecord: Base class for feature records using pydantic models
8
+
9
+ Typical usage example:
10
+ >>> feature = Feature(association_result, model)
11
+ >>> FeatureClass = feature.feature_record_class()
12
+ >>> record = FeatureClass(value="high", confidence=0.95)
13
+ """
14
+
15
+ from pathlib import Path
16
+ from types import UnionType
17
+ from typing import TYPE_CHECKING, ClassVar, Optional, Type
18
+
19
+ from deriva.core.ermrest_model import Column, FindAssociationResult
20
+ from pydantic import BaseModel, create_model
21
+
22
+ if TYPE_CHECKING:
23
+ from model.catalog import DerivaModel
24
+
25
+
26
+ class FeatureRecord(BaseModel):
27
+ """Base class for dynamically generated feature record models.
28
+
29
+ This class serves as the base for pydantic models that represent feature records. Each feature record
30
+ contains the values and metadata associated with a feature instance.
31
+
32
+ Attributes:
33
+ Execution (Optional[str]): RID of the execution that created this feature record.
34
+ Feature_Name (str): Name of the feature this record belongs to.
35
+ feature (ClassVar[Optional[Feature]]): Reference to the Feature object that created this record.
36
+
37
+ Example:
38
+ >>> class GeneFeature(FeatureRecord):
39
+ ... value: str
40
+ ... confidence: float
41
+ >>> record = GeneFeature(
42
+ ... Feature_Name="expression",
43
+ ... value="high",
44
+ ... confidence=0.95
45
+ ... )
46
+ """
47
+
48
+ # model_dump of this feature should be compatible with feature table columns.
49
+ Execution: Optional[str] = None
50
+ Feature_Name: str
51
+ feature: ClassVar[Optional["Feature"]] = None
52
+
53
+ class Config:
54
+ arbitrary_types_allowed = True
55
+ extra = "forbid"
56
+
57
+ @classmethod
58
+ def feature_columns(cls) -> set[Column]:
59
+ """Returns all columns specific to this feature.
60
+
61
+ Returns:
62
+ set[Column]: Set of feature-specific columns, excluding system and relationship columns.
63
+ """
64
+ return cls.feature.feature_columns
65
+
66
+ @classmethod
67
+ def asset_columns(cls) -> set[Column]:
68
+ """Returns columns that reference asset tables.
69
+
70
+ Returns:
71
+ set[Column]: Set of columns that contain references to asset tables.
72
+ """
73
+ return cls.feature.asset_columns
74
+
75
+ @classmethod
76
+ def term_columns(cls) -> set[Column]:
77
+ """Returns columns that reference vocabulary terms.
78
+
79
+ Returns:
80
+ set[Column]: Set of columns that contain references to controlled vocabulary terms.
81
+ """
82
+ return cls.feature.term_columns
83
+
84
+ @classmethod
85
+ def value_columns(cls) -> set[Column]:
86
+ """Returns columns that contain direct values.
87
+
88
+ Returns:
89
+ set[Column]: Set of columns containing direct values (not references to assets or terms).
90
+ """
91
+ return cls.feature.value_columns
92
+
93
+
94
+ class Feature:
95
+ """Manages feature definitions and their relationships in the catalog.
96
+
97
+ A Feature represents a measurable property or characteristic that can be associated with records in a table.
98
+ Features can include asset references, controlled vocabulary terms, and custom metadata fields.
99
+
100
+ Attributes:
101
+ feature_table: Table containing the feature implementation.
102
+ target_table: Table that the feature is associated with.
103
+ feature_name: Name of the feature (from Feature_Name column default).
104
+ feature_columns: Set of columns specific to this feature.
105
+ asset_columns: Set of columns referencing asset tables.
106
+ term_columns: Set of columns referencing vocabulary tables.
107
+ value_columns: Set of columns containing direct values.
108
+
109
+ Example:
110
+ >>> feature = Feature(association_result, model)
111
+ >>> print(f"Feature {feature.feature_name} on {feature.target_table.name}")
112
+ >>> print("Asset columns:", [c.name for c in feature.asset_columns])
113
+ """
114
+
115
+ def __init__(self, atable: FindAssociationResult, model: "DerivaModel") -> None:
116
+ self.feature_table = atable.table
117
+ self.target_table = atable.self_fkey.pk_table
118
+ self.feature_name = atable.table.columns["Feature_Name"].default
119
+ self._model = model
120
+
121
+ skip_columns = {
122
+ "RID",
123
+ "RMB",
124
+ "RCB",
125
+ "RCT",
126
+ "RMT",
127
+ "Feature_Name",
128
+ self.target_table.name,
129
+ "Execution",
130
+ }
131
+ self.feature_columns = {c for c in self.feature_table.columns if c.name not in skip_columns}
132
+
133
+ assoc_fkeys = {atable.self_fkey} | atable.other_fkeys
134
+
135
+ # Determine the role of each column in the feature outside the FK columns.
136
+ self.asset_columns = {
137
+ fk.foreign_key_columns[0]
138
+ for fk in self.feature_table.foreign_keys
139
+ if fk not in assoc_fkeys and self._model.is_asset(fk.pk_table)
140
+ }
141
+
142
+ self.term_columns = {
143
+ fk.foreign_key_columns[0]
144
+ for fk in self.feature_table.foreign_keys
145
+ if fk not in assoc_fkeys and self._model.is_vocabulary(fk.pk_table)
146
+ }
147
+
148
+ self.value_columns = self.feature_columns - (self.asset_columns | self.term_columns)
149
+
150
+ def feature_record_class(self) -> type[FeatureRecord]:
151
+ """Create a pydantic model for entries into the specified feature table
152
+
153
+ Returns:
154
+ A Feature class that can be used to create instances of the feature.
155
+ """
156
+
157
+ def map_type(c: Column) -> UnionType | Type[str] | Type[int] | Type[float]:
158
+ """Maps a Deriva column type to a Python/pydantic type.
159
+
160
+ Converts ERMrest column types to appropriate Python types for use in pydantic models.
161
+ Special handling is provided for asset columns which can accept either strings or Path objects.
162
+
163
+ Args:
164
+ c: ERMrest column to map to a Python type.
165
+
166
+ Returns:
167
+ UnionType | Type[str] | Type[int] | Type[float]: Appropriate Python type for the column:
168
+ - str | Path for asset columns
169
+ - str for text columns
170
+ - int for integer columns
171
+ - float for floating point columns
172
+ - str for all other types
173
+
174
+ Example:
175
+ >>> col = Column(name="score", type="float4")
176
+ >>> typ = map_type(col) # Returns float
177
+ """
178
+ if c.name in {c.name for c in self.asset_columns}:
179
+ return str | Path
180
+
181
+ match c.type.typename:
182
+ case "text":
183
+ return str
184
+ case "int2" | "int4" | "int8":
185
+ return int
186
+ case "float4" | "float8":
187
+ return float
188
+ case _:
189
+ return str
190
+
191
+ featureclass_name = f"{self.target_table.name}Feature{self.feature_name}"
192
+
193
+ # Create feature class. To do this, we must determine the python type for each column and also if the
194
+ # column is optional or not based on its nullability.
195
+ feature_columns = {
196
+ c.name: (
197
+ Optional[map_type(c)] if c.nullok else map_type(c),
198
+ c.default or None,
199
+ )
200
+ for c in self.feature_columns
201
+ } | {
202
+ "Feature_Name": (
203
+ str,
204
+ self.feature_name,
205
+ ), # Set default value for Feature_Name
206
+ self.target_table.name: (str, ...),
207
+ }
208
+ docstring = (
209
+ f"Class to capture fields in a feature {self.feature_name} on table {self.target_table}. "
210
+ "Feature columns include:\n"
211
+ )
212
+ docstring += "\n".join([f" {c.name}" for c in self.feature_columns])
213
+
214
+ model = create_model(
215
+ featureclass_name,
216
+ __base__=FeatureRecord,
217
+ __doc__=docstring,
218
+ **feature_columns,
219
+ )
220
+ model.feature = self # Set value of class variable within the feature class definition.
221
+
222
+ return model
223
+
224
+ def __repr__(self) -> str:
225
+ return (
226
+ f"Feature(target_table={self.target_table.name}, feature_name={self.feature_name}, "
227
+ f"feature_table={self.feature_table.name})"
228
+ )
@@ -0,0 +1,71 @@
1
+ import re
2
+ import sys
3
+ from argparse import ArgumentParser
4
+ from importlib import metadata
5
+ from pathlib import Path
6
+
7
+ from ipykernel.kernelspec import install as install_kernel
8
+
9
+
10
+ def _dist_name_for_this_package() -> str:
11
+ """
12
+ Try to resolve the distribution name that provides this package.
13
+ Works in editable installs and wheels.
14
+ """
15
+ # Top-level package name of this module (your_pkg)
16
+ top_pkg = __name__.split(".")[0]
17
+
18
+ # Map top-level packages -> distributions
19
+ pkg_to_dists = metadata.packages_distributions()
20
+ dists = pkg_to_dists.get(top_pkg) or []
21
+
22
+ # Fall back to project name in METADATA when mapping isn't available
23
+ dist_name = dists[0] if dists else metadata.metadata(top_pkg).get("Name", top_pkg)
24
+ return dist_name
25
+
26
+
27
+ def _normalize_kernel_name(name: str) -> str:
28
+ """
29
+ Jupyter kernel directory names should be simple: lowercase, [-a-z0-9_].
30
+ """
31
+ name = name.strip().lower()
32
+ name = re.sub(r"[^a-z0-9._-]+", "-", name)
33
+ return name
34
+
35
+
36
+ def _name_for_this_venv() -> str:
37
+ config_path = Path(sys.prefix) / "pyvenv.cfg"
38
+ with config_path.open() as f:
39
+ m = re.search("prompt *= *(?P<prompt>.*)", f.read())
40
+ return m["prompt"] if m else ""
41
+
42
+
43
+ def main() -> None:
44
+ parser = ArgumentParser()
45
+ parser.add_argument(
46
+ "--install-local",
47
+ action="store_true",
48
+ help="Create kernal in local venv directory instead of sys.prefix.",
49
+ )
50
+
51
+ dist_name = _name_for_this_venv() # e.g., "deriva-model-template"
52
+ kernel_name = _normalize_kernel_name(dist_name) # e.g., "deriva-model-template"
53
+ display_name = f"Python ({dist_name})"
54
+
55
+ # Install into the current environment's prefix (e.g., .venv/share/jupyter/kernels/..)
56
+ prefix_arg = {}
57
+ install_local = False
58
+ if install_local:
59
+ prefix_arg = {"prefix": sys.prefix}
60
+
61
+ install_kernel(
62
+ user=True, # write under sys.prefix (the active env)
63
+ kernel_name=kernel_name,
64
+ display_name=display_name,
65
+ **prefix_arg,
66
+ )
67
+ print(f"Installed Jupyter kernel '{kernel_name}' with display name '{display_name}' under {sys.prefix!s}")
68
+
69
+
70
+ if __name__ == "__main__":
71
+ main()
File without changes