deriva-ml 1.17.10__py3-none-any.whl → 1.17.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. deriva_ml/__init__.py +43 -1
  2. deriva_ml/asset/__init__.py +17 -0
  3. deriva_ml/asset/asset.py +357 -0
  4. deriva_ml/asset/aux_classes.py +100 -0
  5. deriva_ml/bump_version.py +254 -11
  6. deriva_ml/catalog/__init__.py +21 -0
  7. deriva_ml/catalog/clone.py +1199 -0
  8. deriva_ml/catalog/localize.py +426 -0
  9. deriva_ml/core/__init__.py +29 -0
  10. deriva_ml/core/base.py +817 -1067
  11. deriva_ml/core/config.py +169 -21
  12. deriva_ml/core/constants.py +120 -19
  13. deriva_ml/core/definitions.py +123 -13
  14. deriva_ml/core/enums.py +47 -73
  15. deriva_ml/core/ermrest.py +226 -193
  16. deriva_ml/core/exceptions.py +297 -14
  17. deriva_ml/core/filespec.py +99 -28
  18. deriva_ml/core/logging_config.py +225 -0
  19. deriva_ml/core/mixins/__init__.py +42 -0
  20. deriva_ml/core/mixins/annotation.py +915 -0
  21. deriva_ml/core/mixins/asset.py +384 -0
  22. deriva_ml/core/mixins/dataset.py +237 -0
  23. deriva_ml/core/mixins/execution.py +408 -0
  24. deriva_ml/core/mixins/feature.py +365 -0
  25. deriva_ml/core/mixins/file.py +263 -0
  26. deriva_ml/core/mixins/path_builder.py +145 -0
  27. deriva_ml/core/mixins/rid_resolution.py +204 -0
  28. deriva_ml/core/mixins/vocabulary.py +400 -0
  29. deriva_ml/core/mixins/workflow.py +322 -0
  30. deriva_ml/core/validation.py +389 -0
  31. deriva_ml/dataset/__init__.py +2 -1
  32. deriva_ml/dataset/aux_classes.py +20 -4
  33. deriva_ml/dataset/catalog_graph.py +575 -0
  34. deriva_ml/dataset/dataset.py +1242 -1008
  35. deriva_ml/dataset/dataset_bag.py +1311 -182
  36. deriva_ml/dataset/history.py +27 -14
  37. deriva_ml/dataset/upload.py +225 -38
  38. deriva_ml/demo_catalog.py +126 -110
  39. deriva_ml/execution/__init__.py +46 -2
  40. deriva_ml/execution/base_config.py +639 -0
  41. deriva_ml/execution/execution.py +543 -242
  42. deriva_ml/execution/execution_configuration.py +26 -11
  43. deriva_ml/execution/execution_record.py +592 -0
  44. deriva_ml/execution/find_caller.py +298 -0
  45. deriva_ml/execution/model_protocol.py +175 -0
  46. deriva_ml/execution/multirun_config.py +153 -0
  47. deriva_ml/execution/runner.py +595 -0
  48. deriva_ml/execution/workflow.py +223 -34
  49. deriva_ml/experiment/__init__.py +8 -0
  50. deriva_ml/experiment/experiment.py +411 -0
  51. deriva_ml/feature.py +6 -1
  52. deriva_ml/install_kernel.py +143 -6
  53. deriva_ml/interfaces.py +862 -0
  54. deriva_ml/model/__init__.py +99 -0
  55. deriva_ml/model/annotations.py +1278 -0
  56. deriva_ml/model/catalog.py +286 -60
  57. deriva_ml/model/database.py +144 -649
  58. deriva_ml/model/deriva_ml_database.py +308 -0
  59. deriva_ml/model/handles.py +14 -0
  60. deriva_ml/run_model.py +319 -0
  61. deriva_ml/run_notebook.py +507 -38
  62. deriva_ml/schema/__init__.py +18 -2
  63. deriva_ml/schema/annotations.py +62 -33
  64. deriva_ml/schema/create_schema.py +169 -69
  65. deriva_ml/schema/validation.py +601 -0
  66. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -4
  67. deriva_ml-1.17.11.dist-info/RECORD +77 -0
  68. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
  69. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +1 -0
  70. deriva_ml/protocols/dataset.py +0 -19
  71. deriva_ml/test.py +0 -94
  72. deriva_ml-1.17.10.dist-info/RECORD +0 -45
  73. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
  74. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,411 @@
1
+ """Experiment analysis for DerivaML.
2
+
3
+ This module provides the Experiment class for analyzing completed executions.
4
+ An Experiment wraps an execution RID and provides helper methods for extracting
5
+ configuration details, model parameters, and experiment metadata.
6
+
7
+ Typical usage example:
8
+ >>> from deriva_ml import DerivaML
9
+ >>> from deriva_ml.execution import Experiment
10
+ >>>
11
+ >>> ml = DerivaML("localhost", 45)
12
+ >>> exp = Experiment(ml, "47BE")
13
+ >>> print(exp.name) # e.g., "cifar10_quick"
14
+ >>> print(exp.config_choices) # Hydra config names used
15
+ >>> print(exp.model_config) # Model hyperparameters
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import tempfile
21
+ from dataclasses import dataclass, field
22
+ from pathlib import Path
23
+ from typing import TYPE_CHECKING, Any
24
+
25
+ import yaml
26
+ from deriva.core.hatrac_store import HatracStore
27
+
28
+ if TYPE_CHECKING:
29
+ from deriva_ml.core.base import DerivaML
30
+ from deriva_ml.execution.execution_record import ExecutionRecord
31
+ from deriva_ml.asset.asset import Asset
32
+ from deriva_ml.dataset.dataset import Dataset
33
+
34
+
35
+ @dataclass
36
+ class Experiment:
37
+ """Wraps an execution for experiment analysis.
38
+
39
+ Provides convenient access to execution metadata, configuration choices,
40
+ model parameters, inputs, and outputs. Useful for comparing experiments
41
+ and generating analysis reports.
42
+
43
+ Attributes:
44
+ ml: DerivaML instance for catalog access.
45
+ execution_rid: RID of the execution to analyze.
46
+ execution: The underlying Execution object (lazy-loaded).
47
+ name: Experiment name from config_choices.model_config or execution RID.
48
+ config_choices: Dictionary of Hydra config names used.
49
+ model_config: Dictionary of model hyperparameters.
50
+ description: Execution description.
51
+ status: Execution status (e.g., "Completed").
52
+
53
+ Example:
54
+ >>> exp = Experiment(ml, "47BE")
55
+ >>> print(f"Experiment: {exp.name}")
56
+ >>> print(f"Config: {exp.config_choices}")
57
+ >>> for ds in exp.input_datasets:
58
+ ... print(f" Input: {ds.dataset_rid}")
59
+ """
60
+
61
+ ml: "DerivaML"
62
+ execution_rid: str
63
+ _execution: "ExecutionRecord | None" = field(default=None, repr=False)
64
+ _hydra_config: dict | None = field(default=None, repr=False)
65
+ _config_choices: dict | None = field(default=None, repr=False)
66
+ _model_config: dict | None = field(default=None, repr=False)
67
+ _name: str | None = field(default=None, repr=False)
68
+
69
+ @property
70
+ def execution(self) -> "ExecutionRecord":
71
+ """Get the underlying ExecutionRecord (lazy-loaded)."""
72
+ if self._execution is None:
73
+ self._execution = self.ml.lookup_execution(self.execution_rid)
74
+ return self._execution
75
+
76
+ @property
77
+ def hydra_config(self) -> dict:
78
+ """Get the full Hydra configuration from execution metadata.
79
+
80
+ Downloads and parses the hydra config YAML file from the execution's
81
+ metadata assets.
82
+
83
+ Returns:
84
+ Dictionary containing the full Hydra configuration, or empty dict
85
+ if no config file is found.
86
+ """
87
+ if self._hydra_config is None:
88
+ self._hydra_config = self._load_hydra_config()
89
+ return self._hydra_config
90
+
91
+ def _load_hydra_config(self) -> dict:
92
+ """Load Hydra configuration from execution metadata assets.
93
+
94
+ Loads both the config.yaml (model parameters) and hydra.yaml (choices)
95
+ and merges them into a single dictionary with:
96
+ - config_choices: from hydra.yaml runtime.choices
97
+ - model_config: from config.yaml model_config section
98
+ - Full config.yaml contents
99
+ """
100
+ # Query Execution_Metadata_Execution to find metadata assets for this execution
101
+ pb = self.ml.pathBuilder()
102
+ meta_exec = pb.schemas[self.ml.ml_schema].Execution_Metadata_Execution
103
+ metadata_table = pb.schemas[self.ml.ml_schema].Execution_Metadata
104
+
105
+ # Find metadata assets linked to this execution with role "Output"
106
+ query = meta_exec.filter(meta_exec.Execution == self.execution_rid)
107
+ query = query.filter(meta_exec.Asset_Role == "Output")
108
+ records = list(query.entities().fetch())
109
+
110
+ # Collect metadata records
111
+ metadata_files: dict[str, dict] = {}
112
+ for record in records:
113
+ metadata_rid = record.get("Execution_Metadata")
114
+ if not metadata_rid:
115
+ continue
116
+
117
+ meta_records = list(
118
+ metadata_table.filter(metadata_table.RID == metadata_rid)
119
+ .entities()
120
+ .fetch()
121
+ )
122
+ if meta_records:
123
+ meta = meta_records[0]
124
+ filename = meta.get("Filename", "")
125
+ if filename:
126
+ metadata_files[filename] = meta
127
+
128
+ # Create HatracStore for downloading
129
+ hs = HatracStore(
130
+ "https",
131
+ self.ml.host_name,
132
+ self.ml.credential,
133
+ )
134
+
135
+ result: dict = {}
136
+
137
+ # Load config.yaml for model_config and full configuration
138
+ for filename, meta in metadata_files.items():
139
+ if filename.endswith("-config.yaml"):
140
+ url = meta.get("URL")
141
+ if url:
142
+ with tempfile.TemporaryDirectory() as tmpdir:
143
+ dest = Path(tmpdir) / filename
144
+ hs.get_obj(url, destfilename=str(dest))
145
+ if dest.exists():
146
+ with open(dest) as f:
147
+ result = yaml.safe_load(f) or {}
148
+ break
149
+
150
+ # Load hydra.yaml for config_choices (runtime.choices)
151
+ for filename, meta in metadata_files.items():
152
+ if filename.endswith("-hydra.yaml"):
153
+ url = meta.get("URL")
154
+ if url:
155
+ with tempfile.TemporaryDirectory() as tmpdir:
156
+ dest = Path(tmpdir) / filename
157
+ hs.get_obj(url, destfilename=str(dest))
158
+ if dest.exists():
159
+ with open(dest) as f:
160
+ hydra_data = yaml.safe_load(f) or {}
161
+ # Extract choices from hydra.runtime.choices
162
+ choices = (
163
+ hydra_data.get("hydra", {})
164
+ .get("runtime", {})
165
+ .get("choices", {})
166
+ )
167
+ # Filter out hydra internal choices
168
+ result["config_choices"] = {
169
+ k: v
170
+ for k, v in choices.items()
171
+ if not k.startswith("hydra/")
172
+ }
173
+ break
174
+
175
+ return result
176
+
177
+ @property
178
+ def config_choices(self) -> dict[str, str]:
179
+ """Get the Hydra configuration choices (config names used).
180
+
181
+ Returns:
182
+ Dictionary mapping config group names to the selected config names,
183
+ e.g., {"model_config": "cifar10_quick", "datasets": "cifar10_labeled_split"}
184
+ """
185
+ if self._config_choices is None:
186
+ self._config_choices = self.hydra_config.get("config_choices", {})
187
+ return self._config_choices
188
+
189
+ @property
190
+ def model_config(self) -> dict[str, Any]:
191
+ """Get the model configuration parameters.
192
+
193
+ Returns:
194
+ Dictionary of model hyperparameters from the Hydra config,
195
+ e.g., {"epochs": 3, "learning_rate": 0.001, "batch_size": 128}
196
+ """
197
+ if self._model_config is None:
198
+ self._model_config = self.hydra_config.get("model_config", {})
199
+ return self._model_config
200
+
201
+ @property
202
+ def name(self) -> str:
203
+ """Get the experiment name.
204
+
205
+ Returns the model_config name from config_choices if available,
206
+ otherwise returns the execution RID.
207
+
208
+ Returns:
209
+ Experiment name string.
210
+ """
211
+ if self._name is None:
212
+ self._name = self.config_choices.get("model_config", self.execution_rid)
213
+ return self._name
214
+
215
+ @property
216
+ def description(self) -> str:
217
+ """Get the execution description."""
218
+ return self.execution.description or ""
219
+
220
+ @property
221
+ def status(self) -> str:
222
+ """Get the execution status."""
223
+ if self.execution.status:
224
+ return self.execution.status.value
225
+ return ""
226
+
227
+ @property
228
+ def input_datasets(self) -> list["Dataset"]:
229
+ """Get the input datasets for this experiment.
230
+
231
+ Returns:
232
+ List of Dataset objects used as inputs.
233
+ """
234
+ return self.execution.list_input_datasets()
235
+
236
+ @property
237
+ def input_assets(self) -> list["Asset"]:
238
+ """Get the input assets for this experiment.
239
+
240
+ Returns:
241
+ List of Asset objects used as inputs.
242
+ """
243
+ return self.execution.list_assets(asset_role="Input")
244
+
245
+ @property
246
+ def output_assets(self) -> list["Asset"]:
247
+ """Get the output assets from this experiment.
248
+
249
+ Returns:
250
+ List of Asset objects produced as outputs.
251
+ """
252
+ return self.execution.list_assets(asset_role="Output")
253
+
254
+ def get_chaise_url(self) -> str:
255
+ """Get the Chaise URL for viewing this execution in the browser.
256
+
257
+ Returns:
258
+ URL string for the execution record in Chaise.
259
+ """
260
+ return (
261
+ f"https://{self.ml.host_name}/chaise/record/#{self.ml.catalog_id}/"
262
+ f"deriva-ml:Execution/RID={self.execution_rid}"
263
+ )
264
+
265
+ def summary(self) -> dict[str, Any]:
266
+ """Get a summary dictionary of the experiment.
267
+
268
+ Returns:
269
+ Dictionary with experiment metadata suitable for display or analysis.
270
+ Includes:
271
+ - name, execution_rid, description, status
272
+ - config_choices: Hydra config names used
273
+ - model_config: Model hyperparameters
274
+ - input_datasets: List of input dataset info
275
+ - input_assets: List of input asset info (non-metadata)
276
+ - output_assets: List of output asset info (non-metadata)
277
+ - metadata_assets: List of execution metadata assets (config files, etc.)
278
+ - url: Chaise URL to view execution
279
+ """
280
+ def asset_summary(asset: "Asset") -> dict[str, Any]:
281
+ """Create a summary dict for an asset."""
282
+ return {
283
+ "asset_rid": asset.asset_rid,
284
+ "asset_table": asset.asset_table,
285
+ "filename": asset.filename,
286
+ "description": asset.description,
287
+ "asset_types": asset.asset_types,
288
+ "url": asset.url,
289
+ }
290
+
291
+ # Separate metadata assets from other assets
292
+ input_assets = []
293
+ output_assets = []
294
+ metadata_assets = []
295
+
296
+ for asset in self.input_assets:
297
+ if asset.asset_table == "Execution_Metadata":
298
+ metadata_assets.append(asset_summary(asset))
299
+ else:
300
+ input_assets.append(asset_summary(asset))
301
+
302
+ for asset in self.output_assets:
303
+ if asset.asset_table == "Execution_Metadata":
304
+ # Avoid duplicates - metadata is typically output
305
+ if not any(m["asset_rid"] == asset.asset_rid for m in metadata_assets):
306
+ metadata_assets.append(asset_summary(asset))
307
+ else:
308
+ output_assets.append(asset_summary(asset))
309
+
310
+ return {
311
+ "name": self.name,
312
+ "execution_rid": self.execution_rid,
313
+ "description": self.description,
314
+ "status": self.status,
315
+ "config_choices": self.config_choices,
316
+ "model_config": {
317
+ k: v for k, v in self.model_config.items() if not k.startswith("_")
318
+ },
319
+ "input_datasets": [
320
+ {
321
+ "dataset_rid": ds.dataset_rid,
322
+ "description": ds.description,
323
+ "version": str(ds.current_version) if ds.current_version else None,
324
+ "dataset_types": ds.dataset_types,
325
+ }
326
+ for ds in self.input_datasets
327
+ ],
328
+ "input_assets": input_assets,
329
+ "output_assets": output_assets,
330
+ "metadata_assets": metadata_assets,
331
+ "url": self.get_chaise_url(),
332
+ }
333
+
334
+ def to_markdown(self, show_datasets: bool = True, show_assets: bool = True) -> str:
335
+ """Generate a markdown summary of this experiment.
336
+
337
+ Returns a formatted markdown string with clickable links, configuration
338
+ details, and optionally input datasets and assets.
339
+
340
+ Args:
341
+ show_datasets: If True, include input datasets with nested children.
342
+ show_assets: If True, include input assets.
343
+
344
+ Returns:
345
+ Markdown-formatted string.
346
+
347
+ Example:
348
+ >>> exp = ml.lookup_experiment("47BE")
349
+ >>> print(exp.to_markdown())
350
+ """
351
+ lines = []
352
+
353
+ # Header with execution link
354
+ lines.append(f"### {self.name} ([{self.execution_rid}]({self.get_chaise_url()}))")
355
+
356
+ # Description
357
+ if self.description:
358
+ lines.append(f"**Description:** {self.description}")
359
+
360
+ # Config choices
361
+ if self.config_choices:
362
+ choices_str = ", ".join(
363
+ f"`{k}={v}`" for k, v in sorted(self.config_choices.items())
364
+ )
365
+ lines.append(f"**Configuration Choices:** {choices_str}")
366
+
367
+ # Model configuration (filter internal fields)
368
+ model_cfg = {
369
+ k: v for k, v in self.model_config.items() if not k.startswith("_")
370
+ }
371
+ if model_cfg:
372
+ lines.append("**Model Configuration:**")
373
+ for k, v in sorted(model_cfg.items()):
374
+ lines.append(f"- **{k}**: {v}")
375
+
376
+ # Input datasets
377
+ if show_datasets and self.input_datasets:
378
+ lines.append("**Input Datasets:**")
379
+ for ds in self.input_datasets:
380
+ lines.append(ds.to_markdown(show_children=True, indent=0))
381
+
382
+ # Input assets
383
+ if show_assets and self.input_assets:
384
+ lines.append("**Input Assets:**")
385
+ for asset in self.input_assets:
386
+ lines.append(
387
+ f"- [{asset.asset_rid}]({asset.get_chaise_url()}): {asset.filename}"
388
+ )
389
+
390
+ return "\n".join(lines)
391
+
392
+ def display_markdown(self, show_datasets: bool = True, show_assets: bool = True) -> None:
393
+ """Display a formatted markdown summary of this experiment in Jupyter.
394
+
395
+ Convenience method that calls to_markdown() and displays the result
396
+ using IPython.display.Markdown.
397
+
398
+ Args:
399
+ show_datasets: If True, display input datasets with nested children.
400
+ show_assets: If True, display input assets.
401
+
402
+ Example:
403
+ >>> exp = ml.lookup_experiment("47BE")
404
+ >>> exp.display_markdown()
405
+ """
406
+ from IPython.display import display, Markdown
407
+
408
+ display(Markdown(self.to_markdown(show_datasets, show_assets)))
409
+
410
+ def __repr__(self) -> str:
411
+ return f"Experiment(name={self.name!r}, rid={self.execution_rid!r})"
deriva_ml/feature.py CHANGED
@@ -16,7 +16,12 @@ from pathlib import Path
16
16
  from types import UnionType
17
17
  from typing import TYPE_CHECKING, ClassVar, Optional, Type
18
18
 
19
- from deriva.core.ermrest_model import Column, FindAssociationResult
19
+ # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
20
+ import importlib
21
+ _ermrest_model = importlib.import_module("deriva.core.ermrest_model")
22
+ Column = _ermrest_model.Column
23
+ FindAssociationResult = _ermrest_model.FindAssociationResult
24
+
20
25
  from pydantic import BaseModel, create_model
21
26
 
22
27
  if TYPE_CHECKING:
@@ -1,3 +1,78 @@
1
+ """Jupyter kernel installation utility for DerivaML virtual environments.
2
+
3
+ This module provides a command-line tool for installing a Jupyter kernel that
4
+ points to the current Python virtual environment. This allows Jupyter notebooks
5
+ to use the DerivaML environment with all its dependencies.
6
+
7
+ Why Install a Kernel?
8
+ ---------------------
9
+ When working with Jupyter notebooks, the kernel determines which Python
10
+ environment executes the code. By default, Jupyter may not see packages
11
+ installed in your virtual environment. Installing a kernel creates a
12
+ link so Jupyter can find and use your DerivaML environment.
13
+
14
+ How It Works
15
+ ------------
16
+ 1. Detects the current virtual environment name from ``pyvenv.cfg``
17
+ 2. Normalizes the name to be Jupyter-compatible (lowercase, alphanumeric)
18
+ 3. Registers the kernel with Jupyter using ipykernel's install mechanism
19
+ 4. The kernel appears in Jupyter's kernel selector with a friendly display name
20
+
21
+ The kernel is installed in the user's Jupyter data directory by default,
22
+ making it available across all Jupyter instances for that user.
23
+
24
+ Usage
25
+ -----
26
+ Command line (after activating your virtual environment)::
27
+
28
+ # Install kernel for current virtual environment
29
+ deriva-ml-install-kernel
30
+
31
+ # Or run as a module
32
+ python -m deriva_ml.install_kernel
33
+
34
+ As a module::
35
+
36
+ from deriva_ml.install_kernel import main
37
+ main()
38
+
39
+ After installation, the kernel will appear in Jupyter with a name like
40
+ "Python (deriva-ml)" or "Python (my-project)" depending on your venv name.
41
+
42
+ Example Workflow
43
+ ----------------
44
+ Setting up a new DerivaML project with Jupyter support::
45
+
46
+ # Create and activate virtual environment
47
+ $ uv venv --prompt my-ml-project
48
+ $ source .venv/bin/activate
49
+
50
+ # Install DerivaML
51
+ $ uv pip install deriva-ml
52
+
53
+ # Install Jupyter kernel
54
+ $ deriva-ml-install-kernel
55
+ Installed Jupyter kernel 'my-ml-project' with display name 'Python (my-ml-project)'
56
+
57
+ # Start Jupyter and select the new kernel
58
+ $ jupyter lab
59
+
60
+ Kernel Location
61
+ ---------------
62
+ Kernels are installed to the user's Jupyter data directory:
63
+
64
+ - **Linux/macOS**: ``~/.local/share/jupyter/kernels/``
65
+ - **Windows**: ``%APPDATA%\\jupyter\\kernels\\``
66
+
67
+ Each kernel is a directory containing a ``kernel.json`` file that specifies
68
+ the Python executable path and display name.
69
+
70
+ See Also
71
+ --------
72
+ - Jupyter kernels documentation: https://jupyter-client.readthedocs.io/en/latest/kernels.html
73
+ - ipykernel: https://github.com/ipython/ipykernel
74
+ """
75
+
1
76
  import re
2
77
  import sys
3
78
  from argparse import ArgumentParser
@@ -8,9 +83,18 @@ from ipykernel.kernelspec import install as install_kernel
8
83
 
9
84
 
10
85
  def _dist_name_for_this_package() -> str:
11
- """
12
- Try to resolve the distribution name that provides this package.
13
- Works in editable installs and wheels.
86
+ """Resolve the distribution name that provides this package.
87
+
88
+ Works in both editable installs and wheels by using importlib.metadata
89
+ to map the top-level package name to its distribution.
90
+
91
+ Returns:
92
+ The distribution name (e.g., "deriva-ml").
93
+
94
+ Example:
95
+ >>> name = _dist_name_for_this_package()
96
+ >>> print(name)
97
+ deriva-ml
14
98
  """
15
99
  # Top-level package name of this module (your_pkg)
16
100
  top_pkg = __name__.split(".")[0]
@@ -25,8 +109,23 @@ def _dist_name_for_this_package() -> str:
25
109
 
26
110
 
27
111
  def _normalize_kernel_name(name: str) -> str:
28
- """
29
- Jupyter kernel directory names should be simple: lowercase, [-a-z0-9_].
112
+ """Normalize a name to be valid as a Jupyter kernel directory name.
113
+
114
+ Jupyter kernel directory names should be simple: lowercase letters,
115
+ digits, hyphens, underscores, and dots only. This function converts
116
+ any input string to a valid kernel name.
117
+
118
+ Args:
119
+ name: The input name to normalize (e.g., "My Project 2.0").
120
+
121
+ Returns:
122
+ A normalized kernel name (e.g., "my-project-2.0").
123
+
124
+ Example:
125
+ >>> _normalize_kernel_name("My ML Project!")
126
+ 'my-ml-project-'
127
+ >>> _normalize_kernel_name("deriva-ml")
128
+ 'deriva-ml'
30
129
  """
31
130
  name = name.strip().lower()
32
131
  name = re.sub(r"[^a-z0-9._-]+", "-", name)
@@ -34,6 +133,23 @@ def _normalize_kernel_name(name: str) -> str:
34
133
 
35
134
 
36
135
  def _name_for_this_venv() -> str:
136
+ """Extract the virtual environment name from pyvenv.cfg.
137
+
138
+ Reads the ``prompt`` setting from the current environment's pyvenv.cfg
139
+ file. This is set when creating a venv with ``--prompt`` flag, or
140
+ defaults to the directory name.
141
+
142
+ Returns:
143
+ The virtual environment prompt/name, or empty string if not found.
144
+
145
+ Raises:
146
+ FileNotFoundError: If not running in a virtual environment (no pyvenv.cfg).
147
+
148
+ Example:
149
+ >>> # In a venv created with: uv venv --prompt my-project
150
+ >>> _name_for_this_venv()
151
+ 'my-project'
152
+ """
37
153
  config_path = Path(sys.prefix) / "pyvenv.cfg"
38
154
  with config_path.open() as f:
39
155
  m = re.search("prompt *= *(?P<prompt>.*)", f.read())
@@ -41,7 +157,28 @@ def _name_for_this_venv() -> str:
41
157
 
42
158
 
43
159
  def main() -> None:
44
- parser = ArgumentParser()
160
+ """Main entry point for the kernel installation tool.
161
+
162
+ Installs a Jupyter kernel for the current virtual environment. The kernel
163
+ name and display name are derived from the virtual environment's prompt
164
+ setting in pyvenv.cfg.
165
+
166
+ The kernel is installed to the user's Jupyter data directory, making it
167
+ available for all Jupyter instances run by that user.
168
+
169
+ Command-line Arguments:
170
+ --install-local: Install kernel to the venv's prefix directory instead
171
+ of the user's Jupyter data directory. (Currently not fully implemented)
172
+
173
+ Example:
174
+ >>> # Typically called via command line:
175
+ >>> # $ deriva-ml-install-kernel
176
+ >>> main()
177
+ Installed Jupyter kernel 'my-project' with display name 'Python (my-project)'
178
+ """
179
+ parser = ArgumentParser(
180
+ description="Install a Jupyter kernel for the current virtual environment."
181
+ )
45
182
  parser.add_argument(
46
183
  "--install-local",
47
184
  action="store_true",