deriva-ml 1.17.10__py3-none-any.whl → 1.17.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. deriva_ml/__init__.py +43 -1
  2. deriva_ml/asset/__init__.py +17 -0
  3. deriva_ml/asset/asset.py +357 -0
  4. deriva_ml/asset/aux_classes.py +100 -0
  5. deriva_ml/bump_version.py +254 -11
  6. deriva_ml/catalog/__init__.py +21 -0
  7. deriva_ml/catalog/clone.py +1199 -0
  8. deriva_ml/catalog/localize.py +426 -0
  9. deriva_ml/core/__init__.py +29 -0
  10. deriva_ml/core/base.py +817 -1067
  11. deriva_ml/core/config.py +169 -21
  12. deriva_ml/core/constants.py +120 -19
  13. deriva_ml/core/definitions.py +123 -13
  14. deriva_ml/core/enums.py +47 -73
  15. deriva_ml/core/ermrest.py +226 -193
  16. deriva_ml/core/exceptions.py +297 -14
  17. deriva_ml/core/filespec.py +99 -28
  18. deriva_ml/core/logging_config.py +225 -0
  19. deriva_ml/core/mixins/__init__.py +42 -0
  20. deriva_ml/core/mixins/annotation.py +915 -0
  21. deriva_ml/core/mixins/asset.py +384 -0
  22. deriva_ml/core/mixins/dataset.py +237 -0
  23. deriva_ml/core/mixins/execution.py +408 -0
  24. deriva_ml/core/mixins/feature.py +365 -0
  25. deriva_ml/core/mixins/file.py +263 -0
  26. deriva_ml/core/mixins/path_builder.py +145 -0
  27. deriva_ml/core/mixins/rid_resolution.py +204 -0
  28. deriva_ml/core/mixins/vocabulary.py +400 -0
  29. deriva_ml/core/mixins/workflow.py +322 -0
  30. deriva_ml/core/validation.py +389 -0
  31. deriva_ml/dataset/__init__.py +2 -1
  32. deriva_ml/dataset/aux_classes.py +20 -4
  33. deriva_ml/dataset/catalog_graph.py +575 -0
  34. deriva_ml/dataset/dataset.py +1242 -1008
  35. deriva_ml/dataset/dataset_bag.py +1311 -182
  36. deriva_ml/dataset/history.py +27 -14
  37. deriva_ml/dataset/upload.py +225 -38
  38. deriva_ml/demo_catalog.py +126 -110
  39. deriva_ml/execution/__init__.py +46 -2
  40. deriva_ml/execution/base_config.py +639 -0
  41. deriva_ml/execution/execution.py +543 -242
  42. deriva_ml/execution/execution_configuration.py +26 -11
  43. deriva_ml/execution/execution_record.py +592 -0
  44. deriva_ml/execution/find_caller.py +298 -0
  45. deriva_ml/execution/model_protocol.py +175 -0
  46. deriva_ml/execution/multirun_config.py +153 -0
  47. deriva_ml/execution/runner.py +595 -0
  48. deriva_ml/execution/workflow.py +223 -34
  49. deriva_ml/experiment/__init__.py +8 -0
  50. deriva_ml/experiment/experiment.py +411 -0
  51. deriva_ml/feature.py +6 -1
  52. deriva_ml/install_kernel.py +143 -6
  53. deriva_ml/interfaces.py +862 -0
  54. deriva_ml/model/__init__.py +99 -0
  55. deriva_ml/model/annotations.py +1278 -0
  56. deriva_ml/model/catalog.py +286 -60
  57. deriva_ml/model/database.py +144 -649
  58. deriva_ml/model/deriva_ml_database.py +308 -0
  59. deriva_ml/model/handles.py +14 -0
  60. deriva_ml/run_model.py +319 -0
  61. deriva_ml/run_notebook.py +507 -38
  62. deriva_ml/schema/__init__.py +18 -2
  63. deriva_ml/schema/annotations.py +62 -33
  64. deriva_ml/schema/create_schema.py +169 -69
  65. deriva_ml/schema/validation.py +601 -0
  66. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -4
  67. deriva_ml-1.17.11.dist-info/RECORD +77 -0
  68. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
  69. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +1 -0
  70. deriva_ml/protocols/dataset.py +0 -19
  71. deriva_ml/test.py +0 -94
  72. deriva_ml-1.17.10.dist-info/RECORD +0 -45
  73. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
  74. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
@@ -12,10 +12,17 @@ from collections import Counter, defaultdict
12
12
  from graphlib import CycleError, TopologicalSorter
13
13
  from typing import Any, Callable, Final, Iterable, NewType, TypeAlias
14
14
 
15
- from deriva.core.ermrest_catalog import ErmrestCatalog
16
-
17
- # Deriva imports
18
- from deriva.core.ermrest_model import Column, FindAssociationResult, Model, Schema, Table
15
+ # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
16
+ import importlib
17
+ _ermrest_catalog = importlib.import_module("deriva.core.ermrest_catalog")
18
+ _ermrest_model = importlib.import_module("deriva.core.ermrest_model")
19
+
20
+ ErmrestCatalog = _ermrest_catalog.ErmrestCatalog
21
+ Column = _ermrest_model.Column
22
+ FindAssociationResult = _ermrest_model.FindAssociationResult
23
+ Model = _ermrest_model.Model
24
+ Schema = _ermrest_model.Schema
25
+ Table = _ermrest_model.Table
19
26
 
20
27
  # Third-party imports
21
28
  from pydantic import ConfigDict, validate_call
@@ -23,14 +30,16 @@ from pydantic import ConfigDict, validate_call
23
30
  from deriva_ml.core.definitions import (
24
31
  ML_SCHEMA,
25
32
  RID,
33
+ SYSTEM_SCHEMAS,
26
34
  DerivaAssetColumns,
27
35
  TableDefinition,
36
+ get_domain_schemas,
37
+ is_system_schema,
28
38
  )
29
39
  from deriva_ml.core.exceptions import DerivaMLException, DerivaMLTableTypeError
30
40
 
31
41
  # Local imports
32
42
  from deriva_ml.feature import Feature
33
- from deriva_ml.protocols.dataset import DatasetLike
34
43
 
35
44
  try:
36
45
  from icecream import ic
@@ -61,12 +70,12 @@ class DerivaModel:
61
70
  This class provides a number of DerivaML specific methods that augment the interface in the deriva model class.
62
71
 
63
72
  Attributes:
64
- domain_schema: Schema name for domain-specific tables and relationships.
65
73
  model: ERMRest model for the catalog.
66
- catalog: ERMRest catalog for the model
67
- hostname: ERMRest catalog for the model
68
- ml_schema: The ML schema for the catalog.
69
- domain_schema: The domain schema for the catalog.
74
+ catalog: ERMRest catalog for the model.
75
+ hostname: Hostname of the ERMRest server.
76
+ ml_schema: The ML schema name for the catalog.
77
+ domain_schemas: Frozenset of all domain schema names in the catalog.
78
+ default_schema: The default schema for table creation operations.
70
79
 
71
80
  """
72
81
 
@@ -74,17 +83,22 @@ class DerivaModel:
74
83
  self,
75
84
  model: Model,
76
85
  ml_schema: str = ML_SCHEMA,
77
- domain_schema: str | None = None,
86
+ domain_schemas: set[str] | None = None,
87
+ default_schema: str | None = None,
78
88
  ):
79
- """Create and initialize a DerivaML instance.
89
+ """Create and initialize a DerivaModel instance.
80
90
 
81
- This method will connect to a catalog, and initialize local configuration for the ML execution.
91
+ This method will connect to a catalog and initialize schema configuration.
82
92
  This class is intended to be used as a base class on which domain-specific interfaces are built.
83
93
 
84
94
  Args:
85
95
  model: The ERMRest model for the catalog.
86
96
  ml_schema: The ML schema name.
87
- domain_schema: The domain schema name.
97
+ domain_schemas: Optional explicit set of domain schema names. If None,
98
+ auto-detects all non-system schemas.
99
+ default_schema: The default schema for table creation operations. If None
100
+ and there is exactly one domain schema, that schema is used as default.
101
+ If there are multiple domain schemas, default_schema must be specified.
88
102
  """
89
103
  self.model = model
90
104
  self.configuration = None
@@ -92,27 +106,182 @@ class DerivaModel:
92
106
  self.hostname = self.catalog.deriva_server.server if isinstance(self.catalog, ErmrestCatalog) else "localhost"
93
107
 
94
108
  self.ml_schema = ml_schema
95
- builtin_schemas = ("public", self.ml_schema, "www", "WWW")
96
- if domain_schema:
97
- self.domain_schema = domain_schema
109
+ self._system_schemas = frozenset(SYSTEM_SCHEMAS | {ml_schema})
110
+
111
+ # Determine domain schemas
112
+ if domain_schemas is not None:
113
+ self.domain_schemas = frozenset(domain_schemas)
98
114
  else:
99
- if len(user_schemas := {k for k in self.model.schemas.keys()} - set(builtin_schemas)) == 1:
100
- self.domain_schema = user_schemas.pop()
101
- else:
102
- raise DerivaMLException(f"Ambiguous domain schema: {user_schemas}")
115
+ # Auto-detect all domain schemas
116
+ self.domain_schemas = get_domain_schemas(self.model.schemas.keys(), ml_schema)
117
+
118
+ # Determine default schema for table creation
119
+ if default_schema is not None:
120
+ if default_schema not in self.domain_schemas:
121
+ raise DerivaMLException(
122
+ f"default_schema '{default_schema}' is not in domain_schemas: {self.domain_schemas}"
123
+ )
124
+ self.default_schema = default_schema
125
+ elif len(self.domain_schemas) == 1:
126
+ # Single domain schema - use it as default
127
+ self.default_schema = next(iter(self.domain_schemas))
128
+ elif len(self.domain_schemas) == 0:
129
+ # No domain schemas - default_schema will be None
130
+ self.default_schema = None
131
+ else:
132
+ # Multiple domain schemas, no explicit default
133
+ self.default_schema = None
134
+
135
+ def is_system_schema(self, schema_name: str) -> bool:
136
+ """Check if a schema is a system or ML schema.
137
+
138
+ Args:
139
+ schema_name: Name of the schema to check.
140
+
141
+ Returns:
142
+ True if the schema is a system or ML schema.
143
+ """
144
+ return is_system_schema(schema_name, self.ml_schema)
145
+
146
+ def is_domain_schema(self, schema_name: str) -> bool:
147
+ """Check if a schema is a domain schema.
148
+
149
+ Args:
150
+ schema_name: Name of the schema to check.
151
+
152
+ Returns:
153
+ True if the schema is a domain schema.
154
+ """
155
+ return schema_name in self.domain_schemas
156
+
157
+ def _require_default_schema(self) -> str:
158
+ """Get default schema, raising an error if not set.
159
+
160
+ Returns:
161
+ The default schema name.
162
+
163
+ Raises:
164
+ DerivaMLException: If default_schema is not set.
165
+ """
166
+ if self.default_schema is None:
167
+ raise DerivaMLException(
168
+ f"No default_schema set. With multiple domain schemas {self.domain_schemas}, "
169
+ "you must either specify a default_schema when creating DerivaML or "
170
+ "pass an explicit schema parameter to this method."
171
+ )
172
+ return self.default_schema
103
173
 
104
174
  def refresh_model(self) -> None:
105
175
  self.model = self.catalog.getCatalogModel()
106
176
 
107
- @property
108
- def schemas(self) -> dict[str, Schema]:
109
- return self.model.schemas
110
-
111
177
  @property
112
178
  def chaise_config(self) -> dict[str, Any]:
113
179
  """Return the chaise configuration."""
114
180
  return self.model.chaise_config
115
181
 
182
+ def get_schema_description(self, include_system_columns: bool = False) -> dict[str, Any]:
183
+ """Return a JSON description of the catalog schema structure.
184
+
185
+ Provides a structured representation of the domain and ML schemas including
186
+ tables, columns, foreign keys, and relationships. Useful for understanding
187
+ the data model structure programmatically.
188
+
189
+ Args:
190
+ include_system_columns: If True, include RID, RCT, RMT, RCB, RMB columns.
191
+ Default False to reduce output size.
192
+
193
+ Returns:
194
+ Dictionary with schema structure:
195
+ {
196
+ "domain_schemas": ["schema_name1", "schema_name2"],
197
+ "default_schema": "schema_name1",
198
+ "ml_schema": "deriva-ml",
199
+ "schemas": {
200
+ "schema_name": {
201
+ "tables": {
202
+ "TableName": {
203
+ "comment": "description",
204
+ "is_vocabulary": bool,
205
+ "is_asset": bool,
206
+ "is_association": bool,
207
+ "columns": [...],
208
+ "foreign_keys": [...],
209
+ "features": [...]
210
+ }
211
+ }
212
+ }
213
+ }
214
+ }
215
+ """
216
+ system_columns = {"RID", "RCT", "RMT", "RCB", "RMB"}
217
+ result = {
218
+ "domain_schemas": sorted(self.domain_schemas),
219
+ "default_schema": self.default_schema,
220
+ "ml_schema": self.ml_schema,
221
+ "schemas": {},
222
+ }
223
+
224
+ # Include all domain schemas and the ML schema
225
+ for schema_name in [*self.domain_schemas, self.ml_schema]:
226
+ schema = self.model.schemas.get(schema_name)
227
+ if not schema:
228
+ continue
229
+
230
+ schema_info = {"tables": {}}
231
+
232
+ for table_name, table in schema.tables.items():
233
+ # Get columns
234
+ columns = []
235
+ for col in table.columns:
236
+ if not include_system_columns and col.name in system_columns:
237
+ continue
238
+ columns.append({
239
+ "name": col.name,
240
+ "type": str(col.type.typename),
241
+ "nullok": col.nullok,
242
+ "comment": col.comment or "",
243
+ })
244
+
245
+ # Get foreign keys
246
+ foreign_keys = []
247
+ for fk in table.foreign_keys:
248
+ fk_cols = [c.name for c in fk.foreign_key_columns]
249
+ ref_cols = [c.name for c in fk.referenced_columns]
250
+ foreign_keys.append({
251
+ "columns": fk_cols,
252
+ "referenced_table": f"{fk.pk_table.schema.name}.{fk.pk_table.name}",
253
+ "referenced_columns": ref_cols,
254
+ })
255
+
256
+ # Get features if this is a domain table
257
+ features = []
258
+ if self.is_domain_schema(schema_name):
259
+ try:
260
+ for f in self.find_features(table):
261
+ features.append({
262
+ "name": f.feature_name,
263
+ "feature_table": f.feature_table.name,
264
+ })
265
+ except Exception:
266
+ pass # Table may not support features
267
+
268
+ table_info = {
269
+ "comment": table.comment or "",
270
+ "is_vocabulary": self.is_vocabulary(table),
271
+ "is_asset": self.is_asset(table),
272
+ "is_association": bool(self.is_association(table)),
273
+ "columns": columns,
274
+ "foreign_keys": foreign_keys,
275
+ }
276
+ if features:
277
+ table_info["features"] = features
278
+
279
+ schema_info["tables"][table_name] = table_info
280
+
281
+ result["schemas"][schema_name] = schema_info
282
+
283
+ return result
284
+
116
285
  def __getattr__(self, name: str) -> Any:
117
286
  # Called only if `name` is not found in Manager. Delegate attributes to model class.
118
287
  return getattr(self.model, name)
@@ -120,20 +289,28 @@ class DerivaModel:
120
289
  def name_to_table(self, table: TableInput) -> Table:
121
290
  """Return the table object corresponding to the given table name.
122
291
 
123
- If the table name appears in more than one schema, return the first one you find.
292
+ Searches domain schemas first (in sorted order), then ML schema, then WWW.
293
+ If the table name appears in more than one schema, returns the first match.
124
294
 
125
295
  Args:
126
296
  table: A ERMRest table object or a string that is the name of the table.
127
297
 
128
298
  Returns:
129
299
  Table object.
300
+
301
+ Raises:
302
+ DerivaMLException: If the table doesn't exist in any searchable schema.
130
303
  """
131
304
  if isinstance(table, Table):
132
305
  return table
133
- if table in (s := self.model.schemas[self.domain_schema].tables):
134
- return s[table]
135
- for s in [self.model.schemas[sname] for sname in [self.domain_schema, self.ml_schema, "WWW"]]:
136
- if table in s.tables.keys():
306
+
307
+ # Search domain schemas (sorted for deterministic order), then ML schema, then WWW
308
+ search_order = [*sorted(self.domain_schemas), self.ml_schema, "WWW"]
309
+ for sname in search_order:
310
+ if sname not in self.model.schemas:
311
+ continue
312
+ s = self.model.schemas[sname]
313
+ if table in s.tables:
137
314
  return s.tables[table]
138
315
  raise DerivaMLException(f"The table {table} doesn't exist.")
139
316
 
@@ -220,21 +397,28 @@ class DerivaModel:
220
397
  return [t for s in self.model.schemas.values() for t in s.tables.values() if self.is_asset(t)]
221
398
 
222
399
  def find_vocabularies(self) -> list[Table]:
223
- """Return a list of all the controlled vocabulary tables in the domain schema."""
224
- return [t for s in self.model.schemas.values() for t in s.tables.values() if self.is_vocabulary(t)]
400
+ """Return a list of all controlled vocabulary tables in domain and ML schemas."""
401
+ tables = []
402
+ for schema_name in [*self.domain_schemas, self.ml_schema]:
403
+ schema = self.model.schemas.get(schema_name)
404
+ if schema:
405
+ tables.extend(t for t in schema.tables.values() if self.is_vocabulary(t))
406
+ return tables
225
407
 
226
408
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
227
- def find_features(self, table: TableInput) -> Iterable[Feature]:
228
- """List the names of the features in the specified table.
409
+ def find_features(self, table: TableInput | None = None) -> Iterable[Feature]:
410
+ """List features in the catalog.
411
+
412
+ If a table is specified, returns only features for that table.
413
+ If no table is specified, returns all features across all tables in the catalog.
229
414
 
230
415
  Args:
231
- table: The table to find features for.
232
- table: Table | str:
416
+ table: Optional table to find features for. If None, returns all features
417
+ in the catalog.
233
418
 
234
419
  Returns:
235
- An iterable of FeatureResult instances that describe the current features in the table.
420
+ An iterable of Feature instances describing the features.
236
421
  """
237
- table = self.name_to_table(table)
238
422
 
239
423
  def is_feature(a: FindAssociationResult) -> bool:
240
424
  """Check if association represents a feature.
@@ -250,9 +434,24 @@ class DerivaModel:
250
434
  a.self_fkey.foreign_key_columns[0].name,
251
435
  }.issubset({c.name for c in a.table.columns})
252
436
 
253
- return [
254
- Feature(a, self) for a in table.find_associations(min_arity=3, max_arity=3, pure=False) if is_feature(a)
255
- ]
437
+ def find_table_features(t: Table) -> list[Feature]:
438
+ """Find all features for a single table."""
439
+ return [
440
+ Feature(a, self) for a in t.find_associations(min_arity=3, max_arity=3, pure=False) if is_feature(a)
441
+ ]
442
+
443
+ if table is not None:
444
+ # Find features for a specific table
445
+ return find_table_features(self.name_to_table(table))
446
+ else:
447
+ # Find all features across all domain and ML schema tables
448
+ features: list[Feature] = []
449
+ for schema_name in [*self.domain_schemas, self.ml_schema]:
450
+ schema = self.model.schemas.get(schema_name)
451
+ if schema:
452
+ for t in schema.tables.values():
453
+ features.extend(find_table_features(t))
454
+ return features
256
455
 
257
456
  def lookup_feature(self, table: TableInput, feature_name: str) -> Feature:
258
457
  """Lookup the named feature associated with the provided table.
@@ -290,6 +489,20 @@ class DerivaModel:
290
489
  else:
291
490
  self.model.apply()
292
491
 
492
+ def is_dataset_rid(self, rid: RID, deleted: bool = False) -> bool:
493
+ """Check if a given RID is a dataset RID."""
494
+ try:
495
+ rid_info = self.model.catalog.resolve_rid(rid, self.model)
496
+ except KeyError as _e:
497
+ raise DerivaMLException(f"Invalid RID {rid}")
498
+ if rid_info.table.name != "Dataset":
499
+ return False
500
+ elif deleted:
501
+ # Got a dataset rid. Now check to see if its deleted or not.
502
+ return True
503
+ else:
504
+ return not list(rid_info.datapath.entities().fetch())[0]["Deleted"]
505
+
293
506
  def list_dataset_element_types(self) -> list[Table]:
294
507
  """
295
508
  Lists the data types of elements contained within a dataset.
@@ -307,15 +520,14 @@ class DerivaModel:
307
520
 
308
521
  dataset_table = self.name_to_table("Dataset")
309
522
 
310
- def domain_table(table: Table) -> bool:
311
- return table.schema.name == self.domain_schema or table.name == dataset_table.name
523
+ def is_domain_or_dataset_table(table: Table) -> bool:
524
+ return self.is_domain_schema(table.schema.name) or table.name == dataset_table.name
312
525
 
313
- return [t for a in dataset_table.find_associations() if domain_table(t := a.other_fkeys.pop().pk_table)]
526
+ return [t for a in dataset_table.find_associations() if is_domain_or_dataset_table(t := a.other_fkeys.pop().pk_table)]
314
527
 
315
- def _prepare_wide_table(self,
316
- dataset,
317
- dataset_rid: RID,
318
- include_tables: list[str]) -> tuple[dict[str, Any], list[tuple]]:
528
+ def _prepare_wide_table(
529
+ self, dataset, dataset_rid: RID, include_tables: list[str]
530
+ ) -> tuple[dict[str, Any], list[tuple]]:
319
531
  """
320
532
  Generates details of a wide table from the model
321
533
 
@@ -344,11 +556,6 @@ class DerivaModel:
344
556
  for p in table_paths:
345
557
  paths_by_element[p[2].name].append(p)
346
558
 
347
- # Get the names of all of the tables that can be dataset elements.
348
- dataset_element_tables = {
349
- e.name for e in self.list_dataset_element_types() if e.schema.name == self.domain_schema
350
- }
351
-
352
559
  skip_columns = {"RCT", "RMT", "RCB", "RMB"}
353
560
  element_tables = {}
354
561
  for element_table, paths in paths_by_element.items():
@@ -446,9 +653,11 @@ class DerivaModel:
446
653
 
447
654
  def find_arcs(table: Table) -> set[Table]:
448
655
  """Given a path through the model, return the FKs that link the tables"""
656
+ # Valid schemas for traversal: all domain schemas + ML schema
657
+ valid_schemas = self.domain_schemas | {self.ml_schema}
449
658
  arc_list = [fk.pk_table for fk in table.foreign_keys] + [fk.table for fk in table.referenced_by]
450
- arc_list = [t for t in arc_list if t.schema.name in {self.domain_schema, self.ml_schema}]
451
- domain_tables = [t for t in arc_list if t.schema.name == self.domain_schema]
659
+ arc_list = [t for t in arc_list if t.schema.name in valid_schemas]
660
+ domain_tables = [t for t in arc_list if self.is_domain_schema(t.schema.name)]
452
661
  if multiple_columns := [c for c, cnt in Counter(domain_tables).items() if cnt > 1]:
453
662
  raise DerivaMLException(f"Ambiguous relationship in {table.name} {multiple_columns}")
454
663
  return set(arc_list)
@@ -466,7 +675,8 @@ class DerivaModel:
466
675
  return paths
467
676
 
468
677
  for child in find_arcs(root):
469
- if child.name in {"Dataset_Execution", "Dataset_Dataset", "Execution"}:
678
+ # if child.name in {"Dataset_Execution", "Dataset_Dataset", "Execution"}:
679
+ if child.name in {"Dataset_Dataset", "Execution"}:
470
680
  continue
471
681
  if child == parent:
472
682
  # Don't loop back via referred_by
@@ -479,7 +689,23 @@ class DerivaModel:
479
689
  paths.extend(self._schema_to_paths(child, path))
480
690
  return paths
481
691
 
482
- @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
483
- def create_table(self, table_def: TableDefinition) -> Table:
484
- """Create a new table from TableDefinition."""
485
- return self.model.schemas[self.domain_schema].create_table(table_def.model_dump())
692
+ def create_table(self, table_def: TableDefinition, schema: str | None = None) -> Table:
693
+ """Create a new table from TableDefinition.
694
+
695
+ Args:
696
+ table_def: Table definition (dataclass or dict).
697
+ schema: Schema to create the table in. If None, uses default_schema.
698
+
699
+ Returns:
700
+ The newly created Table.
701
+
702
+ Raises:
703
+ DerivaMLException: If no schema specified and default_schema is not set.
704
+
705
+ Note: @validate_call removed because TableDefinition is now a dataclass from
706
+ deriva.core.typed and Pydantic validation doesn't work well with dataclass fields.
707
+ """
708
+ schema = schema or self._require_default_schema()
709
+ # Handle both TableDefinition (dataclass with to_dict) and plain dicts
710
+ table_dict = table_def.to_dict() if hasattr(table_def, 'to_dict') else table_def
711
+ return self.model.schemas[schema].create_table(table_dict)