deriva-ml 1.13.1__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,8 @@
2
2
  THis module defines the DataSet class with is used to manipulate n
3
3
  """
4
4
 
5
- from datetime import datetime
6
5
  from .deriva_definitions import RID
6
+
7
7
  from enum import Enum
8
8
  from pydantic import (
9
9
  BaseModel,
@@ -98,7 +98,7 @@ class DatasetHistory(BaseModel):
98
98
  version_rid (RID): The RID of the version record for the dataset in the Dataset_Version table.
99
99
  minid (str): The URL that represents the handle of the dataset bag. This will be None if a MINID has not
100
100
  been created yet.
101
- timestamp (datetime): The timestamp of when the dataset was created.
101
+ snapshot (str): Catalog snapshot ID of when the version record was created.
102
102
  """
103
103
 
104
104
  dataset_version: DatasetVersion
@@ -107,7 +107,7 @@ class DatasetHistory(BaseModel):
107
107
  execution_rid: Optional[RID] = None
108
108
  description: str = ""
109
109
  minid: Optional[str] = None
110
- timestamp: Optional[datetime] = None
110
+ snapshot: Optional[str] = None
111
111
 
112
112
  model_config = ConfigDict(arbitrary_types_allowed=True)
113
113
 
@@ -128,12 +128,12 @@ class DatasetMinid(BaseModel):
128
128
  """
129
129
 
130
130
  dataset_version: DatasetVersion
131
- metadata: dict[str, str | int]
132
- minid: str = Field(alias="compact_uri")
131
+ metadata: dict[str, str | int] = {}
132
+ minid: str = Field(alias="compact_uri", default=None)
133
133
  bag_url: str = Field(alias="location")
134
- identifier: str
135
- landing_page: str
136
- version_rid: RID = Field(alias="Dataset_RID")
134
+ identifier: Optional[str] = None
135
+ landing_page: Optional[str] = None
136
+ version_rid: RID = Field(alias="RID")
137
137
  checksum: str = Field(alias="checksums", default="")
138
138
 
139
139
  @computed_field
@@ -156,8 +156,8 @@ class DatasetMinid(BaseModel):
156
156
 
157
157
  @field_validator("bag_url", mode="before")
158
158
  @classmethod
159
- def convert_location_to_str(cls, value: list[str]) -> str:
160
- return value[0]
159
+ def convert_location_to_str(cls, value: list[str] | str) -> str:
160
+ return value[0] if isinstance(value, list) else value
161
161
 
162
162
  @field_validator("checksum", mode="before")
163
163
  @classmethod
deriva_ml/demo_catalog.py CHANGED
@@ -1,18 +1,19 @@
1
1
  import atexit
2
- from importlib.metadata import version
3
2
  from importlib.resources import files
3
+ import itertools
4
4
  import logging
5
5
  from random import randint, random
6
6
  from typing import Optional
7
- import itertools
7
+ from tempfile import TemporaryDirectory
8
8
 
9
- from deriva.config.acl_config import AclConfig
10
- from deriva.core import DerivaServer
11
- from deriva.core import ErmrestCatalog, get_credential
9
+ from deriva.core import DerivaServer, get_credential
10
+ from deriva.core import ErmrestCatalog
12
11
  from deriva.core.datapath import DataPathException
13
12
  from deriva.core.ermrest_model import builtin_types, Schema, Table, Column
14
13
  from requests import HTTPError
14
+ import subprocess
15
15
 
16
+ from .schema_setup.annotations import catalog_annotation
16
17
  from deriva_ml import (
17
18
  DerivaML,
18
19
  ExecutionConfiguration,
@@ -23,8 +24,10 @@ from deriva_ml import (
23
24
  RID,
24
25
  )
25
26
 
26
- from deriva_ml.schema_setup.create_schema import initialize_ml_schema, create_ml_schema
27
- from deriva_ml.dataset import Dataset
27
+ from deriva_ml.schema_setup.create_schema import (
28
+ initialize_ml_schema,
29
+ create_ml_schema,
30
+ )
28
31
 
29
32
  TEST_DATASET_SIZE = 4
30
33
 
@@ -85,7 +88,7 @@ def create_demo_datasets(ml_instance: DerivaML) -> tuple[RID, list[RID], list[RI
85
88
 
86
89
  type_rid = ml_instance.add_term("Dataset_Type", "TestSet", description="A test")
87
90
  training_rid = ml_instance.add_term(
88
- "Dataset_Type", "Training", description="A traing set"
91
+ "Dataset_Type", "Training", description="A training set"
89
92
  )
90
93
  testing_rid = ml_instance.add_term(
91
94
  "Dataset_Type", "Testing", description="A testing set"
@@ -98,32 +101,46 @@ def create_demo_datasets(ml_instance: DerivaML) -> tuple[RID, list[RID], list[RI
98
101
  )
99
102
  subject_rids = [i["RID"] for i in table_path.entities().fetch()]
100
103
 
101
- dataset_rids = []
102
- for r in subject_rids[0:4]:
103
- d = ml_instance.create_dataset(
104
- type=[type_rid.name, "Testing"],
105
- description=f"Dataset {r}",
106
- version=DatasetVersion(1, 0, 0),
107
- )
108
- ml_instance.add_dataset_members(d, [r])
109
- dataset_rids.append(d)
110
-
111
- nested_datasets = []
112
- for i in range(0, 4, 2):
113
- nested_dataset = ml_instance.create_dataset(
114
- type=[type_rid.name, "Training"],
115
- description=f"Nested Dataset {i}",
116
- version=DatasetVersion(1, 0, 0),
117
- )
118
- ml_instance.add_dataset_members(nested_dataset, dataset_rids[i : i + 2])
119
- nested_datasets.append(nested_dataset)
104
+ ml_instance.add_term(
105
+ MLVocab.workflow_type,
106
+ "Create Dataset Workflow",
107
+ description="A Workflow that creates a new dataset.",
108
+ )
109
+ dataset_workflow = ml_instance.create_workflow(
110
+ name="API Workflow", workflow_type="Create Dataset Workflow"
111
+ )
120
112
 
121
- double_nested_dataset = ml_instance.create_dataset(
122
- type_rid.name,
123
- description="Double nested dataset",
124
- version=DatasetVersion(1, 0, 0),
113
+ dataset_execution = ml_instance.create_execution(
114
+ ExecutionConfiguration(workflow=dataset_workflow, description="Create Dataset")
125
115
  )
126
- ml_instance.add_dataset_members(double_nested_dataset, nested_datasets)
116
+
117
+ with dataset_execution.execute() as exe:
118
+ dataset_rids = []
119
+ for r in subject_rids[0:4]:
120
+ d = exe.create_dataset(
121
+ dataset_types=[type_rid.name, "Testing"],
122
+ description=f"Dataset {r}",
123
+ version=DatasetVersion(1, 0, 0),
124
+ )
125
+ ml_instance.add_dataset_members(d, [r])
126
+ dataset_rids.append(d)
127
+
128
+ nested_datasets = []
129
+ for i in range(0, 4, 2):
130
+ nested_dataset = exe.create_dataset(
131
+ dataset_types=[type_rid.name, "Training"],
132
+ description=f"Nested Dataset {i}",
133
+ version=DatasetVersion(1, 0, 0),
134
+ )
135
+ exe.add_dataset_members(nested_dataset, dataset_rids[i : i + 2])
136
+ nested_datasets.append(nested_dataset)
137
+
138
+ double_nested_dataset = exe.create_dataset(
139
+ dataset_types=type_rid.name,
140
+ description="Double nested dataset",
141
+ version=DatasetVersion(1, 0, 0),
142
+ )
143
+ exe.add_dataset_members(double_nested_dataset, nested_datasets)
127
144
  return double_nested_dataset, nested_datasets, dataset_rids
128
145
 
129
146
 
@@ -251,14 +268,13 @@ def create_domain_schema(ml_instance: DerivaML, sname: str) -> None:
251
268
  :return:
252
269
  """
253
270
 
254
- # Make sure that we have a ml schema
255
271
  _ = ml_instance.model.schemas["deriva-ml"]
256
272
 
257
273
  if ml_instance.model.schemas.get(sname):
258
274
  # Clean out any old junk....
259
275
  ml_instance.model.schemas[sname].drop()
260
276
 
261
- domain_schema = ml_instance.model.model.create_schema(
277
+ domain_schema = ml_instance.model.create_schema(
262
278
  Schema.define(sname, annotations={"name_style": {"underline_space": True}})
263
279
  )
264
280
  subject_table = domain_schema.create_table(
@@ -266,6 +282,8 @@ def create_domain_schema(ml_instance: DerivaML, sname: str) -> None:
266
282
  )
267
283
  ml_instance.create_asset("Image", referenced_tables=[subject_table])
268
284
 
285
+ catalog_annotation(ml_instance.model)
286
+
269
287
 
270
288
  def destroy_demo_catalog(catalog):
271
289
  catalog.delete_ermrest_catalog(really=True)
@@ -280,43 +298,47 @@ def create_demo_catalog(
280
298
  create_datasets=False,
281
299
  on_exit_delete=True,
282
300
  ) -> ErmrestCatalog:
283
- credentials = get_credential(hostname)
284
- server = DerivaServer("https", hostname, credentials=credentials)
301
+ credential = get_credential(hostname)
302
+
303
+ server = DerivaServer("https", hostname, credentials=credential)
285
304
  test_catalog = server.create_ermrest_catalog()
305
+ model = test_catalog.getCatalogModel()
306
+ model.configure_baseline_catalog()
307
+ policy_file = files("deriva_ml.schema_setup").joinpath("policy.json")
308
+ subprocess.run(
309
+ [
310
+ "deriva-acl-config",
311
+ "--host",
312
+ test_catalog.deriva_server.server,
313
+ "--config-file",
314
+ policy_file,
315
+ test_catalog.catalog_id,
316
+ ]
317
+ )
318
+
286
319
  if on_exit_delete:
287
320
  atexit.register(destroy_demo_catalog, test_catalog)
288
- model = test_catalog.getCatalogModel()
289
321
 
290
322
  try:
291
- create_ml_schema(model, project_name=project_name)
292
- deriva_ml = DerivaML(
293
- hostname=hostname,
294
- catalog_id=test_catalog.catalog_id,
295
- project_name=project_name,
296
- domain_schema=domain_schema,
297
- logging_level=logging.WARN,
298
- )
299
- create_domain_schema(deriva_ml, domain_schema)
300
- working_dir = deriva_ml.working_dir
301
- dataset_table = deriva_ml.dataset_table
302
- dataset_table.annotations.update(
303
- Dataset(
304
- deriva_ml.model,
305
- cache_dir=deriva_ml.cache_dir,
306
- working_dir=deriva_ml.working_dir,
307
- )._generate_dataset_annotations()
308
- )
309
- deriva_ml.model.apply()
310
- policy_file = files("deriva_ml.schema_setup").joinpath("policy.json")
311
- AclConfig(
312
- hostname, test_catalog.catalog_id, policy_file, credentials=credentials
313
- )
314
- if populate or create_features or create_datasets:
315
- populate_demo_catalog(deriva_ml, domain_schema)
316
- if create_features:
317
- create_demo_features(deriva_ml)
318
- if create_datasets:
319
- create_demo_datasets(deriva_ml)
323
+ with TemporaryDirectory() as tmpdir:
324
+ create_ml_schema(test_catalog, project_name=project_name)
325
+ deriva_ml = DerivaML(
326
+ hostname=hostname,
327
+ catalog_id=test_catalog.catalog_id,
328
+ project_name=project_name,
329
+ domain_schema=domain_schema,
330
+ logging_level=logging.WARN,
331
+ working_dir=tmpdir,
332
+ credential=credential,
333
+ )
334
+ create_domain_schema(deriva_ml, domain_schema)
335
+
336
+ if populate or create_features or create_datasets:
337
+ populate_demo_catalog(deriva_ml, domain_schema)
338
+ if create_features:
339
+ create_demo_features(deriva_ml)
340
+ if create_datasets:
341
+ create_demo_datasets(deriva_ml)
320
342
 
321
343
  except Exception:
322
344
  # on failure, delete catalog and re-raise exception
@@ -332,6 +354,7 @@ class DemoML(DerivaML):
332
354
  catalog_id,
333
355
  cache_dir: Optional[str] = None,
334
356
  working_dir: Optional[str] = None,
357
+ use_minid=True,
335
358
  ):
336
359
  super().__init__(
337
360
  hostname=hostname,
@@ -339,5 +362,5 @@ class DemoML(DerivaML):
339
362
  project_name="ml-test",
340
363
  cache_dir=cache_dir,
341
364
  working_dir=working_dir,
342
- model_version=version(__name__.split(".")[0]),
365
+ use_minid=use_minid,
343
366
  )
@@ -2,12 +2,16 @@
2
2
  Shared definitions that are used in different DerivaML modules.
3
3
  """
4
4
 
5
+ from __future__ import annotations
6
+
5
7
  import warnings
6
8
  from datetime import date
7
9
  from enum import Enum
8
- from typing import Any, Iterable, Optional, Annotated
10
+ from pathlib import Path
11
+ from typing import Any, Iterable, Optional, Annotated, Generator
9
12
 
10
13
  import deriva.core.ermrest_model as em
14
+ import deriva.core.utils.hash_utils as hash_utils
11
15
  from urllib.parse import urlparse
12
16
  from deriva.core.ermrest_model import builtin_types
13
17
  from pydantic import (
@@ -136,11 +140,14 @@ class FileSpec(BaseModel):
136
140
  @field_validator("url")
137
141
  @classmethod
138
142
  def validate_file_url(cls, v):
143
+ """Examine the provided URL. If it's a local path, convert it into a tag URL."""
139
144
  url_parts = urlparse(v)
140
145
  if url_parts.scheme == "tag":
146
+ # Already a tag URL, so just return it.
141
147
  return v
142
- elif not url_parts.scheme:
143
- return f"tag://{gethostname()},{date.today()}:file://{v}"
148
+ elif (not url_parts.scheme) or url_parts.scheme == "file":
149
+ # There is no scheme part tof the URL, or it is a file URL, so it is a local file path, so convert to a tag URL.
150
+ return f"tag://{gethostname()},{date.today()}:file://{url_parts.path}"
144
151
  else:
145
152
  raise ValidationError("url is not a file URL")
146
153
 
@@ -153,6 +160,38 @@ class FileSpec(BaseModel):
153
160
  "Length": self.length,
154
161
  }
155
162
 
163
+ @staticmethod
164
+ def create_filespecs(
165
+ path: Path | str, description: str
166
+ ) -> Generator["FileSpec", None, None]:
167
+ """Given a file or directory, generate the sequence of corresponding FileSpecs sutable to create a File table
168
+
169
+ Arguments:
170
+ path: Path to the file or directory.
171
+ description: The description of the file(s)
172
+
173
+ Returns:
174
+ An iterable of FileSpecs for each file in the directory.
175
+ """
176
+ path = Path(path)
177
+
178
+ def list_all_files(p) -> list[Path]:
179
+ return (
180
+ (f for f in Path(p).rglob("*") if f.is_file()) if path.is_dir() else [p]
181
+ )
182
+
183
+ def create_spec(p: Path, description: str) -> FileSpec:
184
+ hashes = hash_utils.compute_file_hashes(p, hashes=["md5", "sha256"])
185
+ md5 = hashes["md5"][0]
186
+ return FileSpec(
187
+ length=path.stat().st_size,
188
+ md5=md5,
189
+ description=description,
190
+ url=p.as_posix(),
191
+ )
192
+
193
+ return (create_spec(file, description) for file in list_all_files(path))
194
+
156
195
 
157
196
  class VocabularyTerm(BaseModel):
158
197
  """An entry in a vocabulary table.
@@ -162,7 +201,7 @@ class VocabularyTerm(BaseModel):
162
201
  synonyms: List of alternative names for the term
163
202
  id: CURI identifier for the term
164
203
  uri: Unique URI for the term.
165
- description: A description of the meaning of the term
204
+ description: A description of the term meaning
166
205
  rid: Resource identifier assigned to the term
167
206
 
168
207
  Args:
@@ -51,6 +51,7 @@ from .deriva_definitions import (
51
51
  FileSpec,
52
52
  TableDefinition,
53
53
  )
54
+ from .schema_setup.annotations import asset_annotation
54
55
 
55
56
  try:
56
57
  from icecream import ic
@@ -82,9 +83,10 @@ class DerivaML(Dataset):
82
83
  project_name: Optional[str] = None,
83
84
  cache_dir: Optional[str] = None,
84
85
  working_dir: Optional[str] = None,
85
- model_version: str = "1",
86
86
  ml_schema: str = ML_SCHEMA,
87
87
  logging_level=logging.INFO,
88
+ credential=None,
89
+ use_minid=True,
88
90
  ):
89
91
  """Create and initialize a DerivaML instance.
90
92
 
@@ -93,13 +95,14 @@ class DerivaML(Dataset):
93
95
 
94
96
  Args:
95
97
  hostname: Hostname of the Deriva server.
96
- catalog_id: Catalog ID. Either and identifier, or a catalog name.
97
- domain_schema: Schema name for domain specific tables and relationships.
98
+ catalog_id: Catalog ID. Either an identifier or a catalog name.
99
+ domain_schema: Schema name for domain-specific tables and relationships.
100
+ project_name: Project name. Defaults to name of domain schema.
98
101
  cache_dir: Directory path for caching data downloaded from the Deriva server as bdbag.
99
102
  working_dir: Directory path for storing data used by or generated by any computations.
100
- model_version: A string that indicates the version model. Typically passed in via
103
+ use_minid: Use the MINID serice when downloading dataset bags.
101
104
  """
102
- self.credential = get_credential(hostname)
105
+ self.credential = credential or get_credential(hostname)
103
106
  server = DerivaServer(
104
107
  "https",
105
108
  hostname,
@@ -119,21 +122,20 @@ class DerivaML(Dataset):
119
122
  ) / default_workdir
120
123
 
121
124
  self.working_dir.mkdir(parents=True, exist_ok=True)
122
- self.cache_dir = (
123
- Path(cache_dir) if cache_dir else Path.home() / "deriva-ml" / "cache"
124
- )
125
+ self.cache_dir = Path(cache_dir) if cache_dir else self.working_dir / "cache"
125
126
 
126
127
  self.cache_dir.mkdir(parents=True, exist_ok=True)
127
128
 
128
129
  # Initialize dataset class.
129
- super().__init__(self.model, self.cache_dir, self.working_dir)
130
+ super().__init__(
131
+ self.model, self.cache_dir, self.working_dir, use_minid=use_minid
132
+ )
130
133
  self._logger = logging.getLogger("deriva_ml")
131
134
  self._logger.setLevel(logging_level)
132
135
 
133
136
  self.host_name = hostname
134
137
  self.catalog_id = catalog_id
135
138
  self.ml_schema = ml_schema
136
- self.version = model_version
137
139
  self.configuration = None
138
140
  self._execution: Optional[Execution] = None
139
141
  self.domain_schema = self.model.domain_schema
@@ -150,11 +152,6 @@ class DerivaML(Dataset):
150
152
  deriva_logger = logging.getLogger("deriva")
151
153
  deriva_logger.setLevel(logging_level)
152
154
 
153
- if "dirty" in self.version:
154
- logging.info(
155
- f"Loading dirty model. Consider commiting and tagging: {self.version}"
156
- )
157
-
158
155
  def __del__(self):
159
156
  try:
160
157
  if self._execution and self._execution.status != Status.completed:
@@ -438,6 +435,8 @@ class DerivaML(Dataset):
438
435
  )
439
436
  )
440
437
  atable.create_reference(self.model.name_to_table("Asset_Role"))
438
+
439
+ asset_annotation(asset_table)
441
440
  return asset_table
442
441
 
443
442
  # @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
@@ -820,6 +819,8 @@ class DerivaML(Dataset):
820
819
  ) -> Iterable[RID]:
821
820
  """Add a new file to the File table in the catalog.
822
821
 
822
+ The input is an iterator of FileSpec objects which provide the MD5 checksum, length, and URL.
823
+
823
824
  Args:
824
825
  file_types: One or more file types. Must be a term from the File_Type controlled vocabulary.
825
826
  files: A sequence of file specifications that describe the files to add.
@@ -841,7 +842,6 @@ class DerivaML(Dataset):
841
842
  return True
842
843
  return False
843
844
 
844
- # Create the entry for the new dataset_table and get its RID.
845
845
  file_types = [file_types] if isinstance(file_types, str) else file_types
846
846
  pb = self._model.catalog.getPathBuilder()
847
847
  for file_type in file_types:
@@ -868,18 +868,12 @@ class DerivaML(Dataset):
868
868
 
869
869
  if execution_rid:
870
870
  # Get the name of the association table between file_table and execution.
871
- exec_table = next(
872
- self._model.schemas[self._ml_schema]
873
- .tables["Execution"]
874
- .find_associations()
875
- ).name
876
- pb.schemas[self._ml_schema].tables[exec_table].insert(
871
+ pb.schemas[self._ml_schema].File_Execution.insert(
877
872
  [
878
873
  {"File": file_rid, "Execution": execution_rid}
879
874
  for file_rid in file_rids
880
875
  ]
881
876
  )
882
-
883
877
  return file_rids
884
878
 
885
879
  def list_files(
@@ -890,9 +884,10 @@ class DerivaML(Dataset):
890
884
  file_path = ml_path.File
891
885
  type_path = ml_path.File_File_Type
892
886
 
893
- # Get a list of all the dataset_type values associated with this dataset_table.
894
- path = file_path.link(type_path)
895
- path = path.attributes(
887
+ path = file_path.link(
888
+ type_path, on=file_path.RID == type_path.File, join_type="left"
889
+ )
890
+ path = path.File.attributes(
896
891
  path.File.RID,
897
892
  path.File.URL,
898
893
  path.File.MD5,
@@ -902,9 +897,9 @@ class DerivaML(Dataset):
902
897
  )
903
898
  file_map = {}
904
899
  for f in path.fetch():
905
- file_map.setdefault(f["RID"], f | {"File_Types": []})["File_Types"].append(
906
- f["File_Type"]
907
- )
900
+ entry = file_map.setdefault(f["RID"], {**f, "File_Types": []})
901
+ if ft := f.get("File_Type"): # assign-and-test in one go
902
+ entry["File_Types"].append(ft)
908
903
 
909
904
  # Now get rid of the File_Type key and return the result
910
905
  return [(f, f.pop("File_Type"))[0] for f in file_map.values()]
@@ -974,7 +969,7 @@ class DerivaML(Dataset):
974
969
  ) -> Workflow:
975
970
  """Identify current executing program and return a workflow RID for it
976
971
 
977
- Determine the notebook or script that is currently being executed. Assume that this is
972
+ Determine the notebook or script that is currently being executed. Assume that this is
978
973
  being executed from a cloned GitHub repository. Determine the remote repository name for
979
974
  this object. Then either retrieve an existing workflow for this executable or create
980
975
  a new one.
@@ -983,6 +978,9 @@ class DerivaML(Dataset):
983
978
  name: The name of the workflow.
984
979
  workflow_type: The type of the workflow.
985
980
  description: The description of the workflow.
981
+
982
+ Returns:
983
+ A workflow object.
986
984
  """
987
985
  # Make sure type is correct.
988
986
  self.lookup_term(MLVocab.workflow_type, workflow_type)
@@ -1001,6 +999,9 @@ class DerivaML(Dataset):
1001
999
  1. The datasets specified in the configuration are downloaded and placed in the cache-dir. If a version is
1002
1000
  not specified in the configuration, then a new minor version number is created for the dataset and downloaded.
1003
1001
 
1002
+ 2. If any execution assets are provided in the configuration, they are downloaded and placed in the working directory.
1003
+
1004
+
1004
1005
  Args:
1005
1006
  configuration: ExecutionConfiguration:
1006
1007
  dry_run: Do not create an execution record or upload results.
deriva_ml/deriva_model.py CHANGED
@@ -21,7 +21,7 @@ from .deriva_definitions import (
21
21
 
22
22
  from collections import Counter
23
23
  from pydantic import validate_call, ConfigDict
24
- from typing import Iterable, Optional
24
+ from typing import Iterable, Optional, Any
25
25
 
26
26
 
27
27
  class DerivaModel:
@@ -61,7 +61,7 @@ class DerivaModel:
61
61
  self.schemas = self.model.schemas
62
62
 
63
63
  self.ml_schema = ml_schema
64
- builtin_schemas = ["public", self.ml_schema, "www"]
64
+ builtin_schemas = ["public", self.ml_schema, "www", "WWW"]
65
65
  try:
66
66
  self.domain_schema = (
67
67
  domain_schema
@@ -73,6 +73,11 @@ class DerivaModel:
73
73
  # No domain schema defined.
74
74
  self.domain_schema = domain_schema
75
75
 
76
+ @property
77
+ def chaise_config(self) -> dict[str, Any]:
78
+ """Return the chaise configuration."""
79
+ return self.model.chaise_config
80
+
76
81
  def __getattr__(self, name):
77
82
  # Called only if `name` is not found in Manager. Delegate attributes to model class.
78
83
  return getattr(self.model, name)
@@ -115,7 +120,12 @@ class DerivaModel:
115
120
  return vocab_columns.issubset({c.name.upper() for c in table.columns})
116
121
 
117
122
  def is_association(
118
- self, table_name: str | Table, unqualified: bool = True, pure: bool = True
123
+ self,
124
+ table_name: str | Table,
125
+ unqualified: bool = True,
126
+ pure: bool = True,
127
+ min_arity: int = 2,
128
+ max_arity: int = 2,
119
129
  ) -> bool | set | int:
120
130
  """Check the specified table to see if it is an association table.
121
131
 
@@ -130,7 +140,9 @@ class DerivaModel:
130
140
 
131
141
  """
132
142
  table = self.name_to_table(table_name)
133
- return table.is_association(unqualified=unqualified, pure=pure)
143
+ return table.is_association(
144
+ unqualified=unqualified, pure=pure, min_arity=min_arity, max_arity=max_arity
145
+ )
134
146
 
135
147
  def find_association(self, table1: Table | str, table2: Table | str) -> Table:
136
148
  """Given two tables, return an association table that connects the two.
@@ -302,7 +314,7 @@ class DerivaModel:
302
314
  ) -> list[list[Table]]:
303
315
  """Recursively walk over the domain schema graph and extend the current path.
304
316
 
305
- Walk a schema graph and return a list all the paths through the graph.
317
+ Walk a schema graph and return a list all the paths through the graph.
306
318
 
307
319
  Args:
308
320
  path: Source path so far