kalbio 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,474 @@
1
+ """
2
+ Module for managing entity fields in Kaleidoscope.
3
+
4
+ This module provides classes and services for working with entity fields, which are the
5
+ schema definitions for data stored in the Kaleidoscope system. It includes:
6
+
7
+ - DataFieldTypeEnum: An enumeration of all supported field types
8
+ - EntityField: A model representing a field definition
9
+ - EntityFieldsService: A service class for retrieving and creating entity fields
10
+
11
+ Entity fields can be of two types:
12
+
13
+ - Key fields: Used to uniquely identify entities
14
+ - Data fields: Used to store additional information about entities
15
+
16
+ The service provides caching mechanisms to minimize API calls and includes error handling
17
+ for all network operations.
18
+
19
+ Classes:
20
+ DataFieldTypeEnum: An enumeration of all supported field types
21
+ EntityField: A model representing a field definition
22
+ EntityFieldsService: A service class for retrieving and creating entity fields
23
+
24
+ Example:
25
+ ```python
26
+ # Get all key fields
27
+ key_fields = client.entity_fields.get_key_fields()
28
+
29
+ # Create or get a data field
30
+ field = client.entity_fields.get_or_create_data_field(
31
+ field_name="temperature",
32
+ field_type=DataFieldTypeEnum.NUMBER
33
+ )
34
+ ```
35
+ """
36
+
37
+ import logging
38
+ from datetime import datetime
39
+ from enum import Enum
40
+ from functools import lru_cache
41
+ from kalbio._kaleidoscope_model import _KaleidoscopeBaseModel
42
+ from kalbio.client import KaleidoscopeClient
43
+ from pydantic import TypeAdapter
44
+ from typing import List, Optional, Union
45
+
46
+ _logger = logging.getLogger(__name__)
47
+
48
+
49
+ class DataFieldTypeEnum(str, Enum):
50
+ """Enumeration of data field types supported by the system.
51
+
52
+ This enum defines all possible types of data fields that can be used in the application.
53
+ Each field type represents a specific kind of data structure and validation rules.
54
+
55
+ Attributes:
56
+ TEXT: Plain text field.
57
+ NUMBER: Numeric field for storing numbers.
58
+ QUALIFIED_NUMBER: Numeric field with additional qualifiers or units.
59
+ SMILES_STRING: Field for storing SMILES (Simplified Molecular Input Line Entry System) notation.
60
+ SELECT: Single selection field from predefined options.
61
+ MULTISELECT: Multiple selection field from predefined options.
62
+ MOLFILE: Field for storing molecular structure files.
63
+ RECORD_REFERENCE: Reference to another record by record_id.
64
+ FILE: Generic file attachment field.
65
+ IMAGE: Image file field.
66
+ DATE: Date field.
67
+ URL: Web URL field.
68
+ BOOLEAN: Boolean (true/false) field.
69
+ EMAIL: Email address field.
70
+ PHONE: Phone number field.
71
+ FORMULA: Field for storing formulas or calculated expressions.
72
+ PEOPLE: Field for referencing people/users.
73
+ VOTES: Field for storing vote counts or voting data.
74
+ XY_ARRAY: Field for storing XY coordinate arrays.
75
+ DNA_OLIGO: Field for storing DNA oligonucleotide sequences.
76
+ RNA_OLIGO: Field for storing RNA oligonucleotide sequences.
77
+ PEPTID: Field for storing peptide sequences.
78
+ PLASMID: Field for storing plasmid information.
79
+ GOOGLE_DRIVE: Field for Google Drive file references.
80
+ S3_FILE: Field for AWS S3 file references.
81
+ SNOWFLAKE_QUERY: Field for Snowflake database query references.
82
+ """
83
+
84
+ TEXT = "text"
85
+ NUMBER = "number"
86
+ QUALIFIED_NUMBER = "qualified-number"
87
+
88
+ SMILES_STRING = "smiles-string"
89
+ SELECT = "select"
90
+ MULTISELECT = "multiselect"
91
+ MOLFILE = "molfile"
92
+ RECORD_REFERENCE = "record-reference" # value is a record_id
93
+ FILE = "file"
94
+ IMAGE = "image"
95
+ DATE = "date"
96
+ URL = "URL"
97
+ BOOLEAN = "boolean"
98
+ EMAIL = "email"
99
+ PHONE = "phone"
100
+ FORMULA = "formula"
101
+ PEOPLE = "people"
102
+ VOTES = "votes"
103
+ XY_ARRAY = "xy-array"
104
+ DNA_OLIGO = "dna-oligo"
105
+ RNA_OLIGO = "rna-oligo"
106
+ PEPTID = "peptide"
107
+ PLASMID = "plasmid"
108
+ GOOGLE_DRIVE = "google-drive-file"
109
+ S3_FILE = "s3-file"
110
+ SNOWFLAKE_QUERY = "snowflake-query"
111
+
112
+
113
+ class EntityField(_KaleidoscopeBaseModel):
114
+ """Represents a field within an entity in the Kaleidoscope system.
115
+
116
+ This class defines the structure and metadata for individual fields that belong
117
+ to an entity, including type information, key status, and optional references.
118
+
119
+ Attributes:
120
+ id (str): The UUID of the field.
121
+ created_at (datetime): Timestamp when the field was created.
122
+ is_key (bool): Indicates whether this field is a key field for the entity.
123
+ field_name (str): The name of the field.
124
+ field_type (DataFieldTypeEnum): The data type of the field.
125
+ ref_slice_id (Optional[str]): Optional reference to a slice ID for relational fields.
126
+
127
+ Example:
128
+ ```python
129
+ from kalbio.entity_fields import EntityField, DataFieldTypeEnum
130
+ from datetime import datetime
131
+
132
+ ef = EntityField(
133
+ id="field_uuid",
134
+ created_at=datetime.utcnow(),
135
+ is_key=True,
136
+ field_name="sample_id",
137
+ field_type=DataFieldTypeEnum.TEXT,
138
+ ref_slice_id=None,
139
+ )
140
+ print(str(ef)) # sample_id
141
+ ```
142
+ """
143
+
144
+ created_at: datetime
145
+ is_key: bool
146
+ field_name: str
147
+ field_type: DataFieldTypeEnum
148
+ ref_slice_id: Optional[str]
149
+
150
+ def __str__(self):
151
+ return f"{self.field_name}"
152
+
153
+
154
+ type EntityFieldIdentifier = Union[EntityField, str]
155
+ """An Identifier Type for Entity Fields.
156
+
157
+ An EntityField should be able to be identified by:
158
+
159
+ * EntityField (object instance)
160
+ * UUID (str)
161
+ * field_name (str)
162
+ """
163
+
164
+
165
+ class EntityFieldsService:
166
+ """Service class for managing key fields and data fields in Kaleidoscope.
167
+
168
+ Entity fields can be of two types:
169
+
170
+ - Key fields: Used to uniquely identify entities
171
+ - Data fields: Used to store additional information about entities
172
+
173
+ Example:
174
+ ```python
175
+ key_fields = client.entity_fields.get_key_fields()
176
+ temperature = client.entity_fields.get_or_create_data_field(
177
+ field_name="temperature",
178
+ field_type=DataFieldTypeEnum.NUMBER,
179
+ )
180
+ ```
181
+ """
182
+
183
+ def __init__(self, client: KaleidoscopeClient):
184
+ self._client = client
185
+
186
+ #########################
187
+ # Public Methods #
188
+ #########################
189
+
190
+ ##### for Key Fields #####
191
+
192
+ @lru_cache
193
+ def get_key_fields(self) -> List[EntityField]:
194
+ """Retrieve key fields and cache the result.
195
+
196
+ Returns:
197
+ Key field definitions for the workspace.
198
+
199
+ Notes:
200
+ On error, the caches are cleared and an empty list is returned.
201
+
202
+ Example:
203
+ ```python
204
+ key_fields = client.entity_fields.get_key_fields()
205
+ ```
206
+ """
207
+ try:
208
+ resp = self._client._get("/key_fields")
209
+ return TypeAdapter(List[EntityField]).validate_python(resp)
210
+ except Exception as e:
211
+ _logger.error(f"Error fetching key fields: {e}")
212
+ self._clear_key_field_caches()
213
+ return []
214
+
215
+ def get_key_field_by_id(
216
+ self, identifier: EntityFieldIdentifier
217
+ ) -> EntityField | None:
218
+ """Get a key field by an identifier.
219
+
220
+ Args:
221
+ identifier: Key field identifier. Data field identifiers will return None.
222
+
223
+ This method will accept and resolve any type of EntityFieldIdentifier.
224
+
225
+ Returns:
226
+ Matching key field if found. If not, returns None.
227
+
228
+ Example:
229
+ ```python
230
+ key_field = client.entity_fields.get_key_field_by_id("sample_id")
231
+ ```
232
+ """
233
+
234
+ id_map = self._get_key_field_id_map()
235
+ field_id = self._resolve_key_field_id(identifier)
236
+
237
+ if field_id:
238
+ return id_map.get(field_id, None)
239
+ else:
240
+ return None
241
+
242
+ def get_or_create_key_field(self, field_name: str) -> EntityField | None:
243
+ """Retrieve an existing key field by name or create it.
244
+
245
+ Args:
246
+ field_name: Name of the key field to fetch or create.
247
+
248
+ Returns:
249
+ Existing or newly created key field, or None on error.
250
+
251
+ Example:
252
+ ```python
253
+ key_field = client.entity_fields.get_or_create_key_field("sample_id")
254
+ ```
255
+ """
256
+ field = self.get_key_field_by_id(field_name)
257
+ if field is not None:
258
+ return field
259
+
260
+ self._clear_key_field_caches()
261
+
262
+ try:
263
+ data = {"field_name": field_name}
264
+ resp = self._client._post("/key_fields/", data)
265
+ return EntityField.model_validate(resp)
266
+ except Exception as e:
267
+ _logger.error(f"Error getting or creating key field: {e}")
268
+ return None
269
+
270
+ ##### for Data Fields #####
271
+
272
+ @lru_cache
273
+ def get_data_fields(self) -> List[EntityField]:
274
+ """Retrieve data fields and cache the result.
275
+
276
+ Returns:
277
+ Data field definitions for the workspace.
278
+
279
+ Notes:
280
+ On error, the caches are cleared and an empty list is returned.
281
+
282
+ Example:
283
+ ```python
284
+ data_fields = client.entity_fields.get_data_fields()
285
+ ```
286
+ """
287
+ try:
288
+ resp = self._client._get("/data_fields")
289
+ return TypeAdapter(List[EntityField]).validate_python(resp)
290
+ except Exception as e:
291
+ _logger.error(f"Error fetching data fields: {e}")
292
+ self._clear_data_field_caches()
293
+ return []
294
+
295
+ def get_data_field_by_id(
296
+ self, identifier: EntityFieldIdentifier
297
+ ) -> EntityField | None:
298
+ """Get a data field by identifier.
299
+
300
+ Args:
301
+ identifier: Identifier for a data field. Key field identifiers return None.
302
+
303
+ This method will accept and resolve any type of EntityFieldIdentifier.
304
+
305
+
306
+ Returns:
307
+ Matching data field, if found.
308
+
309
+ Example:
310
+ ```python
311
+ data_field = client.entity_fields.get_data_field_by_id("temperature")
312
+ ```
313
+ """
314
+
315
+ id_map = self._get_data_field_id_map()
316
+ field_id = self._resolve_data_field_id(identifier)
317
+
318
+ if field_id:
319
+ return id_map.get(field_id, None)
320
+ else:
321
+ return None
322
+
323
+ def get_or_create_data_field(
324
+ self, field_name: str, field_type: DataFieldTypeEnum
325
+ ) -> EntityField | None:
326
+ """Create a data field or return the existing one.
327
+
328
+ Args:
329
+ field_name: Name of the data field to create or retrieve.
330
+ field_type: Data field type.
331
+
332
+ Returns:
333
+ Existing or newly created data field, or None on error.
334
+
335
+ Example:
336
+ ```python
337
+ concentration = client.entity_fields.get_or_create_data_field(
338
+ field_name="concentration",
339
+ field_type=DataFieldTypeEnum.NUMBER,
340
+ )
341
+ ```
342
+ """
343
+ field = self.get_data_field_by_id(field_name)
344
+ if field is not None:
345
+ return field
346
+
347
+ self._clear_data_field_caches()
348
+
349
+ try:
350
+ data: dict = {
351
+ "field_name": field_name,
352
+ "field_type": field_type.value,
353
+ "attrs": {},
354
+ }
355
+ resp = self._client._post("/data_fields/", data)
356
+ return EntityField.model_validate(resp)
357
+ except Exception as e:
358
+ _logger.error(f"Error getting or creating data field: {e}")
359
+ return None
360
+
361
+ #########################
362
+ # Private Methods #
363
+ #########################
364
+
365
+ ##### for Key Fields #####
366
+
367
+ @lru_cache
368
+ def _get_key_field_id_map(self) -> dict[str, EntityField]:
369
+ """Map key field UUIDs to their entities.
370
+
371
+ Returns:
372
+ UUID-to-EntityField mapping for key fields.
373
+ """
374
+ return {field.id: field for field in self.get_key_fields()}
375
+
376
+ @lru_cache
377
+ def _get_key_field_name_map(self) -> dict[str, EntityField]:
378
+ """Map key field names to their entities.
379
+
380
+ Returns:
381
+ field_name-to-EntityField mapping for key fields.
382
+ """
383
+ return {field.field_name: field for field in self.get_key_fields()}
384
+
385
+ def _resolve_key_field_id(self, identifier: EntityFieldIdentifier) -> str | None:
386
+ """Resolve a key field identifier to its ID.
387
+
388
+ Args:
389
+ identifier: Key field object, UUID, or field name.
390
+
391
+ Returns:
392
+ Field ID if resolved; otherwise None.
393
+ """
394
+ if isinstance(identifier, EntityField):
395
+ if identifier.is_key:
396
+ return identifier.id
397
+ else:
398
+ _logger.error(f"Key field with identifier '{identifier}' not found.")
399
+ return None
400
+
401
+ id_map = self._get_key_field_id_map()
402
+ if identifier in id_map: # try to find by uuid
403
+ return identifier
404
+
405
+ key_field = self._get_key_field_name_map().get(identifier, None)
406
+ if key_field: # try to find by name
407
+ return key_field.id
408
+
409
+ _logger.error(f"Key field with identifier '{identifier}' not found.")
410
+ return None
411
+
412
+ def _clear_key_field_caches(self) -> None:
413
+ """Clear caches for key fields.
414
+
415
+ Call when a key field is added, removed, or changed.
416
+ """
417
+ self.get_key_fields.cache_clear()
418
+ self._get_key_field_id_map.cache_clear()
419
+ self._get_key_field_name_map.cache_clear()
420
+
421
+ ##### for Data Fields #####
422
+
423
+ @lru_cache
424
+ def _get_data_field_id_map(self) -> dict[str, EntityField]:
425
+ """Map data field UUIDs to their entities.
426
+
427
+ Returns:
428
+ UUID-to-EntityField mapping for data fields.
429
+ """
430
+ return {field.id: field for field in self.get_data_fields()}
431
+
432
+ @lru_cache
433
+ def _get_data_field_name_map(self) -> dict[str, EntityField]:
434
+ """Map data field names to their entities.
435
+
436
+ Returns:
437
+ field_name-to-EntityField mapping for data fields.
438
+ """
439
+ return {field.field_name: field for field in self.get_data_fields()}
440
+
441
+ def _resolve_data_field_id(self, identifier: EntityFieldIdentifier) -> str | None:
442
+ """Resolve a data field identifier to its ID.
443
+
444
+ Args:
445
+ identifier: Data field object, UUID, or field name.
446
+
447
+ Returns:
448
+ Field ID if resolved; otherwise None.
449
+ """
450
+ if isinstance(identifier, EntityField):
451
+ if not identifier.is_key:
452
+ return identifier.id
453
+ else:
454
+ _logger.error(f"Data field with identifier '{identifier}' not found.")
455
+ return None
456
+
457
+ # Check if it's already an ID
458
+ id_map = self._get_data_field_id_map()
459
+ if identifier in id_map:
460
+ return identifier
461
+
462
+ # Try to find by name
463
+ data_field = self._get_data_field_name_map().get(identifier, None)
464
+ if data_field:
465
+ return data_field.id
466
+
467
+ _logger.error(f"Data field with identifier '{identifier}' not found.")
468
+ return None
469
+
470
+ def _clear_data_field_caches(self) -> None:
471
+ """Clear caches for data fields."""
472
+ self.get_data_fields.cache_clear()
473
+ self._get_data_field_id_map.cache_clear()
474
+ self._get_data_field_name_map.cache_clear()
kalbio/entity_types.py ADDED
@@ -0,0 +1,188 @@
1
+ """
2
+ Entity type management module for the Kaleidoscope system.
3
+
4
+ This module provides classes and services for working with entity types in Kaleidoscope.
5
+ Entity types define classifications of entities with associated key fields and slice names
6
+ for data organization and retrieval.
7
+
8
+ Classes:
9
+ EntityType: Represents a single entity type with its configuration and key fields.
10
+ EntityTypesService: Service class for managing and querying entity types.
11
+
12
+ Example:
13
+ ```python
14
+ # get all entity types
15
+ all_types = client.entity_types.get_types()
16
+
17
+ # get a specific type by name
18
+ specific_type = client.entity_types.get_type_by_name("my_entity")
19
+ ```
20
+ """
21
+
22
+ import logging
23
+ from functools import lru_cache
24
+ from kalbio._kaleidoscope_model import _KaleidoscopeBaseModel
25
+ from kalbio.client import KaleidoscopeClient
26
+ from pydantic import TypeAdapter
27
+ from typing import List
28
+
29
+ _logger = logging.getLogger(__name__)
30
+
31
+
32
+ class EntityType(_KaleidoscopeBaseModel):
33
+ """Represents an entity type in the Kaleidoscope system.
34
+
35
+ An EntityType defines a classification of entities with associated key fields
36
+ and a slice name for data organization and retrieval.
37
+
38
+ Attributes:
39
+ id (str): UUID of the entity type.
40
+ key_field_ids (List[str]): List of field IDs that serve as key fields for this entity type.
41
+ slice_name (str): Name of the entity slice associated with this type.
42
+ """
43
+
44
+ key_field_ids: List[str]
45
+ slice_name: str
46
+
47
+ def __str__(self):
48
+ return f"{self.slice_name}"
49
+
50
+ def get_record_ids(self) -> List[str]:
51
+ """Retrieve a list of record IDs associated with the current entity slice.
52
+
53
+ Returns:
54
+ List[str]: A list of record IDs as strings.
55
+
56
+ Note:
57
+ If an exception occurs during the API request, it logs the error
58
+ and returns an empty list.
59
+ """
60
+ try:
61
+ resp = self._client._get("/records/search?entity_slice_id=" + self.id)
62
+ return resp
63
+ except Exception as e:
64
+ _logger.error(f"Error fetching record_ids of this entity type: {e}")
65
+ return []
66
+
67
+
68
+ class EntityTypesService:
69
+ """Service class for managing and retrieving entity types from the Kaleidoscope API.
70
+
71
+ This service provides methods to fetch, filter, and search entity types based on
72
+ various criteria such as name and key field IDs. It handles the conversion of raw
73
+ API responses into validated EntityType objects.
74
+
75
+ Example:
76
+ ```python
77
+ # get all entity types
78
+ all_types = client.entity_types.get_types()
79
+
80
+ # get a specific type by name
81
+ specific_type = client.entity_types.get_type_by_name("my_entity")
82
+ ```
83
+ """
84
+
85
+ def __init__(self, client: KaleidoscopeClient):
86
+ self._client = client
87
+
88
+ def _create_entity_type(self, data: dict) -> EntityType:
89
+ """Create an EntityType instance from the provided data dictionary.
90
+
91
+ Args:
92
+ data (dict): A dictionary containing the data required to instantiate an EntityType.
93
+
94
+ Returns:
95
+ EntityType: The validated and initialized EntityType instance.
96
+
97
+ Raises:
98
+ ValidationError: If the data could not be validated as an EntityType.
99
+ """
100
+ entity_type = TypeAdapter(EntityType).validate_python(data)
101
+ entity_type._set_client(self._client)
102
+ return entity_type
103
+
104
+ def _create_entity_type_list(self, data: list[dict]) -> List[EntityType]:
105
+ """Convert a list of entity type data dictionaries into a list of EntityType objects.
106
+
107
+ Args:
108
+ data (list[dict]): The input data representing entity types.
109
+
110
+ Returns:
111
+ List[EntityType]: A list of EntityType instances with the client set.
112
+
113
+ Raises:
114
+ ValidationError: If the data could not be validated as a list of EntityType objects.
115
+ """
116
+ entity_types = TypeAdapter(List[EntityType]).validate_python(data)
117
+
118
+ for entity_type in entity_types:
119
+ entity_type._set_client(self._client)
120
+
121
+ return entity_types
122
+
123
+ @lru_cache
124
+ def get_types(self) -> List[EntityType]:
125
+ """Retrieve a list of entity types from the client.
126
+
127
+ This method caches its values.
128
+
129
+ Returns:
130
+ List[EntityType]: A list of EntityType objects created from the response.
131
+
132
+ Note:
133
+ If an exception occurs during the API request, it logs the error,
134
+ clears the cache, and returns an empty list.
135
+ """
136
+ try:
137
+ resp = self._client._get("/entity_slices")
138
+ return self._create_entity_type_list(resp)
139
+ except Exception as e:
140
+ _logger.error(f"Error fetching entity types: {e}")
141
+ self.get_types.cache_clear()
142
+ return []
143
+
144
+ def get_type_by_name(self, name: str) -> EntityType | None:
145
+ """Retrieve an EntityType object from the list of entity types by its name.
146
+
147
+ Args:
148
+ name (str): The name of the entity type to search for.
149
+
150
+ Returns:
151
+ (EntityType | None): The EntityType object with the matching name if found, otherwise None.
152
+ """
153
+ entity_types = self.get_types()
154
+ return next(
155
+ (et for et in entity_types if et.slice_name == name),
156
+ None,
157
+ )
158
+
159
+ def get_types_with_key_fields(self, key_field_ids: List[str]) -> List[EntityType]:
160
+ """Return a list of EntityType objects that contain all the specified key field IDs.
161
+
162
+ Args:
163
+ key_field_ids (List[str]): A list of key field IDs to filter entity types.
164
+
165
+ Returns:
166
+ List[EntityType]: A list of EntityType instances where each entity type includes all the given key field IDs.
167
+ """
168
+ entity_types = self.get_types()
169
+ return [
170
+ et
171
+ for et in entity_types
172
+ if all([id in et.key_field_ids for id in key_field_ids])
173
+ ]
174
+
175
+ def get_type_exact_keys(self, key_field_ids: List[str]) -> EntityType | None:
176
+ """Retrieve an EntityType object whose key_field_ids exactly match the provided list.
177
+
178
+ Args:
179
+ key_field_ids (List[str]): A list of key field IDs to match against entity types.
180
+
181
+ Returns:
182
+ (EntityType | None): The matching EntityType object if found; otherwise, None.
183
+ """
184
+ entity_types = self._client.entity_types.get_types()
185
+ return next(
186
+ (et for et in entity_types if set(et.key_field_ids) == set(key_field_ids)),
187
+ None,
188
+ )