albert 1.10.0rc2__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- albert/__init__.py +1 -1
- albert/client.py +5 -0
- albert/collections/custom_templates.py +3 -0
- albert/collections/data_templates.py +118 -264
- albert/collections/entity_types.py +19 -3
- albert/collections/inventory.py +1 -1
- albert/collections/notebooks.py +154 -26
- albert/collections/parameters.py +1 -0
- albert/collections/property_data.py +384 -280
- albert/collections/reports.py +4 -0
- albert/collections/synthesis.py +292 -0
- albert/collections/tasks.py +2 -1
- albert/collections/worksheets.py +3 -0
- albert/core/shared/models/base.py +3 -1
- albert/core/shared/models/patch.py +1 -1
- albert/resources/batch_data.py +4 -2
- albert/resources/cas.py +3 -1
- albert/resources/custom_fields.py +3 -1
- albert/resources/data_templates.py +60 -12
- albert/resources/inventory.py +6 -4
- albert/resources/lists.py +3 -1
- albert/resources/notebooks.py +12 -7
- albert/resources/parameter_groups.py +3 -1
- albert/resources/property_data.py +64 -5
- albert/resources/sheets.py +16 -14
- albert/resources/synthesis.py +61 -0
- albert/resources/tags.py +3 -1
- albert/resources/tasks.py +4 -7
- albert/resources/workflows.py +4 -2
- albert/utils/data_template.py +392 -37
- albert/utils/property_data.py +638 -0
- albert/utils/tasks.py +3 -3
- {albert-1.10.0rc2.dist-info → albert-1.11.0.dist-info}/METADATA +1 -1
- {albert-1.10.0rc2.dist-info → albert-1.11.0.dist-info}/RECORD +36 -33
- {albert-1.10.0rc2.dist-info → albert-1.11.0.dist-info}/WHEEL +0 -0
- {albert-1.10.0rc2.dist-info → albert-1.11.0.dist-info}/licenses/LICENSE +0 -0
albert/utils/data_template.py
CHANGED
|
@@ -1,24 +1,33 @@
|
|
|
1
1
|
"""Utilities for working with data templates."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import uuid
|
|
4
6
|
from pathlib import Path
|
|
5
7
|
from typing import TYPE_CHECKING
|
|
6
8
|
|
|
7
9
|
from tenacity import retry, stop_after_attempt, wait_exponential
|
|
8
10
|
|
|
11
|
+
from albert.collections.attachments import AttachmentCollection
|
|
12
|
+
from albert.collections.files import FileCollection
|
|
9
13
|
from albert.core.logging import logger
|
|
10
14
|
from albert.core.shared.identifiers import AttachmentId, DataColumnId, DataTemplateId
|
|
11
15
|
from albert.core.shared.models.patch import (
|
|
12
16
|
GeneralPatchDatum,
|
|
13
17
|
GeneralPatchPayload,
|
|
18
|
+
PatchDatum,
|
|
14
19
|
PatchOperation,
|
|
15
|
-
PGPatchDatum,
|
|
16
20
|
)
|
|
17
21
|
from albert.exceptions import AlbertHTTPError
|
|
18
22
|
from albert.resources.attachments import Attachment, AttachmentCategory
|
|
19
|
-
from albert.resources.data_templates import DataColumnValue, DataTemplate
|
|
23
|
+
from albert.resources.data_templates import DataColumnValue, DataTemplate, ImportMode
|
|
20
24
|
from albert.resources.files import FileNamespace
|
|
21
|
-
from albert.resources.parameter_groups import
|
|
25
|
+
from albert.resources.parameter_groups import (
|
|
26
|
+
DataType,
|
|
27
|
+
EnumValidationValue,
|
|
28
|
+
ParameterValue,
|
|
29
|
+
ValueValidation,
|
|
30
|
+
)
|
|
22
31
|
from albert.resources.tasks import CsvCurveInput, CsvCurveResponse, TaskMetadata
|
|
23
32
|
from albert.resources.worker_jobs import (
|
|
24
33
|
WORKER_JOB_PENDING_STATES,
|
|
@@ -28,21 +37,41 @@ from albert.resources.worker_jobs import (
|
|
|
28
37
|
WorkerJobState,
|
|
29
38
|
)
|
|
30
39
|
from albert.utils.tasks import (
|
|
40
|
+
CSV_EXTENSIONS,
|
|
31
41
|
determine_extension,
|
|
32
42
|
extract_extensions_from_attachment,
|
|
43
|
+
fetch_csv_table_rows,
|
|
33
44
|
map_csv_headers_to_columns,
|
|
34
45
|
resolve_attachment,
|
|
35
46
|
)
|
|
36
47
|
|
|
37
48
|
if TYPE_CHECKING:
|
|
38
|
-
from albert.collections.attachments import AttachmentCollection
|
|
39
|
-
from albert.collections.files import FileCollection
|
|
40
49
|
from albert.core.session import AlbertSession
|
|
50
|
+
from albert.resources.data_templates import CurveExample, ImageExample
|
|
41
51
|
|
|
42
52
|
|
|
43
53
|
_CURVE_JOB_POLL_INTERVAL = 2.0
|
|
44
54
|
_CURVE_JOB_MAX_ATTEMPTS = 20
|
|
45
55
|
_CURVE_JOB_MAX_WAIT = 10.0
|
|
56
|
+
SUPPORTED_IMAGE_EXTENSIONS = [
|
|
57
|
+
".png",
|
|
58
|
+
".jpg",
|
|
59
|
+
".jpeg",
|
|
60
|
+
".jfif",
|
|
61
|
+
".pjpeg",
|
|
62
|
+
".pjp",
|
|
63
|
+
".svg",
|
|
64
|
+
".gif",
|
|
65
|
+
".apng",
|
|
66
|
+
".avif",
|
|
67
|
+
".webp",
|
|
68
|
+
".bmp",
|
|
69
|
+
".ico",
|
|
70
|
+
".cur",
|
|
71
|
+
".tif",
|
|
72
|
+
".tiff",
|
|
73
|
+
".heic",
|
|
74
|
+
]
|
|
46
75
|
|
|
47
76
|
|
|
48
77
|
def get_target_data_column(
|
|
@@ -104,7 +133,7 @@ def validate_data_column_type(*, target_column: DataColumnValue) -> None:
|
|
|
104
133
|
|
|
105
134
|
def get_script_attachment(
|
|
106
135
|
*,
|
|
107
|
-
attachment_collection:
|
|
136
|
+
attachment_collection: AttachmentCollection,
|
|
108
137
|
data_template_id: DataTemplateId,
|
|
109
138
|
column_id: DataColumnId,
|
|
110
139
|
) -> tuple[Attachment, set[str]]:
|
|
@@ -144,15 +173,22 @@ def get_script_attachment(
|
|
|
144
173
|
|
|
145
174
|
def prepare_curve_input_attachment(
|
|
146
175
|
*,
|
|
147
|
-
attachment_collection:
|
|
176
|
+
attachment_collection: AttachmentCollection,
|
|
148
177
|
data_template_id: DataTemplateId,
|
|
149
178
|
column_id: DataColumnId,
|
|
150
179
|
allowed_extensions: set[str] | None,
|
|
151
180
|
file_path: str | Path | None,
|
|
152
181
|
attachment_id: AttachmentId | None,
|
|
153
182
|
require_signed_url: bool,
|
|
183
|
+
parent_id: str | None = None,
|
|
184
|
+
upload_key: str | None = None,
|
|
185
|
+
auto_upload_key: bool = True,
|
|
154
186
|
) -> Attachment:
|
|
155
|
-
"""Resolve the input attachment, uploading a file when required, and validate it.
|
|
187
|
+
"""Resolve the input attachment, uploading a file when required, and validate it.
|
|
188
|
+
|
|
189
|
+
When ``parent_id`` is provided, the attachment is created under that parent.
|
|
190
|
+
Set ``auto_upload_key=False`` to skip curve-input key generation.
|
|
191
|
+
"""
|
|
156
192
|
|
|
157
193
|
if (attachment_id is None) == (file_path is None):
|
|
158
194
|
raise ValueError("Provide exactly one of 'attachment_id' or 'file_path'.")
|
|
@@ -161,7 +197,6 @@ def prepare_curve_input_attachment(
|
|
|
161
197
|
normalized_extensions = {ext.lower().lstrip(".") for ext in allowed_extensions if ext}
|
|
162
198
|
display_extensions = sorted(allowed_extensions) if allowed_extensions else []
|
|
163
199
|
|
|
164
|
-
upload_key: str | None = None
|
|
165
200
|
resolved_path: Path | None = None
|
|
166
201
|
if file_path is not None:
|
|
167
202
|
resolved_path = Path(file_path)
|
|
@@ -169,12 +204,15 @@ def prepare_curve_input_attachment(
|
|
|
169
204
|
if not suffix:
|
|
170
205
|
derived_extension = determine_extension(filename=resolved_path.name)
|
|
171
206
|
suffix = f".{derived_extension}" if derived_extension else ""
|
|
172
|
-
upload_key
|
|
207
|
+
if auto_upload_key and upload_key is None:
|
|
208
|
+
upload_key = (
|
|
209
|
+
f"curve-input/{data_template_id}/{column_id}/{uuid.uuid4().hex[:10]}{suffix}"
|
|
210
|
+
)
|
|
173
211
|
|
|
174
212
|
resolved_attachment_id = AttachmentId(
|
|
175
213
|
resolve_attachment(
|
|
176
214
|
attachment_collection=attachment_collection,
|
|
177
|
-
task_id=data_template_id,
|
|
215
|
+
task_id=parent_id or data_template_id,
|
|
178
216
|
file_path=resolved_path or file_path,
|
|
179
217
|
attachment_id=str(attachment_id) if attachment_id else None,
|
|
180
218
|
allowed_extensions=normalized_extensions,
|
|
@@ -206,13 +244,14 @@ def prepare_curve_input_attachment(
|
|
|
206
244
|
|
|
207
245
|
def exec_curve_script(
|
|
208
246
|
*,
|
|
209
|
-
session:
|
|
210
|
-
api_version: str,
|
|
247
|
+
session: AlbertSession,
|
|
211
248
|
data_template_id: DataTemplateId,
|
|
212
249
|
column_id: DataColumnId,
|
|
213
250
|
raw_attachment: Attachment,
|
|
214
|
-
file_collection:
|
|
251
|
+
file_collection: FileCollection,
|
|
215
252
|
script_attachment_signed_url: str,
|
|
253
|
+
task_id: str | None = None,
|
|
254
|
+
block_id: str | None = None,
|
|
216
255
|
) -> tuple[str, dict[str, str]]:
|
|
217
256
|
"""Execute the curve preprocessing script and return the processed key and column headers."""
|
|
218
257
|
|
|
@@ -220,14 +259,24 @@ def exec_curve_script(
|
|
|
220
259
|
if not raw_signed_url:
|
|
221
260
|
raise ValueError("Curve input attachment does not include a signed URL.")
|
|
222
261
|
|
|
223
|
-
|
|
262
|
+
if task_id and block_id:
|
|
263
|
+
processed_input_key = (
|
|
264
|
+
f"curve-input/{task_id}/{block_id}/{data_template_id}/"
|
|
265
|
+
f"{column_id}/{uuid.uuid4().hex[:10]}.csv"
|
|
266
|
+
)
|
|
267
|
+
else:
|
|
268
|
+
processed_input_key = f"curve-input/{data_template_id}/{column_id}/{uuid.uuid4().hex}.csv"
|
|
224
269
|
content_type = raw_attachment.mime_type or "text/csv"
|
|
225
270
|
upload_url = file_collection.get_signed_upload_url(
|
|
226
271
|
name=processed_input_key,
|
|
227
272
|
namespace=FileNamespace.RESULT,
|
|
228
273
|
content_type=content_type,
|
|
229
274
|
)
|
|
230
|
-
metadata_payload = TaskMetadata(
|
|
275
|
+
metadata_payload = TaskMetadata(
|
|
276
|
+
filename=raw_attachment.name or "",
|
|
277
|
+
task_id=task_id or data_template_id,
|
|
278
|
+
block_id=block_id,
|
|
279
|
+
)
|
|
231
280
|
csv_payload = CsvCurveInput(
|
|
232
281
|
script_s3_url=script_attachment_signed_url,
|
|
233
282
|
data_s3_url=raw_signed_url,
|
|
@@ -235,7 +284,7 @@ def exec_curve_script(
|
|
|
235
284
|
task_metadata=metadata_payload,
|
|
236
285
|
)
|
|
237
286
|
response = session.post(
|
|
238
|
-
|
|
287
|
+
"/api/v3/proxy/csvtable/curve",
|
|
239
288
|
json=csv_payload.model_dump(by_alias=True, mode="json", exclude_none=True),
|
|
240
289
|
)
|
|
241
290
|
curve_response = CsvCurveResponse.model_validate(response.json())
|
|
@@ -294,19 +343,30 @@ def derive_curve_csv_mapping(
|
|
|
294
343
|
|
|
295
344
|
def create_curve_import_job(
|
|
296
345
|
*,
|
|
297
|
-
session:
|
|
346
|
+
session: AlbertSession,
|
|
298
347
|
data_template_id: DataTemplateId,
|
|
299
348
|
column_id: DataColumnId,
|
|
300
349
|
csv_mapping: dict[str, str],
|
|
301
350
|
raw_attachment: Attachment,
|
|
302
351
|
processed_input_key: str,
|
|
352
|
+
task_id: str | None = None,
|
|
353
|
+
block_id: str | None = None,
|
|
303
354
|
) -> tuple[str, str, str]:
|
|
304
355
|
"""Create the curve import job and wait for completion."""
|
|
305
356
|
partition_uuid = str(uuid.uuid4())
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
357
|
+
if (task_id is None) != (block_id is None):
|
|
358
|
+
raise ValueError("task_id and block_id must be provided together for curve imports.")
|
|
359
|
+
if task_id and block_id:
|
|
360
|
+
s3_output_key = (
|
|
361
|
+
f"curve-output/{data_template_id}/{column_id}/"
|
|
362
|
+
f"parentid={task_id}/blockid={block_id}/"
|
|
363
|
+
f"datatemplateid={data_template_id}/uuid={partition_uuid}"
|
|
364
|
+
)
|
|
365
|
+
else:
|
|
366
|
+
s3_output_key = (
|
|
367
|
+
f"curve-output/{data_template_id}/{column_id}/"
|
|
368
|
+
f"parentid=null/blockid=null/datatemplateid={data_template_id}/uuid={partition_uuid}"
|
|
369
|
+
)
|
|
310
370
|
namespace = raw_attachment.namespace or "result"
|
|
311
371
|
worker_metadata = WorkerJobMetadata(
|
|
312
372
|
parent_type="DAT",
|
|
@@ -339,12 +399,13 @@ def create_curve_import_job(
|
|
|
339
399
|
reraise=True,
|
|
340
400
|
)
|
|
341
401
|
def _poll_worker_job() -> WorkerJob:
|
|
402
|
+
"""Poll a worker job status for completion."""
|
|
342
403
|
status_response = session.get(f"/api/v3/worker-jobs/{job_id}")
|
|
343
404
|
current_job = WorkerJob.model_validate(status_response.json())
|
|
344
405
|
state = current_job.state
|
|
345
406
|
|
|
346
407
|
if state in WORKER_JOB_PENDING_STATES:
|
|
347
|
-
logger.
|
|
408
|
+
logger.info(
|
|
348
409
|
"Curve data import in progress for template %s column %s",
|
|
349
410
|
data_template_id,
|
|
350
411
|
column_id,
|
|
@@ -392,22 +453,22 @@ def build_curve_import_patch_payload(
|
|
|
392
453
|
},
|
|
393
454
|
}
|
|
394
455
|
actions = [
|
|
395
|
-
|
|
456
|
+
PatchDatum(
|
|
396
457
|
operation=PatchOperation.ADD.value,
|
|
397
458
|
attribute="jobId",
|
|
398
459
|
new_value=job_id,
|
|
399
460
|
),
|
|
400
|
-
|
|
461
|
+
PatchDatum(
|
|
401
462
|
operation=PatchOperation.ADD.value,
|
|
402
463
|
attribute="csvMapping",
|
|
403
464
|
new_value=csv_mapping,
|
|
404
465
|
),
|
|
405
|
-
|
|
466
|
+
PatchDatum(
|
|
406
467
|
operation=PatchOperation.ADD.value,
|
|
407
468
|
attribute="value",
|
|
408
469
|
new_value=value_payload,
|
|
409
470
|
),
|
|
410
|
-
|
|
471
|
+
PatchDatum(
|
|
411
472
|
operation=PatchOperation.ADD.value,
|
|
412
473
|
attribute="athenaPartitionKey",
|
|
413
474
|
new_value=partition_uuid,
|
|
@@ -424,13 +485,307 @@ def build_curve_import_patch_payload(
|
|
|
424
485
|
)
|
|
425
486
|
|
|
426
487
|
|
|
427
|
-
def
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
488
|
+
def add_parameter_enums(
|
|
489
|
+
*,
|
|
490
|
+
session: AlbertSession,
|
|
491
|
+
base_path: str,
|
|
492
|
+
data_template_id: DataTemplateId,
|
|
493
|
+
new_parameters: list[ParameterValue],
|
|
494
|
+
) -> dict[str, list[EnumValidationValue]]:
|
|
495
|
+
"""Add enum values to newly created parameters and return updated enum sequences."""
|
|
496
|
+
|
|
497
|
+
data_template = DataTemplate(**session.get(f"{base_path}/{data_template_id}").json())
|
|
498
|
+
existing_parameters = data_template.parameter_values or []
|
|
499
|
+
enums_by_sequence: dict[str, list[EnumValidationValue]] = {}
|
|
500
|
+
for parameter in new_parameters:
|
|
501
|
+
this_sequence = next(
|
|
502
|
+
(
|
|
503
|
+
p.sequence
|
|
504
|
+
for p in existing_parameters
|
|
505
|
+
if p.id == parameter.id and p.short_name == parameter.short_name
|
|
506
|
+
),
|
|
507
|
+
None,
|
|
508
|
+
)
|
|
509
|
+
enum_patches: list[dict[str, str]] = []
|
|
510
|
+
if (
|
|
511
|
+
parameter.validation
|
|
512
|
+
and len(parameter.validation) > 0
|
|
513
|
+
and isinstance(parameter.validation[0].value, list)
|
|
514
|
+
):
|
|
515
|
+
existing_validation = (
|
|
516
|
+
[x for x in existing_parameters if x.sequence == parameter.sequence]
|
|
517
|
+
if existing_parameters
|
|
518
|
+
else []
|
|
519
|
+
)
|
|
520
|
+
existing_enums = (
|
|
521
|
+
[
|
|
522
|
+
x
|
|
523
|
+
for x in existing_validation[0].validation[0].value
|
|
524
|
+
if isinstance(x, EnumValidationValue) and x.id is not None
|
|
525
|
+
]
|
|
526
|
+
if (
|
|
527
|
+
existing_validation
|
|
528
|
+
and len(existing_validation) > 0
|
|
529
|
+
and existing_validation[0].validation
|
|
530
|
+
and len(existing_validation[0].validation) > 0
|
|
531
|
+
and existing_validation[0].validation[0].value
|
|
532
|
+
and isinstance(existing_validation[0].validation[0].value, list)
|
|
533
|
+
)
|
|
534
|
+
else []
|
|
535
|
+
)
|
|
536
|
+
updated_enums = (
|
|
537
|
+
[x for x in parameter.validation[0].value if isinstance(x, EnumValidationValue)]
|
|
538
|
+
if parameter.validation[0].value
|
|
539
|
+
else []
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
deleted_enums = [
|
|
543
|
+
x for x in existing_enums if x.id not in [y.id for y in updated_enums]
|
|
544
|
+
]
|
|
545
|
+
|
|
546
|
+
new_enums = [x for x in updated_enums if x.id not in [y.id for y in existing_enums]]
|
|
547
|
+
|
|
548
|
+
matching_enums = [x for x in updated_enums if x.id in [y.id for y in existing_enums]]
|
|
549
|
+
|
|
550
|
+
for new_enum in new_enums:
|
|
551
|
+
enum_patches.append({"operation": "add", "text": new_enum.text})
|
|
552
|
+
for deleted_enum in deleted_enums:
|
|
553
|
+
enum_patches.append({"operation": "delete", "id": deleted_enum.id})
|
|
554
|
+
for matching_enum in matching_enums:
|
|
555
|
+
if (
|
|
556
|
+
matching_enum.text
|
|
557
|
+
!= [x for x in existing_enums if x.id == matching_enum.id][0].text
|
|
558
|
+
):
|
|
559
|
+
enum_patches.append(
|
|
560
|
+
{
|
|
561
|
+
"operation": "update",
|
|
562
|
+
"id": matching_enum.id,
|
|
563
|
+
"text": matching_enum.text,
|
|
564
|
+
}
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
if enum_patches and this_sequence:
|
|
568
|
+
enum_response = session.put(
|
|
569
|
+
f"{base_path}/{data_template_id}/parameters/{this_sequence}/enums",
|
|
570
|
+
json=enum_patches,
|
|
571
|
+
)
|
|
572
|
+
enums_by_sequence[this_sequence] = [
|
|
573
|
+
EnumValidationValue(**x) for x in enum_response.json()
|
|
574
|
+
]
|
|
575
|
+
|
|
576
|
+
return enums_by_sequence
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
def upload_image_example_attachment(
|
|
580
|
+
*,
|
|
581
|
+
attachment_collection: AttachmentCollection,
|
|
582
|
+
data_template_id: DataTemplateId,
|
|
583
|
+
file_path: str | Path | None,
|
|
584
|
+
attachment_id: AttachmentId | None,
|
|
585
|
+
upload_key: str | None = None,
|
|
586
|
+
) -> Attachment:
|
|
587
|
+
"""Upload or resolve an image attachment for a data template example."""
|
|
588
|
+
|
|
589
|
+
supported_extensions = {ext.lstrip(".").lower() for ext in SUPPORTED_IMAGE_EXTENSIONS}
|
|
590
|
+
resolved_attachment_id = AttachmentId(
|
|
591
|
+
resolve_attachment(
|
|
592
|
+
attachment_collection=attachment_collection,
|
|
593
|
+
task_id=data_template_id,
|
|
594
|
+
file_path=file_path,
|
|
595
|
+
attachment_id=str(attachment_id) if attachment_id else None,
|
|
596
|
+
allowed_extensions=supported_extensions,
|
|
597
|
+
note_text=None,
|
|
598
|
+
upload_key=upload_key,
|
|
599
|
+
)
|
|
600
|
+
)
|
|
601
|
+
attachment = attachment_collection.get_by_id(id=resolved_attachment_id)
|
|
602
|
+
if supported_extensions:
|
|
603
|
+
attachment_ext = determine_extension(filename=attachment.name)
|
|
604
|
+
if attachment_ext and attachment_ext not in supported_extensions:
|
|
605
|
+
raise ValueError(
|
|
606
|
+
f"Attachment '{attachment.name}' is not a supported image type "
|
|
607
|
+
f"({sorted(supported_extensions)})."
|
|
608
|
+
)
|
|
609
|
+
return attachment
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
def build_data_column_image_example_payload(
|
|
613
|
+
*,
|
|
614
|
+
target_column: DataColumnValue,
|
|
615
|
+
attachment: Attachment,
|
|
616
|
+
) -> GeneralPatchPayload:
|
|
617
|
+
"""Construct the patch payload to set an image example on a data column."""
|
|
618
|
+
|
|
619
|
+
key = attachment.key
|
|
620
|
+
file_name = attachment.name
|
|
621
|
+
if not key:
|
|
622
|
+
raise ValueError("Image attachment is missing an S3 key.")
|
|
623
|
+
if target_column.sequence is None:
|
|
624
|
+
raise ValueError("Data column sequence is required to patch image examples.")
|
|
625
|
+
|
|
626
|
+
value_payload = {
|
|
627
|
+
"fileName": file_name,
|
|
628
|
+
"s3Key": {
|
|
629
|
+
"original": key,
|
|
630
|
+
"thumb": key,
|
|
631
|
+
"preview": key,
|
|
632
|
+
},
|
|
633
|
+
}
|
|
634
|
+
action = PatchDatum(
|
|
635
|
+
operation=PatchOperation.ADD.value,
|
|
636
|
+
attribute="value",
|
|
637
|
+
new_value=value_payload,
|
|
638
|
+
)
|
|
639
|
+
return GeneralPatchPayload(
|
|
640
|
+
data=[
|
|
641
|
+
GeneralPatchDatum(
|
|
642
|
+
attribute="datacolumn",
|
|
643
|
+
colId=target_column.sequence,
|
|
644
|
+
actions=[action],
|
|
645
|
+
)
|
|
646
|
+
]
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def ensure_data_column_accepts_images(*, target_column: DataColumnValue) -> None:
|
|
651
|
+
"""Ensure the resolved data column is configured for image data."""
|
|
652
|
+
|
|
653
|
+
validations = target_column.validation or []
|
|
654
|
+
if not any(_validation_is_image(validation) for validation in validations):
|
|
655
|
+
raise ValueError(
|
|
656
|
+
f"Data column '{target_column.name}' must be an image-type column to add image examples."
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
def _validation_is_curve(validation: ValueValidation | None) -> bool:
|
|
661
|
+
"""Return True when validation indicates curve data."""
|
|
662
|
+
return isinstance(validation, ValueValidation) and validation.datatype == DataType.CURVE
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def _validation_is_image(validation: ValueValidation | None) -> bool:
|
|
666
|
+
"""Return True when validation indicates image data."""
|
|
667
|
+
return isinstance(validation, ValueValidation) and validation.datatype == DataType.IMAGE
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
def build_curve_example(
|
|
671
|
+
*,
|
|
672
|
+
session: AlbertSession,
|
|
673
|
+
data_template_id: DataTemplateId,
|
|
674
|
+
example: CurveExample,
|
|
675
|
+
target_column: DataColumnValue,
|
|
676
|
+
) -> GeneralPatchPayload:
|
|
677
|
+
"""Construct the patch payload for a curve example on a data template."""
|
|
678
|
+
|
|
679
|
+
validate_data_column_type(target_column=target_column)
|
|
680
|
+
column_id = target_column.data_column_id
|
|
681
|
+
if column_id is None:
|
|
682
|
+
raise ValueError("Target data column is missing an identifier.")
|
|
683
|
+
attachment_collection = AttachmentCollection(session=session)
|
|
684
|
+
file_collection = FileCollection(session=session)
|
|
685
|
+
|
|
686
|
+
script_attachment_signed_url: str | None = None
|
|
687
|
+
|
|
688
|
+
if example.mode is ImportMode.SCRIPT:
|
|
689
|
+
script_attachment, script_extensions = get_script_attachment(
|
|
690
|
+
attachment_collection=attachment_collection,
|
|
691
|
+
data_template_id=data_template_id,
|
|
692
|
+
column_id=column_id,
|
|
693
|
+
)
|
|
694
|
+
if not script_extensions:
|
|
695
|
+
raise ValueError("Script attachment must define allowed extensions.")
|
|
696
|
+
script_attachment_signed_url = script_attachment.signed_url
|
|
697
|
+
allowed_extensions = set(script_extensions)
|
|
698
|
+
else:
|
|
699
|
+
allowed_extensions = set(CSV_EXTENSIONS)
|
|
700
|
+
raw_attachment = prepare_curve_input_attachment(
|
|
701
|
+
attachment_collection=attachment_collection,
|
|
702
|
+
data_template_id=data_template_id,
|
|
703
|
+
column_id=column_id,
|
|
704
|
+
allowed_extensions=allowed_extensions,
|
|
705
|
+
file_path=example.file_path,
|
|
706
|
+
attachment_id=example.attachment_id,
|
|
707
|
+
require_signed_url=example.mode is ImportMode.SCRIPT,
|
|
708
|
+
)
|
|
709
|
+
raw_key = raw_attachment.key
|
|
710
|
+
if raw_attachment.id is None:
|
|
711
|
+
raise ValueError("Curve input attachment did not return an identifier.")
|
|
712
|
+
resolved_attachment_id = AttachmentId(raw_attachment.id)
|
|
713
|
+
|
|
714
|
+
processed_input_key: str = raw_key
|
|
715
|
+
column_headers: dict[str, str] = {}
|
|
716
|
+
|
|
717
|
+
if example.mode is ImportMode.SCRIPT:
|
|
718
|
+
processed_input_key, column_headers = exec_curve_script(
|
|
719
|
+
session=session,
|
|
720
|
+
data_template_id=data_template_id,
|
|
721
|
+
column_id=column_id,
|
|
722
|
+
raw_attachment=raw_attachment,
|
|
723
|
+
file_collection=file_collection,
|
|
724
|
+
script_attachment_signed_url=script_attachment_signed_url,
|
|
725
|
+
)
|
|
726
|
+
else:
|
|
727
|
+
table_rows = fetch_csv_table_rows(
|
|
728
|
+
session=session,
|
|
729
|
+
attachment_id=resolved_attachment_id,
|
|
730
|
+
headers_only=True,
|
|
731
|
+
)
|
|
732
|
+
header_row = table_rows[0]
|
|
733
|
+
if not isinstance(header_row, dict):
|
|
734
|
+
raise ValueError("Unexpected CSV header format returned by preview endpoint.")
|
|
735
|
+
column_headers = {
|
|
736
|
+
key: value
|
|
737
|
+
for key, value in header_row.items()
|
|
738
|
+
if isinstance(key, str) and isinstance(value, str) and value
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
csv_mapping = derive_curve_csv_mapping(
|
|
742
|
+
target_column=target_column,
|
|
743
|
+
column_headers=column_headers,
|
|
744
|
+
field_mapping=example.field_mapping,
|
|
745
|
+
)
|
|
746
|
+
|
|
747
|
+
job_id, partition_uuid, s3_output_key = create_curve_import_job(
|
|
748
|
+
session=session,
|
|
749
|
+
data_template_id=data_template_id,
|
|
750
|
+
column_id=column_id,
|
|
751
|
+
csv_mapping=csv_mapping,
|
|
752
|
+
raw_attachment=raw_attachment,
|
|
753
|
+
processed_input_key=processed_input_key,
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
return build_curve_import_patch_payload(
|
|
757
|
+
target_column=target_column,
|
|
758
|
+
job_id=job_id,
|
|
759
|
+
csv_mapping=csv_mapping,
|
|
760
|
+
raw_attachment=raw_attachment,
|
|
761
|
+
partition_uuid=partition_uuid,
|
|
762
|
+
s3_output_key=s3_output_key,
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
def build_image_example(
|
|
767
|
+
*,
|
|
768
|
+
session: AlbertSession,
|
|
769
|
+
data_template_id: DataTemplateId,
|
|
770
|
+
example: ImageExample,
|
|
771
|
+
target_column: DataColumnValue,
|
|
772
|
+
) -> GeneralPatchPayload:
|
|
773
|
+
"""Construct the patch payload for an image example on a data template."""
|
|
774
|
+
|
|
775
|
+
ensure_data_column_accepts_images(target_column=target_column)
|
|
776
|
+
resolved_path = Path(example.file_path)
|
|
777
|
+
upload_ext = resolved_path.suffix.lower()
|
|
778
|
+
if not upload_ext:
|
|
779
|
+
raise ValueError("File extension is required for image examples.")
|
|
780
|
+
upload_key = f"imagedata/original/{data_template_id}/{uuid.uuid4().hex[:10]}{upload_ext}"
|
|
781
|
+
attachment_collection = AttachmentCollection(session=session)
|
|
782
|
+
attachment = upload_image_example_attachment(
|
|
783
|
+
attachment_collection=attachment_collection,
|
|
784
|
+
data_template_id=data_template_id,
|
|
785
|
+
file_path=example.file_path,
|
|
786
|
+
attachment_id=None,
|
|
787
|
+
upload_key=upload_key,
|
|
788
|
+
)
|
|
789
|
+
return build_data_column_image_example_payload(
|
|
790
|
+
target_column=target_column, attachment=attachment
|
|
791
|
+
)
|