aind-data-transfer-service 1.12.1__py3-none-any.whl → 1.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  """Init package"""
2
2
  import os
3
3
 
4
- __version__ = "1.12.1"
4
+ __version__ = "1.13.0"
5
5
 
6
6
  # Global constants
7
7
  OPEN_DATA_BUCKET_NAME = os.getenv("OPEN_DATA_BUCKET_NAME", "open")
@@ -1,59 +1,118 @@
1
1
  """Module to handle processing legacy csv files"""
2
2
 
3
- import json
3
+ import re
4
+ from datetime import datetime
4
5
 
5
- from aind_data_transfer_models.core import (
6
- BasicUploadJobConfigs,
7
- CodeOceanPipelineMonitorConfigs,
8
- ModalityConfigs,
6
+ from aind_data_schema_models.modalities import Modality
7
+ from aind_data_schema_models.platforms import Platform
8
+
9
+ from aind_data_transfer_service.models.core import Task, UploadJobConfigsV2
10
+
11
+ DATETIME_PATTERN2 = re.compile(
12
+ r"^\d{1,2}/\d{1,2}/\d{4} \d{1,2}:\d{2}:\d{2} [APap][Mm]$"
9
13
  )
10
14
 
11
15
 
12
- def map_csv_row_to_job(row: dict) -> BasicUploadJobConfigs:
16
+ def map_csv_row_to_job(row: dict) -> UploadJobConfigsV2:
13
17
  """
14
- Maps csv row into a BasicUploadJobConfigs model
18
+ Maps csv row into a UploadJobConfigsV2 model. This attempts to be somewhat
19
+ backwards compatible with previous csv files.
15
20
  Parameters
16
21
  ----------
17
22
  row : dict
18
23
 
19
24
  Returns
20
25
  -------
21
- BasicUploadJobConfigs
26
+ UploadJobConfigsV2
22
27
 
23
28
  """
24
29
  modality_configs = dict()
25
- basic_job_configs = dict()
30
+ job_configs = dict()
31
+ check_s3_folder_exists_task = None
32
+ final_check_s3_folder_exist = None
33
+ codeocean_tasks = dict()
26
34
  for key, value in row.items():
27
35
  # Strip white spaces and replace dashes with underscores
28
36
  clean_key = str(key).strip(" ").replace("-", "_")
29
37
  clean_val = str(value).strip(" ")
30
- # Replace empty strings with None.
31
- clean_val = None if clean_val is None or clean_val == "" else clean_val
38
+ # Check empty strings or None values
39
+ if clean_val is None or clean_val == "":
40
+ continue
32
41
  if clean_key.startswith("modality"):
33
42
  modality_parts = clean_key.split(".")
34
- if len(modality_parts) == 1:
35
- modality_key = modality_parts[0]
36
- sub_key = "modality"
43
+ modality_key = modality_parts[0]
44
+ sub_key = (
45
+ "modality" if len(modality_parts) == 1 else modality_parts[1]
46
+ )
47
+ modality_configs.setdefault(modality_key, dict())
48
+ # Temp backwards compatibility check
49
+ if sub_key == "source":
50
+ sub_key = "input_source"
51
+ if sub_key in ["process_capsule_id", "capsule_id", "pipeline_id"]:
52
+ if sub_key == "pipeline_id":
53
+ codeocean_pipeline_monitor_settings = {
54
+ "pipeline_monitor_settings": {
55
+ "run_params": {"pipeline_id": clean_val}
56
+ }
57
+ }
58
+ else:
59
+ codeocean_pipeline_monitor_settings = {
60
+ "pipeline_monitor_settings": {
61
+ "run_params": {"capsule_id": clean_val}
62
+ }
63
+ }
64
+ codeocean_tasks[modality_key] = Task(
65
+ skip_task=False,
66
+ job_settings=codeocean_pipeline_monitor_settings,
67
+ )
37
68
  else:
38
- modality_key = modality_parts[0]
39
- sub_key = modality_parts[1]
40
- if (
41
- modality_configs.get(modality_key) is None
42
- and clean_val is not None
43
- ):
44
- modality_configs[modality_key] = {sub_key: clean_val}
45
- elif clean_val is not None:
46
69
  modality_configs[modality_key].update({sub_key: clean_val})
47
- elif clean_key == "job_type":
48
- if clean_val is not None:
49
- codeocean_configs = json.loads(
50
- CodeOceanPipelineMonitorConfigs().model_dump_json()
51
- )
52
- codeocean_configs["job_type"] = clean_val
53
- basic_job_configs["codeocean_configs"] = codeocean_configs
70
+ elif clean_key == "force_cloud_sync" and clean_val.upper() in [
71
+ "TRUE",
72
+ "T",
73
+ ]:
74
+ check_s3_folder_exists_task = {"skip_task": True}
75
+ final_check_s3_folder_exist = {"skip_task": True}
54
76
  else:
55
- basic_job_configs[clean_key] = clean_val
56
- modalities = []
57
- for modality_value in modality_configs.values():
58
- modalities.append(ModalityConfigs(**modality_value))
59
- return BasicUploadJobConfigs(modalities=modalities, **basic_job_configs)
77
+ job_configs[clean_key] = clean_val
78
+ # Rename codeocean config keys with correct modality
79
+ keys = list(codeocean_tasks.keys())
80
+ for key in keys:
81
+ modality_abbreviation = modality_configs[key]["modality"]
82
+ codeocean_tasks[modality_abbreviation] = codeocean_tasks.pop(key)
83
+ # Create Tasks from parsed configs
84
+ modality_tasks = {
85
+ m.pop("modality"): Task(job_settings=m)
86
+ for m in modality_configs.values()
87
+ if m.get("modality") is not None
88
+ }
89
+ metadata_task = (
90
+ Task(job_settings={"metadata_dir": job_configs.pop("metadata_dir")})
91
+ if "metadata_dir" in job_configs
92
+ else None
93
+ )
94
+ tasks = {
95
+ "gather_preliminary_metadata": metadata_task,
96
+ "check_s3_folder_exists_task": check_s3_folder_exists_task,
97
+ "final_check_s3_folder_exist": final_check_s3_folder_exist,
98
+ "modality_transformation_settings": modality_tasks,
99
+ "codeocean_pipeline_settings": None
100
+ if codeocean_tasks == dict()
101
+ else codeocean_tasks,
102
+ }
103
+ job_configs.update(
104
+ {
105
+ "platform": Platform.from_abbreviation(job_configs["platform"]),
106
+ "modalities": [
107
+ Modality.from_abbreviation(m) for m in modality_tasks.keys()
108
+ ],
109
+ "tasks": {k: v for k, v in tasks.items() if v is not None},
110
+ }
111
+ )
112
+ acq_dt = job_configs.get("acq_datetime")
113
+ if acq_dt is not None and re.match(DATETIME_PATTERN2, acq_dt):
114
+ job_configs["acq_datetime"] = datetime.strptime(
115
+ acq_dt, "%m/%d/%Y %I:%M:%S %p"
116
+ )
117
+
118
+ return UploadJobConfigsV2(**job_configs)
@@ -1,4 +1,5 @@
1
1
  """Module to configure and create xlsx job upload template"""
2
+
2
3
  import datetime
3
4
  from io import BytesIO
4
5
  from typing import Any, Dict, List
@@ -19,23 +20,21 @@ class JobUploadTemplate:
19
20
  NUM_TEMPLATE_ROWS = 20
20
21
  XLSX_DATETIME_FORMAT = "YYYY-MM-DDTHH:mm:ss"
21
22
  HEADERS = [
23
+ "job_type",
22
24
  "project_name",
23
- "process_capsule_id",
24
- "input_data_mount",
25
25
  "platform",
26
26
  "acq_datetime",
27
27
  "subject_id",
28
28
  "metadata_dir",
29
29
  "modality0",
30
- "modality0.source",
30
+ "modality0.input_source",
31
31
  "modality1",
32
- "modality1.source",
32
+ "modality1.input_source",
33
33
  ]
34
34
  SAMPLE_JOBS = [
35
35
  [
36
+ "default",
36
37
  "Behavior Platform",
37
- "1f999652-00a0-4c4b-99b5-64c2985ad070",
38
- "data_mount",
39
38
  Platform.BEHAVIOR.abbreviation,
40
39
  datetime.datetime(2023, 10, 4, 4, 0, 0),
41
40
  "123456",
@@ -46,9 +45,8 @@ class JobUploadTemplate:
46
45
  "/allen/aind/stage/fake/dir",
47
46
  ],
48
47
  [
48
+ "default",
49
49
  "Ophys Platform - SLAP2",
50
- None,
51
- None,
52
50
  Platform.SMARTSPIM.abbreviation,
53
51
  datetime.datetime(2023, 3, 4, 16, 30, 0),
54
52
  "654321",
@@ -57,9 +55,8 @@ class JobUploadTemplate:
57
55
  "/allen/aind/stage/fake/dir",
58
56
  ],
59
57
  [
58
+ "default",
60
59
  "Ephys Platform",
61
- None,
62
- None,
63
60
  Platform.ECEPHYS.abbreviation,
64
61
  datetime.datetime(2023, 1, 30, 19, 1, 0),
65
62
  "654321",
@@ -227,12 +227,23 @@ async def validate_csv(request: Request):
227
227
  xlsx_book.close()
228
228
  data = csv_io.getvalue()
229
229
  csv_reader = csv.DictReader(io.StringIO(data))
230
+ params = AirflowDagRunsRequestParameters(
231
+ dag_ids=["transform_and_upload_v2"],
232
+ states=["running", "queued"],
233
+ )
234
+ _, current_jobs = await get_airflow_jobs(
235
+ params=params, get_confs=True
236
+ )
237
+ context = {
238
+ "job_types": get_job_types("v2"),
239
+ "project_names": get_project_names(),
240
+ "current_jobs": current_jobs,
241
+ }
230
242
  for row in csv_reader:
231
243
  if not any(row.values()):
232
244
  continue
233
245
  try:
234
- project_names = get_project_names()
235
- with validation_context({"project_names": project_names}):
246
+ with validation_context_v2(context):
236
247
  job = map_csv_row_to_job(row=row)
237
248
  # Construct hpc job setting most of the vars from the env
238
249
  basic_jobs.append(
@@ -1086,7 +1097,6 @@ routes = [
1086
1097
  ),
1087
1098
  Route("/api/submit_hpc_jobs", endpoint=submit_hpc_jobs, methods=["POST"]),
1088
1099
  Route("/api/v1/validate_json", endpoint=validate_json, methods=["POST"]),
1089
- Route("/api/v1/validate_csv", endpoint=validate_csv, methods=["POST"]),
1090
1100
  Route("/api/v1/submit_jobs", endpoint=submit_jobs, methods=["POST"]),
1091
1101
  Route(
1092
1102
  "/api/v1/get_job_status_list",
@@ -1101,6 +1111,7 @@ routes = [
1101
1111
  endpoint=get_parameter,
1102
1112
  methods=["GET"],
1103
1113
  ),
1114
+ Route("/api/v2/validate_csv", endpoint=validate_csv, methods=["POST"]),
1104
1115
  Route(
1105
1116
  "/api/v2/validate_json", endpoint=validate_json_v2, methods=["POST"]
1106
1117
  ),
@@ -88,7 +88,7 @@
88
88
  var jobs = []
89
89
  var parsing_errors = []
90
90
  const msgTypes = {
91
- "validatePending": "Sending...",
91
+ "validatePending": "Validating...",
92
92
  "validateSuccess": "Successfully validated jobs from file.",
93
93
  "validateError": "Error validating jobs from file.",
94
94
  "submitPending": "Submitting jobs. Please do not refresh or re-submit...",
@@ -129,7 +129,7 @@
129
129
  }
130
130
  var formData = new FormData(document.getElementById("file_form"));
131
131
  $.ajax({
132
- url: "/api/v1/validate_csv",
132
+ url: "/api/v2/validate_csv",
133
133
  type: "POST",
134
134
  data: formData,
135
135
  cache: false,
@@ -146,30 +146,37 @@
146
146
  let jobsLength = jobs.length;
147
147
  var table = document.createElement('table'), tr, td, row;
148
148
  addTableRow(
149
- [ "project_name", "process_capsule_id", "input_data_mount", "s3_bucket", "platform", "subject_id", "acq_datetime", "metadata_dir", "modality", "modality.source" ],
149
+ [ "job_type", "project_name", "s3_bucket", "platform", "subject_id", "acq_datetime", "metadata_dir", "modality", "modality.input_source" ],
150
150
  table, tr, td, true
151
151
  );
152
152
  for (row = 0; row < jobsLength; row++) {
153
153
  let job = jobs[row];
154
- let modalities = job.modalities;
155
- let modalitiesLength = modalities.length;
154
+ let modalities = job.tasks?.modality_transformation_settings;
155
+ if (modalities) {
156
+ modalities = Object.entries(modalities).map(([key, value]) => ({
157
+ abbreviation: key,
158
+ input_source: value.job_settings?.input_source
159
+ }))
160
+ }
161
+ let modalitiesLength = modalities ? modalities.length: 0;
162
+ let metadata_dir = job.tasks?.gather_preliminary_metadata?.job_settings?.metadata_dir
156
163
  addTableRow(
157
- [ { value: job.project_name, rowspan: modalitiesLength },
158
- { value: job.process_capsule_id ?? "", rowspan: modalitiesLength },
159
- { value: job.input_data_mount ?? "", rowspan: modalitiesLength },
164
+ [
165
+ { value: job.job_type, rowspan: modalitiesLength },
166
+ { value: job.project_name, rowspan: modalitiesLength },
160
167
  { value: job.s3_bucket, rowspan: modalitiesLength },
161
168
  { value: job.platform.abbreviation, rowspan: modalitiesLength },
162
169
  { value: job.subject_id, rowspan: modalitiesLength },
163
170
  { value: job.acq_datetime, rowspan: modalitiesLength },
164
- { value: job.metadata_dir ?? "", rowspan: modalitiesLength },
165
- modalities ? modalities[0].modality.abbreviation : "",
166
- modalities ? modalities[0].source : ""
171
+ { value: metadata_dir ?? "", rowspan: modalitiesLength },
172
+ modalities ? modalities[0].abbreviation : "",
173
+ modalities ? modalities[0].input_source : ""
167
174
  ], table, tr, td, false
168
175
  );
169
176
  for (mRow = 1; mRow < modalitiesLength; mRow++) {
170
177
  let modality = modalities[mRow]
171
178
  addTableRow(
172
- [ modality.modality.abbreviation, modality.source ],
179
+ [ modality.abbreviation, modality.input_source ],
173
180
  table, tr, td, false
174
181
  );
175
182
  }
@@ -212,7 +219,7 @@
212
219
  job_settings["email_notification_types"] = mail_type
213
220
  job_settings["upload_jobs"] = jobs
214
221
  $.ajax({
215
- url: "/api/v1/submit_jobs",
222
+ url: "/api/v2/submit_jobs",
216
223
  type: "POST",
217
224
  data: JSON.stringify(job_settings),
218
225
  contentType: 'application/json; charset=utf-8',
@@ -229,7 +236,12 @@
229
236
  error: function(data) {
230
237
  jobs = []
231
238
  setMessage(msgTypes.submitError);
232
- parsing_errors = data.responseJSON["data"]["errors"]
239
+ let errors = data.responseJSON["data"]["errors"];
240
+ try {
241
+ parsing_errors = JSON.parse(errors).map((err) => JSON.stringify(err));
242
+ } catch (e) {
243
+ parsing_errors = (typeof errors == "string") ? [errors] : errors;
244
+ }
233
245
  $("#response").html(parsing_errors.map((errStr) => {
234
246
  return `<li>${errStr}</li>`
235
247
  }));
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aind-data-transfer-service
3
- Version: 1.12.1
3
+ Version: 1.13.0
4
4
  Summary: Service that handles requests to upload data to the cloud
5
5
  Author: Allen Institute for Neural Dynamics
6
6
  License: MIT
@@ -1,23 +1,23 @@
1
- aind_data_transfer_service/__init__.py,sha256=3GXmPddTOfb9RSGlgi6HS1B_Iid1wc2Y9npwJ5UP0oY,272
1
+ aind_data_transfer_service/__init__.py,sha256=Y0HuFDE9tYQTP-S3DeIW839zx2wB5XnL3eWslZat-n0,272
2
2
  aind_data_transfer_service/log_handler.py,sha256=c7a-gLmZeRpeCUBwCz6XsTszWXQeQdR7eKZtas4llXM,1700
3
- aind_data_transfer_service/server.py,sha256=OL53huEa654aCjOWEafP9rV1Qp4l_qSX6R_Tsis_ing,41252
3
+ aind_data_transfer_service/server.py,sha256=ozavVyxMpZEwDAQ0OwRAwy_9CHXoLcUZ2rriSX1qH04,41643
4
4
  aind_data_transfer_service/configs/__init__.py,sha256=9W5GTuso9Is1B9X16RXcdb_GxasZvj6qDzOBDv0AbTc,36
5
- aind_data_transfer_service/configs/csv_handler.py,sha256=-iB_6tRoD5sKA9K3CCcyFHHmnNSLN7bVnmS-vLsMMRE,2085
5
+ aind_data_transfer_service/configs/csv_handler.py,sha256=9jM0fUlWCzmqTC7ubAeFCl0eEIX5BQvHcPPPTPngcog,4374
6
6
  aind_data_transfer_service/configs/job_configs.py,sha256=T-h5N6lyY9xTZ_xg_5FxkyYuMdagApbE6xalxFQ-bqA,18848
7
- aind_data_transfer_service/configs/job_upload_template.py,sha256=l1pZd_jT3dml0pdVr9Tjml8KRi5-LkHbSGIEAjFMgps,5096
7
+ aind_data_transfer_service/configs/job_upload_template.py,sha256=dJo_nuGIjHNzkGtZmJd7-qqRz8s8R8RcWnYV3Hi_8QE,4990
8
8
  aind_data_transfer_service/hpc/__init__.py,sha256=YNc68YNlmXwKIPFMIViz_K4XzVVHkLPEBOFyO5DKMKI,53
9
9
  aind_data_transfer_service/hpc/client.py,sha256=-JSxAWn96_XOIDwhsXAHK3TZAdckddUhtcCzRHnaTqA,4700
10
10
  aind_data_transfer_service/hpc/models.py,sha256=-7HhV16s_MUyKPy0x0FGIbnq8DPL2qJAzJO5G7003AE,16184
11
11
  aind_data_transfer_service/models/__init__.py,sha256=Meym73bEZ9nQr4QoeyhQmV3nRTYtd_4kWKPNygsBfJg,25
12
12
  aind_data_transfer_service/models/core.py,sha256=rVOCOBOv101w6fMXt5o9anxWlywMHSTL0uQT4u_T6H0,9985
13
13
  aind_data_transfer_service/models/internal.py,sha256=MGQrPuHrR21nn4toqdTCIEDW6MG7pWRajoPqD3j-ST0,9706
14
- aind_data_transfer_service/templates/index.html,sha256=8I4QLC4pAPKD3UUrrt3sUDP8Ynopf7B8YZWC7_VabjI,10624
14
+ aind_data_transfer_service/templates/index.html,sha256=KoqedswLWOiqgtkk2Z3HrDfEJycS_SJ7ueiuYGhL2Yo,11289
15
15
  aind_data_transfer_service/templates/job_params.html,sha256=vqIdNQsZTM0kq3Wa9u-VjmmMa0UzBTpK02WpOSatXBQ,8817
16
16
  aind_data_transfer_service/templates/job_status.html,sha256=vIOaJGJM78hOWTLTAzMfHjG9sNqPvS-muAyXYQtpnYI,16901
17
17
  aind_data_transfer_service/templates/job_tasks_table.html,sha256=rWFukhjZ4dhPyabe372tmi4lbQS2fyELZ7Awbn5Un4g,6181
18
18
  aind_data_transfer_service/templates/task_logs.html,sha256=y1GnQft0S50ghPb2xJDjAlefymB9a4zYdMikUFV7Tl4,918
19
- aind_data_transfer_service-1.12.1.dist-info/licenses/LICENSE,sha256=U0Y7B3gZJHXpjJVLgTQjM8e_c8w4JJpLgGhIdsoFR1Y,1092
20
- aind_data_transfer_service-1.12.1.dist-info/METADATA,sha256=KgJjMeGNKXCOMiRv3dkXPHsTUN0MQbzuk-pN_CrflKQ,2410
21
- aind_data_transfer_service-1.12.1.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
22
- aind_data_transfer_service-1.12.1.dist-info/top_level.txt,sha256=XmxH0q27Jholj2-VYh-6WMrh9Lw6kkuCX_fdsj3SaFE,27
23
- aind_data_transfer_service-1.12.1.dist-info/RECORD,,
19
+ aind_data_transfer_service-1.13.0.dist-info/licenses/LICENSE,sha256=U0Y7B3gZJHXpjJVLgTQjM8e_c8w4JJpLgGhIdsoFR1Y,1092
20
+ aind_data_transfer_service-1.13.0.dist-info/METADATA,sha256=3gI2KXcWaR-WWKJNtGQrUqxRonF1CwOgMvQYz7RFsqk,2410
21
+ aind_data_transfer_service-1.13.0.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
22
+ aind_data_transfer_service-1.13.0.dist-info/top_level.txt,sha256=XmxH0q27Jholj2-VYh-6WMrh9Lw6kkuCX_fdsj3SaFE,27
23
+ aind_data_transfer_service-1.13.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.3.1)
2
+ Generator: setuptools (80.7.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5