pysodafair 0.1.66__py3-none-any.whl → 0.1.68__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysoda/core/dataset_generation/upload.py +12 -7
- pysoda/core/metadata/code_description.py +37 -1
- pysoda/core/metadata/dataset_description.py +3 -1
- pysoda/core/metadata/performances.py +14 -1
- pysoda/core/metadata/resources.py +16 -1
- pysoda/core/metadata/samples.py +25 -74
- pysoda/core/metadata/sites.py +14 -1
- pysoda/core/metadata/subjects.py +51 -22
- pysoda/core/metadata_templates/.dss +1 -0
- pysoda/core/metadata_templates/dataset_description.xlsx +0 -0
- pysoda/core/metadata_templates/samples.xlsx +0 -0
- pysoda/core/metadata_templates/subjects.xlsx +0 -0
- pysoda/core/metadata_templates/submission.xlsx +0 -0
- pysoda/schema/code_description.json +121 -51
- pysoda/schema/dataset_description.json +35 -31
- pysoda/schema/manifest.json +1 -1
- pysoda/schema/performances.json +16 -6
- pysoda/schema/resources.json +2 -1
- pysoda/schema/samples.json +31 -5
- pysoda/schema/sites.json +4 -2
- pysoda/schema/subjects.json +41 -10
- pysoda/schema/submission_schema.json +7 -4
- pysoda/utils/exceptions.py +10 -2
- pysoda/utils/pennsieveAgentUtils.py +11 -2
- {pysodafair-0.1.66.dist-info → pysodafair-0.1.68.dist-info}/METADATA +2 -5
- {pysodafair-0.1.66.dist-info → pysodafair-0.1.68.dist-info}/RECORD +28 -27
- {pysodafair-0.1.66.dist-info → pysodafair-0.1.68.dist-info}/WHEEL +0 -0
- {pysodafair-0.1.66.dist-info → pysodafair-0.1.68.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
|
|
2
|
+
from pysoda.utils.exceptions import PennsieveAgentRPCError
|
|
2
3
|
from ...utils import (
|
|
3
4
|
generate_options_set, generating_locally, generating_on_ps,
|
|
4
5
|
uploading_with_ps_account, uploading_to_existing_ps_dataset,
|
|
@@ -2148,8 +2149,6 @@ def ps_upload_to_dataset(soda, ps, ds, resume=False):
|
|
|
2148
2149
|
my_bf_existing_files_name_with_extension,
|
|
2149
2150
|
) = ps_get_existing_files_details(my_tracking_folder)
|
|
2150
2151
|
|
|
2151
|
-
logger.info(f"Existing files in Pennsieve: {my_bf_existing_files_name_with_extension}")
|
|
2152
|
-
|
|
2153
2152
|
list_local_files = []
|
|
2154
2153
|
list_projected_names = []
|
|
2155
2154
|
list_desired_names = []
|
|
@@ -2466,7 +2465,7 @@ def ps_upload_to_dataset(soda, ps, ds, resume=False):
|
|
|
2466
2465
|
relative_path,
|
|
2467
2466
|
)
|
|
2468
2467
|
|
|
2469
|
-
logger.info(f"
|
|
2468
|
+
logger.info(f"Amount of files to upload: {len(list_upload_files)} ")
|
|
2470
2469
|
|
|
2471
2470
|
|
|
2472
2471
|
# return and mark upload as completed if nothing is added to the manifest
|
|
@@ -3032,6 +3031,7 @@ def ps_upload_to_dataset(soda, ps, ds, resume=False):
|
|
|
3032
3031
|
end = timer()
|
|
3033
3032
|
logger.info(f"Time for ps_upload_to_dataset function: {timedelta(seconds=end - start)}")
|
|
3034
3033
|
except Exception as e:
|
|
3034
|
+
logger.error(f"An error occurred in ps_upload_to_dataset function: {str(e)}")
|
|
3035
3035
|
# reset the total bytes uploaded for any file that has not been fully uploaded
|
|
3036
3036
|
ums.set_main_total_generate_dataset_size(main_total_generate_dataset_size)
|
|
3037
3037
|
ums.set_total_files_to_upload(total_files)
|
|
@@ -3056,6 +3056,7 @@ renaming_files_flow = False
|
|
|
3056
3056
|
elapsed_time = None
|
|
3057
3057
|
manifest_id = None
|
|
3058
3058
|
origin_manifest_id = None
|
|
3059
|
+
curation_error_message = ""
|
|
3059
3060
|
|
|
3060
3061
|
|
|
3061
3062
|
|
|
@@ -3460,10 +3461,7 @@ def validate_dataset_structure(soda, resume):
|
|
|
3460
3461
|
connect_pennsieve_client(accountname)
|
|
3461
3462
|
except Exception as e:
|
|
3462
3463
|
main_curate_status = "Done"
|
|
3463
|
-
|
|
3464
|
-
raise Exception("The Pennsieve Agent cannot access datasets but needs to in order to work. Please try again. If the issue persists, please contact the SODA team. The SODA team will contact Pennsieve to help resolve this issue.")
|
|
3465
|
-
else:
|
|
3466
|
-
raise PennsieveAccountInvalid("Please select a valid Pennsieve account.")
|
|
3464
|
+
raise e
|
|
3467
3465
|
|
|
3468
3466
|
if uploading_to_existing_ps_dataset(soda):
|
|
3469
3467
|
# check that the Pennsieve dataset is valid
|
|
@@ -3560,6 +3558,7 @@ def reset_upload_session_environment(resume):
|
|
|
3560
3558
|
global generated_dataset_id
|
|
3561
3559
|
global bytes_file_path_dict
|
|
3562
3560
|
global renaming_files_flow
|
|
3561
|
+
global curation_error_message
|
|
3563
3562
|
|
|
3564
3563
|
start_generate = 0
|
|
3565
3564
|
myds = ""
|
|
@@ -3574,6 +3573,7 @@ def reset_upload_session_environment(resume):
|
|
|
3574
3573
|
main_curation_uploaded_files = 0
|
|
3575
3574
|
uploaded_folder_counter = 0
|
|
3576
3575
|
generated_dataset_id = None
|
|
3576
|
+
curation_error_message = ""
|
|
3577
3577
|
|
|
3578
3578
|
main_curate_status = "Curating"
|
|
3579
3579
|
main_curate_progress_message = "Starting dataset curation"
|
|
@@ -3602,6 +3602,7 @@ def main_curate_function(soda, resume):
|
|
|
3602
3602
|
global manifest_id
|
|
3603
3603
|
global origin_manifest_id
|
|
3604
3604
|
global total_files
|
|
3605
|
+
global curation_error_message
|
|
3605
3606
|
|
|
3606
3607
|
logger.info("Starting generating selected dataset")
|
|
3607
3608
|
logger.info(f"Generating dataset metadata generate-options={soda['generate-dataset']}")
|
|
@@ -3627,7 +3628,9 @@ def main_curate_function(soda, resume):
|
|
|
3627
3628
|
ps = connect_pennsieve_client(accountname)
|
|
3628
3629
|
generate_dataset(soda, resume, ps)
|
|
3629
3630
|
except Exception as e:
|
|
3631
|
+
logger.error(f"An error occurred in main_curate_function function: {str(e)}")
|
|
3630
3632
|
main_curate_status = "Done"
|
|
3633
|
+
curation_error_message = str(e)
|
|
3631
3634
|
raise e
|
|
3632
3635
|
|
|
3633
3636
|
main_curate_status = "Done"
|
|
@@ -3665,6 +3668,7 @@ def main_curate_function_progress():
|
|
|
3665
3668
|
global renaming_files_flow
|
|
3666
3669
|
global ums
|
|
3667
3670
|
global elapsed_time
|
|
3671
|
+
global curation_error_message
|
|
3668
3672
|
|
|
3669
3673
|
|
|
3670
3674
|
prior_elapsed_time = ums.get_elapsed_time()
|
|
@@ -3691,6 +3695,7 @@ def main_curate_function_progress():
|
|
|
3691
3695
|
"total_files_uploaded": main_curation_uploaded_files,
|
|
3692
3696
|
"generated_dataset_id": myds["content"]["id"] if myds != "" else None, # when a new dataset gets generated log its id to our analytics
|
|
3693
3697
|
"generated_dataset_int_id": myds["content"]["intId"] if myds != "" else None,
|
|
3698
|
+
"curation_error_message": curation_error_message,
|
|
3694
3699
|
}
|
|
3695
3700
|
|
|
3696
3701
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_CODE_DESCRIPTION, SCHEMA_NAME_CODE_DESCRIPTION
|
|
2
2
|
from .excel_utils import rename_headers, excel_columns
|
|
3
|
-
from openpyxl.styles import PatternFill
|
|
3
|
+
from openpyxl.styles import PatternFill, Font
|
|
4
4
|
from os.path import join, getsize
|
|
5
5
|
from openpyxl import load_workbook
|
|
6
6
|
import shutil
|
|
@@ -53,27 +53,47 @@ def populate_input_output_information(ws1, soda):
|
|
|
53
53
|
|
|
54
54
|
excel_ascii = excel_columns(start_index=3)[0]
|
|
55
55
|
ws1[excel_ascii + str(row)] = input_output_information["number_of_inputs"]
|
|
56
|
+
ws1[excel_ascii + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
56
57
|
|
|
57
58
|
for input, column in zip(input_output_information["inputs"], excel_columns(start_index=3)):
|
|
58
59
|
row = 28
|
|
59
60
|
ws1[column + str(row)] = input["input_parameter_name"]
|
|
61
|
+
ws1[column + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
62
|
+
|
|
60
63
|
ws1[column + str(row + 1)] = input["input parameter type"]
|
|
64
|
+
ws1[column + str(row + 1)].font = Font(bold=False, size=11, name="Arial")
|
|
65
|
+
|
|
61
66
|
ws1[column + str(row + 2)] = input["input_parameter_description"]
|
|
67
|
+
ws1[column + str(row + 2)].font = Font(bold=False, size=11, name="Arial")
|
|
68
|
+
|
|
62
69
|
ws1[column + str(row + 3)] = input["input_units"]
|
|
70
|
+
ws1[column + str(row + 3)].font = Font(bold=False, size=11, name="Arial")
|
|
71
|
+
|
|
63
72
|
ws1[column + str(row + 4)] = input["input_default_value"]
|
|
73
|
+
ws1[column + str(row + 4)].font = Font(bold=False, size=11, name="Arial")
|
|
64
74
|
|
|
65
75
|
# populate number of outputs into row 34
|
|
66
76
|
row = 34
|
|
67
77
|
ws1[excel_ascii + str(row)] = input_output_information["number_of_outputs"]
|
|
78
|
+
ws1[excel_ascii + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
68
79
|
|
|
69
80
|
# populate the outputs from row 35 - 39
|
|
70
81
|
for output, column in zip(input_output_information["outputs"], excel_columns(start_index=3)):
|
|
71
82
|
row = 35
|
|
72
83
|
ws1[column + str(row)] = output["output_parameter_name"]
|
|
84
|
+
ws1[column + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
85
|
+
|
|
73
86
|
ws1[column + str(row + 1)] = output["output_parameter_type"]
|
|
87
|
+
ws1[column + str(row + 1)].font = Font(bold=False, size=11, name="Arial")
|
|
88
|
+
|
|
74
89
|
ws1[column + str(row + 2)] = output["output_parameter_description"]
|
|
90
|
+
ws1[column + str(row + 2)].font = Font(bold=False, size=11, name="Arial")
|
|
91
|
+
|
|
75
92
|
ws1[column + str(row + 3)] = output["output_units"]
|
|
93
|
+
ws1[column + str(row + 3)].font = Font(bold=False, size=11, name="Arial")
|
|
94
|
+
|
|
76
95
|
ws1[column + str(row + 4)] = output["output_default_value"]
|
|
96
|
+
ws1[column + str(row + 4)].font = Font(bold=False, size=11, name="Arial")
|
|
77
97
|
|
|
78
98
|
|
|
79
99
|
def populate_basic_information(ws1, soda):
|
|
@@ -83,9 +103,16 @@ def populate_basic_information(ws1, soda):
|
|
|
83
103
|
row = 2
|
|
84
104
|
for info, column in zip(basic_information, excel_columns(start_index=3)):
|
|
85
105
|
ws1[column + str(row)] = info["RRID_term"]
|
|
106
|
+
ws1[column + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
107
|
+
|
|
86
108
|
ws1[column + str(row + 1)] = info["RRID_identifier"]
|
|
109
|
+
ws1[column + str(row + 1)].font = Font(bold=False, size=11, name="Arial")
|
|
110
|
+
|
|
87
111
|
ws1[column + str(row + 2)] = info["ontology_term"]
|
|
112
|
+
ws1[column + str(row + 2)].font = Font(bold=False, size=11, name="Arial")
|
|
113
|
+
|
|
88
114
|
ws1[column + str(row + 3)] = info["ontology_identifier"]
|
|
115
|
+
ws1[column + str(row + 3)].font = Font(bold=False, size=11, name="Arial")
|
|
89
116
|
|
|
90
117
|
|
|
91
118
|
def populate_ten_simple_rules(ws1, soda):
|
|
@@ -94,10 +121,19 @@ def populate_ten_simple_rules(ws1, soda):
|
|
|
94
121
|
ascii_cols = excel_columns(start_index=3)
|
|
95
122
|
for _, rule in ten_simple_rules.items():
|
|
96
123
|
ws1[ascii_cols[0] + str(row)] = rule.get("Link", "")
|
|
124
|
+
ws1[ascii_cols[0] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
125
|
+
|
|
97
126
|
ws1[ascii_cols[1] + str(row)] = rule.get("Rating", "")
|
|
127
|
+
ws1[ascii_cols[1] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
128
|
+
|
|
98
129
|
ws1[ascii_cols[2] + str(row)] = rule.get("Target", "")
|
|
130
|
+
ws1[ascii_cols[2] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
131
|
+
|
|
99
132
|
ws1[ascii_cols[3] + str(row)] = rule.get("Target Justification", "")
|
|
133
|
+
ws1[ascii_cols[3] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
134
|
+
|
|
100
135
|
ws1[ascii_cols[4] + str(row)] = rule.get("Text", "")
|
|
136
|
+
ws1[ascii_cols[4] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
101
137
|
row += 1
|
|
102
138
|
|
|
103
139
|
|
|
@@ -146,7 +146,9 @@ def populate_contributor_info(workbook, soda):
|
|
|
146
146
|
workbook[column + "28"] = contributor.get("contributor_name", "")
|
|
147
147
|
workbook[column + "29"] = contributor.get("contributor_orcid_id", "")
|
|
148
148
|
workbook[column + "30"] = contributor.get("contributor_affiliation", "")
|
|
149
|
-
|
|
149
|
+
roles = contributor.get("contributor_roles", [])
|
|
150
|
+
roles_str = ", ".join(roles) if roles else ""
|
|
151
|
+
workbook[column + "31"] = roles_str
|
|
150
152
|
# Return the length of the contributor array, or 1 if empty
|
|
151
153
|
return max(1, len(contributor_info))
|
|
152
154
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_PERFORMANCES, SCHEMA_NAME_PERFORMANCES
|
|
2
2
|
from .excel_utils import rename_headers, excel_columns
|
|
3
|
-
from openpyxl.styles import PatternFill
|
|
3
|
+
from openpyxl.styles import PatternFill, Font
|
|
4
4
|
from os.path import join, getsize
|
|
5
5
|
from openpyxl import load_workbook
|
|
6
6
|
import shutil
|
|
@@ -26,13 +26,26 @@ def create_excel(soda, upload_boolean, local_destination):
|
|
|
26
26
|
ascii_headers = excel_columns(start_index=0)
|
|
27
27
|
for performance in performances:
|
|
28
28
|
ws1[ascii_headers[0] + str(row)] = performance.get("performance_id", "")
|
|
29
|
+
ws1[ascii_headers[0] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
30
|
+
|
|
29
31
|
ws1[ascii_headers[1] + str(row)] = performance.get("protocol_url_or_doi", "")
|
|
32
|
+
ws1[ascii_headers[1] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
33
|
+
|
|
30
34
|
ws1[ascii_headers[2] + str(row)] = performance.get("date", "")
|
|
35
|
+
ws1[ascii_headers[2] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
36
|
+
|
|
31
37
|
ws1[ascii_headers[3] + str(row)] = performance.get("start_datetime", "")
|
|
38
|
+
ws1[ascii_headers[3] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
39
|
+
|
|
32
40
|
ws1[ascii_headers[4] + str(row)] = performance.get("end_datetime", "")
|
|
41
|
+
ws1[ascii_headers[4] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
42
|
+
|
|
33
43
|
participants = " ".join(performance.get("participants", []))
|
|
34
44
|
ws1[ascii_headers[5] + str(row)] = participants
|
|
45
|
+
ws1[ascii_headers[5] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
46
|
+
|
|
35
47
|
ws1[ascii_headers[6] + str(row)] = performance.get("additional_metadata", "")
|
|
48
|
+
ws1[ascii_headers[6] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
36
49
|
row += 1
|
|
37
50
|
|
|
38
51
|
wb.save(destination)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_RESOURCES, SCHEMA_NAME_RESOURCES
|
|
2
2
|
from .excel_utils import rename_headers, excel_columns
|
|
3
|
-
from openpyxl.styles import PatternFill
|
|
3
|
+
from openpyxl.styles import PatternFill, Font
|
|
4
4
|
from os.path import join, getsize
|
|
5
5
|
from openpyxl import load_workbook
|
|
6
6
|
import shutil
|
|
@@ -27,13 +27,28 @@ def create_excel(soda, upload_boolean, local_destination):
|
|
|
27
27
|
ascii_headers = excel_columns(start_index=0)
|
|
28
28
|
for resource in resources:
|
|
29
29
|
ws1[ascii_headers[0] + str(row)] = resource.get("rrid", "")
|
|
30
|
+
ws1[ascii_headers[0] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
31
|
+
|
|
30
32
|
ws1[ascii_headers[1] + str(row)] = resource.get("type", "")
|
|
33
|
+
ws1[ascii_headers[1] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
34
|
+
|
|
31
35
|
ws1[ascii_headers[2] + str(row)] = resource.get("name", "")
|
|
36
|
+
ws1[ascii_headers[2] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
37
|
+
|
|
32
38
|
ws1[ascii_headers[3] + str(row)] = resource.get("url", "")
|
|
39
|
+
ws1[ascii_headers[3] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
40
|
+
|
|
33
41
|
ws1[ascii_headers[4] + str(row)] = resource.get("vendor", "")
|
|
42
|
+
ws1[ascii_headers[4] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
43
|
+
|
|
34
44
|
ws1[ascii_headers[5] + str(row)] = resource.get("version", "")
|
|
45
|
+
ws1[ascii_headers[5] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
46
|
+
|
|
35
47
|
ws1[ascii_headers[6] + str(row)] = resource.get("id_in_protocol", "")
|
|
48
|
+
ws1[ascii_headers[6] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
49
|
+
|
|
36
50
|
ws1[ascii_headers[7] + str(row)] = resource.get("additional_metadata", "")
|
|
51
|
+
ws1[ascii_headers[7] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
37
52
|
|
|
38
53
|
row += 1
|
|
39
54
|
|
pysoda/core/metadata/samples.py
CHANGED
|
@@ -49,7 +49,7 @@ def create_excel(soda, upload_boolean, local_destination):
|
|
|
49
49
|
ws1[ascii_headers[5] + str(row)] = sample.get("sample_type", "")
|
|
50
50
|
ws1[ascii_headers[5] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
51
51
|
|
|
52
|
-
ws1[ascii_headers[6] + str(row)] = sample
|
|
52
|
+
ws1[ascii_headers[6] + str(row)] = handle_anatomical_location_field(sample)
|
|
53
53
|
ws1[ascii_headers[6] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
54
54
|
|
|
55
55
|
ws1[ascii_headers[7] + str(row)] = sample.get("also_in_dataset", "")
|
|
@@ -61,36 +61,39 @@ def create_excel(soda, upload_boolean, local_destination):
|
|
|
61
61
|
ws1[ascii_headers[9] + str(row)] = sample.get("metadata_only", "")
|
|
62
62
|
ws1[ascii_headers[9] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
63
63
|
|
|
64
|
-
ws1[ascii_headers[10] + str(row)] = sample.get("
|
|
64
|
+
ws1[ascii_headers[10] + str(row)] = sample.get("number_of_directly_derived_samples", "")
|
|
65
65
|
ws1[ascii_headers[10] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
66
66
|
|
|
67
|
-
ws1[ascii_headers[11] + str(row)] = sample.get("
|
|
67
|
+
ws1[ascii_headers[11] + str(row)] = sample.get("laboratory_internal_id", "")
|
|
68
68
|
ws1[ascii_headers[11] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
69
69
|
|
|
70
|
-
ws1[ascii_headers[12] + str(row)] = sample.get("
|
|
70
|
+
ws1[ascii_headers[12] + str(row)] = sample.get("date_of_derivation", "")
|
|
71
71
|
ws1[ascii_headers[12] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
72
72
|
|
|
73
|
-
ws1[ascii_headers[13] + str(row)] = sample.get("
|
|
73
|
+
ws1[ascii_headers[13] + str(row)] = sample.get("experimental_log_file_path", "")
|
|
74
74
|
ws1[ascii_headers[13] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
75
75
|
|
|
76
|
-
ws1[ascii_headers[14] + str(row)] = sample.get("
|
|
76
|
+
ws1[ascii_headers[14] + str(row)] = sample.get("reference_atlas", "")
|
|
77
77
|
ws1[ascii_headers[14] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
78
78
|
|
|
79
|
-
ws1[ascii_headers[15] + str(row)] = sample.get("
|
|
79
|
+
ws1[ascii_headers[15] + str(row)] = sample.get("pathology", "")
|
|
80
80
|
ws1[ascii_headers[15] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
81
81
|
|
|
82
|
-
ws1[ascii_headers[16] + str(row)] = sample.get("
|
|
82
|
+
ws1[ascii_headers[16] + str(row)] = sample.get("laterality", "")
|
|
83
83
|
ws1[ascii_headers[16] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
84
84
|
|
|
85
|
-
ws1[ascii_headers[17] + str(row)] = sample.get("
|
|
85
|
+
ws1[ascii_headers[17] + str(row)] = sample.get("cell_type", "")
|
|
86
86
|
ws1[ascii_headers[17] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
87
87
|
|
|
88
|
-
ws1[ascii_headers[18] + str(row)] = sample.get("
|
|
88
|
+
ws1[ascii_headers[18] + str(row)] = sample.get("plane_of_section", "")
|
|
89
89
|
ws1[ascii_headers[18] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
90
90
|
|
|
91
|
-
ws1[ascii_headers[19] + str(row)] = sample.get("
|
|
91
|
+
ws1[ascii_headers[19] + str(row)] = sample.get("protocol_title", "")
|
|
92
92
|
ws1[ascii_headers[19] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
93
93
|
|
|
94
|
+
ws1[ascii_headers[20] + str(row)] = sample.get("protocol_url_or_doi", "")
|
|
95
|
+
ws1[ascii_headers[20] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
96
|
+
|
|
94
97
|
# Handle custom fields
|
|
95
98
|
for field_name, field in sample.items():
|
|
96
99
|
if field_name in sds_headers:
|
|
@@ -102,14 +105,14 @@ def create_excel(soda, upload_boolean, local_destination):
|
|
|
102
105
|
|
|
103
106
|
# Create the column header in the Excel file
|
|
104
107
|
offset_from_final_sds_header = custom_headers_to_column[field_name]
|
|
105
|
-
ws1[ascii_headers[
|
|
106
|
-
ws1[ascii_headers[
|
|
107
|
-
ws1[ascii_headers[
|
|
108
|
+
ws1[ascii_headers[20 + offset_from_final_sds_header] + "1"] = field_name
|
|
109
|
+
ws1[ascii_headers[20 + offset_from_final_sds_header] + "1"].fill = orangeFill
|
|
110
|
+
ws1[ascii_headers[20 + offset_from_final_sds_header] + "1"].font = Font(bold=True, size=12, name="Calibri")
|
|
108
111
|
|
|
109
112
|
# Add the field value to the corresponding cell in the Excel file
|
|
110
113
|
offset_from_final_sds_header = custom_headers_to_column[field_name]
|
|
111
|
-
ws1[ascii_headers[
|
|
112
|
-
ws1[ascii_headers[
|
|
114
|
+
ws1[ascii_headers[20 + offset_from_final_sds_header] + str(row)] = field
|
|
115
|
+
ws1[ascii_headers[20 + offset_from_final_sds_header] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
113
116
|
|
|
114
117
|
row += 1
|
|
115
118
|
|
|
@@ -124,61 +127,9 @@ def create_excel(soda, upload_boolean, local_destination):
|
|
|
124
127
|
return size
|
|
125
128
|
|
|
126
129
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
# "was_derived_from": "derived_from_1",
|
|
134
|
-
# "pool_id": "pool_1",
|
|
135
|
-
# "sample_experimental_group": "experimental_group_1",
|
|
136
|
-
# "sample_type": "type_1",
|
|
137
|
-
# "sample_anatomical_location": "location_1",
|
|
138
|
-
# "also_in_dataset": "dataset_1",
|
|
139
|
-
# "member_of": "member_1",
|
|
140
|
-
# "metadata_only": "False",
|
|
141
|
-
# "laboratory_internal_id": "internal_id_1",
|
|
142
|
-
# "date_of_derivation": "2023-01-01",
|
|
143
|
-
# "experimental_log_file_path": "/path/to/log/file",
|
|
144
|
-
# "reference_atlas": "/path/to/atlas",
|
|
145
|
-
# "pathology": "pathology_1",
|
|
146
|
-
# "laterality": "left",
|
|
147
|
-
# "cell_type": "cell_type_1",
|
|
148
|
-
# "plane_of_section": "plane_1",
|
|
149
|
-
# "protocol_title": "protocol_title_1",
|
|
150
|
-
# "protocol_url_or_doi": "/path/to/protocol",
|
|
151
|
-
# "custom_field_1": "custom_value_1",
|
|
152
|
-
# },
|
|
153
|
-
# {
|
|
154
|
-
# "sample_id": "sample_2",
|
|
155
|
-
# "subject_id": "subject_2",
|
|
156
|
-
# "was_derived_from": "derived_from_2",
|
|
157
|
-
# "pool_id": "pool_2",
|
|
158
|
-
# "sample_experimental_group": "experimental_group_2",
|
|
159
|
-
# "sample_type": "type_2",
|
|
160
|
-
# "sample_anatomical_location": "location_2",
|
|
161
|
-
# "also_in_dataset": "dataset_2",
|
|
162
|
-
# "member_of": "member_2",
|
|
163
|
-
# "metadata_only": "True",
|
|
164
|
-
# "laboratory_internal_id": "internal_id_2",
|
|
165
|
-
# "date_of_derivation": "2023-02-01",
|
|
166
|
-
# "experimental_log_file_path": "/path/to/log/file2",
|
|
167
|
-
# "reference_atlas": "/path/to/atlas2",
|
|
168
|
-
# "pathology": "pathology_2",
|
|
169
|
-
# "laterality": "right",
|
|
170
|
-
# "cell_type": "cell_type_2",
|
|
171
|
-
# "plane_of_section": "plane_2",
|
|
172
|
-
# "protocol_title": "protocol_title_2",
|
|
173
|
-
# "protocol_url_or_doi": "/path/to/protocol2",
|
|
174
|
-
# "custom_field_1": "custom_value_12",
|
|
175
|
-
|
|
176
|
-
# }
|
|
177
|
-
# ]
|
|
178
|
-
# }
|
|
179
|
-
# }
|
|
180
|
-
|
|
181
|
-
# try:
|
|
182
|
-
# create_excel(soda, False, "samples.xlsx")
|
|
183
|
-
# except Exception as e:
|
|
184
|
-
# print(f"An error occurred: {e}")
|
|
130
|
+
|
|
131
|
+
def handle_anatomical_location_field(sample):
|
|
132
|
+
anatomical_location = sample.get("sample_anatomical_location", "")
|
|
133
|
+
if isinstance(anatomical_location, list):
|
|
134
|
+
return " ".join(anatomical_location)
|
|
135
|
+
return anatomical_location
|
pysoda/core/metadata/sites.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_SITES, SCHEMA_NAME_SITES
|
|
2
2
|
from .excel_utils import rename_headers, excel_columns
|
|
3
|
-
from openpyxl.styles import PatternFill
|
|
3
|
+
from openpyxl.styles import PatternFill, Font
|
|
4
4
|
from os.path import join, getsize
|
|
5
5
|
from openpyxl import load_workbook
|
|
6
6
|
import shutil
|
|
@@ -27,12 +27,25 @@ def create_excel(soda, upload_boolean, local_destination):
|
|
|
27
27
|
ascii_headers = excel_columns(start_index=0)
|
|
28
28
|
for performance in sites:
|
|
29
29
|
ws1[ascii_headers[0] + str(row)] = performance.get("site_id", "")
|
|
30
|
+
ws1[ascii_headers[0] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
31
|
+
|
|
30
32
|
ws1[ascii_headers[1] + str(row)] = performance.get("specimen_id", "")
|
|
33
|
+
ws1[ascii_headers[1] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
34
|
+
|
|
31
35
|
ws1[ascii_headers[2] + str(row)] = performance.get("site_type", "")
|
|
36
|
+
ws1[ascii_headers[2] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
37
|
+
|
|
32
38
|
ws1[ascii_headers[3] + str(row)] = performance.get("laboratory_internal_id", "")
|
|
39
|
+
ws1[ascii_headers[3] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
40
|
+
|
|
33
41
|
ws1[ascii_headers[4] + str(row)] = performance.get("coordinate_system", "")
|
|
42
|
+
ws1[ascii_headers[4] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
43
|
+
|
|
34
44
|
ws1[ascii_headers[5] + str(row)] = performance.get("coordinate_system_position", "")
|
|
45
|
+
ws1[ascii_headers[5] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
46
|
+
|
|
35
47
|
ws1[ascii_headers[6] + str(row)] = performance.get("more...", "")
|
|
48
|
+
ws1[ascii_headers[6] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
36
49
|
row += 1
|
|
37
50
|
|
|
38
51
|
wb.save(destination)
|
pysoda/core/metadata/subjects.py
CHANGED
|
@@ -84,54 +84,57 @@ def create_excel(soda, upload_boolean, local_destination):
|
|
|
84
84
|
ws1[ascii_headers[11] + str(row)] = subject.get("metadata_only", "")
|
|
85
85
|
ws1[ascii_headers[11] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
86
86
|
|
|
87
|
-
ws1[ascii_headers[12] + str(row)] = subject.get("
|
|
87
|
+
ws1[ascii_headers[12] + str(row)] = subject.get("number_of_directly_derived_samples", "")
|
|
88
88
|
ws1[ascii_headers[12] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
89
89
|
|
|
90
|
-
ws1[ascii_headers[13] + str(row)] = subject.get("
|
|
90
|
+
ws1[ascii_headers[13] + str(row)] = subject.get("laboratory_internal_id", "")
|
|
91
91
|
ws1[ascii_headers[13] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
92
92
|
|
|
93
|
-
ws1[ascii_headers[14] + str(row)] = subject.get("
|
|
93
|
+
ws1[ascii_headers[14] + str(row)] = subject.get("date_of_birth", "")
|
|
94
94
|
ws1[ascii_headers[14] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
95
95
|
|
|
96
|
-
ws1[ascii_headers[15] + str(row)] = subject.get("
|
|
96
|
+
ws1[ascii_headers[15] + str(row)] = subject.get("age_range_min", "")
|
|
97
97
|
ws1[ascii_headers[15] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
98
98
|
|
|
99
|
-
ws1[ascii_headers[16] + str(row)] = subject.get("
|
|
99
|
+
ws1[ascii_headers[16] + str(row)] = subject.get("age_range_max", "")
|
|
100
100
|
ws1[ascii_headers[16] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
101
101
|
|
|
102
|
-
ws1[ascii_headers[17] + str(row)] = subject.get("
|
|
102
|
+
ws1[ascii_headers[17] + str(row)] = normalize_body_mass(subject.get("body_mass", ""))
|
|
103
103
|
ws1[ascii_headers[17] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
104
104
|
|
|
105
|
-
ws1[ascii_headers[18] + str(row)] = subject.get("
|
|
105
|
+
ws1[ascii_headers[18] + str(row)] = subject.get("genotype", "")
|
|
106
106
|
ws1[ascii_headers[18] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
107
107
|
|
|
108
|
-
ws1[ascii_headers[19] + str(row)] = subject.get("
|
|
108
|
+
ws1[ascii_headers[19] + str(row)] = subject.get("phenotype", "")
|
|
109
109
|
ws1[ascii_headers[19] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
110
110
|
|
|
111
|
-
ws1[ascii_headers[20] + str(row)] = subject.get("
|
|
111
|
+
ws1[ascii_headers[20] + str(row)] = subject.get("handedness", "")
|
|
112
112
|
ws1[ascii_headers[20] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
113
113
|
|
|
114
|
-
ws1[ascii_headers[21] + str(row)] = subject.get("
|
|
114
|
+
ws1[ascii_headers[21] + str(row)] = subject.get("reference_atlas", "")
|
|
115
115
|
ws1[ascii_headers[21] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
116
116
|
|
|
117
|
-
ws1[ascii_headers[22] + str(row)] = subject.get("
|
|
117
|
+
ws1[ascii_headers[22] + str(row)] = subject.get("experimental_log_file_path", "")
|
|
118
118
|
ws1[ascii_headers[22] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
119
119
|
|
|
120
|
-
ws1[ascii_headers[23] + str(row)] = subject.get("
|
|
120
|
+
ws1[ascii_headers[23] + str(row)] = subject.get("experiment_date", "")
|
|
121
121
|
ws1[ascii_headers[23] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
122
122
|
|
|
123
|
-
ws1[ascii_headers[24] + str(row)] = subject.get("
|
|
123
|
+
ws1[ascii_headers[24] + str(row)] = subject.get("disease", "")
|
|
124
124
|
ws1[ascii_headers[24] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
125
125
|
|
|
126
|
-
ws1[ascii_headers[25] + str(row)] = subject.get("
|
|
126
|
+
ws1[ascii_headers[25] + str(row)] = subject.get("intervention", "")
|
|
127
127
|
ws1[ascii_headers[25] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
128
128
|
|
|
129
|
-
ws1[ascii_headers[26] + str(row)] = subject.get("
|
|
129
|
+
ws1[ascii_headers[26] + str(row)] = subject.get("disease_model", "")
|
|
130
130
|
ws1[ascii_headers[26] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
131
131
|
|
|
132
|
-
ws1[ascii_headers[27] + str(row)] = subject.get("
|
|
132
|
+
ws1[ascii_headers[27] + str(row)] = subject.get("protocol_title", "")
|
|
133
133
|
ws1[ascii_headers[27] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
134
134
|
|
|
135
|
+
ws1[ascii_headers[28] + str(row)] = subject.get("protocol_url_or_doi", "")
|
|
136
|
+
ws1[ascii_headers[28] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
137
|
+
|
|
135
138
|
# handle custom fields
|
|
136
139
|
for field_name, field in subject.items():
|
|
137
140
|
if field_name in sds_headers:
|
|
@@ -146,17 +149,17 @@ def create_excel(soda, upload_boolean, local_destination):
|
|
|
146
149
|
|
|
147
150
|
# create the column header in the excel file
|
|
148
151
|
offset_from_final_sds_header = custom_headers_to_column[field_name]
|
|
149
|
-
ws1[ascii_headers[
|
|
150
|
-
ws1[ascii_headers[
|
|
151
|
-
ws1[ascii_headers[
|
|
152
|
+
ws1[ascii_headers[28 + offset_from_final_sds_header] + "1"] = field_name
|
|
153
|
+
ws1[ascii_headers[28 + offset_from_final_sds_header] + "1"].fill = orangeFill
|
|
154
|
+
ws1[ascii_headers[28 + offset_from_final_sds_header] + "1"].font = Font(bold=True, size=12, name="Calibri")
|
|
152
155
|
|
|
153
156
|
|
|
154
157
|
|
|
155
158
|
# add the field value to the corresponding cell in the excel file
|
|
156
159
|
offset_from_final_sds_header = custom_headers_to_column[field_name]
|
|
157
160
|
|
|
158
|
-
ws1[ascii_headers[
|
|
159
|
-
ws1[ascii_headers[
|
|
161
|
+
ws1[ascii_headers[28 + offset_from_final_sds_header] + str(row)] = field
|
|
162
|
+
ws1[ascii_headers[28 + offset_from_final_sds_header] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
160
163
|
|
|
161
164
|
|
|
162
165
|
row += 1
|
|
@@ -169,4 +172,30 @@ def create_excel(soda, upload_boolean, local_destination):
|
|
|
169
172
|
if upload_boolean:
|
|
170
173
|
upload_metadata_file(SDS_FILE_SUBJECTS, soda, destination, True)
|
|
171
174
|
|
|
172
|
-
return {size: size}
|
|
175
|
+
return {size: size}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def normalize_body_mass(body_mass_value):
|
|
180
|
+
"""
|
|
181
|
+
Ensures that a body_mass value is normalized to a string for Excel export.
|
|
182
|
+
Handles cases where 'body_mass' is a string, number, or an object with 'value' and 'unit'.
|
|
183
|
+
"""
|
|
184
|
+
# If already a string or empty, leave as is
|
|
185
|
+
if isinstance(body_mass_value, str):
|
|
186
|
+
return body_mass_value
|
|
187
|
+
# If a number, convert to string
|
|
188
|
+
if isinstance(body_mass_value, (int, float)):
|
|
189
|
+
return str(body_mass_value)
|
|
190
|
+
# If an object with value and unit
|
|
191
|
+
if isinstance(body_mass_value, dict):
|
|
192
|
+
value = body_mass_value.get("value")
|
|
193
|
+
unit = body_mass_value.get("unit")
|
|
194
|
+
if value is not None and unit:
|
|
195
|
+
return f"{value} {unit}"
|
|
196
|
+
elif value is not None:
|
|
197
|
+
return str(value)
|
|
198
|
+
else:
|
|
199
|
+
return ""
|
|
200
|
+
else:
|
|
201
|
+
return ""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
(sds 3.0.1)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|