pysodafair 0.1.62__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysoda/__init__.py +0 -0
- pysoda/constants.py +3 -0
- pysoda/core/__init__.py +10 -0
- pysoda/core/dataset_generation/__init__.py +11 -0
- pysoda/core/dataset_generation/manifestSession/__init__.py +1 -0
- pysoda/core/dataset_generation/manifestSession/manifest_session.py +146 -0
- pysoda/core/dataset_generation/upload.py +3951 -0
- pysoda/core/dataset_importing/__init__.py +1 -0
- pysoda/core/dataset_importing/import_dataset.py +662 -0
- pysoda/core/metadata/__init__.py +20 -0
- pysoda/core/metadata/code_description.py +109 -0
- pysoda/core/metadata/constants.py +32 -0
- pysoda/core/metadata/dataset_description.py +188 -0
- pysoda/core/metadata/excel_utils.py +41 -0
- pysoda/core/metadata/helpers.py +250 -0
- pysoda/core/metadata/manifest.py +112 -0
- pysoda/core/metadata/manifest_package/__init__.py +2 -0
- pysoda/core/metadata/manifest_package/manifest.py +0 -0
- pysoda/core/metadata/manifest_package/manifest_import.py +29 -0
- pysoda/core/metadata/manifest_package/manifest_writer.py +666 -0
- pysoda/core/metadata/performances.py +46 -0
- pysoda/core/metadata/resources.py +53 -0
- pysoda/core/metadata/samples.py +184 -0
- pysoda/core/metadata/sites.py +51 -0
- pysoda/core/metadata/subjects.py +172 -0
- pysoda/core/metadata/submission.py +91 -0
- pysoda/core/metadata/text_metadata.py +47 -0
- pysoda/core/metadata_templates/CHANGES +1 -0
- pysoda/core/metadata_templates/LICENSE +1 -0
- pysoda/core/metadata_templates/README.md +4 -0
- pysoda/core/metadata_templates/__init__.py +0 -0
- pysoda/core/metadata_templates/code_description.xlsx +0 -0
- pysoda/core/metadata_templates/code_parameters.xlsx +0 -0
- pysoda/core/metadata_templates/dataset_description.xlsx +0 -0
- pysoda/core/metadata_templates/manifest.xlsx +0 -0
- pysoda/core/metadata_templates/performances.xlsx +0 -0
- pysoda/core/metadata_templates/resources.xlsx +0 -0
- pysoda/core/metadata_templates/samples.xlsx +0 -0
- pysoda/core/metadata_templates/sites.xlsx +0 -0
- pysoda/core/metadata_templates/subjects.xlsx +0 -0
- pysoda/core/metadata_templates/subjects_pools_samples_structure.xlsx +0 -0
- pysoda/core/metadata_templates/subjects_pools_samples_structure_example.xlsx +0 -0
- pysoda/core/metadata_templates/submission.xlsx +0 -0
- pysoda/core/permissions/__init__.py +1 -0
- pysoda/core/permissions/permissions.py +31 -0
- pysoda/core/pysoda/__init__.py +2 -0
- pysoda/core/pysoda/soda.py +34 -0
- pysoda/core/pysoda/soda_object.py +55 -0
- pysoda/core/upload_manifests/__init__.py +1 -0
- pysoda/core/upload_manifests/upload_manifests.py +37 -0
- pysoda/schema/__init__.py +0 -0
- pysoda/schema/code_description.json +629 -0
- pysoda/schema/dataset_description.json +295 -0
- pysoda/schema/manifest.json +60 -0
- pysoda/schema/performances.json +44 -0
- pysoda/schema/resources.json +39 -0
- pysoda/schema/samples.json +97 -0
- pysoda/schema/sites.json +38 -0
- pysoda/schema/soda_schema.json +664 -0
- pysoda/schema/subjects.json +131 -0
- pysoda/schema/submission_schema.json +28 -0
- pysoda/utils/__init__.py +9 -0
- pysoda/utils/authentication.py +381 -0
- pysoda/utils/config.py +68 -0
- pysoda/utils/exceptions.py +156 -0
- pysoda/utils/logger.py +6 -0
- pysoda/utils/metadata_utils.py +74 -0
- pysoda/utils/pennsieveAgentUtils.py +11 -0
- pysoda/utils/pennsieveUtils.py +118 -0
- pysoda/utils/profile.py +28 -0
- pysoda/utils/schema_validation.py +133 -0
- pysoda/utils/time_utils.py +5 -0
- pysoda/utils/upload_utils.py +108 -0
- pysodafair-0.1.62.dist-info/METADATA +190 -0
- pysodafair-0.1.62.dist-info/RECORD +77 -0
- pysodafair-0.1.62.dist-info/WHEEL +4 -0
- pysodafair-0.1.62.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_PERFORMANCES, SCHEMA_NAME_PERFORMANCES
|
|
2
|
+
from .excel_utils import rename_headers, excel_columns
|
|
3
|
+
from openpyxl.styles import PatternFill
|
|
4
|
+
from os.path import join, getsize
|
|
5
|
+
from openpyxl import load_workbook
|
|
6
|
+
import shutil
|
|
7
|
+
from ...utils import validate_schema
|
|
8
|
+
from .helpers import upload_metadata_file, get_template_path
|
|
9
|
+
|
|
10
|
+
def create_excel(soda, upload_boolean, local_destination):
|
|
11
|
+
source = get_template_path(SDS_FILE_PERFORMANCES)
|
|
12
|
+
|
|
13
|
+
destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_PERFORMANCES) if upload_boolean else local_destination
|
|
14
|
+
|
|
15
|
+
shutil.copyfile(source, destination)
|
|
16
|
+
|
|
17
|
+
wb = load_workbook(destination)
|
|
18
|
+
ws1 = wb["Sheet1"]
|
|
19
|
+
|
|
20
|
+
performances = soda["dataset_metadata"]["performances"]
|
|
21
|
+
|
|
22
|
+
validate_schema(performances, SCHEMA_NAME_PERFORMANCES)
|
|
23
|
+
|
|
24
|
+
# get the ascii column headers
|
|
25
|
+
row = 2
|
|
26
|
+
ascii_headers = excel_columns(start_index=0)
|
|
27
|
+
for performance in performances:
|
|
28
|
+
ws1[ascii_headers[0] + str(row)] = performance.get("performance_id", "")
|
|
29
|
+
ws1[ascii_headers[1] + str(row)] = performance.get("protocol_url_or_doi", "")
|
|
30
|
+
ws1[ascii_headers[2] + str(row)] = performance.get("date", "")
|
|
31
|
+
ws1[ascii_headers[3] + str(row)] = performance.get("start_datetime", "")
|
|
32
|
+
ws1[ascii_headers[4] + str(row)] = performance.get("end_datetime", "")
|
|
33
|
+
participants = " ".join(performance.get("participants", []))
|
|
34
|
+
ws1[ascii_headers[5] + str(row)] = participants
|
|
35
|
+
ws1[ascii_headers[6] + str(row)] = performance.get("additional_metadata", "")
|
|
36
|
+
row += 1
|
|
37
|
+
|
|
38
|
+
wb.save(destination)
|
|
39
|
+
|
|
40
|
+
size = getsize(destination)
|
|
41
|
+
|
|
42
|
+
## if generating directly on Pennsieve, call upload function
|
|
43
|
+
if upload_boolean:
|
|
44
|
+
upload_metadata_file(SDS_FILE_PERFORMANCES, soda, destination, True)
|
|
45
|
+
|
|
46
|
+
return {"size": size}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_RESOURCES, SCHEMA_NAME_RESOURCES
|
|
2
|
+
from .excel_utils import rename_headers, excel_columns
|
|
3
|
+
from openpyxl.styles import PatternFill
|
|
4
|
+
from os.path import join, getsize
|
|
5
|
+
from openpyxl import load_workbook
|
|
6
|
+
import shutil
|
|
7
|
+
from ...utils import validate_schema
|
|
8
|
+
from .helpers import upload_metadata_file, get_template_path
|
|
9
|
+
|
|
10
|
+
def create_excel(soda, upload_boolean, local_destination):
|
|
11
|
+
source = get_template_path(SDS_FILE_RESOURCES)
|
|
12
|
+
|
|
13
|
+
destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_RESOURCES) if upload_boolean else local_destination
|
|
14
|
+
|
|
15
|
+
shutil.copyfile(source, destination)
|
|
16
|
+
|
|
17
|
+
wb = load_workbook(destination)
|
|
18
|
+
ws1 = wb["Sheet1"]
|
|
19
|
+
|
|
20
|
+
resources = soda["dataset_metadata"]["resources"]
|
|
21
|
+
|
|
22
|
+
validate_schema(resources, SCHEMA_NAME_RESOURCES)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# get the ascii column headers
|
|
26
|
+
row = 2
|
|
27
|
+
ascii_headers = excel_columns(start_index=0)
|
|
28
|
+
for resource in resources:
|
|
29
|
+
ws1[ascii_headers[0] + str(row)] = resource.get("rrid", "")
|
|
30
|
+
ws1[ascii_headers[1] + str(row)] = resource.get("type", "")
|
|
31
|
+
ws1[ascii_headers[2] + str(row)] = resource.get("name", "")
|
|
32
|
+
ws1[ascii_headers[3] + str(row)] = resource.get("url", "")
|
|
33
|
+
ws1[ascii_headers[4] + str(row)] = resource.get("vendor", "")
|
|
34
|
+
ws1[ascii_headers[5] + str(row)] = resource.get("version", "")
|
|
35
|
+
ws1[ascii_headers[6] + str(row)] = resource.get("id_in_protocol", "")
|
|
36
|
+
ws1[ascii_headers[7] + str(row)] = resource.get("additional_metadata", "")
|
|
37
|
+
|
|
38
|
+
row += 1
|
|
39
|
+
|
|
40
|
+
wb.save(destination)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
size = getsize(destination)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
## if generating directly on Pennsieve, call upload function
|
|
47
|
+
if upload_boolean:
|
|
48
|
+
upload_metadata_file(SDS_FILE_RESOURCES, soda, destination, True)
|
|
49
|
+
|
|
50
|
+
return {"size": size}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_SAMPLES, SCHEMA_NAME_SAMPLES
|
|
2
|
+
from .excel_utils import excel_columns
|
|
3
|
+
from openpyxl.styles import PatternFill, Font
|
|
4
|
+
from os.path import join, getsize
|
|
5
|
+
from openpyxl import load_workbook
|
|
6
|
+
import shutil
|
|
7
|
+
from ...utils import validate_schema, get_sds_headers
|
|
8
|
+
from .helpers import upload_metadata_file, get_template_path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def create_excel(soda, upload_boolean, local_destination):
|
|
12
|
+
source = get_template_path(SDS_FILE_SAMPLES)
|
|
13
|
+
|
|
14
|
+
samples = soda["dataset_metadata"]["samples"]
|
|
15
|
+
|
|
16
|
+
validate_schema(samples, SCHEMA_NAME_SAMPLES)
|
|
17
|
+
|
|
18
|
+
destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_SAMPLES) if upload_boolean else local_destination
|
|
19
|
+
shutil.copyfile(source, destination)
|
|
20
|
+
wb = load_workbook(destination)
|
|
21
|
+
ws1 = wb["Sheet1"]
|
|
22
|
+
|
|
23
|
+
orangeFill = PatternFill(
|
|
24
|
+
start_color="FFD965", end_color="FFD965", fill_type="solid"
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
row = 2
|
|
28
|
+
ascii_headers = excel_columns(start_index=0)
|
|
29
|
+
custom_headers_to_column = {}
|
|
30
|
+
sds_headers = get_sds_headers(SCHEMA_NAME_SAMPLES)
|
|
31
|
+
|
|
32
|
+
# Populate the Excel file with the data
|
|
33
|
+
for sample in samples:
|
|
34
|
+
ws1[ascii_headers[0] + str(row)] = sample.get("sample_id", "")
|
|
35
|
+
ws1[ascii_headers[0] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
36
|
+
|
|
37
|
+
ws1[ascii_headers[1] + str(row)] = sample.get("subject_id", "")
|
|
38
|
+
ws1[ascii_headers[1] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
39
|
+
|
|
40
|
+
ws1[ascii_headers[2] + str(row)] = sample.get("was_derived_from", "")
|
|
41
|
+
ws1[ascii_headers[2] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
42
|
+
|
|
43
|
+
ws1[ascii_headers[3] + str(row)] = sample.get("pool_id", "")
|
|
44
|
+
ws1[ascii_headers[3] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
45
|
+
|
|
46
|
+
ws1[ascii_headers[4] + str(row)] = sample.get("sample_experimental_group", "")
|
|
47
|
+
ws1[ascii_headers[4] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
48
|
+
|
|
49
|
+
ws1[ascii_headers[5] + str(row)] = sample.get("sample_type", "")
|
|
50
|
+
ws1[ascii_headers[5] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
51
|
+
|
|
52
|
+
ws1[ascii_headers[6] + str(row)] = sample.get("sample_anatomical_location", "")
|
|
53
|
+
ws1[ascii_headers[6] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
54
|
+
|
|
55
|
+
ws1[ascii_headers[7] + str(row)] = sample.get("also_in_dataset", "")
|
|
56
|
+
ws1[ascii_headers[7] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
57
|
+
|
|
58
|
+
ws1[ascii_headers[8] + str(row)] = sample.get("member_of", "")
|
|
59
|
+
ws1[ascii_headers[8] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
60
|
+
|
|
61
|
+
ws1[ascii_headers[9] + str(row)] = sample.get("metadata_only", "")
|
|
62
|
+
ws1[ascii_headers[9] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
63
|
+
|
|
64
|
+
ws1[ascii_headers[10] + str(row)] = sample.get("laboratory_internal_id", "")
|
|
65
|
+
ws1[ascii_headers[10] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
66
|
+
|
|
67
|
+
ws1[ascii_headers[11] + str(row)] = sample.get("date_of_derivation", "")
|
|
68
|
+
ws1[ascii_headers[11] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
69
|
+
|
|
70
|
+
ws1[ascii_headers[12] + str(row)] = sample.get("experimental_log_file_path", "")
|
|
71
|
+
ws1[ascii_headers[12] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
72
|
+
|
|
73
|
+
ws1[ascii_headers[13] + str(row)] = sample.get("reference_atlas", "")
|
|
74
|
+
ws1[ascii_headers[13] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
75
|
+
|
|
76
|
+
ws1[ascii_headers[14] + str(row)] = sample.get("pathology", "")
|
|
77
|
+
ws1[ascii_headers[14] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
78
|
+
|
|
79
|
+
ws1[ascii_headers[15] + str(row)] = sample.get("laterality", "")
|
|
80
|
+
ws1[ascii_headers[15] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
81
|
+
|
|
82
|
+
ws1[ascii_headers[16] + str(row)] = sample.get("cell_type", "")
|
|
83
|
+
ws1[ascii_headers[16] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
84
|
+
|
|
85
|
+
ws1[ascii_headers[17] + str(row)] = sample.get("plane_of_section", "")
|
|
86
|
+
ws1[ascii_headers[17] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
87
|
+
|
|
88
|
+
ws1[ascii_headers[18] + str(row)] = sample.get("protocol_title", "")
|
|
89
|
+
ws1[ascii_headers[18] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
90
|
+
|
|
91
|
+
ws1[ascii_headers[19] + str(row)] = sample.get("protocol_url_or_doi", "")
|
|
92
|
+
ws1[ascii_headers[19] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
93
|
+
|
|
94
|
+
# Handle custom fields
|
|
95
|
+
for field_name, field in sample.items():
|
|
96
|
+
if field_name in sds_headers:
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
# Check if the field is already in the custom_headers_to_column dictionary
|
|
100
|
+
if field_name not in custom_headers_to_column:
|
|
101
|
+
custom_headers_to_column[field_name] = len(custom_headers_to_column.keys()) + 1
|
|
102
|
+
|
|
103
|
+
# Create the column header in the Excel file
|
|
104
|
+
offset_from_final_sds_header = custom_headers_to_column[field_name]
|
|
105
|
+
ws1[ascii_headers[19 + offset_from_final_sds_header] + "1"] = field_name
|
|
106
|
+
ws1[ascii_headers[19 + offset_from_final_sds_header] + "1"].fill = orangeFill
|
|
107
|
+
ws1[ascii_headers[19 + offset_from_final_sds_header] + "1"].font = Font(bold=True, size=12, name="Calibri")
|
|
108
|
+
|
|
109
|
+
# Add the field value to the corresponding cell in the Excel file
|
|
110
|
+
offset_from_final_sds_header = custom_headers_to_column[field_name]
|
|
111
|
+
ws1[ascii_headers[19 + offset_from_final_sds_header] + str(row)] = field
|
|
112
|
+
ws1[ascii_headers[19 + offset_from_final_sds_header] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
113
|
+
|
|
114
|
+
row += 1
|
|
115
|
+
|
|
116
|
+
wb.save(destination)
|
|
117
|
+
|
|
118
|
+
size = getsize(destination)
|
|
119
|
+
|
|
120
|
+
# If generating directly on Pennsieve, call upload function
|
|
121
|
+
if upload_boolean:
|
|
122
|
+
upload_metadata_file(SDS_FILE_SAMPLES, soda, destination, True)
|
|
123
|
+
|
|
124
|
+
return size
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# soda = {
|
|
128
|
+
# "dataset_metadata": {
|
|
129
|
+
# "samples": [
|
|
130
|
+
# {
|
|
131
|
+
# "sample_id": "sample_1",
|
|
132
|
+
# "subject_id": "subject_1",
|
|
133
|
+
# "was_derived_from": "derived_from_1",
|
|
134
|
+
# "pool_id": "pool_1",
|
|
135
|
+
# "sample_experimental_group": "experimental_group_1",
|
|
136
|
+
# "sample_type": "type_1",
|
|
137
|
+
# "sample_anatomical_location": "location_1",
|
|
138
|
+
# "also_in_dataset": "dataset_1",
|
|
139
|
+
# "member_of": "member_1",
|
|
140
|
+
# "metadata_only": "False",
|
|
141
|
+
# "laboratory_internal_id": "internal_id_1",
|
|
142
|
+
# "date_of_derivation": "2023-01-01",
|
|
143
|
+
# "experimental_log_file_path": "/path/to/log/file",
|
|
144
|
+
# "reference_atlas": "/path/to/atlas",
|
|
145
|
+
# "pathology": "pathology_1",
|
|
146
|
+
# "laterality": "left",
|
|
147
|
+
# "cell_type": "cell_type_1",
|
|
148
|
+
# "plane_of_section": "plane_1",
|
|
149
|
+
# "protocol_title": "protocol_title_1",
|
|
150
|
+
# "protocol_url_or_doi": "/path/to/protocol",
|
|
151
|
+
# "custom_field_1": "custom_value_1",
|
|
152
|
+
# },
|
|
153
|
+
# {
|
|
154
|
+
# "sample_id": "sample_2",
|
|
155
|
+
# "subject_id": "subject_2",
|
|
156
|
+
# "was_derived_from": "derived_from_2",
|
|
157
|
+
# "pool_id": "pool_2",
|
|
158
|
+
# "sample_experimental_group": "experimental_group_2",
|
|
159
|
+
# "sample_type": "type_2",
|
|
160
|
+
# "sample_anatomical_location": "location_2",
|
|
161
|
+
# "also_in_dataset": "dataset_2",
|
|
162
|
+
# "member_of": "member_2",
|
|
163
|
+
# "metadata_only": "True",
|
|
164
|
+
# "laboratory_internal_id": "internal_id_2",
|
|
165
|
+
# "date_of_derivation": "2023-02-01",
|
|
166
|
+
# "experimental_log_file_path": "/path/to/log/file2",
|
|
167
|
+
# "reference_atlas": "/path/to/atlas2",
|
|
168
|
+
# "pathology": "pathology_2",
|
|
169
|
+
# "laterality": "right",
|
|
170
|
+
# "cell_type": "cell_type_2",
|
|
171
|
+
# "plane_of_section": "plane_2",
|
|
172
|
+
# "protocol_title": "protocol_title_2",
|
|
173
|
+
# "protocol_url_or_doi": "/path/to/protocol2",
|
|
174
|
+
# "custom_field_1": "custom_value_12",
|
|
175
|
+
|
|
176
|
+
# }
|
|
177
|
+
# ]
|
|
178
|
+
# }
|
|
179
|
+
# }
|
|
180
|
+
|
|
181
|
+
# try:
|
|
182
|
+
# create_excel(soda, False, "samples.xlsx")
|
|
183
|
+
# except Exception as e:
|
|
184
|
+
# print(f"An error occurred: {e}")
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_SITES, SCHEMA_NAME_SITES
|
|
2
|
+
from .excel_utils import rename_headers, excel_columns
|
|
3
|
+
from openpyxl.styles import PatternFill
|
|
4
|
+
from os.path import join, getsize
|
|
5
|
+
from openpyxl import load_workbook
|
|
6
|
+
import shutil
|
|
7
|
+
from ...utils import validate_schema
|
|
8
|
+
from .helpers import upload_metadata_file, get_template_path
|
|
9
|
+
|
|
10
|
+
def create_excel(soda, upload_boolean, local_destination):
|
|
11
|
+
source = get_template_path(SDS_FILE_SITES)
|
|
12
|
+
|
|
13
|
+
destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_SITES) if upload_boolean else local_destination
|
|
14
|
+
|
|
15
|
+
shutil.copyfile(source, destination)
|
|
16
|
+
|
|
17
|
+
wb = load_workbook(destination)
|
|
18
|
+
ws1 = wb["Sheet1"]
|
|
19
|
+
|
|
20
|
+
sites = soda["dataset_metadata"]["sites"]
|
|
21
|
+
|
|
22
|
+
validate_schema(sites, SCHEMA_NAME_SITES)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# get the ascii column headers
|
|
26
|
+
row = 2
|
|
27
|
+
ascii_headers = excel_columns(start_index=0)
|
|
28
|
+
for performance in sites:
|
|
29
|
+
ws1[ascii_headers[0] + str(row)] = performance.get("site_id", "")
|
|
30
|
+
ws1[ascii_headers[1] + str(row)] = performance.get("specimen_id", "")
|
|
31
|
+
ws1[ascii_headers[2] + str(row)] = performance.get("site_type", "")
|
|
32
|
+
ws1[ascii_headers[3] + str(row)] = performance.get("laboratory_internal_id", "")
|
|
33
|
+
ws1[ascii_headers[4] + str(row)] = performance.get("coordinate_system", "")
|
|
34
|
+
ws1[ascii_headers[5] + str(row)] = performance.get("coordinate_system_position", "")
|
|
35
|
+
ws1[ascii_headers[6] + str(row)] = performance.get("more...", "")
|
|
36
|
+
row += 1
|
|
37
|
+
|
|
38
|
+
wb.save(destination)
|
|
39
|
+
|
|
40
|
+
size = getsize(destination)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
## if generating directly on Pennsieve, call upload function
|
|
44
|
+
if upload_boolean:
|
|
45
|
+
upload_metadata_file(SDS_FILE_SITES, soda, destination, True)
|
|
46
|
+
|
|
47
|
+
return {"size": size}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_SUBJECTS,SCHEMA_NAME_SUBJECTS
|
|
2
|
+
from .excel_utils import rename_headers, excel_columns
|
|
3
|
+
from openpyxl.styles import PatternFill
|
|
4
|
+
from os.path import join, getsize
|
|
5
|
+
from openpyxl import load_workbook
|
|
6
|
+
import shutil
|
|
7
|
+
import numpy as np
|
|
8
|
+
from ...utils import validate_schema, get_sds_headers
|
|
9
|
+
from openpyxl.styles import Font
|
|
10
|
+
from .helpers import transposeMatrix, getMetadataCustomFields, sortedSubjectsTableData, upload_metadata_file, get_template_path
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def create_excel(soda, upload_boolean, local_destination):
|
|
14
|
+
source = get_template_path(SDS_FILE_SUBJECTS)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
subjects = soda["dataset_metadata"]["subjects"]
|
|
18
|
+
|
|
19
|
+
validate_schema(subjects, SCHEMA_NAME_SUBJECTS)
|
|
20
|
+
|
|
21
|
+
destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_SUBJECTS) if upload_boolean else local_destination
|
|
22
|
+
shutil.copyfile(source, destination)
|
|
23
|
+
wb = load_workbook(destination)
|
|
24
|
+
ws1 = wb["Sheet1"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# 1. see if the length of datastructure[0] == length of datastructure. If yes, go ahead. If no, add new columns from headers[n-1] onward.
|
|
29
|
+
orangeFill = PatternFill(
|
|
30
|
+
start_color="FFD965", end_color="FFD965", fill_type="solid"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# 1.1 (optional) add custom fields to the headers of the workbook
|
|
35
|
+
|
|
36
|
+
# for column, header in zip(
|
|
37
|
+
# excel_columns(start_index=29), subjects[0].keys()
|
|
38
|
+
# ):
|
|
39
|
+
# cell = column + str(1)
|
|
40
|
+
# ws1[cell] = header
|
|
41
|
+
# ws1[cell].fill = orangeFill
|
|
42
|
+
# ws1[cell].font = Font(bold=True, size=12, name="Calibri")
|
|
43
|
+
|
|
44
|
+
row = 2
|
|
45
|
+
ascii_headers = excel_columns(start_index=0)
|
|
46
|
+
custom_headers_to_column = {}
|
|
47
|
+
sds_headers = get_sds_headers(SCHEMA_NAME_SUBJECTS)
|
|
48
|
+
|
|
49
|
+
# 2. populate excel file with the data
|
|
50
|
+
for subject in subjects:
|
|
51
|
+
ws1[ascii_headers[0] + str(row)] = subject.get("subject_id", "")
|
|
52
|
+
ws1[ascii_headers[0] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
53
|
+
|
|
54
|
+
ws1[ascii_headers[1] + str(row)] = subject.get("pool_id", "")
|
|
55
|
+
ws1[ascii_headers[1] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
56
|
+
|
|
57
|
+
ws1[ascii_headers[2] + str(row)] = subject.get("subject_experimental_group", "")
|
|
58
|
+
ws1[ascii_headers[2] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
59
|
+
|
|
60
|
+
ws1[ascii_headers[3] + str(row)] = subject.get("age", "")
|
|
61
|
+
ws1[ascii_headers[3] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
62
|
+
|
|
63
|
+
ws1[ascii_headers[4] + str(row)] = subject.get("sex", "")
|
|
64
|
+
ws1[ascii_headers[4] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
65
|
+
|
|
66
|
+
ws1[ascii_headers[5] + str(row)] = subject.get("species", "")
|
|
67
|
+
ws1[ascii_headers[5] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
68
|
+
|
|
69
|
+
ws1[ascii_headers[6] + str(row)] = subject.get("strain", "")
|
|
70
|
+
ws1[ascii_headers[6] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
71
|
+
|
|
72
|
+
ws1[ascii_headers[7] + str(row)] = subject.get("rrid_for_strain", "")
|
|
73
|
+
ws1[ascii_headers[7] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
74
|
+
|
|
75
|
+
ws1[ascii_headers[8] + str(row)] = subject.get("age_category", "")
|
|
76
|
+
ws1[ascii_headers[8] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
77
|
+
|
|
78
|
+
ws1[ascii_headers[9] + str(row)] = subject.get("also_in_dataset", "")
|
|
79
|
+
ws1[ascii_headers[9] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
80
|
+
|
|
81
|
+
ws1[ascii_headers[10] + str(row)] = subject.get("member_of", "")
|
|
82
|
+
ws1[ascii_headers[10] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
83
|
+
|
|
84
|
+
ws1[ascii_headers[11] + str(row)] = subject.get("metadata_only", "")
|
|
85
|
+
ws1[ascii_headers[11] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
86
|
+
|
|
87
|
+
ws1[ascii_headers[12] + str(row)] = subject.get("laboratory_internal_id", "")
|
|
88
|
+
ws1[ascii_headers[12] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
89
|
+
|
|
90
|
+
ws1[ascii_headers[13] + str(row)] = subject.get("date_of_birth", "")
|
|
91
|
+
ws1[ascii_headers[13] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
92
|
+
|
|
93
|
+
ws1[ascii_headers[14] + str(row)] = subject.get("age_range_min", "")
|
|
94
|
+
ws1[ascii_headers[14] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
95
|
+
|
|
96
|
+
ws1[ascii_headers[15] + str(row)] = subject.get("age_range_max", "")
|
|
97
|
+
ws1[ascii_headers[15] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
98
|
+
|
|
99
|
+
ws1[ascii_headers[16] + str(row)] = subject.get("body_mass", "")
|
|
100
|
+
ws1[ascii_headers[16] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
101
|
+
|
|
102
|
+
ws1[ascii_headers[17] + str(row)] = subject.get("genotype", "")
|
|
103
|
+
ws1[ascii_headers[17] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
104
|
+
|
|
105
|
+
ws1[ascii_headers[18] + str(row)] = subject.get("phenotype", "")
|
|
106
|
+
ws1[ascii_headers[18] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
107
|
+
|
|
108
|
+
ws1[ascii_headers[19] + str(row)] = subject.get("handedness", "")
|
|
109
|
+
ws1[ascii_headers[19] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
110
|
+
|
|
111
|
+
ws1[ascii_headers[20] + str(row)] = subject.get("reference_atlas", "")
|
|
112
|
+
ws1[ascii_headers[20] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
113
|
+
|
|
114
|
+
ws1[ascii_headers[21] + str(row)] = subject.get("experimental_log_file_path", "")
|
|
115
|
+
ws1[ascii_headers[21] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
116
|
+
|
|
117
|
+
ws1[ascii_headers[22] + str(row)] = subject.get("experiment_date", "")
|
|
118
|
+
ws1[ascii_headers[22] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
119
|
+
|
|
120
|
+
ws1[ascii_headers[23] + str(row)] = subject.get("disease_or_disorder", "")
|
|
121
|
+
ws1[ascii_headers[23] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
122
|
+
|
|
123
|
+
ws1[ascii_headers[24] + str(row)] = subject.get("intervention", "")
|
|
124
|
+
ws1[ascii_headers[24] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
125
|
+
|
|
126
|
+
ws1[ascii_headers[25] + str(row)] = subject.get("disease_model", "")
|
|
127
|
+
ws1[ascii_headers[25] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
128
|
+
|
|
129
|
+
ws1[ascii_headers[26] + str(row)] = subject.get("protocol_title", "")
|
|
130
|
+
ws1[ascii_headers[26] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
131
|
+
|
|
132
|
+
ws1[ascii_headers[27] + str(row)] = subject.get("protocol_url_or_doi", "")
|
|
133
|
+
ws1[ascii_headers[27] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
134
|
+
|
|
135
|
+
# handle custom fields
|
|
136
|
+
for field_name, field in subject.items():
|
|
137
|
+
if field_name in sds_headers:
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
# check if the field is already in the custom_headers_to_column dictionary
|
|
141
|
+
if field_name not in custom_headers_to_column:
|
|
142
|
+
print(field_name)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
custom_headers_to_column[field_name] = len(custom_headers_to_column.keys()) + 1
|
|
146
|
+
|
|
147
|
+
# create the column header in the excel file
|
|
148
|
+
offset_from_final_sds_header = custom_headers_to_column[field_name]
|
|
149
|
+
ws1[ascii_headers[27 + offset_from_final_sds_header] + "1"] = field_name
|
|
150
|
+
ws1[ascii_headers[27 + offset_from_final_sds_header] + "1"].fill = orangeFill
|
|
151
|
+
ws1[ascii_headers[27 + offset_from_final_sds_header] + "1"].font = Font(bold=True, size=12, name="Calibri")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# add the field value to the corresponding cell in the excel file
|
|
156
|
+
offset_from_final_sds_header = custom_headers_to_column[field_name]
|
|
157
|
+
|
|
158
|
+
ws1[ascii_headers[27 + offset_from_final_sds_header] + str(row)] = field
|
|
159
|
+
ws1[ascii_headers[27 + offset_from_final_sds_header] + str(row)].font = Font(bold=False, size=11, name="Arial")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
row += 1
|
|
163
|
+
|
|
164
|
+
wb.save(destination)
|
|
165
|
+
|
|
166
|
+
size = getsize(destination)
|
|
167
|
+
|
|
168
|
+
## if generating directly on Pennsieve, then call upload function and then delete the destination path
|
|
169
|
+
if upload_boolean:
|
|
170
|
+
upload_metadata_file(SDS_FILE_SUBJECTS, soda, destination, True)
|
|
171
|
+
|
|
172
|
+
return {size: size}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from os.path import join, getsize
|
|
2
|
+
from openpyxl import load_workbook
|
|
3
|
+
from openpyxl.styles import Font
|
|
4
|
+
import shutil
|
|
5
|
+
import tempfile
|
|
6
|
+
from .helpers import upload_metadata_file, get_template_path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH
|
|
10
|
+
from .excel_utils import rename_headers, excel_columns
|
|
11
|
+
from ...utils import validate_schema
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
### Create submission file
|
|
15
|
+
def create_excel(soda, upload_boolean, local_destination):
|
|
16
|
+
"""
|
|
17
|
+
Create an Excel file for submission metadata.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
soda (dict): The soda object containing dataset metadata.
|
|
21
|
+
upload_boolean (bool): Whether to upload the file to Pennsieve.
|
|
22
|
+
destination_path (str): The path to save the Excel file.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
dict: A dictionary containing the size of the metadata file.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
validate_schema(soda["dataset_metadata"]["submission"], "submission_schema.json")
|
|
29
|
+
|
|
30
|
+
font_submission = Font(name="Calibri", size=14, bold=False)
|
|
31
|
+
|
|
32
|
+
source = get_template_path("submission.xlsx")
|
|
33
|
+
|
|
34
|
+
destination = join(METADATA_UPLOAD_PS_PATH, "submission.xlsx") if upload_boolean else local_destination
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
shutil.copyfile(source, destination)
|
|
38
|
+
except FileNotFoundError as e:
|
|
39
|
+
raise e
|
|
40
|
+
|
|
41
|
+
#TODO: Do not use an array for the non-array values; zipping for the sake of the ascii value is not necessary until milestone_achieved
|
|
42
|
+
submission_metadata_list = [
|
|
43
|
+
soda["dataset_metadata"]["submission"]
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
# write to excel file
|
|
47
|
+
wb = load_workbook(destination)
|
|
48
|
+
ws1 = wb["Sheet1"]
|
|
49
|
+
start_index = 2
|
|
50
|
+
for column, submission_data in zip(excel_columns(start_index), submission_metadata_list):
|
|
51
|
+
ws1[column + "2"] = submission_data["consortium_data_standard"]
|
|
52
|
+
ws1[column + "3"] = submission_data["funding_consortium"]
|
|
53
|
+
ws1[column + "4"] = submission_data["award_number"]
|
|
54
|
+
for col, milestone in zip(excel_columns(start_index), submission_data["milestone_achieved"]):
|
|
55
|
+
ws1[col + str(5)] = milestone
|
|
56
|
+
ws1[column + "6"] = submission_data["milestone_completion_date"]
|
|
57
|
+
ws1[column + "2"].font = font_submission
|
|
58
|
+
ws1[column + "3"].font = font_submission
|
|
59
|
+
ws1[column + "4"].font = font_submission
|
|
60
|
+
ws1[column + "5"].font = font_submission
|
|
61
|
+
ws1[column + "6"].font = font_submission
|
|
62
|
+
|
|
63
|
+
# TODO: should milestone completion date also be an array?
|
|
64
|
+
rename_headers(ws1, len(submission_metadata_list[0]["milestone_achieved"]), 2)
|
|
65
|
+
|
|
66
|
+
wb.save(destination)
|
|
67
|
+
|
|
68
|
+
print("Excel file created successfully at:", destination)
|
|
69
|
+
|
|
70
|
+
wb.close()
|
|
71
|
+
|
|
72
|
+
# calculate the size of the metadata file
|
|
73
|
+
size = getsize(destination)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
## if generating directly on Pennsieve, then call upload function and then delete the destination path
|
|
78
|
+
if upload_boolean:
|
|
79
|
+
print("Uploading Excel file to Pennsieve...")
|
|
80
|
+
upload_metadata_file("submission.xlsx", soda, destination, True)
|
|
81
|
+
print("Excel file uploaded successfully to Pennsieve.")
|
|
82
|
+
return {"size": size}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH
|
|
2
|
+
from os.path import join, getsize
|
|
3
|
+
from .helpers import upload_metadata_file, get_template_path
|
|
4
|
+
import shutil
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# this function saves and uploads the README/CHANGES to Pennsieve, just when users choose to generate onto Pennsieve
|
|
8
|
+
## (not used for generating locally)
|
|
9
|
+
def create_text_file(soda, upload_boolean, local_destination, metadata_filename):
|
|
10
|
+
"""
|
|
11
|
+
Create a text file for README, LICENSE, or CHANGES metadata using a template.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
soda (dict): The soda object containing dataset metadata.
|
|
15
|
+
upload_boolean (bool): Whether to upload the file to Pennsieve.
|
|
16
|
+
local_destination (str): The path to save the text file.
|
|
17
|
+
metadata_filename (str): The name of the metadata file to be created (e.g., "README.md", "LICENSE", "CHANGES").
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
int: The size of the metadata file in bytes.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
# Use metadata_filename directly for template and output filename
|
|
24
|
+
source = get_template_path(metadata_filename)
|
|
25
|
+
destination = join(METADATA_UPLOAD_PS_PATH, metadata_filename) if upload_boolean else local_destination
|
|
26
|
+
|
|
27
|
+
# Copy the template to the destination (if it exists)
|
|
28
|
+
try:
|
|
29
|
+
shutil.copyfile(source, destination)
|
|
30
|
+
except FileNotFoundError:
|
|
31
|
+
# If template not found, just create a new file
|
|
32
|
+
with open(destination, "w", encoding="utf-8") as f:
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
# Write the actual content from soda into the file (overwriting template content)
|
|
36
|
+
# Use metadata_filename as the key for content
|
|
37
|
+
text = soda["dataset_metadata"].get(metadata_filename, "")
|
|
38
|
+
with open(destination, "w", encoding="utf-8") as file:
|
|
39
|
+
file.write(text)
|
|
40
|
+
|
|
41
|
+
size = getsize(destination)
|
|
42
|
+
if upload_boolean:
|
|
43
|
+
upload_metadata_file(metadata_filename, soda, destination, True)
|
|
44
|
+
|
|
45
|
+
return size
|
|
46
|
+
|
|
47
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Optional text file that contains information about the history of the dataset
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Optional text file that contains information about the history of the dataset
|
|
File without changes
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|