pysodafair 0.1.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. pysoda/__init__.py +0 -0
  2. pysoda/constants.py +3 -0
  3. pysoda/core/__init__.py +10 -0
  4. pysoda/core/dataset_generation/__init__.py +11 -0
  5. pysoda/core/dataset_generation/manifestSession/__init__.py +1 -0
  6. pysoda/core/dataset_generation/manifestSession/manifest_session.py +146 -0
  7. pysoda/core/dataset_generation/upload.py +3951 -0
  8. pysoda/core/dataset_importing/__init__.py +1 -0
  9. pysoda/core/dataset_importing/import_dataset.py +662 -0
  10. pysoda/core/metadata/__init__.py +20 -0
  11. pysoda/core/metadata/code_description.py +109 -0
  12. pysoda/core/metadata/constants.py +32 -0
  13. pysoda/core/metadata/dataset_description.py +188 -0
  14. pysoda/core/metadata/excel_utils.py +41 -0
  15. pysoda/core/metadata/helpers.py +250 -0
  16. pysoda/core/metadata/manifest.py +112 -0
  17. pysoda/core/metadata/manifest_package/__init__.py +2 -0
  18. pysoda/core/metadata/manifest_package/manifest.py +0 -0
  19. pysoda/core/metadata/manifest_package/manifest_import.py +29 -0
  20. pysoda/core/metadata/manifest_package/manifest_writer.py +666 -0
  21. pysoda/core/metadata/performances.py +46 -0
  22. pysoda/core/metadata/resources.py +53 -0
  23. pysoda/core/metadata/samples.py +184 -0
  24. pysoda/core/metadata/sites.py +51 -0
  25. pysoda/core/metadata/subjects.py +172 -0
  26. pysoda/core/metadata/submission.py +91 -0
  27. pysoda/core/metadata/text_metadata.py +47 -0
  28. pysoda/core/metadata_templates/CHANGES +1 -0
  29. pysoda/core/metadata_templates/LICENSE +1 -0
  30. pysoda/core/metadata_templates/README.md +4 -0
  31. pysoda/core/metadata_templates/__init__.py +0 -0
  32. pysoda/core/metadata_templates/code_description.xlsx +0 -0
  33. pysoda/core/metadata_templates/code_parameters.xlsx +0 -0
  34. pysoda/core/metadata_templates/dataset_description.xlsx +0 -0
  35. pysoda/core/metadata_templates/manifest.xlsx +0 -0
  36. pysoda/core/metadata_templates/performances.xlsx +0 -0
  37. pysoda/core/metadata_templates/resources.xlsx +0 -0
  38. pysoda/core/metadata_templates/samples.xlsx +0 -0
  39. pysoda/core/metadata_templates/sites.xlsx +0 -0
  40. pysoda/core/metadata_templates/subjects.xlsx +0 -0
  41. pysoda/core/metadata_templates/subjects_pools_samples_structure.xlsx +0 -0
  42. pysoda/core/metadata_templates/subjects_pools_samples_structure_example.xlsx +0 -0
  43. pysoda/core/metadata_templates/submission.xlsx +0 -0
  44. pysoda/core/permissions/__init__.py +1 -0
  45. pysoda/core/permissions/permissions.py +31 -0
  46. pysoda/core/pysoda/__init__.py +2 -0
  47. pysoda/core/pysoda/soda.py +34 -0
  48. pysoda/core/pysoda/soda_object.py +55 -0
  49. pysoda/core/upload_manifests/__init__.py +1 -0
  50. pysoda/core/upload_manifests/upload_manifests.py +37 -0
  51. pysoda/schema/__init__.py +0 -0
  52. pysoda/schema/code_description.json +629 -0
  53. pysoda/schema/dataset_description.json +295 -0
  54. pysoda/schema/manifest.json +60 -0
  55. pysoda/schema/performances.json +44 -0
  56. pysoda/schema/resources.json +39 -0
  57. pysoda/schema/samples.json +97 -0
  58. pysoda/schema/sites.json +38 -0
  59. pysoda/schema/soda_schema.json +664 -0
  60. pysoda/schema/subjects.json +131 -0
  61. pysoda/schema/submission_schema.json +28 -0
  62. pysoda/utils/__init__.py +9 -0
  63. pysoda/utils/authentication.py +381 -0
  64. pysoda/utils/config.py +68 -0
  65. pysoda/utils/exceptions.py +156 -0
  66. pysoda/utils/logger.py +6 -0
  67. pysoda/utils/metadata_utils.py +74 -0
  68. pysoda/utils/pennsieveAgentUtils.py +11 -0
  69. pysoda/utils/pennsieveUtils.py +118 -0
  70. pysoda/utils/profile.py +28 -0
  71. pysoda/utils/schema_validation.py +133 -0
  72. pysoda/utils/time_utils.py +5 -0
  73. pysoda/utils/upload_utils.py +108 -0
  74. pysodafair-0.1.62.dist-info/METADATA +190 -0
  75. pysodafair-0.1.62.dist-info/RECORD +77 -0
  76. pysodafair-0.1.62.dist-info/WHEEL +4 -0
  77. pysodafair-0.1.62.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,46 @@
1
+ from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_PERFORMANCES, SCHEMA_NAME_PERFORMANCES
2
+ from .excel_utils import rename_headers, excel_columns
3
+ from openpyxl.styles import PatternFill
4
+ from os.path import join, getsize
5
+ from openpyxl import load_workbook
6
+ import shutil
7
+ from ...utils import validate_schema
8
+ from .helpers import upload_metadata_file, get_template_path
9
+
10
+ def create_excel(soda, upload_boolean, local_destination):
11
+ source = get_template_path(SDS_FILE_PERFORMANCES)
12
+
13
+ destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_PERFORMANCES) if upload_boolean else local_destination
14
+
15
+ shutil.copyfile(source, destination)
16
+
17
+ wb = load_workbook(destination)
18
+ ws1 = wb["Sheet1"]
19
+
20
+ performances = soda["dataset_metadata"]["performances"]
21
+
22
+ validate_schema(performances, SCHEMA_NAME_PERFORMANCES)
23
+
24
+ # get the ascii column headers
25
+ row = 2
26
+ ascii_headers = excel_columns(start_index=0)
27
+ for performance in performances:
28
+ ws1[ascii_headers[0] + str(row)] = performance.get("performance_id", "")
29
+ ws1[ascii_headers[1] + str(row)] = performance.get("protocol_url_or_doi", "")
30
+ ws1[ascii_headers[2] + str(row)] = performance.get("date", "")
31
+ ws1[ascii_headers[3] + str(row)] = performance.get("start_datetime", "")
32
+ ws1[ascii_headers[4] + str(row)] = performance.get("end_datetime", "")
33
+ participants = " ".join(performance.get("participants", []))
34
+ ws1[ascii_headers[5] + str(row)] = participants
35
+ ws1[ascii_headers[6] + str(row)] = performance.get("additional_metadata", "")
36
+ row += 1
37
+
38
+ wb.save(destination)
39
+
40
+ size = getsize(destination)
41
+
42
+ ## if generating directly on Pennsieve, call upload function
43
+ if upload_boolean:
44
+ upload_metadata_file(SDS_FILE_PERFORMANCES, soda, destination, True)
45
+
46
+ return {"size": size}
@@ -0,0 +1,53 @@
1
+ from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_RESOURCES, SCHEMA_NAME_RESOURCES
2
+ from .excel_utils import rename_headers, excel_columns
3
+ from openpyxl.styles import PatternFill
4
+ from os.path import join, getsize
5
+ from openpyxl import load_workbook
6
+ import shutil
7
+ from ...utils import validate_schema
8
+ from .helpers import upload_metadata_file, get_template_path
9
+
10
+ def create_excel(soda, upload_boolean, local_destination):
11
+ source = get_template_path(SDS_FILE_RESOURCES)
12
+
13
+ destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_RESOURCES) if upload_boolean else local_destination
14
+
15
+ shutil.copyfile(source, destination)
16
+
17
+ wb = load_workbook(destination)
18
+ ws1 = wb["Sheet1"]
19
+
20
+ resources = soda["dataset_metadata"]["resources"]
21
+
22
+ validate_schema(resources, SCHEMA_NAME_RESOURCES)
23
+
24
+
25
+ # get the ascii column headers
26
+ row = 2
27
+ ascii_headers = excel_columns(start_index=0)
28
+ for resource in resources:
29
+ ws1[ascii_headers[0] + str(row)] = resource.get("rrid", "")
30
+ ws1[ascii_headers[1] + str(row)] = resource.get("type", "")
31
+ ws1[ascii_headers[2] + str(row)] = resource.get("name", "")
32
+ ws1[ascii_headers[3] + str(row)] = resource.get("url", "")
33
+ ws1[ascii_headers[4] + str(row)] = resource.get("vendor", "")
34
+ ws1[ascii_headers[5] + str(row)] = resource.get("version", "")
35
+ ws1[ascii_headers[6] + str(row)] = resource.get("id_in_protocol", "")
36
+ ws1[ascii_headers[7] + str(row)] = resource.get("additional_metadata", "")
37
+
38
+ row += 1
39
+
40
+ wb.save(destination)
41
+
42
+
43
+ size = getsize(destination)
44
+
45
+
46
+ ## if generating directly on Pennsieve, call upload function
47
+ if upload_boolean:
48
+ upload_metadata_file(SDS_FILE_RESOURCES, soda, destination, True)
49
+
50
+ return {"size": size}
51
+
52
+
53
+
@@ -0,0 +1,184 @@
1
+ from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_SAMPLES, SCHEMA_NAME_SAMPLES
2
+ from .excel_utils import excel_columns
3
+ from openpyxl.styles import PatternFill, Font
4
+ from os.path import join, getsize
5
+ from openpyxl import load_workbook
6
+ import shutil
7
+ from ...utils import validate_schema, get_sds_headers
8
+ from .helpers import upload_metadata_file, get_template_path
9
+
10
+
11
+ def create_excel(soda, upload_boolean, local_destination):
12
+ source = get_template_path(SDS_FILE_SAMPLES)
13
+
14
+ samples = soda["dataset_metadata"]["samples"]
15
+
16
+ validate_schema(samples, SCHEMA_NAME_SAMPLES)
17
+
18
+ destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_SAMPLES) if upload_boolean else local_destination
19
+ shutil.copyfile(source, destination)
20
+ wb = load_workbook(destination)
21
+ ws1 = wb["Sheet1"]
22
+
23
+ orangeFill = PatternFill(
24
+ start_color="FFD965", end_color="FFD965", fill_type="solid"
25
+ )
26
+
27
+ row = 2
28
+ ascii_headers = excel_columns(start_index=0)
29
+ custom_headers_to_column = {}
30
+ sds_headers = get_sds_headers(SCHEMA_NAME_SAMPLES)
31
+
32
+ # Populate the Excel file with the data
33
+ for sample in samples:
34
+ ws1[ascii_headers[0] + str(row)] = sample.get("sample_id", "")
35
+ ws1[ascii_headers[0] + str(row)].font = Font(bold=False, size=11, name="Arial")
36
+
37
+ ws1[ascii_headers[1] + str(row)] = sample.get("subject_id", "")
38
+ ws1[ascii_headers[1] + str(row)].font = Font(bold=False, size=11, name="Arial")
39
+
40
+ ws1[ascii_headers[2] + str(row)] = sample.get("was_derived_from", "")
41
+ ws1[ascii_headers[2] + str(row)].font = Font(bold=False, size=11, name="Arial")
42
+
43
+ ws1[ascii_headers[3] + str(row)] = sample.get("pool_id", "")
44
+ ws1[ascii_headers[3] + str(row)].font = Font(bold=False, size=11, name="Arial")
45
+
46
+ ws1[ascii_headers[4] + str(row)] = sample.get("sample_experimental_group", "")
47
+ ws1[ascii_headers[4] + str(row)].font = Font(bold=False, size=11, name="Arial")
48
+
49
+ ws1[ascii_headers[5] + str(row)] = sample.get("sample_type", "")
50
+ ws1[ascii_headers[5] + str(row)].font = Font(bold=False, size=11, name="Arial")
51
+
52
+ ws1[ascii_headers[6] + str(row)] = sample.get("sample_anatomical_location", "")
53
+ ws1[ascii_headers[6] + str(row)].font = Font(bold=False, size=11, name="Arial")
54
+
55
+ ws1[ascii_headers[7] + str(row)] = sample.get("also_in_dataset", "")
56
+ ws1[ascii_headers[7] + str(row)].font = Font(bold=False, size=11, name="Arial")
57
+
58
+ ws1[ascii_headers[8] + str(row)] = sample.get("member_of", "")
59
+ ws1[ascii_headers[8] + str(row)].font = Font(bold=False, size=11, name="Arial")
60
+
61
+ ws1[ascii_headers[9] + str(row)] = sample.get("metadata_only", "")
62
+ ws1[ascii_headers[9] + str(row)].font = Font(bold=False, size=11, name="Arial")
63
+
64
+ ws1[ascii_headers[10] + str(row)] = sample.get("laboratory_internal_id", "")
65
+ ws1[ascii_headers[10] + str(row)].font = Font(bold=False, size=11, name="Arial")
66
+
67
+ ws1[ascii_headers[11] + str(row)] = sample.get("date_of_derivation", "")
68
+ ws1[ascii_headers[11] + str(row)].font = Font(bold=False, size=11, name="Arial")
69
+
70
+ ws1[ascii_headers[12] + str(row)] = sample.get("experimental_log_file_path", "")
71
+ ws1[ascii_headers[12] + str(row)].font = Font(bold=False, size=11, name="Arial")
72
+
73
+ ws1[ascii_headers[13] + str(row)] = sample.get("reference_atlas", "")
74
+ ws1[ascii_headers[13] + str(row)].font = Font(bold=False, size=11, name="Arial")
75
+
76
+ ws1[ascii_headers[14] + str(row)] = sample.get("pathology", "")
77
+ ws1[ascii_headers[14] + str(row)].font = Font(bold=False, size=11, name="Arial")
78
+
79
+ ws1[ascii_headers[15] + str(row)] = sample.get("laterality", "")
80
+ ws1[ascii_headers[15] + str(row)].font = Font(bold=False, size=11, name="Arial")
81
+
82
+ ws1[ascii_headers[16] + str(row)] = sample.get("cell_type", "")
83
+ ws1[ascii_headers[16] + str(row)].font = Font(bold=False, size=11, name="Arial")
84
+
85
+ ws1[ascii_headers[17] + str(row)] = sample.get("plane_of_section", "")
86
+ ws1[ascii_headers[17] + str(row)].font = Font(bold=False, size=11, name="Arial")
87
+
88
+ ws1[ascii_headers[18] + str(row)] = sample.get("protocol_title", "")
89
+ ws1[ascii_headers[18] + str(row)].font = Font(bold=False, size=11, name="Arial")
90
+
91
+ ws1[ascii_headers[19] + str(row)] = sample.get("protocol_url_or_doi", "")
92
+ ws1[ascii_headers[19] + str(row)].font = Font(bold=False, size=11, name="Arial")
93
+
94
+ # Handle custom fields
95
+ for field_name, field in sample.items():
96
+ if field_name in sds_headers:
97
+ continue
98
+
99
+ # Check if the field is already in the custom_headers_to_column dictionary
100
+ if field_name not in custom_headers_to_column:
101
+ custom_headers_to_column[field_name] = len(custom_headers_to_column.keys()) + 1
102
+
103
+ # Create the column header in the Excel file
104
+ offset_from_final_sds_header = custom_headers_to_column[field_name]
105
+ ws1[ascii_headers[19 + offset_from_final_sds_header] + "1"] = field_name
106
+ ws1[ascii_headers[19 + offset_from_final_sds_header] + "1"].fill = orangeFill
107
+ ws1[ascii_headers[19 + offset_from_final_sds_header] + "1"].font = Font(bold=True, size=12, name="Calibri")
108
+
109
+ # Add the field value to the corresponding cell in the Excel file
110
+ offset_from_final_sds_header = custom_headers_to_column[field_name]
111
+ ws1[ascii_headers[19 + offset_from_final_sds_header] + str(row)] = field
112
+ ws1[ascii_headers[19 + offset_from_final_sds_header] + str(row)].font = Font(bold=False, size=11, name="Arial")
113
+
114
+ row += 1
115
+
116
+ wb.save(destination)
117
+
118
+ size = getsize(destination)
119
+
120
+ # If generating directly on Pennsieve, call upload function
121
+ if upload_boolean:
122
+ upload_metadata_file(SDS_FILE_SAMPLES, soda, destination, True)
123
+
124
+ return size
125
+
126
+
127
+ # soda = {
128
+ # "dataset_metadata": {
129
+ # "samples": [
130
+ # {
131
+ # "sample_id": "sample_1",
132
+ # "subject_id": "subject_1",
133
+ # "was_derived_from": "derived_from_1",
134
+ # "pool_id": "pool_1",
135
+ # "sample_experimental_group": "experimental_group_1",
136
+ # "sample_type": "type_1",
137
+ # "sample_anatomical_location": "location_1",
138
+ # "also_in_dataset": "dataset_1",
139
+ # "member_of": "member_1",
140
+ # "metadata_only": "False",
141
+ # "laboratory_internal_id": "internal_id_1",
142
+ # "date_of_derivation": "2023-01-01",
143
+ # "experimental_log_file_path": "/path/to/log/file",
144
+ # "reference_atlas": "/path/to/atlas",
145
+ # "pathology": "pathology_1",
146
+ # "laterality": "left",
147
+ # "cell_type": "cell_type_1",
148
+ # "plane_of_section": "plane_1",
149
+ # "protocol_title": "protocol_title_1",
150
+ # "protocol_url_or_doi": "/path/to/protocol",
151
+ # "custom_field_1": "custom_value_1",
152
+ # },
153
+ # {
154
+ # "sample_id": "sample_2",
155
+ # "subject_id": "subject_2",
156
+ # "was_derived_from": "derived_from_2",
157
+ # "pool_id": "pool_2",
158
+ # "sample_experimental_group": "experimental_group_2",
159
+ # "sample_type": "type_2",
160
+ # "sample_anatomical_location": "location_2",
161
+ # "also_in_dataset": "dataset_2",
162
+ # "member_of": "member_2",
163
+ # "metadata_only": "True",
164
+ # "laboratory_internal_id": "internal_id_2",
165
+ # "date_of_derivation": "2023-02-01",
166
+ # "experimental_log_file_path": "/path/to/log/file2",
167
+ # "reference_atlas": "/path/to/atlas2",
168
+ # "pathology": "pathology_2",
169
+ # "laterality": "right",
170
+ # "cell_type": "cell_type_2",
171
+ # "plane_of_section": "plane_2",
172
+ # "protocol_title": "protocol_title_2",
173
+ # "protocol_url_or_doi": "/path/to/protocol2",
174
+ # "custom_field_1": "custom_value_12",
175
+
176
+ # }
177
+ # ]
178
+ # }
179
+ # }
180
+
181
+ # try:
182
+ # create_excel(soda, False, "samples.xlsx")
183
+ # except Exception as e:
184
+ # print(f"An error occurred: {e}")
@@ -0,0 +1,51 @@
1
+ from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_SITES, SCHEMA_NAME_SITES
2
+ from .excel_utils import rename_headers, excel_columns
3
+ from openpyxl.styles import PatternFill
4
+ from os.path import join, getsize
5
+ from openpyxl import load_workbook
6
+ import shutil
7
+ from ...utils import validate_schema
8
+ from .helpers import upload_metadata_file, get_template_path
9
+
10
+ def create_excel(soda, upload_boolean, local_destination):
11
+ source = get_template_path(SDS_FILE_SITES)
12
+
13
+ destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_SITES) if upload_boolean else local_destination
14
+
15
+ shutil.copyfile(source, destination)
16
+
17
+ wb = load_workbook(destination)
18
+ ws1 = wb["Sheet1"]
19
+
20
+ sites = soda["dataset_metadata"]["sites"]
21
+
22
+ validate_schema(sites, SCHEMA_NAME_SITES)
23
+
24
+
25
+ # get the ascii column headers
26
+ row = 2
27
+ ascii_headers = excel_columns(start_index=0)
28
+ for performance in sites:
29
+ ws1[ascii_headers[0] + str(row)] = performance.get("site_id", "")
30
+ ws1[ascii_headers[1] + str(row)] = performance.get("specimen_id", "")
31
+ ws1[ascii_headers[2] + str(row)] = performance.get("site_type", "")
32
+ ws1[ascii_headers[3] + str(row)] = performance.get("laboratory_internal_id", "")
33
+ ws1[ascii_headers[4] + str(row)] = performance.get("coordinate_system", "")
34
+ ws1[ascii_headers[5] + str(row)] = performance.get("coordinate_system_position", "")
35
+ ws1[ascii_headers[6] + str(row)] = performance.get("more...", "")
36
+ row += 1
37
+
38
+ wb.save(destination)
39
+
40
+ size = getsize(destination)
41
+
42
+
43
+ ## if generating directly on Pennsieve, call upload function
44
+ if upload_boolean:
45
+ upload_metadata_file(SDS_FILE_SITES, soda, destination, True)
46
+
47
+ return {"size": size}
48
+
49
+
50
+
51
+
@@ -0,0 +1,172 @@
1
+ from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_SUBJECTS,SCHEMA_NAME_SUBJECTS
2
+ from .excel_utils import rename_headers, excel_columns
3
+ from openpyxl.styles import PatternFill
4
+ from os.path import join, getsize
5
+ from openpyxl import load_workbook
6
+ import shutil
7
+ import numpy as np
8
+ from ...utils import validate_schema, get_sds_headers
9
+ from openpyxl.styles import Font
10
+ from .helpers import transposeMatrix, getMetadataCustomFields, sortedSubjectsTableData, upload_metadata_file, get_template_path
11
+
12
+
13
+ def create_excel(soda, upload_boolean, local_destination):
14
+ source = get_template_path(SDS_FILE_SUBJECTS)
15
+
16
+
17
+ subjects = soda["dataset_metadata"]["subjects"]
18
+
19
+ validate_schema(subjects, SCHEMA_NAME_SUBJECTS)
20
+
21
+ destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_SUBJECTS) if upload_boolean else local_destination
22
+ shutil.copyfile(source, destination)
23
+ wb = load_workbook(destination)
24
+ ws1 = wb["Sheet1"]
25
+
26
+
27
+
28
+ # 1. see if the length of datastructure[0] == length of datastructure. If yes, go ahead. If no, add new columns from headers[n-1] onward.
29
+ orangeFill = PatternFill(
30
+ start_color="FFD965", end_color="FFD965", fill_type="solid"
31
+ )
32
+
33
+
34
+ # 1.1 (optional) add custom fields to the headers of the workbook
35
+
36
+ # for column, header in zip(
37
+ # excel_columns(start_index=29), subjects[0].keys()
38
+ # ):
39
+ # cell = column + str(1)
40
+ # ws1[cell] = header
41
+ # ws1[cell].fill = orangeFill
42
+ # ws1[cell].font = Font(bold=True, size=12, name="Calibri")
43
+
44
+ row = 2
45
+ ascii_headers = excel_columns(start_index=0)
46
+ custom_headers_to_column = {}
47
+ sds_headers = get_sds_headers(SCHEMA_NAME_SUBJECTS)
48
+
49
+ # 2. populate excel file with the data
50
+ for subject in subjects:
51
+ ws1[ascii_headers[0] + str(row)] = subject.get("subject_id", "")
52
+ ws1[ascii_headers[0] + str(row)].font = Font(bold=False, size=11, name="Arial")
53
+
54
+ ws1[ascii_headers[1] + str(row)] = subject.get("pool_id", "")
55
+ ws1[ascii_headers[1] + str(row)].font = Font(bold=False, size=11, name="Arial")
56
+
57
+ ws1[ascii_headers[2] + str(row)] = subject.get("subject_experimental_group", "")
58
+ ws1[ascii_headers[2] + str(row)].font = Font(bold=False, size=11, name="Arial")
59
+
60
+ ws1[ascii_headers[3] + str(row)] = subject.get("age", "")
61
+ ws1[ascii_headers[3] + str(row)].font = Font(bold=False, size=11, name="Arial")
62
+
63
+ ws1[ascii_headers[4] + str(row)] = subject.get("sex", "")
64
+ ws1[ascii_headers[4] + str(row)].font = Font(bold=False, size=11, name="Arial")
65
+
66
+ ws1[ascii_headers[5] + str(row)] = subject.get("species", "")
67
+ ws1[ascii_headers[5] + str(row)].font = Font(bold=False, size=11, name="Arial")
68
+
69
+ ws1[ascii_headers[6] + str(row)] = subject.get("strain", "")
70
+ ws1[ascii_headers[6] + str(row)].font = Font(bold=False, size=11, name="Arial")
71
+
72
+ ws1[ascii_headers[7] + str(row)] = subject.get("rrid_for_strain", "")
73
+ ws1[ascii_headers[7] + str(row)].font = Font(bold=False, size=11, name="Arial")
74
+
75
+ ws1[ascii_headers[8] + str(row)] = subject.get("age_category", "")
76
+ ws1[ascii_headers[8] + str(row)].font = Font(bold=False, size=11, name="Arial")
77
+
78
+ ws1[ascii_headers[9] + str(row)] = subject.get("also_in_dataset", "")
79
+ ws1[ascii_headers[9] + str(row)].font = Font(bold=False, size=11, name="Arial")
80
+
81
+ ws1[ascii_headers[10] + str(row)] = subject.get("member_of", "")
82
+ ws1[ascii_headers[10] + str(row)].font = Font(bold=False, size=11, name="Arial")
83
+
84
+ ws1[ascii_headers[11] + str(row)] = subject.get("metadata_only", "")
85
+ ws1[ascii_headers[11] + str(row)].font = Font(bold=False, size=11, name="Arial")
86
+
87
+ ws1[ascii_headers[12] + str(row)] = subject.get("laboratory_internal_id", "")
88
+ ws1[ascii_headers[12] + str(row)].font = Font(bold=False, size=11, name="Arial")
89
+
90
+ ws1[ascii_headers[13] + str(row)] = subject.get("date_of_birth", "")
91
+ ws1[ascii_headers[13] + str(row)].font = Font(bold=False, size=11, name="Arial")
92
+
93
+ ws1[ascii_headers[14] + str(row)] = subject.get("age_range_min", "")
94
+ ws1[ascii_headers[14] + str(row)].font = Font(bold=False, size=11, name="Arial")
95
+
96
+ ws1[ascii_headers[15] + str(row)] = subject.get("age_range_max", "")
97
+ ws1[ascii_headers[15] + str(row)].font = Font(bold=False, size=11, name="Arial")
98
+
99
+ ws1[ascii_headers[16] + str(row)] = subject.get("body_mass", "")
100
+ ws1[ascii_headers[16] + str(row)].font = Font(bold=False, size=11, name="Arial")
101
+
102
+ ws1[ascii_headers[17] + str(row)] = subject.get("genotype", "")
103
+ ws1[ascii_headers[17] + str(row)].font = Font(bold=False, size=11, name="Arial")
104
+
105
+ ws1[ascii_headers[18] + str(row)] = subject.get("phenotype", "")
106
+ ws1[ascii_headers[18] + str(row)].font = Font(bold=False, size=11, name="Arial")
107
+
108
+ ws1[ascii_headers[19] + str(row)] = subject.get("handedness", "")
109
+ ws1[ascii_headers[19] + str(row)].font = Font(bold=False, size=11, name="Arial")
110
+
111
+ ws1[ascii_headers[20] + str(row)] = subject.get("reference_atlas", "")
112
+ ws1[ascii_headers[20] + str(row)].font = Font(bold=False, size=11, name="Arial")
113
+
114
+ ws1[ascii_headers[21] + str(row)] = subject.get("experimental_log_file_path", "")
115
+ ws1[ascii_headers[21] + str(row)].font = Font(bold=False, size=11, name="Arial")
116
+
117
+ ws1[ascii_headers[22] + str(row)] = subject.get("experiment_date", "")
118
+ ws1[ascii_headers[22] + str(row)].font = Font(bold=False, size=11, name="Arial")
119
+
120
+ ws1[ascii_headers[23] + str(row)] = subject.get("disease_or_disorder", "")
121
+ ws1[ascii_headers[23] + str(row)].font = Font(bold=False, size=11, name="Arial")
122
+
123
+ ws1[ascii_headers[24] + str(row)] = subject.get("intervention", "")
124
+ ws1[ascii_headers[24] + str(row)].font = Font(bold=False, size=11, name="Arial")
125
+
126
+ ws1[ascii_headers[25] + str(row)] = subject.get("disease_model", "")
127
+ ws1[ascii_headers[25] + str(row)].font = Font(bold=False, size=11, name="Arial")
128
+
129
+ ws1[ascii_headers[26] + str(row)] = subject.get("protocol_title", "")
130
+ ws1[ascii_headers[26] + str(row)].font = Font(bold=False, size=11, name="Arial")
131
+
132
+ ws1[ascii_headers[27] + str(row)] = subject.get("protocol_url_or_doi", "")
133
+ ws1[ascii_headers[27] + str(row)].font = Font(bold=False, size=11, name="Arial")
134
+
135
+ # handle custom fields
136
+ for field_name, field in subject.items():
137
+ if field_name in sds_headers:
138
+ continue
139
+
140
+ # check if the field is already in the custom_headers_to_column dictionary
141
+ if field_name not in custom_headers_to_column:
142
+ print(field_name)
143
+
144
+
145
+ custom_headers_to_column[field_name] = len(custom_headers_to_column.keys()) + 1
146
+
147
+ # create the column header in the excel file
148
+ offset_from_final_sds_header = custom_headers_to_column[field_name]
149
+ ws1[ascii_headers[27 + offset_from_final_sds_header] + "1"] = field_name
150
+ ws1[ascii_headers[27 + offset_from_final_sds_header] + "1"].fill = orangeFill
151
+ ws1[ascii_headers[27 + offset_from_final_sds_header] + "1"].font = Font(bold=True, size=12, name="Calibri")
152
+
153
+
154
+
155
+ # add the field value to the corresponding cell in the excel file
156
+ offset_from_final_sds_header = custom_headers_to_column[field_name]
157
+
158
+ ws1[ascii_headers[27 + offset_from_final_sds_header] + str(row)] = field
159
+ ws1[ascii_headers[27 + offset_from_final_sds_header] + str(row)].font = Font(bold=False, size=11, name="Arial")
160
+
161
+
162
+ row += 1
163
+
164
+ wb.save(destination)
165
+
166
+ size = getsize(destination)
167
+
168
+ ## if generating directly on Pennsieve, then call upload function and then delete the destination path
169
+ if upload_boolean:
170
+ upload_metadata_file(SDS_FILE_SUBJECTS, soda, destination, True)
171
+
172
+ return {size: size}
@@ -0,0 +1,91 @@
1
+ from os.path import join, getsize
2
+ from openpyxl import load_workbook
3
+ from openpyxl.styles import Font
4
+ import shutil
5
+ import tempfile
6
+ from .helpers import upload_metadata_file, get_template_path
7
+
8
+
9
+ from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH
10
+ from .excel_utils import rename_headers, excel_columns
11
+ from ...utils import validate_schema
12
+
13
+
14
+ ### Create submission file
15
+ def create_excel(soda, upload_boolean, local_destination):
16
+ """
17
+ Create an Excel file for submission metadata.
18
+
19
+ Args:
20
+ soda (dict): The soda object containing dataset metadata.
21
+ upload_boolean (bool): Whether to upload the file to Pennsieve.
22
+ destination_path (str): The path to save the Excel file.
23
+
24
+ Returns:
25
+ dict: A dictionary containing the size of the metadata file.
26
+ """
27
+
28
+ validate_schema(soda["dataset_metadata"]["submission"], "submission_schema.json")
29
+
30
+ font_submission = Font(name="Calibri", size=14, bold=False)
31
+
32
+ source = get_template_path("submission.xlsx")
33
+
34
+ destination = join(METADATA_UPLOAD_PS_PATH, "submission.xlsx") if upload_boolean else local_destination
35
+
36
+ try:
37
+ shutil.copyfile(source, destination)
38
+ except FileNotFoundError as e:
39
+ raise e
40
+
41
+ #TODO: Do not use an array for the non-array values; zipping for the sake of the ascii value is not necessary until milestone_achieved
42
+ submission_metadata_list = [
43
+ soda["dataset_metadata"]["submission"]
44
+ ]
45
+
46
+ # write to excel file
47
+ wb = load_workbook(destination)
48
+ ws1 = wb["Sheet1"]
49
+ start_index = 2
50
+ for column, submission_data in zip(excel_columns(start_index), submission_metadata_list):
51
+ ws1[column + "2"] = submission_data["consortium_data_standard"]
52
+ ws1[column + "3"] = submission_data["funding_consortium"]
53
+ ws1[column + "4"] = submission_data["award_number"]
54
+ for col, milestone in zip(excel_columns(start_index), submission_data["milestone_achieved"]):
55
+ ws1[col + str(5)] = milestone
56
+ ws1[column + "6"] = submission_data["milestone_completion_date"]
57
+ ws1[column + "2"].font = font_submission
58
+ ws1[column + "3"].font = font_submission
59
+ ws1[column + "4"].font = font_submission
60
+ ws1[column + "5"].font = font_submission
61
+ ws1[column + "6"].font = font_submission
62
+
63
+ # TODO: should milestone completion date also be an array?
64
+ rename_headers(ws1, len(submission_metadata_list[0]["milestone_achieved"]), 2)
65
+
66
+ wb.save(destination)
67
+
68
+ print("Excel file created successfully at:", destination)
69
+
70
+ wb.close()
71
+
72
+ # calculate the size of the metadata file
73
+ size = getsize(destination)
74
+
75
+
76
+
77
+ ## if generating directly on Pennsieve, then call upload function and then delete the destination path
78
+ if upload_boolean:
79
+ print("Uploading Excel file to Pennsieve...")
80
+ upload_metadata_file("submission.xlsx", soda, destination, True)
81
+ print("Excel file uploaded successfully to Pennsieve.")
82
+ return {"size": size}
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
@@ -0,0 +1,47 @@
1
+ from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH
2
+ from os.path import join, getsize
3
+ from .helpers import upload_metadata_file, get_template_path
4
+ import shutil
5
+
6
+
7
+ # this function saves and uploads the README/CHANGES to Pennsieve, just when users choose to generate onto Pennsieve
8
+ ## (not used for generating locally)
9
+ def create_text_file(soda, upload_boolean, local_destination, metadata_filename):
10
+ """
11
+ Create a text file for README, LICENSE, or CHANGES metadata using a template.
12
+
13
+ Args:
14
+ soda (dict): The soda object containing dataset metadata.
15
+ upload_boolean (bool): Whether to upload the file to Pennsieve.
16
+ local_destination (str): The path to save the text file.
17
+ metadata_filename (str): The name of the metadata file to be created (e.g., "README.md", "LICENSE", "CHANGES").
18
+
19
+ Returns:
20
+ int: The size of the metadata file in bytes.
21
+ """
22
+
23
+ # Use metadata_filename directly for template and output filename
24
+ source = get_template_path(metadata_filename)
25
+ destination = join(METADATA_UPLOAD_PS_PATH, metadata_filename) if upload_boolean else local_destination
26
+
27
+ # Copy the template to the destination (if it exists)
28
+ try:
29
+ shutil.copyfile(source, destination)
30
+ except FileNotFoundError:
31
+ # If template not found, just create a new file
32
+ with open(destination, "w", encoding="utf-8") as f:
33
+ pass
34
+
35
+ # Write the actual content from soda into the file (overwriting template content)
36
+ # Use metadata_filename as the key for content
37
+ text = soda["dataset_metadata"].get(metadata_filename, "")
38
+ with open(destination, "w", encoding="utf-8") as file:
39
+ file.write(text)
40
+
41
+ size = getsize(destination)
42
+ if upload_boolean:
43
+ upload_metadata_file(metadata_filename, soda, destination, True)
44
+
45
+ return size
46
+
47
+
@@ -0,0 +1 @@
1
+ Optional text file that contains information about the history of the dataset
@@ -0,0 +1 @@
1
+ Optional text file that contains information about the history of the dataset
@@ -0,0 +1,4 @@
1
+ # My dataset readme (change this line)
2
+
3
+ A require markdown file that provides an introduction to and
4
+ background for the dataset.
File without changes