pysodafair 0.1.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. pysoda/__init__.py +0 -0
  2. pysoda/constants.py +3 -0
  3. pysoda/core/__init__.py +10 -0
  4. pysoda/core/dataset_generation/__init__.py +11 -0
  5. pysoda/core/dataset_generation/manifestSession/__init__.py +1 -0
  6. pysoda/core/dataset_generation/manifestSession/manifest_session.py +146 -0
  7. pysoda/core/dataset_generation/upload.py +3951 -0
  8. pysoda/core/dataset_importing/__init__.py +1 -0
  9. pysoda/core/dataset_importing/import_dataset.py +662 -0
  10. pysoda/core/metadata/__init__.py +20 -0
  11. pysoda/core/metadata/code_description.py +109 -0
  12. pysoda/core/metadata/constants.py +32 -0
  13. pysoda/core/metadata/dataset_description.py +188 -0
  14. pysoda/core/metadata/excel_utils.py +41 -0
  15. pysoda/core/metadata/helpers.py +250 -0
  16. pysoda/core/metadata/manifest.py +112 -0
  17. pysoda/core/metadata/manifest_package/__init__.py +2 -0
  18. pysoda/core/metadata/manifest_package/manifest.py +0 -0
  19. pysoda/core/metadata/manifest_package/manifest_import.py +29 -0
  20. pysoda/core/metadata/manifest_package/manifest_writer.py +666 -0
  21. pysoda/core/metadata/performances.py +46 -0
  22. pysoda/core/metadata/resources.py +53 -0
  23. pysoda/core/metadata/samples.py +184 -0
  24. pysoda/core/metadata/sites.py +51 -0
  25. pysoda/core/metadata/subjects.py +172 -0
  26. pysoda/core/metadata/submission.py +91 -0
  27. pysoda/core/metadata/text_metadata.py +47 -0
  28. pysoda/core/metadata_templates/CHANGES +1 -0
  29. pysoda/core/metadata_templates/LICENSE +1 -0
  30. pysoda/core/metadata_templates/README.md +4 -0
  31. pysoda/core/metadata_templates/__init__.py +0 -0
  32. pysoda/core/metadata_templates/code_description.xlsx +0 -0
  33. pysoda/core/metadata_templates/code_parameters.xlsx +0 -0
  34. pysoda/core/metadata_templates/dataset_description.xlsx +0 -0
  35. pysoda/core/metadata_templates/manifest.xlsx +0 -0
  36. pysoda/core/metadata_templates/performances.xlsx +0 -0
  37. pysoda/core/metadata_templates/resources.xlsx +0 -0
  38. pysoda/core/metadata_templates/samples.xlsx +0 -0
  39. pysoda/core/metadata_templates/sites.xlsx +0 -0
  40. pysoda/core/metadata_templates/subjects.xlsx +0 -0
  41. pysoda/core/metadata_templates/subjects_pools_samples_structure.xlsx +0 -0
  42. pysoda/core/metadata_templates/subjects_pools_samples_structure_example.xlsx +0 -0
  43. pysoda/core/metadata_templates/submission.xlsx +0 -0
  44. pysoda/core/permissions/__init__.py +1 -0
  45. pysoda/core/permissions/permissions.py +31 -0
  46. pysoda/core/pysoda/__init__.py +2 -0
  47. pysoda/core/pysoda/soda.py +34 -0
  48. pysoda/core/pysoda/soda_object.py +55 -0
  49. pysoda/core/upload_manifests/__init__.py +1 -0
  50. pysoda/core/upload_manifests/upload_manifests.py +37 -0
  51. pysoda/schema/__init__.py +0 -0
  52. pysoda/schema/code_description.json +629 -0
  53. pysoda/schema/dataset_description.json +295 -0
  54. pysoda/schema/manifest.json +60 -0
  55. pysoda/schema/performances.json +44 -0
  56. pysoda/schema/resources.json +39 -0
  57. pysoda/schema/samples.json +97 -0
  58. pysoda/schema/sites.json +38 -0
  59. pysoda/schema/soda_schema.json +664 -0
  60. pysoda/schema/subjects.json +131 -0
  61. pysoda/schema/submission_schema.json +28 -0
  62. pysoda/utils/__init__.py +9 -0
  63. pysoda/utils/authentication.py +381 -0
  64. pysoda/utils/config.py +68 -0
  65. pysoda/utils/exceptions.py +156 -0
  66. pysoda/utils/logger.py +6 -0
  67. pysoda/utils/metadata_utils.py +74 -0
  68. pysoda/utils/pennsieveAgentUtils.py +11 -0
  69. pysoda/utils/pennsieveUtils.py +118 -0
  70. pysoda/utils/profile.py +28 -0
  71. pysoda/utils/schema_validation.py +133 -0
  72. pysoda/utils/time_utils.py +5 -0
  73. pysoda/utils/upload_utils.py +108 -0
  74. pysodafair-0.1.62.dist-info/METADATA +190 -0
  75. pysodafair-0.1.62.dist-info/RECORD +77 -0
  76. pysodafair-0.1.62.dist-info/WHEEL +4 -0
  77. pysodafair-0.1.62.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,109 @@
1
+ from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_CODE_DESCRIPTION, SCHEMA_NAME_CODE_DESCRIPTION
2
+ from .excel_utils import rename_headers, excel_columns
3
+ from openpyxl.styles import PatternFill
4
+ from os.path import join, getsize
5
+ from openpyxl import load_workbook
6
+ import shutil
7
+ from ...utils import validate_schema
8
+ from .helpers import upload_metadata_file, get_template_path
9
+
10
+
11
+
12
+ # TODO: Handle optional entries when coupled with provided entries
13
+ # TODO: Handle extending columns and filling with color when more entries are provided than the template default handles
14
+ def create_excel(soda, upload, local_destination):
15
+ source = get_template_path(SDS_FILE_CODE_DESCRIPTION)
16
+ destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_CODE_DESCRIPTION) if upload else local_destination
17
+ shutil.copyfile(source, destination)
18
+
19
+ validate_schema(soda["dataset_metadata"]["code_description"], SCHEMA_NAME_CODE_DESCRIPTION)
20
+
21
+ wb = load_workbook("./" +destination)
22
+ print(wb.sheetnames)
23
+ ws1 = wb[wb.sheetnames[0]]
24
+
25
+ populate_input_output_information(ws1, soda)
26
+
27
+ populate_basic_information(ws1, soda)
28
+
29
+ populate_ten_simple_rules(ws1, soda)
30
+
31
+ wb.save(destination)
32
+
33
+ size = getsize(destination)
34
+
35
+ ## if generating directly on Pennsieve, then call upload function and then delete the destination path
36
+ if upload:
37
+ upload_metadata_file(
38
+ SDS_FILE_CODE_DESCRIPTION, soda, destination, True
39
+ )
40
+
41
+ return {"size": size}
42
+
43
+
44
+
45
+
46
+
47
+ # TODO: Handle optional entries
48
+ def populate_input_output_information(ws1, soda):
49
+ # populate from row 27 and column 4 up to column n, depending upon the amount of items in the array for each input output information entry
50
+ input_output_information = soda["dataset_metadata"]["input_output_information"]
51
+
52
+ row = 27
53
+
54
+ excel_ascii = excel_columns(start_index=3)[0]
55
+ ws1[excel_ascii + str(row)] = input_output_information["number_of_inputs"]
56
+
57
+ for input, column in zip(input_output_information["inputs"], excel_columns(start_index=3)):
58
+ row = 28
59
+ ws1[column + str(row)] = input["input_parameter_name"]
60
+ ws1[column + str(row + 1)] = input["input parameter type"]
61
+ ws1[column + str(row + 2)] = input["input_parameter_description"]
62
+ ws1[column + str(row + 3)] = input["input_units"]
63
+ ws1[column + str(row + 4)] = input["input_default_value"]
64
+
65
+ # populate number of outputs into row 34
66
+ row = 34
67
+ ws1[excel_ascii + str(row)] = input_output_information["number_of_outputs"]
68
+
69
+ # populate the outputs from row 35 - 39
70
+ for output, column in zip(input_output_information["outputs"], excel_columns(start_index=3)):
71
+ row = 35
72
+ ws1[column + str(row)] = output["output_parameter_name"]
73
+ ws1[column + str(row + 1)] = output["output_parameter_type"]
74
+ ws1[column + str(row + 2)] = output["output_parameter_description"]
75
+ ws1[column + str(row + 3)] = output["output_units"]
76
+ ws1[column + str(row + 4)] = output["output_default_value"]
77
+
78
+
79
+ def populate_basic_information(ws1, soda):
80
+ basic_information = soda["dataset_metadata"]["basic_information"]
81
+
82
+ # fill out basic information from row 2 - 5 starting from col 3
83
+ row = 2
84
+ for info, column in zip(basic_information, excel_columns(start_index=3)):
85
+ ws1[column + str(row)] = info["RRID_term"]
86
+ ws1[column + str(row + 1)] = info["RRID_identifier"]
87
+ ws1[column + str(row + 2)] = info["ontology_term"]
88
+ ws1[column + str(row + 3)] = info["ontology_identifier"]
89
+
90
+
91
+ def populate_ten_simple_rules(ws1, soda):
92
+ ten_simple_rules = soda["dataset_metadata"]["ten_simple_rules"]
93
+ row = 8
94
+ ascii_cols = excel_columns(start_index=3)
95
+ for _, rule in ten_simple_rules.items():
96
+ ws1[ascii_cols[0] + str(row)] = rule.get("Link", "")
97
+ ws1[ascii_cols[1] + str(row)] = rule.get("Rating", "")
98
+ ws1[ascii_cols[2] + str(row)] = rule.get("Target", "")
99
+ ws1[ascii_cols[3] + str(row)] = rule.get("Target Justification", "")
100
+ ws1[ascii_cols[4] + str(row)] = rule.get("Text", "")
101
+ row += 1
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
@@ -0,0 +1,32 @@
1
+ from os.path import join, getsize, abspath, dirname, expanduser
2
+ from os import makedirs
3
+
4
+
5
+
6
+ TEMPLATE_PATH = join(dirname(abspath(__file__)), '..', 'metadata_templates')
7
+ METADATA_UPLOAD_PS_PATH = expanduser("~/.pysoda")
8
+ makedirs(METADATA_UPLOAD_PS_PATH, exist_ok=True)
9
+
10
+
11
+ SCHEMA_NAMES = {
12
+ "submission": "submission_schema.json",
13
+ "subjects": "subjects_schema.json"
14
+ }
15
+
16
+
17
+ SDS_FILE_SUBJECTS = "subjects.xlsx"
18
+ SCHEMA_NAME_SUBJECTS = "subjects.json"
19
+ SDS_FILE_SAMPLES = "samples.xlsx"
20
+ SCHEMA_NAME_SAMPLES = "samples.json"
21
+ SDS_FILE_PERFORMANCES = "performances.xlsx"
22
+ SCHEMA_NAME_PERFORMANCES = "performances.json"
23
+ SDS_FILE_SITES = "sites.xlsx"
24
+ SCHEMA_NAME_SITES = "sites.json"
25
+ SDS_FILE_RESOURCES = "resources.xlsx"
26
+ SCHEMA_NAME_RESOURCES = "resources.json"
27
+ SDS_FILE_DATASET_DESCRIPTION = "dataset_description.xlsx"
28
+ SCHEMA_NAME_DATASET_DESCRIPTION = "dataset_description.json"
29
+ SDS_FILE_CODE_DESCRIPTION = "code_description.xlsx"
30
+ SCHEMA_NAME_CODE_DESCRIPTION = "code_description.json"
31
+ SDS_FILE_MANIFEST = "manifest.xlsx"
32
+ SCHEMA_NAME_MANIFEST = "manifest.json"
@@ -0,0 +1,188 @@
1
+ from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_DATASET_DESCRIPTION, SCHEMA_NAME_DATASET_DESCRIPTION
2
+ from os.path import join, getsize
3
+ from openpyxl import load_workbook
4
+ import shutil
5
+ from .excel_utils import rename_headers, excel_columns
6
+ import itertools
7
+ from openpyxl.styles import PatternFill
8
+ from ...utils import validate_schema
9
+ from .helpers import upload_metadata_file, get_template_path
10
+
11
+
12
+
13
+ def create_excel(
14
+ soda,
15
+ upload_boolean,
16
+ local_destination,
17
+ ):
18
+ source = get_template_path(SDS_FILE_DATASET_DESCRIPTION)
19
+ destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_DATASET_DESCRIPTION) if upload_boolean else local_destination
20
+ shutil.copyfile(source, destination)
21
+
22
+ validate_schema(soda["dataset_metadata"]["dataset_description"], SCHEMA_NAME_DATASET_DESCRIPTION)
23
+
24
+ # write to excel file
25
+ wb = load_workbook(destination)
26
+ ws1 = wb["Sheet1"]
27
+
28
+ ws1["D22"] = ""
29
+ ws1["E22"] = ""
30
+ ws1["D24"] = ""
31
+ ws1["E24"] = ""
32
+ ws1["D25"] = ""
33
+ ws1["E25"] = ""
34
+
35
+ # Populate the Metadata version (Required)
36
+ ws1["D2"] = soda["dataset_metadata"]["dataset_description"]["metadata_version"]
37
+
38
+ # Populate the Dataset Type (default to empty string if not present)
39
+ ws1["D3"] = (
40
+ soda.get("dataset_metadata", {})
41
+ .get("dataset_description", {})
42
+ .get("dataset_type", "")
43
+ )
44
+
45
+ populate_standards_info(ws1, soda)
46
+
47
+ keyword_array = populate_basic_info(ws1, soda)
48
+
49
+ populate_study_info(ws1, soda)
50
+ populate_contributor_info(ws1, soda)
51
+ populate_related_resource_information(ws1, soda)
52
+ populate_funding_info(ws1, soda)
53
+ populate_participant_information(ws1, soda)
54
+ data_dictionary_information(ws1, soda)
55
+
56
+ wb.save(destination)
57
+
58
+ size = getsize(destination)
59
+
60
+ ## if generating directly on Pennsieve, then call upload function and then delete the destination path
61
+ if upload_boolean:
62
+ upload_metadata_file(
63
+ "dataset_description.xlsx", soda, destination, True
64
+ )
65
+
66
+ return {"size": size}
67
+
68
+
69
+
70
+
71
+ def populate_study_info(workbook, soda):
72
+ study_info = soda["dataset_metadata"]["dataset_description"]["study_information"]
73
+ workbook["D20"] = study_info.get("study_purpose", "")
74
+ workbook["D21"] = study_info.get("study_data_collection", "")
75
+ workbook["D22"] = study_info.get("study_primary_conclusion", "")
76
+
77
+ # Arrays
78
+ organ_system = study_info.get("study_organ_system", [])
79
+ approach = study_info.get("study_approach", [])
80
+ technique = study_info.get("study_technique", [])
81
+
82
+ for i, column in zip(range(len(organ_system)), excel_columns(start_index=3)):
83
+ workbook[column + "23"] = organ_system[i]
84
+ for i, column in zip(range(len(approach)), excel_columns(start_index=3)):
85
+ workbook[column + "24"] = approach[i]
86
+ for i, column in zip(range(len(technique)), excel_columns(start_index=3)):
87
+ workbook[column + "25"] = technique[i]
88
+
89
+ workbook["D26"] = study_info.get("study_collection_title", "")
90
+
91
+ # Return the max length of the arrays, or 1 if all are empty
92
+ return max(1, len(organ_system), len(approach), len(technique))
93
+
94
+ def populate_standards_info(workbook, soda):
95
+ standards_info = soda["dataset_metadata"]["dataset_description"]["standards_information"]
96
+ workbook["D5"] = standards_info["data_standard"]
97
+ workbook["D6"] = standards_info["data_standard_version"]
98
+
99
+
100
+ def populate_basic_info(workbook, soda):
101
+ basic_info = soda["dataset_metadata"]["dataset_description"]["basic_information"]
102
+ workbook["D8"] = basic_info.get("title", "")
103
+ workbook["D9"] = basic_info.get("subtitle", "")
104
+ workbook["D10"] = basic_info.get("description", "")
105
+
106
+ # Write keywords array across columns in row 11 (D11, E11, F11, ...)
107
+ keywords = basic_info.get("keywords", [])
108
+ for col, keyword in zip(excel_columns(start_index=3), keywords):
109
+ workbook[f"{col}11"] = keyword
110
+
111
+ workbook["D12"] = basic_info.get("funding", "")
112
+ workbook["D13"] = basic_info.get("acknowledgments", "")
113
+ workbook["D14"] = basic_info.get("license", "")
114
+
115
+ # Return the length of the keywords array, or 1 if empty
116
+ return max(1, len(keywords))
117
+
118
+
119
+ def populate_funding_info(workbook, soda):
120
+ funding_info = soda["dataset_metadata"]["dataset_description"]["funding_information"]
121
+ workbook["D16"] = funding_info["funding_consortium"]
122
+ workbook["D17"] = funding_info["funding_agency"]
123
+ workbook["D18"] = funding_info["award_number"]
124
+
125
+
126
+
127
+ def populate_contributor_info(workbook, soda):
128
+ contributor_info = soda["dataset_metadata"]["dataset_description"].get("contributor_information", [])
129
+ for contributor, column in zip(contributor_info, excel_columns(start_index=3)):
130
+ workbook[column + "28"] = contributor.get("contributor_name", "")
131
+ workbook[column + "29"] = contributor.get("contributor_orcid_id", "")
132
+ workbook[column + "30"] = contributor.get("contributor_affiliation", "")
133
+ workbook[column + "31"] = contributor.get("contributor_role", "")
134
+ # Return the length of the contributor array, or 1 if empty
135
+ return max(1, len(contributor_info))
136
+
137
+
138
+ def populate_related_resource_information(workbook, soda):
139
+ related_resource_information = soda["dataset_metadata"]["dataset_description"].get("related_resource_information", [])
140
+ for info, column in zip(related_resource_information, excel_columns(start_index=3)):
141
+ workbook[column + "33"] = info.get("identifier_description", "")
142
+ workbook[column + "34"] = info.get("relation_type", "")
143
+ workbook[column + "35"] = info.get("identifier", "")
144
+ workbook[column + "36"] = info.get("identifier_type", "")
145
+ # Return the length of the related resource array, or 1 if empty
146
+ return max(1, len(related_resource_information))
147
+
148
+
149
+
150
+ def populate_participant_information(workbook, soda):
151
+ participant_info = soda["dataset_metadata"]["dataset_description"]["participant_information"]
152
+ workbook["D38"] = participant_info.get("number_of_subjects", 0)
153
+ workbook["D39"] = participant_info.get("number_of_samples", 0)
154
+ workbook["D40"] = participant_info.get("number_of_sites", 0)
155
+ workbook["D41"] = participant_info.get("number_of_performances", 0)
156
+
157
+
158
+ def data_dictionary_information(workbook, soda):
159
+ """
160
+ This function is a placeholder for future implementation.
161
+ It currently does not populate any data in the workbook.
162
+ """
163
+ # Placeholder for future implementation
164
+ data_dictionary_info = soda["dataset_metadata"]["dataset_description"].get("data_dictionary_information", {})
165
+
166
+ workbook["D43"] = data_dictionary_info.get("data_dictionary_path", "")
167
+ workbook["D44"] = data_dictionary_info.get("data_dictionary_type", "")
168
+ workbook["D45"] = data_dictionary_info.get("data_dictionary_description", "")
169
+
170
+ def grayout_subheaders(workbook, max_len, start_index):
171
+ """
172
+ Gray out sub-header rows for values exceeding 3 (SDS2.0).
173
+ """
174
+ headers_list = ["4", "10", "18", "23", "28"]
175
+ columns_list = excel_columns(start_index=start_index)
176
+
177
+ for (i, column), no in itertools.product(zip(range(2, max_len + 1), columns_list[1:]), headers_list):
178
+ cell = workbook[column + no]
179
+ fillColor("B2B2B2", cell)
180
+
181
+
182
+
183
+
184
+
185
+ def fillColor(color, cell):
186
+ colorFill = PatternFill(start_color=color, end_color=color, fill_type="solid")
187
+
188
+ cell.fill = colorFill
@@ -0,0 +1,41 @@
1
+ from string import ascii_uppercase
2
+ import itertools
3
+ from openpyxl.styles import PatternFill, Font
4
+
5
+
6
+
7
+ def rename_headers(workbook, max_len, start_index):
8
+ """
9
+ Rename header columns if values exceed 3. Change Additional Values to Value 4, 5,...
10
+ Adds styling to the column headers as well.
11
+ """
12
+
13
+ columns_list = excel_columns(start_index=start_index)
14
+ if max_len >= start_index:
15
+ workbook[columns_list[0] + "1"] = "Value"
16
+ for i, column in zip(range(2, max_len + 1), columns_list[1:]):
17
+
18
+ workbook[column + "1"] = f"Value {str(i)}"
19
+ cell = workbook[column + "1"]
20
+
21
+ blueFill = PatternFill(
22
+ start_color="9CC2E5", end_color="9CC2E5", fill_type="solid"
23
+ )
24
+
25
+ font = Font(bold=True)
26
+ cell.fill = blueFill
27
+ cell.font = font
28
+
29
+ else:
30
+ delete_range = len(columns_list) - max_len
31
+ workbook.delete_cols(4 + max_len, delete_range)
32
+
33
+
34
+
35
+ def excel_columns(start_index=0):
36
+ """
37
+ NOTE: does not support more than 699 contributors/links
38
+ """
39
+ single_letter = list(ascii_uppercase[start_index:])
40
+ two_letter = [a + b for a, b in itertools.product(ascii_uppercase, ascii_uppercase)]
41
+ return single_letter + two_letter
@@ -0,0 +1,250 @@
1
+ import requests
2
+ from ...constants import PENNSIEVE_URL
3
+ from ...utils import get_dataset_id, get_access_token, create_request_headers, connect_pennsieve_client, PennsieveActionNoPermission, GenericUploadError
4
+ from ...core import has_edit_permissions
5
+ from functools import partial
6
+ import time
7
+ import os
8
+ from .. import logger
9
+ import sys
10
+
11
+
12
+ def get_template_path(filename):
13
+ """Get the path to a template file within the metadata_templates package."""
14
+ global logger
15
+
16
+ # Method 1: Try PyInstaller bundle first (onefolder creates _MEIPASS)
17
+ if hasattr(sys, '_MEIPASS'):
18
+ # PyInstaller onefolder extracts to _MEIPASS/
19
+ possible_paths = [
20
+ os.path.join(sys._MEIPASS, "pysoda", "core", "metadata_templates", filename),
21
+ os.path.join(sys._MEIPASS, "metadata_templates", filename),
22
+ os.path.join(sys._MEIPASS, filename)
23
+ ]
24
+ for path in possible_paths:
25
+ if os.path.exists(path):
26
+ logger.info(f"Template found in PyInstaller bundle: {path}")
27
+ return path
28
+
29
+ # Method 2: Try to import the metadata_templates module (works if PyPI package is properly installed)
30
+ try:
31
+ from .. import metadata_templates
32
+ templates_dir = os.path.dirname(metadata_templates.__file__)
33
+ template_path = os.path.join(templates_dir, filename)
34
+ if os.path.exists(template_path):
35
+ logger.info(f"Template found in metadata_templates module: {template_path}")
36
+ return template_path
37
+ except (ImportError, ModuleNotFoundError, AttributeError):
38
+ pass
39
+
40
+ # Method 3: Search in the Flask app's directory structure
41
+ current_file = os.path.abspath(__file__)
42
+ current_dir = os.path.dirname(current_file)
43
+
44
+ # Walk up the directory tree to find the templates
45
+ search_paths = [
46
+ os.path.join(current_dir, '..', 'metadata_templates', filename),
47
+ os.path.join(current_dir, 'metadata_templates', filename),
48
+ ]
49
+
50
+ # Also check if we're in a site-packages structure
51
+ site_packages_paths = []
52
+ path_parts = current_file.split(os.sep)
53
+ for i, part in enumerate(path_parts):
54
+ if part == 'site-packages':
55
+ site_packages_root = os.sep.join(path_parts[:i+1])
56
+ site_packages_paths.extend([
57
+ os.path.join(site_packages_root, 'pysoda', 'core', 'metadata_templates', filename),
58
+ os.path.join(site_packages_root, 'pysoda_fairdataihub_tools', 'pysoda', 'core', 'metadata_templates', filename)
59
+ ])
60
+
61
+ all_paths = search_paths + site_packages_paths
62
+
63
+ for path in all_paths:
64
+ if os.path.exists(path):
65
+ logger.info(f"Template found in directory structure: {path}")
66
+ return path
67
+
68
+ # Method 4: Try to find in Electron app resources (if not using PyInstaller)
69
+ try:
70
+ # Look for Electron app structure
71
+ current_path = current_dir
72
+ while current_path and current_path != os.path.dirname(current_path):
73
+ electron_paths = [
74
+ os.path.join(current_path, 'resources', 'app', 'node_modules', 'pysoda', 'core', 'metadata_templates', filename),
75
+ os.path.join(current_path, 'resources', 'pysoda', 'core', 'metadata_templates', filename),
76
+ os.path.join(current_path, 'app', 'pysoda', 'core', 'metadata_templates', filename)
77
+ ]
78
+ for path in electron_paths:
79
+ if os.path.exists(path):
80
+ logger.info(f"Template found in Electron app resources: {path}")
81
+ return path
82
+ current_path = os.path.dirname(current_path)
83
+ except Exception:
84
+ pass
85
+
86
+
87
+ # Method 5: Try to find in Electron Resources folder
88
+ try:
89
+ # Find the Electron Resources folder
90
+ current_path = current_dir
91
+ resources_folder = None
92
+
93
+ # Walk up the directory tree to find the Resources folder
94
+ while current_path and current_path != os.path.dirname(current_path):
95
+ # Check common Electron Resources locations
96
+ possible_resources = [
97
+ os.path.join(current_path, 'Resources'), # macOS
98
+ os.path.join(current_path, 'resources'), # Windows/Linux
99
+ os.path.join(current_path, 'Contents', 'Resources'), # macOS app bundle
100
+ ]
101
+
102
+ for resource_path in possible_resources:
103
+ if os.path.exists(resource_path):
104
+ resources_folder = resource_path
105
+ break
106
+
107
+ if resources_folder:
108
+ break
109
+
110
+ current_path = os.path.dirname(current_path)
111
+
112
+ # If we found the Resources folder, look for metadata_templates inside it
113
+ if resources_folder:
114
+ template_path = os.path.join(resources_folder, 'metadata_templates', filename)
115
+ logger.info(f"Searching for template file in Electron Resources: {template_path}")
116
+
117
+ if os.path.exists(template_path):
118
+ logger.info(f"Template found in Electron Resources: {template_path}")
119
+ return template_path
120
+
121
+ except Exception as e:
122
+ logger.warning(f"Failed to search Electron Resources: {e}")
123
+ pass
124
+
125
+ # Method 6: Use importlib_resources as fallback (Python 3.7+)
126
+ try:
127
+ from importlib import resources
128
+ with resources.path('metadata_templates', filename) as template_path:
129
+ logger.info(f"Using template path: {template_path}")
130
+
131
+ if template_path.exists():
132
+ logger.info(f"Template found using importlib_resources: {template_path}")
133
+ return str(template_path)
134
+ except (ImportError, ModuleNotFoundError, AttributeError):
135
+ # Fallback to other methods if importlib_resources is not available
136
+ pass
137
+
138
+
139
+
140
+ except Exception as e:
141
+ logger.error(f"Failed to create fallback template: {e}")
142
+ raise ImportError(f"Could not locate or create template file {filename}. Error: {e}")
143
+
144
+
145
+
146
+ # helper function to process custom fields (users add and name them) for subjects and samples files
147
+ def getMetadataCustomFields(matrix):
148
+ return [column for column in matrix if any(column[1:])]
149
+
150
+
151
+ # transpose a matrix (array of arrays)
152
+ # The transpose of a matrix is found by interchanging its rows into columns or columns into rows.
153
+ # REFERENCE: https://byjus.com/maths/transpose-of-a-matrix/
154
+ def transposeMatrix(matrix):
155
+ return [[matrix[j][i] for j in range(len(matrix))] for i in range(len(matrix[0]))]
156
+
157
+ # needed to sort subjects and samples table data to match the UI fields
158
+ def sortedSubjectsTableData(matrix, fields):
159
+ sortedMatrix = []
160
+ for field in fields:
161
+ for column in matrix:
162
+ if column[0].lower() == field:
163
+ sortedMatrix.append(column)
164
+ break
165
+
166
+ customHeaderMatrix = [
167
+ column for column in matrix if column[0].lower() not in fields
168
+ ]
169
+
170
+ return (
171
+ np.concatenate((sortedMatrix, customHeaderMatrix)).tolist()
172
+ if customHeaderMatrix
173
+ else sortedMatrix
174
+ )
175
+
176
+
177
+
178
+ def upload_metadata_file(file_name, soda, path_to_file, delete_after_upload=True):
179
+ global logger
180
+
181
+ if "ps-account-selected" in soda:
182
+ ps_account = soda["ps-account-selected"]["account-name"]
183
+
184
+ if "ps-dataset-selected" in soda:
185
+ ps_dataset = soda["ps-dataset-selected"]["dataset-name"]
186
+
187
+ # check that the Pennsieve dataset is valid
188
+ selected_dataset_id = get_dataset_id(ps_dataset)
189
+
190
+ # check that the user has permissions for uploading and modifying the dataset
191
+ if not has_edit_permissions(get_access_token(), selected_dataset_id):
192
+ raise PennsieveActionNoPermission("edit" + selected_dataset_id)
193
+ headers = create_request_headers(get_access_token())
194
+ # handle duplicates on Pennsieve: first, obtain the existing file ID
195
+ r = requests.get(f"{PENNSIEVE_URL}/datasets/{selected_dataset_id}", headers=headers)
196
+ r.raise_for_status()
197
+ ds_items = r.json()
198
+ # go through the content in the dataset and find the file ID of the file to be uploaded
199
+ for item in ds_items["children"]:
200
+ if item["content"]["name"] == file_name:
201
+ item_id = item["content"]["id"]
202
+ jsonfile = {
203
+ "things": [item_id]
204
+ }
205
+ # then, delete it using Pennsieve method delete(id)\vf = Pennsieve()
206
+ r = requests.post(f"{PENNSIEVE_URL}/data/delete",json=jsonfile, headers=headers)
207
+ r.raise_for_status()
208
+ try:
209
+ ps = connect_pennsieve_client(ps_account)
210
+ # create a new manifest for the metadata file
211
+ ps.use_dataset(selected_dataset_id)
212
+ manifest = ps.manifest.create(path_to_file)
213
+ m_id = manifest.manifest_id
214
+ except Exception as e:
215
+ logger.error(e)
216
+ error_message = "Could not create manifest file for this dataset"
217
+ raise GenericUploadError(error_message)
218
+
219
+ # upload the manifest file
220
+ try:
221
+ ps.manifest.upload(m_id)
222
+ # create a subscriber function with ps attached so it can be used to unusbscribe
223
+ subscriber_metadata_ps_client = partial(subscriber_metadata, ps)
224
+ # subscribe for the upload to finish
225
+ ps.subscribe(10, False, subscriber_metadata_ps_client)
226
+ except Exception as e:
227
+ logger.error("Error uploading dataset files")
228
+ logger.error(e)
229
+ raise Exception("The Pennsieve Agent has encountered an issue while uploading. Please retry the upload. If this issue persists please follow this <a target='_blank' rel='noopener noreferrer' href='https://docs.sodaforsparc.io/docs/how-to/how-to-reinstall-the-pennsieve-agent'> guide</a> on performing a full reinstallation of the Pennsieve Agent to fix the problem.")
230
+
231
+
232
+ # before we can remove files we need to wait for all of the Agent's threads/subprocesses to finish
233
+ # elsewise we get an error that the file is in use and therefore cannot be deleted
234
+ time.sleep(5)
235
+
236
+ # delete the local file that was created for the purpose of uploading to Pennsieve
237
+ if delete_after_upload:
238
+ os.remove(path_to_file)
239
+
240
+
241
+
242
+ def subscriber_metadata(ps, events_dict):
243
+ global logger
244
+ if events_dict["type"] == 1:
245
+ fileid = events_dict["upload_status"].file_id
246
+ total_bytes_to_upload = events_dict["upload_status"].total
247
+ current_bytes_uploaded = events_dict["upload_status"].current
248
+ if current_bytes_uploaded == total_bytes_to_upload and fileid != "":
249
+ logger.info("File upload complete")
250
+ ps.unsubscribe(10)