PyPI - pysodafair - Versions diffs - 0.1.62__py3-none-any.whl - Mend

pysodafair 0.1.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

pysoda/__init__.py +0 -0
pysoda/constants.py +3 -0
pysoda/core/__init__.py +10 -0
pysoda/core/dataset_generation/__init__.py +11 -0
pysoda/core/dataset_generation/manifestSession/__init__.py +1 -0
pysoda/core/dataset_generation/manifestSession/manifest_session.py +146 -0
pysoda/core/dataset_generation/upload.py +3951 -0
pysoda/core/dataset_importing/__init__.py +1 -0
pysoda/core/dataset_importing/import_dataset.py +662 -0
pysoda/core/metadata/__init__.py +20 -0
pysoda/core/metadata/code_description.py +109 -0
pysoda/core/metadata/constants.py +32 -0
pysoda/core/metadata/dataset_description.py +188 -0
pysoda/core/metadata/excel_utils.py +41 -0
pysoda/core/metadata/helpers.py +250 -0
pysoda/core/metadata/manifest.py +112 -0
pysoda/core/metadata/manifest_package/__init__.py +2 -0
pysoda/core/metadata/manifest_package/manifest.py +0 -0
pysoda/core/metadata/manifest_package/manifest_import.py +29 -0
pysoda/core/metadata/manifest_package/manifest_writer.py +666 -0
pysoda/core/metadata/performances.py +46 -0
pysoda/core/metadata/resources.py +53 -0
pysoda/core/metadata/samples.py +184 -0
pysoda/core/metadata/sites.py +51 -0
pysoda/core/metadata/subjects.py +172 -0
pysoda/core/metadata/submission.py +91 -0
pysoda/core/metadata/text_metadata.py +47 -0
pysoda/core/metadata_templates/CHANGES +1 -0
pysoda/core/metadata_templates/LICENSE +1 -0
pysoda/core/metadata_templates/README.md +4 -0
pysoda/core/metadata_templates/__init__.py +0 -0
pysoda/core/metadata_templates/code_description.xlsx +0 -0
pysoda/core/metadata_templates/code_parameters.xlsx +0 -0
pysoda/core/metadata_templates/dataset_description.xlsx +0 -0
pysoda/core/metadata_templates/manifest.xlsx +0 -0
pysoda/core/metadata_templates/performances.xlsx +0 -0
pysoda/core/metadata_templates/resources.xlsx +0 -0
pysoda/core/metadata_templates/samples.xlsx +0 -0
pysoda/core/metadata_templates/sites.xlsx +0 -0
pysoda/core/metadata_templates/subjects.xlsx +0 -0
pysoda/core/metadata_templates/subjects_pools_samples_structure.xlsx +0 -0
pysoda/core/metadata_templates/subjects_pools_samples_structure_example.xlsx +0 -0
pysoda/core/metadata_templates/submission.xlsx +0 -0
pysoda/core/permissions/__init__.py +1 -0
pysoda/core/permissions/permissions.py +31 -0
pysoda/core/pysoda/__init__.py +2 -0
pysoda/core/pysoda/soda.py +34 -0
pysoda/core/pysoda/soda_object.py +55 -0
pysoda/core/upload_manifests/__init__.py +1 -0
pysoda/core/upload_manifests/upload_manifests.py +37 -0
pysoda/schema/__init__.py +0 -0
pysoda/schema/code_description.json +629 -0
pysoda/schema/dataset_description.json +295 -0
pysoda/schema/manifest.json +60 -0
pysoda/schema/performances.json +44 -0
pysoda/schema/resources.json +39 -0
pysoda/schema/samples.json +97 -0
pysoda/schema/sites.json +38 -0
pysoda/schema/soda_schema.json +664 -0
pysoda/schema/subjects.json +131 -0
pysoda/schema/submission_schema.json +28 -0
pysoda/utils/__init__.py +9 -0
pysoda/utils/authentication.py +381 -0
pysoda/utils/config.py +68 -0
pysoda/utils/exceptions.py +156 -0
pysoda/utils/logger.py +6 -0
pysoda/utils/metadata_utils.py +74 -0
pysoda/utils/pennsieveAgentUtils.py +11 -0
pysoda/utils/pennsieveUtils.py +118 -0
pysoda/utils/profile.py +28 -0
pysoda/utils/schema_validation.py +133 -0
pysoda/utils/time_utils.py +5 -0
pysoda/utils/upload_utils.py +108 -0
pysodafair-0.1.62.dist-info/METADATA +190 -0
pysodafair-0.1.62.dist-info/RECORD +77 -0
pysodafair-0.1.62.dist-info/WHEEL +4 -0
pysodafair-0.1.62.dist-info/licenses/LICENSE +21 -0

pysoda/core/metadata/code_description.py ADDED Viewed

@@ -0,0 +1,109 @@
+from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_CODE_DESCRIPTION, SCHEMA_NAME_CODE_DESCRIPTION
+from .excel_utils import rename_headers, excel_columns
+from openpyxl.styles import PatternFill
+from os.path import join, getsize
+from openpyxl import load_workbook
+import shutil
+from ...utils import validate_schema
+from .helpers import upload_metadata_file, get_template_path
+# TODO: Handle optional entries when coupled with provided entries
+# TODO: Handle extending columns and filling with color when more entries are provided than the template default handles
+def create_excel(soda, upload, local_destination):
+    source = get_template_path(SDS_FILE_CODE_DESCRIPTION)
+    destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_CODE_DESCRIPTION) if upload else local_destination
+    shutil.copyfile(source, destination)
+    validate_schema(soda["dataset_metadata"]["code_description"], SCHEMA_NAME_CODE_DESCRIPTION)
+    wb = load_workbook("./" +destination)
+    print(wb.sheetnames)
+    ws1 = wb[wb.sheetnames[0]]
+    populate_input_output_information(ws1, soda)
+    populate_basic_information(ws1, soda)
+    populate_ten_simple_rules(ws1, soda)
+    wb.save(destination)
+    size = getsize(destination)
+    ## if generating directly on Pennsieve, then call upload function and then delete the destination path
+    if upload:
+        upload_metadata_file(
+            SDS_FILE_CODE_DESCRIPTION, soda, destination, True
+        )
+    return {"size": size}
+# TODO: Handle optional entries
+def populate_input_output_information(ws1, soda):
+    # populate from row 27 and column 4 up to column n, depending upon the amount of items in the array for each input output information entry
+    input_output_information = soda["dataset_metadata"]["input_output_information"]
+    row = 27
+    excel_ascii = excel_columns(start_index=3)[0]
+    ws1[excel_ascii + str(row)] = input_output_information["number_of_inputs"]
+    for input, column in zip(input_output_information["inputs"], excel_columns(start_index=3)):
+        row = 28
+        ws1[column + str(row)] = input["input_parameter_name"]
+        ws1[column + str(row + 1)] = input["input parameter type"]
+        ws1[column + str(row + 2)] = input["input_parameter_description"]
+        ws1[column + str(row + 3)] = input["input_units"]
+        ws1[column + str(row + 4)] = input["input_default_value"]
+    # populate number of outputs into row 34
+    row = 34
+    ws1[excel_ascii + str(row)] = input_output_information["number_of_outputs"]
+    # populate the outputs from row 35 - 39
+    for output, column in zip(input_output_information["outputs"], excel_columns(start_index=3)):
+        row = 35
+        ws1[column + str(row)] = output["output_parameter_name"]
+        ws1[column + str(row + 1)] = output["output_parameter_type"]
+        ws1[column + str(row + 2)] = output["output_parameter_description"]
+        ws1[column + str(row + 3)] = output["output_units"]
+        ws1[column + str(row + 4)] = output["output_default_value"]
+def populate_basic_information(ws1, soda):
+    basic_information = soda["dataset_metadata"]["basic_information"]
+    # fill out basic information from row 2 - 5 starting from col 3
+    row = 2
+    for info, column in zip(basic_information, excel_columns(start_index=3)):
+        ws1[column + str(row)] = info["RRID_term"]
+        ws1[column + str(row + 1)] = info["RRID_identifier"]
+        ws1[column + str(row + 2)] = info["ontology_term"]
+        ws1[column + str(row + 3)] = info["ontology_identifier"]
+def populate_ten_simple_rules(ws1, soda):
+    ten_simple_rules = soda["dataset_metadata"]["ten_simple_rules"]
+    row = 8
+    ascii_cols = excel_columns(start_index=3)
+    for _, rule in ten_simple_rules.items():
+        ws1[ascii_cols[0] + str(row)] = rule.get("Link", "")
+        ws1[ascii_cols[1] + str(row)] = rule.get("Rating", "")
+        ws1[ascii_cols[2] + str(row)] = rule.get("Target", "")
+        ws1[ascii_cols[3] + str(row)] = rule.get("Target Justification", "")
+        ws1[ascii_cols[4] + str(row)] = rule.get("Text", "")
+        row += 1

pysoda/core/metadata/constants.py ADDED Viewed

@@ -0,0 +1,32 @@
+from os.path import join, getsize, abspath, dirname, expanduser
+from os import makedirs
+TEMPLATE_PATH = join(dirname(abspath(__file__)), '..', 'metadata_templates')
+METADATA_UPLOAD_PS_PATH = expanduser("~/.pysoda")
+makedirs(METADATA_UPLOAD_PS_PATH, exist_ok=True)
+SCHEMA_NAMES = {
+    "submission": "submission_schema.json",
+    "subjects": "subjects_schema.json"
+}
+SDS_FILE_SUBJECTS = "subjects.xlsx"
+SCHEMA_NAME_SUBJECTS = "subjects.json"
+SDS_FILE_SAMPLES = "samples.xlsx"
+SCHEMA_NAME_SAMPLES = "samples.json"
+SDS_FILE_PERFORMANCES = "performances.xlsx"
+SCHEMA_NAME_PERFORMANCES = "performances.json"
+SDS_FILE_SITES = "sites.xlsx"
+SCHEMA_NAME_SITES = "sites.json"
+SDS_FILE_RESOURCES = "resources.xlsx"
+SCHEMA_NAME_RESOURCES = "resources.json"
+SDS_FILE_DATASET_DESCRIPTION = "dataset_description.xlsx"
+SCHEMA_NAME_DATASET_DESCRIPTION = "dataset_description.json"
+SDS_FILE_CODE_DESCRIPTION = "code_description.xlsx"
+SCHEMA_NAME_CODE_DESCRIPTION = "code_description.json"
+SDS_FILE_MANIFEST = "manifest.xlsx"
+SCHEMA_NAME_MANIFEST = "manifest.json"

pysoda/core/metadata/dataset_description.py ADDED Viewed

@@ -0,0 +1,188 @@
+from .constants import METADATA_UPLOAD_PS_PATH, TEMPLATE_PATH, SDS_FILE_DATASET_DESCRIPTION, SCHEMA_NAME_DATASET_DESCRIPTION
+from os.path import join, getsize
+from openpyxl import load_workbook
+import shutil
+from .excel_utils import rename_headers, excel_columns
+import itertools
+from openpyxl.styles import PatternFill
+from ...utils import validate_schema
+from .helpers import upload_metadata_file, get_template_path
+def create_excel(
+    soda,
+    upload_boolean,
+    local_destination,
+):
+    source = get_template_path(SDS_FILE_DATASET_DESCRIPTION)
+    destination = join(METADATA_UPLOAD_PS_PATH, SDS_FILE_DATASET_DESCRIPTION) if upload_boolean else local_destination
+    shutil.copyfile(source, destination)
+    validate_schema(soda["dataset_metadata"]["dataset_description"], SCHEMA_NAME_DATASET_DESCRIPTION)
+    # write to excel file
+    wb = load_workbook(destination)
+    ws1 = wb["Sheet1"]
+    ws1["D22"] = ""
+    ws1["E22"] = ""
+    ws1["D24"] = ""
+    ws1["E24"] = ""
+    ws1["D25"] = ""
+    ws1["E25"] = ""
+    # Populate the Metadata version (Required)
+    ws1["D2"] = soda["dataset_metadata"]["dataset_description"]["metadata_version"]
+    # Populate the Dataset Type (default to empty string if not present)
+    ws1["D3"] = (
+    soda.get("dataset_metadata", {})
+        .get("dataset_description", {})
+        .get("dataset_type", "")
+    )
+    populate_standards_info(ws1, soda)
+    keyword_array = populate_basic_info(ws1, soda)
+    populate_study_info(ws1, soda)
+    populate_contributor_info(ws1, soda)
+    populate_related_resource_information(ws1, soda)
+    populate_funding_info(ws1, soda)
+    populate_participant_information(ws1, soda)
+    data_dictionary_information(ws1, soda)
+    wb.save(destination)
+    size = getsize(destination)
+    ## if generating directly on Pennsieve, then call upload function and then delete the destination path
+    if upload_boolean:
+        upload_metadata_file(
+            "dataset_description.xlsx", soda, destination, True
+        )
+    return {"size": size}
+def populate_study_info(workbook, soda):
+    study_info = soda["dataset_metadata"]["dataset_description"]["study_information"]
+    workbook["D20"] = study_info.get("study_purpose", "")
+    workbook["D21"] = study_info.get("study_data_collection", "")
+    workbook["D22"] = study_info.get("study_primary_conclusion", "")
+    # Arrays
+    organ_system = study_info.get("study_organ_system", [])
+    approach = study_info.get("study_approach", [])
+    technique = study_info.get("study_technique", [])
+    for i, column in zip(range(len(organ_system)), excel_columns(start_index=3)):
+        workbook[column + "23"] = organ_system[i]
+    for i, column in zip(range(len(approach)), excel_columns(start_index=3)):
+        workbook[column + "24"] = approach[i]
+    for i, column in zip(range(len(technique)), excel_columns(start_index=3)):
+        workbook[column + "25"] = technique[i]
+    workbook["D26"] = study_info.get("study_collection_title", "")
+    # Return the max length of the arrays, or 1 if all are empty
+    return max(1, len(organ_system), len(approach), len(technique))
+def populate_standards_info(workbook, soda):
+    standards_info = soda["dataset_metadata"]["dataset_description"]["standards_information"]
+    workbook["D5"] = standards_info["data_standard"]
+    workbook["D6"] = standards_info["data_standard_version"]
+def populate_basic_info(workbook, soda):
+    basic_info = soda["dataset_metadata"]["dataset_description"]["basic_information"]
+    workbook["D8"] = basic_info.get("title", "")
+    workbook["D9"] = basic_info.get("subtitle", "")
+    workbook["D10"] = basic_info.get("description", "")
+    # Write keywords array across columns in row 11 (D11, E11, F11, ...)
+    keywords = basic_info.get("keywords", [])
+    for col, keyword in zip(excel_columns(start_index=3), keywords):
+        workbook[f"{col}11"] = keyword
+    workbook["D12"] = basic_info.get("funding", "")
+    workbook["D13"] = basic_info.get("acknowledgments", "")
+    workbook["D14"] = basic_info.get("license", "")
+    # Return the length of the keywords array, or 1 if empty
+    return max(1, len(keywords))
+def populate_funding_info(workbook, soda):
+    funding_info = soda["dataset_metadata"]["dataset_description"]["funding_information"]
+    workbook["D16"] = funding_info["funding_consortium"]
+    workbook["D17"] = funding_info["funding_agency"]
+    workbook["D18"] = funding_info["award_number"]
+def populate_contributor_info(workbook, soda):
+    contributor_info = soda["dataset_metadata"]["dataset_description"].get("contributor_information", [])
+    for contributor, column in zip(contributor_info, excel_columns(start_index=3)):
+        workbook[column + "28"] = contributor.get("contributor_name", "")
+        workbook[column + "29"] = contributor.get("contributor_orcid_id", "")
+        workbook[column + "30"] = contributor.get("contributor_affiliation", "")
+        workbook[column + "31"] = contributor.get("contributor_role", "")
+    # Return the length of the contributor array, or 1 if empty
+    return max(1, len(contributor_info))
+def populate_related_resource_information(workbook, soda):
+    related_resource_information = soda["dataset_metadata"]["dataset_description"].get("related_resource_information", [])
+    for info, column in zip(related_resource_information, excel_columns(start_index=3)):
+        workbook[column + "33"] = info.get("identifier_description", "")
+        workbook[column + "34"] = info.get("relation_type", "")
+        workbook[column + "35"] = info.get("identifier", "")
+        workbook[column + "36"] = info.get("identifier_type", "")
+    # Return the length of the related resource array, or 1 if empty
+    return max(1, len(related_resource_information))
+def populate_participant_information(workbook, soda):
+    participant_info = soda["dataset_metadata"]["dataset_description"]["participant_information"]
+    workbook["D38"] = participant_info.get("number_of_subjects", 0)
+    workbook["D39"] = participant_info.get("number_of_samples", 0)
+    workbook["D40"] = participant_info.get("number_of_sites", 0)
+    workbook["D41"] = participant_info.get("number_of_performances", 0)
+def data_dictionary_information(workbook, soda):
+    """
+    This function is a placeholder for future implementation.
+    It currently does not populate any data in the workbook.
+    """
+    # Placeholder for future implementation
+    data_dictionary_info = soda["dataset_metadata"]["dataset_description"].get("data_dictionary_information", {})
+    workbook["D43"] = data_dictionary_info.get("data_dictionary_path", "")
+    workbook["D44"] = data_dictionary_info.get("data_dictionary_type", "")
+    workbook["D45"] = data_dictionary_info.get("data_dictionary_description", "")
+def grayout_subheaders(workbook, max_len, start_index):
+    """
+    Gray out sub-header rows for values exceeding 3 (SDS2.0).
+    """
+    headers_list = ["4", "10", "18", "23", "28"]
+    columns_list = excel_columns(start_index=start_index)
+    for (i, column), no in itertools.product(zip(range(2, max_len + 1), columns_list[1:]), headers_list):
+        cell = workbook[column + no]
+        fillColor("B2B2B2", cell)
+def fillColor(color, cell):
+    colorFill = PatternFill(start_color=color, end_color=color, fill_type="solid")
+    cell.fill = colorFill

pysoda/core/metadata/excel_utils.py ADDED Viewed

@@ -0,0 +1,41 @@
+from string import ascii_uppercase
+import itertools
+from openpyxl.styles import PatternFill, Font
+def rename_headers(workbook, max_len, start_index):
+  """
+  Rename header columns if values exceed 3. Change Additional Values to Value 4, 5,...
+  Adds styling to the column headers as well.
+  """
+  columns_list = excel_columns(start_index=start_index)
+  if max_len >= start_index:
+      workbook[columns_list[0] + "1"] = "Value"
+      for i, column in zip(range(2, max_len + 1), columns_list[1:]):
+          workbook[column + "1"] = f"Value {str(i)}"
+          cell = workbook[column + "1"]
+          blueFill = PatternFill(
+              start_color="9CC2E5", end_color="9CC2E5", fill_type="solid"
+          )
+          font = Font(bold=True)
+          cell.fill = blueFill
+          cell.font = font
+  else:
+      delete_range = len(columns_list) - max_len
+      workbook.delete_cols(4 + max_len, delete_range)
+def excel_columns(start_index=0):
+    """
+    NOTE: does not support more than 699 contributors/links
+    """
+    single_letter = list(ascii_uppercase[start_index:])
+    two_letter = [a + b for a, b in itertools.product(ascii_uppercase, ascii_uppercase)]
+    return single_letter + two_letter

pysoda/core/metadata/helpers.py ADDED Viewed

@@ -0,0 +1,250 @@
+import requests
+from ...constants import PENNSIEVE_URL
+from ...utils import get_dataset_id, get_access_token, create_request_headers, connect_pennsieve_client, PennsieveActionNoPermission, GenericUploadError
+from ...core import has_edit_permissions
+from functools import partial
+import time
+import os
+from .. import logger
+import sys
+def get_template_path(filename):
+    """Get the path to a template file within the metadata_templates package."""
+    global logger
+    # Method 1: Try PyInstaller bundle first (onefolder creates _MEIPASS)
+    if hasattr(sys, '_MEIPASS'):
+        # PyInstaller onefolder extracts to _MEIPASS/
+        possible_paths = [
+            os.path.join(sys._MEIPASS, "pysoda", "core", "metadata_templates", filename),
+            os.path.join(sys._MEIPASS, "metadata_templates", filename),
+            os.path.join(sys._MEIPASS, filename)
+        ]
+        for path in possible_paths:
+            if os.path.exists(path):
+                logger.info(f"Template found in PyInstaller bundle: {path}")
+                return path
+    # Method 2: Try to import the metadata_templates module (works if PyPI package is properly installed)
+    try:
+        from .. import metadata_templates
+        templates_dir = os.path.dirname(metadata_templates.__file__)
+        template_path = os.path.join(templates_dir, filename)
+        if os.path.exists(template_path):
+            logger.info(f"Template found in metadata_templates module: {template_path}")
+            return template_path
+    except (ImportError, ModuleNotFoundError, AttributeError):
+        pass
+    # Method 3: Search in the Flask app's directory structure
+    current_file = os.path.abspath(__file__)
+    current_dir = os.path.dirname(current_file)
+    # Walk up the directory tree to find the templates
+    search_paths = [
+        os.path.join(current_dir, '..', 'metadata_templates', filename),
+        os.path.join(current_dir, 'metadata_templates', filename),
+    ]
+    # Also check if we're in a site-packages structure
+    site_packages_paths = []
+    path_parts = current_file.split(os.sep)
+    for i, part in enumerate(path_parts):
+        if part == 'site-packages':
+            site_packages_root = os.sep.join(path_parts[:i+1])
+            site_packages_paths.extend([
+                os.path.join(site_packages_root, 'pysoda', 'core', 'metadata_templates', filename),
+                os.path.join(site_packages_root, 'pysoda_fairdataihub_tools', 'pysoda', 'core', 'metadata_templates', filename)
+            ])
+    all_paths = search_paths + site_packages_paths
+    for path in all_paths:
+        if os.path.exists(path):
+            logger.info(f"Template found in directory structure: {path}")
+            return path
+    # Method 4: Try to find in Electron app resources (if not using PyInstaller)
+    try:
+        # Look for Electron app structure
+        current_path = current_dir
+        while current_path and current_path != os.path.dirname(current_path):
+            electron_paths = [
+                os.path.join(current_path, 'resources', 'app', 'node_modules', 'pysoda', 'core', 'metadata_templates', filename),
+                os.path.join(current_path, 'resources', 'pysoda', 'core', 'metadata_templates', filename),
+                os.path.join(current_path, 'app', 'pysoda', 'core', 'metadata_templates', filename)
+            ]
+            for path in electron_paths:
+                if os.path.exists(path):
+                    logger.info(f"Template found in Electron app resources: {path}")
+                    return path
+            current_path = os.path.dirname(current_path)
+    except Exception:
+        pass
+    # Method 5: Try to find in Electron Resources folder
+    try:
+        # Find the Electron Resources folder
+        current_path = current_dir
+        resources_folder = None
+        # Walk up the directory tree to find the Resources folder
+        while current_path and current_path != os.path.dirname(current_path):
+            # Check common Electron Resources locations
+            possible_resources = [
+                os.path.join(current_path, 'Resources'),  # macOS
+                os.path.join(current_path, 'resources'),  # Windows/Linux
+                os.path.join(current_path, 'Contents', 'Resources'),  # macOS app bundle
+            ]
+            for resource_path in possible_resources:
+                if os.path.exists(resource_path):
+                    resources_folder = resource_path
+                    break
+            if resources_folder:
+                break
+            current_path = os.path.dirname(current_path)
+        # If we found the Resources folder, look for metadata_templates inside it
+        if resources_folder:
+            template_path = os.path.join(resources_folder, 'metadata_templates', filename)
+            logger.info(f"Searching for template file in Electron Resources: {template_path}")
+            if os.path.exists(template_path):
+                logger.info(f"Template found in Electron Resources: {template_path}")
+                return template_path
+    except Exception as e:
+        logger.warning(f"Failed to search Electron Resources: {e}")
+        pass
+    # Method 6: Use importlib_resources as fallback (Python 3.7+)
+    try:
+        from importlib import resources
+        with resources.path('metadata_templates', filename) as template_path:
+            logger.info(f"Using template path: {template_path}")
+            if template_path.exists():
+                logger.info(f"Template found using importlib_resources: {template_path}")
+                return str(template_path)
+    except (ImportError, ModuleNotFoundError, AttributeError):
+        # Fallback to other methods if importlib_resources is not available
+        pass
+    except Exception as e:
+        logger.error(f"Failed to create fallback template: {e}")
+        raise ImportError(f"Could not locate or create template file {filename}. Error: {e}")
+# helper function to process custom fields (users add and name them) for subjects and samples files
+def getMetadataCustomFields(matrix):
+    return [column for column in matrix if any(column[1:])]
+# transpose a matrix (array of arrays)
+# The transpose of a matrix is found by interchanging its rows into columns or columns into rows.
+# REFERENCE: https://byjus.com/maths/transpose-of-a-matrix/
+def transposeMatrix(matrix):
+    return [[matrix[j][i] for j in range(len(matrix))] for i in range(len(matrix[0]))]
+# needed to sort subjects and samples table data to match the UI fields
+def sortedSubjectsTableData(matrix, fields):
+    sortedMatrix = []
+    for field in fields:
+        for column in matrix:
+            if column[0].lower() == field:
+                sortedMatrix.append(column)
+                break
+    customHeaderMatrix = [
+        column for column in matrix if column[0].lower() not in fields
+    ]
+    return (
+        np.concatenate((sortedMatrix, customHeaderMatrix)).tolist()
+        if customHeaderMatrix
+        else sortedMatrix
+    )
+def upload_metadata_file(file_name, soda, path_to_file, delete_after_upload=True):
+    global logger
+    if "ps-account-selected" in soda:
+        ps_account = soda["ps-account-selected"]["account-name"]
+    if "ps-dataset-selected" in soda:
+        ps_dataset = soda["ps-dataset-selected"]["dataset-name"]
+    # check that the Pennsieve dataset is valid
+    selected_dataset_id = get_dataset_id(ps_dataset)
+    # check that the user has permissions for uploading and modifying the dataset
+    if not has_edit_permissions(get_access_token(), selected_dataset_id):
+        raise PennsieveActionNoPermission("edit" + selected_dataset_id)
+    headers = create_request_headers(get_access_token())
+    # handle duplicates on Pennsieve: first, obtain the existing file ID
+    r = requests.get(f"{PENNSIEVE_URL}/datasets/{selected_dataset_id}", headers=headers)
+    r.raise_for_status()
+    ds_items = r.json()
+    # go through the content in the dataset and find the file ID of the file to be uploaded
+    for item in ds_items["children"]:
+        if item["content"]["name"] == file_name:
+            item_id = item["content"]["id"]
+            jsonfile = {
+                "things": [item_id]
+            }
+            # then, delete it using Pennsieve method delete(id)\vf = Pennsieve()
+            r = requests.post(f"{PENNSIEVE_URL}/data/delete",json=jsonfile, headers=headers)
+            r.raise_for_status()
+    try:
+        ps = connect_pennsieve_client(ps_account)
+        # create a new manifest for the metadata file
+        ps.use_dataset(selected_dataset_id)
+        manifest = ps.manifest.create(path_to_file)
+        m_id = manifest.manifest_id
+    except Exception as e:
+        logger.error(e)
+        error_message = "Could not create manifest file for this dataset"
+        raise GenericUploadError(error_message)
+    # upload the manifest file
+    try:
+        ps.manifest.upload(m_id)
+        # create a subscriber function with ps attached so it can be used to unusbscribe
+        subscriber_metadata_ps_client = partial(subscriber_metadata, ps)
+        # subscribe for the upload to finish
+        ps.subscribe(10, False, subscriber_metadata_ps_client)
+    except Exception as e:
+        logger.error("Error uploading dataset files")
+        logger.error(e)
+        raise Exception("The Pennsieve Agent has encountered an issue while uploading. Please retry the upload. If this issue persists please follow this <a target='_blank' rel='noopener noreferrer' href='https://docs.sodaforsparc.io/docs/how-to/how-to-reinstall-the-pennsieve-agent'> guide</a> on performing a full reinstallation of the Pennsieve Agent to fix the problem.")
+    # before we can remove files we need to wait for all of the Agent's threads/subprocesses to finish
+    # elsewise we get an error that the file is in use and therefore cannot be deleted
+    time.sleep(5)
+    # delete the local file that was created for the purpose of uploading to Pennsieve
+    if delete_after_upload:
+        os.remove(path_to_file)
+def subscriber_metadata(ps, events_dict):
+    global logger
+    if events_dict["type"] == 1:
+        fileid = events_dict["upload_status"].file_id
+        total_bytes_to_upload = events_dict["upload_status"].total
+        current_bytes_uploaded = events_dict["upload_status"].current
+        if current_bytes_uploaded == total_bytes_to_upload and fileid != "":
+            logger.info("File upload complete")
+            ps.unsubscribe(10)