PyPI - pysodafair - Versions diffs - 0.1.64__py3-none-any.whl → 0.1.66__py3-none-any.whl - Mend

pysodafair 0.1.64py3-none-any.whl → 0.1.66py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

pysoda/core/dataset_generation/upload.py CHANGED Viewed

@@ -40,7 +40,7 @@ from os.path import (
 import pandas as pd
 import time
 from timeit import default_timer as timer
-from datetime import timedelta
+from datetime import timedelta, timezone
 import shutil
 import subprocess
 import gevent
@@ -3888,9 +3888,11 @@ def generate_manifest_file_data(dataset_structure):
                 timestamp = file_info["timestamp"]
             else:
                 local_path = pathlib.Path(file_info["path"])
-                timestamp = datetime.fromtimestamp(
-                    local_path.stat().st_mtime, tz=local_timezone
-                ).isoformat().replace(".", ",").replace("+00:00", "Z")
+                # Create proper ISO 8601 timestamp
+                dt = datetime.fromtimestamp(local_path.stat().st_mtime, tz=timezone.utc)
+                # per the SDS spec, replace '.' with ',' in the timestamp fractional seconds section
+                timestamp = dt.isoformat().replace(".", ",").replace("+00:00", "Z")
             manifest_data.append(create_file_entry(file_name, file_info, path_parts, timestamp))

pysoda/core/metadata/dataset_description.py CHANGED Viewed

@@ -42,21 +42,23 @@ def create_excel(
         .get("dataset_type", "")
     )
-    populate_standards_info(ws1, soda)
+    standards_arr_len = populate_standards_info(ws1, soda)
-    keyword_array_len = populate_basic_info(ws1, soda)
+    keyword_funding_array_len = populate_basic_info(ws1, soda)
     study_arr_len = populate_study_info(ws1, soda)
     contributor_arr_len = populate_contributor_info(ws1, soda)
     related_resource_arr_len = populate_related_resource_information(ws1, soda)
     populate_funding_info(ws1, soda)
     populate_participant_information(ws1, soda)
-    data_dictionary_information(ws1, soda)
+    dict_arr_len = data_dictionary_information(ws1, soda)
     max_len = max(
-        keyword_array_len,
+        keyword_funding_array_len,
         study_arr_len,
         contributor_arr_len,
         related_resource_arr_len,
+        standards_arr_len,
+        dict_arr_len,
     )
     # 3 is the first value column position
@@ -102,9 +104,12 @@ def populate_study_info(workbook, soda):
 def populate_standards_info(workbook, soda):
     standards_info = soda["dataset_metadata"]["dataset_description"]["standards_information"]
-    workbook["D5"] = standards_info["data_standard"]
-    workbook["D6"] = standards_info["data_standard_version"]
+    # this is an array with multiple entries
+    for col, standard in zip(excel_columns(start_index=3), standards_info):
+        workbook[col + "5"] = standard.get("data_standard", "")
+        workbook[col + "6"] = standard.get("data_standard_version", "")
+    return max(1, len(standards_info))
 def populate_basic_info(workbook, soda):
     basic_info = soda["dataset_metadata"]["dataset_description"]["basic_information"]
@@ -117,12 +122,14 @@ def populate_basic_info(workbook, soda):
     for col, keyword in zip(excel_columns(start_index=3), keywords):
         workbook[f"{col}11"] = keyword
-    workbook["D12"] = basic_info.get("funding", "")
+    funding = basic_info.get("funding", [])
+    for col, funding_source in zip(excel_columns(start_index=3), funding):
+        workbook[f"{col}12"] = funding_source
     workbook["D13"] = basic_info.get("acknowledgments", "")
     workbook["D14"] = basic_info.get("license", "")
     # Return the length of the keywords array, or 1 if empty
-    return max(1, len(keywords))
+    return max(1, len(keywords), len(funding))
 def populate_funding_info(workbook, soda):
@@ -170,11 +177,14 @@ def data_dictionary_information(workbook, soda):
     It currently does not populate any data in the workbook.
     """
     # Placeholder for future implementation
-    data_dictionary_info = soda["dataset_metadata"]["dataset_description"].get("data_dictionary_information", {})
+    data_dictionary_info = soda["dataset_metadata"]["dataset_description"].get("data_dictionary_information", [])
-    workbook["D43"] = data_dictionary_info.get("data_dictionary_path", "")
-    workbook["D44"] = data_dictionary_info.get("data_dictionary_type", "")
-    workbook["D45"] = data_dictionary_info.get("data_dictionary_description", "")
+    for column, entry in zip(excel_columns(start_index=3), data_dictionary_info):
+        workbook[column + "43"] = entry.get("data_dictionary_path", "")
+        workbook[column + "44"] = entry.get("data_dictionary_type", "")
+        workbook[column + "45"] = entry.get("data_dictionary_description", "")
+    return max(1, len(data_dictionary_info))
 def grayout_subheaders(workbook, col):
     """
@@ -228,9 +238,6 @@ def apply_dashed_border(cell, workbook):
 def extend_value_header(workbook, max_len, start_index):
     """
     The headers starting at G1 are the 'Value' headers that correspond to the maximum number of entries for either the
@@ -253,4 +260,4 @@ def extend_value_header(workbook, max_len, start_index):
         apply_calibri_bold_12(header_cell)
         set_cell_alignment(header_cell, horizontal='center', vertical='center')
         apply_dashed_border(header_cell, workbook)
-        grayout_subheaders(workbook, column_list[i - 1])
+        grayout_subheaders(workbook, column_list[i - 1])

pysoda/core/metadata/manifest.py CHANGED Viewed

@@ -21,7 +21,7 @@ def create_excel(soda, upload_boolean, local_destination):
     wb = load_workbook(destination)
     ws1 = wb["Sheet1"]
     manifest = soda["dataset_metadata"]["manifest_file"]
-    # validate_schema(manifest, SCHEMA_NAME_MANIFEST)
+    validate_schema(manifest, SCHEMA_NAME_MANIFEST)
     ascii_headers = excel_columns(start_index=0)
     custom_headers_to_column = {}

pysoda/schema/dataset_description.json CHANGED Viewed

@@ -8,21 +8,25 @@
     },
     "type": {
       "type": "string",
-      "description": "The type of the dataset. For example, experimental."
+      "enum": ["experimental", "computational"],
+      "description": "The type of the dataset. In short, experimental data should have data collected from subjects and/or samples."
     },
     "standards_information": {
-      "type": "object",
-      "properties": {
-        "data_standard": {
-          "type": "string",
-          "description": "The name of the standard used in the project."
-        },
-        "data_standard_version": {
-          "type": "string",
-          "description": "The version of the standard used in the project."
+      "type": "array",
+      "minItems": 1,
+      "items": {
+        "type": "object",
+        "properties": {
+          "data_standard": {
+            "type": "string",
+            "description": "The name of the standard used in the project. For example, SPARC."
+          },
+          "data_standard_version": {
+            "type": "string",
+            "description": "The version of the standard used in the project. For example, 1.0.0"
+          }
         }
-      },
-      "required": []
+      }
     },
     "basic_information": {
       "type": "object",
@@ -47,8 +51,11 @@
           "description": "A list of keywords related to the project."
         },
         "funding": {
-          "type": "string",
-          "description": "Funding awards for the project."
+          "type": "array",
+          "items": {
+            "type": "string"
+          },
+          "description": "Funding awards for the project. Listed as free text. E.g., OT2OD025349"
         },
         "acknowledgments": {
           "type": "string",
@@ -56,7 +63,7 @@
         },
         "license": {
           "type": "string",
-          "description": "The license under which the project is released."
+          "description": "The license under which the project is released. Use the SPDX license identifier."
         }
       },
       "required": []
@@ -128,10 +135,12 @@
         "properties": {
           "contributor_orcid_id": {
             "type": "string",
-            "description": "The ORCiD for this contributor."
+            "pattern": "^https://orcid.org/000[09]-00[01][0-9]-[0-9]{4}-[0-9]{3}([0-9]|X)$",
+            "description": "The ORCiD for this contributor. Must be a valid ORCID URL format."
           },
           "contributor_affiliation": {
             "type": "string",
+            "pattern": "^https://ror.org/0[0-9a-z]{6}[0-9]{2}$",
             "description": "The institutional affiliation for this contributor."
           },
           "contributor_name": {
@@ -142,22 +151,27 @@
           "contributor_role": {
             "type": "string",
             "enum": [
-              "PrincipalInvestigator",
-              "Creator",
-              "CoInvestigator",
               "CorrespondingAuthor",
+              "ContactPerson",
+              "Creator",
               "DataCollector",
               "DataCurator",
               "DataManager",
               "Distributor",
               "Editor",
+              "HostingInstitution",
+              "PrincipalInvestigator",
+              "CoInvestigator",
               "Producer",
               "ProjectLeader",
               "ProjectManager",
               "ProjectMember",
+              "RegistrationAgency",
+              "RegistrationAuthority",
               "RelatedPerson",
               "Researcher",
               "ResearchGroup",
+              "RightsHolder",
               "Sponsor",
               "Supervisor",
               "WorkPackageLeader",
@@ -256,40 +270,48 @@
       "type": "object",
       "properties": {
         "number_of_subjects": {
-          "type": "number",
+          "type": "integer",
+          "minimum": 0,
           "description": "The number of subjects in the study."
         },
         "number_of_samples": {
-          "type": "number",
+          "type": "integer",
+          "minimum": 0,
           "description": "The number of samples in the study."
         },
         "number_of_sites": {
-          "type": "number",
+          "type": "integer",
+          "minimum": 0,
           "description": "The number of sites in the study."
         },
         "number_of_performances": {
-          "type": "number",
+          "type": "integer",
+          "minimum": 0,
           "description": "The number of performance in the study."
         }
       },
       "required": []
     },
     "data_dictionary_information": {
-      "type": "object",
-      "properties": {
-        "data_dictionary_path": {
-          "type": "string",
-          "description": "The path to the data dictionary file."
-        },
-        "data_dictionary_type": {
-          "type": "string",
-          "description": "The type of the data dictionary. E.g., json-schema"
-        },
-        "data_dictionary_description": {
-          "type": "string",
-          "description": "A descrption of the data dictionary."
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "data_dictionary_path": {
+            "type": "string",
+            "description": "The path to the data dictionary file. Should be relative to the root of the dataset. E.g., 'code/data_dictionary/schema.json'"
+          },
+          "data_dictionary_type": {
+            "type": "string",
+            "description": "The type of the data dictionary. E.g., json-schema"
+          },
+          "data_dictionary_description": {
+            "type": "string",
+            "description": "A description of the data dictionary."
+          }
         }
       }
     }
-  }
+  },
+  "required": ["metadata_version"]
 }

pysoda/schema/manifest.json CHANGED Viewed

@@ -11,7 +11,8 @@
         },
         "timestamp": {
           "type": "string",
-          "description": "Timestamp of when the data was created or last modified. This should be in ISO 8601 format."
+          "description": "Timestamp of when the data was created or last modified. This should be in ISO 8601 format. Per the SDS guidelines, commas are to be used for separating fractional seconds. Empty string for when it does not apply.",
+          "pattern": "^([0-9]{4}-[0-1][0-9]-[0-3][0-9]T[0-2][0-9]:[0-6][0-9](:[0-6][0-9](,[0-9]{1,9})?)?(Z|[+-][0-2][0-9]:[0-6][0-9])?)?$"
         },
         "description": {
           "type": "string",
@@ -23,7 +24,8 @@
         },
         "entity": {
           "type": "string",
-          "description": "Each ID should be taken from the subject.xlsx, samples.xlsx, sites.xlsx, or performances.xlsx files."
+          "description": "Each ID should be taken from the subject.xlsx, samples.xlsx, sites.xlsx, or performances.xlsx files.",
+          "pattern": "^((pop-)?(sub|sam|site|perf)-[A-Za-z0-9]([A-Za-z0-9-]*[A-Za-z0-9])?)?$"
         },
         "data_modality": {
           "type": "string",
@@ -43,7 +45,7 @@
         },
         "entity_is_transitive": {
           "type": "string",
-          "description": "Indicates whether the entity represented in this data file is transitive. This should be either true or false."
+          "description": "Indicates whether the entity represented in this data file is transitive. This should be either true or false if provided ."
         },
         "additional_metadata": {
           "type": "string",

{pysodafair-0.1.64.dist-info → pysodafair-0.1.66.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pysodafair
-Version: 0.1.64
+Version: 0.1.66
 Summary: Pysoda package for Fairdataihub tools
 License: MIT
 License-File: LICENSE

{pysodafair-0.1.64.dist-info → pysodafair-0.1.66.dist-info}/RECORD RENAMED Viewed

@@ -4,16 +4,16 @@ pysoda/core/__init__.py,sha256=bXnu4fYemJ915xId8nwh-Gy2IUEvVuoS9Hk3CXyJp8U,235
 pysoda/core/dataset_generation/__init__.py,sha256=tisLmJeXLeAINXf7BdNZGi2i9vQwImWGa_S5FNwQFbs,299
 pysoda/core/dataset_generation/manifestSession/__init__.py,sha256=kqkTAFEhluyQJ9mmMhYQTh1tgowBeJdH-Q9s5ifGkSE,51
 pysoda/core/dataset_generation/manifestSession/manifest_session.py,sha256=TML_KOJ-1REohqSaHCZNFJrbDR1UDQ9sFcithJRx9t8,4669
-pysoda/core/dataset_generation/upload.py,sha256=FtEQoyg3ly0lXmnUtKDXXHln1dLN6reb9knOdFTBFeA,172657
+pysoda/core/dataset_generation/upload.py,sha256=lnU6od0LtJOjuzNj-VX2HzOp66xUBr1V6nXtpJjkdIg,172804
 pysoda/core/dataset_importing/__init__.py,sha256=NbWs4HAqqydFLACFoVwl6g457dkYvjiZKx9NzB9wFxE,114
 pysoda/core/dataset_importing/import_dataset.py,sha256=cx8qCQmR_BKdC2G-jzqE4dWg2JhkOS3jM8kpjLIsObk,29487
 pysoda/core/metadata/__init__.py,sha256=Tkx6vdEQEPwAHmVSc9GfdbDZUkvuAqEgHJOxRDeX5vE,821
 pysoda/core/metadata/code_description.py,sha256=sWbRPWPf32txpN048oQ9lT3y7GAoDkrS-GHivwwv55Y,4104
 pysoda/core/metadata/constants.py,sha256=PR78huqBKdBpzDUGxVKy9YW3pUrJ4ftuF4MfqpNb1bU,1052
-pysoda/core/metadata/dataset_description.py,sha256=K1yWYQkbn4d7DCjQH5vT9Y6dhcWWlx823XLACgkIS2M,9749
+pysoda/core/metadata/dataset_description.py,sha256=icIRH2TYXWqrc4gu_RUK0yP_WQmy9BGAkVOEVQ--GfA,10275
 pysoda/core/metadata/excel_utils.py,sha256=FQ8-DBq2lxFdpDUZeABfFKe284JzRTe6AYPxEw7yzsg,1216
 pysoda/core/metadata/helpers.py,sha256=jDf4KPTbx4unHT7pDlFEa0jls1OZY-dpdKf3kUGMtvg,10609
-pysoda/core/metadata/manifest.py,sha256=MjID-r52Yn3i9WlrT5IWeYvmkVdfzQuqGOmoMsZQVJg,4197
+pysoda/core/metadata/manifest.py,sha256=LO8ZP8oJ0vEjbXU0yPVEYm62MLOZwIYVUaW4VEczO9k,4195
 pysoda/core/metadata/manifest_package/__init__.py,sha256=7qiNT62WsI1LKb-lvnK31lW04sOvW_KTZQA1e8sUAgY,211
 pysoda/core/metadata/manifest_package/manifest.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pysoda/core/metadata/manifest_package/manifest_import.py,sha256=5rKOBf-NMt2dVsPJYotk5V855wnmtiMqrpknSOYOEl0,1347
@@ -50,8 +50,8 @@ pysoda/core/upload_manifests/__init__.py,sha256=SOiyflYmZDtkLwWpeu7flyn3J2SAj9WL
 pysoda/core/upload_manifests/upload_manifests.py,sha256=Hd2XSXYieG-EMcPTLSYXvdtraipp1XBSo5cGpZR-vbA,1223
 pysoda/schema/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pysoda/schema/code_description.json,sha256=FJOyJBC2TKjTTdtew3B4wdq_WIrti-r2F-M6t5OVNB0,17854
-pysoda/schema/dataset_description.json,sha256=qxhzQzoE7CGuXvtY9fVWV31-daf5Ivms7_PkVW4i858,9053
-pysoda/schema/manifest.json,sha256=bsVzhnZHd2566KhrxQPBcwH376fgE8pSY8eXviTXrWA,2636
+pysoda/schema/dataset_description.json,sha256=U725xGDmy4oeJe0IOFcJc7a0oA5pAbpsRVQsjhPEj1k,10069
+pysoda/schema/manifest.json,sha256=dClcVIR3IlesjWC9peLAnyuIECwNDHfbIFmCsj9l2Wk,3006
 pysoda/schema/performances.json,sha256=TJw7ERC6eMR-9cImOg7xoEVRyOR4xViJqZO8U2X2OXo,1254
 pysoda/schema/resources.json,sha256=9-rwCTX523V5XaKmXLTUGzackOl7tk-qijbbBDoeuhY,1232
 pysoda/schema/samples.json,sha256=NvuYhskV9QFrkSX4ant9G4KFCe9JLnVG98vDv2Puuvk,3340
@@ -71,7 +71,7 @@ pysoda/utils/profile.py,sha256=di4D_IE1rGSfHl0-SRVjrJK2bCdedW4ugp0W-j1HarQ,937
 pysoda/utils/schema_validation.py,sha256=3w3FRPyn4P3xMhITzItk-jYte8TTlNrgCWmY-s05x9Y,4438
 pysoda/utils/time_utils.py,sha256=g5848bivtzWj6TDoWo6CcohF-THxShETP4qTyHTjBWw,131
 pysoda/utils/upload_utils.py,sha256=-BCvfXsJSFqnwEZVCFQX1PLfkdeW0gyGLjLGW6Uxdf8,4607
-pysodafair-0.1.64.dist-info/METADATA,sha256=mgUEcj-8RZdfFlMBD94Joq0bmRP4CXOF0C6S3JxYX20,7011
-pysodafair-0.1.64.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
-pysodafair-0.1.64.dist-info/licenses/LICENSE,sha256=Jlt0uGnx87qPRGXPQHsBkg_S7MOsYS2E9Rh1zy47bfw,1082
-pysodafair-0.1.64.dist-info/RECORD,,
+pysodafair-0.1.66.dist-info/METADATA,sha256=CFPeQYRgKDKEjoreN1iv9vV28KcV8Xbbdsp26H99AeI,7011
+pysodafair-0.1.66.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
+pysodafair-0.1.66.dist-info/licenses/LICENSE,sha256=Jlt0uGnx87qPRGXPQHsBkg_S7MOsYS2E9Rh1zy47bfw,1082
+pysodafair-0.1.66.dist-info/RECORD,,

{pysodafair-0.1.64.dist-info → pysodafair-0.1.66.dist-info}/WHEEL RENAMED Viewed

File without changes

{pysodafair-0.1.64.dist-info → pysodafair-0.1.66.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

pysodafair 0.1.64__py3-none-any.whl → 0.1.66__py3-none-any.whl

pysodafair 0.1.64py3-none-any.whl → 0.1.66py3-none-any.whl