pysodafair 0.1.63__tar.gz → 0.1.65__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {pysodafair-0.1.63 → pysodafair-0.1.65}/PKG-INFO +1 -1
  2. {pysodafair-0.1.63 → pysodafair-0.1.65}/pyproject.toml +1 -1
  3. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/dataset_generation/upload.py +44 -76
  4. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/dataset_description.py +23 -16
  5. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/manifest.py +1 -1
  6. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/schema/dataset_description.json +59 -37
  7. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/schema/manifest.json +5 -3
  8. {pysodafair-0.1.63 → pysodafair-0.1.65}/LICENSE +0 -0
  9. {pysodafair-0.1.63 → pysodafair-0.1.65}/README.md +0 -0
  10. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/__init__.py +0 -0
  11. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/constants.py +0 -0
  12. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/__init__.py +0 -0
  13. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/dataset_generation/__init__.py +0 -0
  14. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/dataset_generation/manifestSession/__init__.py +0 -0
  15. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/dataset_generation/manifestSession/manifest_session.py +0 -0
  16. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/dataset_importing/__init__.py +0 -0
  17. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/dataset_importing/import_dataset.py +0 -0
  18. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/__init__.py +0 -0
  19. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/code_description.py +0 -0
  20. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/constants.py +0 -0
  21. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/excel_utils.py +0 -0
  22. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/helpers.py +0 -0
  23. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/manifest_package/__init__.py +0 -0
  24. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/manifest_package/manifest.py +0 -0
  25. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/manifest_package/manifest_import.py +0 -0
  26. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/manifest_package/manifest_writer.py +0 -0
  27. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/performances.py +0 -0
  28. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/resources.py +0 -0
  29. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/samples.py +0 -0
  30. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/sites.py +0 -0
  31. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/subjects.py +0 -0
  32. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/submission.py +0 -0
  33. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata/text_metadata.py +0 -0
  34. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/CHANGES +0 -0
  35. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/LICENSE +0 -0
  36. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/README.md +0 -0
  37. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/__init__.py +0 -0
  38. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/code_description.xlsx +0 -0
  39. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/code_parameters.xlsx +0 -0
  40. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/dataset_description.xlsx +0 -0
  41. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/manifest.xlsx +0 -0
  42. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/performances.xlsx +0 -0
  43. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/resources.xlsx +0 -0
  44. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/samples.xlsx +0 -0
  45. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/sites.xlsx +0 -0
  46. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/subjects.xlsx +0 -0
  47. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/subjects_pools_samples_structure.xlsx +0 -0
  48. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/subjects_pools_samples_structure_example.xlsx +0 -0
  49. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/metadata_templates/submission.xlsx +0 -0
  50. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/permissions/__init__.py +0 -0
  51. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/permissions/permissions.py +0 -0
  52. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/pysoda/__init__.py +0 -0
  53. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/pysoda/soda.py +0 -0
  54. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/pysoda/soda_object.py +0 -0
  55. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/upload_manifests/__init__.py +0 -0
  56. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/core/upload_manifests/upload_manifests.py +0 -0
  57. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/schema/__init__.py +0 -0
  58. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/schema/code_description.json +0 -0
  59. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/schema/performances.json +0 -0
  60. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/schema/resources.json +0 -0
  61. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/schema/samples.json +0 -0
  62. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/schema/sites.json +0 -0
  63. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/schema/soda_schema.json +0 -0
  64. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/schema/subjects.json +0 -0
  65. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/schema/submission_schema.json +0 -0
  66. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/utils/__init__.py +0 -0
  67. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/utils/authentication.py +0 -0
  68. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/utils/config.py +0 -0
  69. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/utils/exceptions.py +0 -0
  70. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/utils/logger.py +0 -0
  71. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/utils/metadata_utils.py +0 -0
  72. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/utils/pennsieveAgentUtils.py +0 -0
  73. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/utils/pennsieveUtils.py +0 -0
  74. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/utils/profile.py +0 -0
  75. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/utils/schema_validation.py +0 -0
  76. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/utils/time_utils.py +0 -0
  77. {pysodafair-0.1.63 → pysodafair-0.1.65}/pysoda/utils/upload_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pysodafair
3
- Version: 0.1.63
3
+ Version: 0.1.65
4
4
  Summary: Pysoda package for Fairdataihub tools
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pysodafair"
3
- version = "0.1.63"
3
+ version = "0.1.65"
4
4
  description = "Pysoda package for Fairdataihub tools"
5
5
  authors = ["Christopher Marroquin <cmarroquin@calmi2.org>"]
6
6
  license = "MIT"
@@ -40,7 +40,7 @@ from os.path import (
40
40
  import pandas as pd
41
41
  import time
42
42
  from timeit import default_timer as timer
43
- from datetime import timedelta
43
+ from datetime import timedelta, timezone
44
44
  import shutil
45
45
  import subprocess
46
46
  import gevent
@@ -3830,57 +3830,39 @@ def generate_manifest_file_locally(generate_purpose, soda):
3830
3830
 
3831
3831
 
3832
3832
  def generate_manifest_file_data(dataset_structure):
3833
- # Define common file extensions with special handling
3834
3833
  double_extensions = {
3835
3834
  ".ome.tiff", ".ome.tif", ".ome.tf2", ".ome.tf8", ".ome.btf", ".ome.xml",
3836
3835
  ".brukertiff.gz", ".mefd.gz", ".moberg.gz", ".nii.gz", ".mgh.gz", ".tar.gz", ".bcl.gz"
3837
3836
  }
3838
3837
 
3839
- # Helper function: Get the complete file extension
3838
+ # Helper: Determine file extension (handles double extensions)
3840
3839
  def get_file_extension(filename):
3841
3840
  for ext in double_extensions:
3842
3841
  if filename.endswith(ext):
3843
3842
  base_ext = os.path.splitext(os.path.splitext(filename)[0])[1]
3844
3843
  return base_ext + ext
3845
3844
  return os.path.splitext(filename)[1]
3846
-
3845
+
3846
+ # Helper: Create a manifest row for a folder
3847
3847
  def create_folder_entry(folder_name, path_parts):
3848
3848
  full_path = "/".join(path_parts + [folder_name]) + "/"
3849
- entry = [
3850
- full_path.lstrip("/"), # Remove leading slash for consistency
3851
- "", # Timestamp
3852
- "", # Description
3853
- "folder", # File type
3854
- "", # Entity (empty)
3855
- "", # Data modality (empty)
3856
- "", # Also in dataset (empty)
3857
- "", # Data dictionary path (empty)
3858
- "", # Entity is transitive (empty)
3859
- "", # Additional Metadata
3849
+ return [
3850
+ full_path.lstrip("/"),
3851
+ "", "", "folder", "", "", "", "", "", ""
3860
3852
  ]
3861
- return entry
3862
-
3863
-
3864
-
3865
- # Helper function: Build a single manifest entry
3866
- def create_file_entry(item, folder, path_parts, timestamp, filename):
3867
- full_path = "/".join(path_parts + [filename])
3868
- file_info = folder["files"][item]
3869
3853
 
3854
+ # Helper: Create a manifest row for a file
3855
+ def create_file_entry(file_name, file_info, path_parts, timestamp):
3870
3856
  entry = [
3871
- full_path.lstrip("/"), # Remove leading slash for consistency
3872
- timestamp, # Timestamp
3873
- file_info["description"], # Description
3874
- get_file_extension(filename), # File type
3875
- "", # Entity (empty)
3876
- "", # Data modality (empty)
3877
- "", # Also in dataset (empty)
3878
- "", # Data dictionary path (empty)
3879
- "", # Entity is transitive (empty)
3880
- file_info.get("additional-metadata", "") # Additional Metadata
3857
+ "/".join(path_parts + [file_name]).lstrip("/"),
3858
+ timestamp,
3859
+ file_info["description"],
3860
+ get_file_extension(file_name),
3861
+ "", "", "", "", "",
3862
+ file_info.get("additional-metadata", "")
3881
3863
  ]
3882
3864
 
3883
- # Add any extra columns dynamically
3865
+ # Append any extra columns dynamically
3884
3866
  if "extra_columns" in file_info:
3885
3867
  for key, value in file_info["extra_columns"].items():
3886
3868
  entry.append(value)
@@ -3889,48 +3871,38 @@ def generate_manifest_file_data(dataset_structure):
3889
3871
 
3890
3872
  return entry
3891
3873
 
3892
- # Recursive function: Traverse dataset and collect file data
3874
+ # Recursive traversal of folders and files
3893
3875
  def traverse_folders(folder, path_parts):
3894
- # Add header row if processing files for the first time
3895
3876
  if not manifest_data:
3896
3877
  manifest_data.append(header_row)
3897
-
3898
- if "files" in folder:
3899
- for item, file_info in folder["files"].items():
3900
-
3901
- if "path" in file_info:
3902
- file_path = file_info["path"]
3903
- elif "pspath" in file_info:
3904
- file_path = file_info["pspath"]
3905
- else:
3906
- continue
3907
-
3908
- # If the file is a manifest file, skip it
3909
- if item in {"manifest.xlsx", "manifest.csv"}:
3910
- continue
3911
3878
 
3912
- # Determine timestamp
3913
- filename = os.path.basename(file_path.replace("\\", "/"))
3914
- if file_info["location"] == "ps":
3915
- timestamp = file_info["timestamp"]
3916
- else:
3917
- local_path = pathlib.Path(file_info["path"])
3918
- timestamp = datetime.fromtimestamp(
3919
- local_path.stat().st_mtime, tz=local_timezone
3920
- ).isoformat().replace(".", ",").replace("+00:00", "Z")
3921
-
3922
- # Add file entry
3923
- manifest_data.append(create_file_entry(item, folder, path_parts, timestamp, filename))
3924
-
3925
- if "folders" in folder:
3926
- for subfolder_name, subfolder in folder["folders"].items():
3927
- # Add folder entry
3928
- manifest_data.append(create_folder_entry(subfolder_name, path_parts))
3929
- traverse_folders(subfolder, path_parts + [subfolder_name])
3930
-
3931
- # Initialize variables
3932
- manifest_data = [] # Collects all rows for the manifest
3933
- # TODO: Update to SDS 3.0
3879
+ # Process files
3880
+ for file_name, file_info in folder.get("files", {}).items():
3881
+ file_path = file_info.get("path")
3882
+ if not file_path:
3883
+ continue
3884
+ if file_name in {"manifest.xlsx", "manifest.csv"}:
3885
+ continue
3886
+
3887
+ if file_info["location"] == "ps":
3888
+ timestamp = file_info["timestamp"]
3889
+ else:
3890
+ local_path = pathlib.Path(file_info["path"])
3891
+ # Create proper ISO 8601 timestamp
3892
+ dt = datetime.fromtimestamp(local_path.stat().st_mtime, tz=timezone.utc)
3893
+ # per the SDS spec, replace '.' with ',' in the timestamp fractional seconds section
3894
+ timestamp = dt.isoformat().replace(".", ",").replace("+00:00", "Z")
3895
+
3896
+
3897
+ manifest_data.append(create_file_entry(file_name, file_info, path_parts, timestamp))
3898
+
3899
+ # Process subfolders
3900
+ for subfolder_name, subfolder in folder.get("folders", {}).items():
3901
+ manifest_data.append(create_folder_entry(subfolder_name, path_parts))
3902
+ traverse_folders(subfolder, path_parts + [subfolder_name])
3903
+
3904
+ # Initialize manifest data and header
3905
+ manifest_data = []
3934
3906
  header_row = [
3935
3907
  "filename", "timestamp", "description", "file type", "entity",
3936
3908
  "data modality", "also in dataset", "data dictionary path",
@@ -3938,9 +3910,6 @@ def generate_manifest_file_data(dataset_structure):
3938
3910
  ]
3939
3911
  local_timezone = TZLOCAL()
3940
3912
 
3941
- # Log the dataset structure
3942
-
3943
- # Start recursive traversal from the root
3944
3913
  traverse_folders(dataset_structure, [])
3945
3914
 
3946
3915
  return manifest_data
@@ -3948,4 +3917,3 @@ def generate_manifest_file_data(dataset_structure):
3948
3917
 
3949
3918
 
3950
3919
 
3951
-
@@ -42,21 +42,23 @@ def create_excel(
42
42
  .get("dataset_type", "")
43
43
  )
44
44
 
45
- populate_standards_info(ws1, soda)
45
+ standards_arr_len = populate_standards_info(ws1, soda)
46
46
 
47
- keyword_array_len = populate_basic_info(ws1, soda)
47
+ keyword_funding_array_len = populate_basic_info(ws1, soda)
48
48
  study_arr_len = populate_study_info(ws1, soda)
49
49
  contributor_arr_len = populate_contributor_info(ws1, soda)
50
50
  related_resource_arr_len = populate_related_resource_information(ws1, soda)
51
51
  populate_funding_info(ws1, soda)
52
52
  populate_participant_information(ws1, soda)
53
- data_dictionary_information(ws1, soda)
53
+ dict_arr_len = data_dictionary_information(ws1, soda)
54
54
 
55
55
  max_len = max(
56
- keyword_array_len,
56
+ keyword_funding_array_len,
57
57
  study_arr_len,
58
58
  contributor_arr_len,
59
59
  related_resource_arr_len,
60
+ standards_arr_len,
61
+ dict_arr_len,
60
62
  )
61
63
 
62
64
  # 3 is the first value column position
@@ -102,9 +104,12 @@ def populate_study_info(workbook, soda):
102
104
 
103
105
  def populate_standards_info(workbook, soda):
104
106
  standards_info = soda["dataset_metadata"]["dataset_description"]["standards_information"]
105
- workbook["D5"] = standards_info["data_standard"]
106
- workbook["D6"] = standards_info["data_standard_version"]
107
+ # this is an array with multiple entries
108
+ for col, standard in zip(excel_columns(start_index=3), standards_info):
109
+ workbook[col + "5"] = standard.get("data_standard", "")
110
+ workbook[col + "6"] = standard.get("data_standard_version", "")
107
111
 
112
+ return max(1, len(standards_info))
108
113
 
109
114
  def populate_basic_info(workbook, soda):
110
115
  basic_info = soda["dataset_metadata"]["dataset_description"]["basic_information"]
@@ -117,12 +122,14 @@ def populate_basic_info(workbook, soda):
117
122
  for col, keyword in zip(excel_columns(start_index=3), keywords):
118
123
  workbook[f"{col}11"] = keyword
119
124
 
120
- workbook["D12"] = basic_info.get("funding", "")
125
+ funding = basic_info.get("funding", [])
126
+ for col, funding_source in zip(excel_columns(start_index=3), funding):
127
+ workbook[f"{col}12"] = funding_source
121
128
  workbook["D13"] = basic_info.get("acknowledgments", "")
122
129
  workbook["D14"] = basic_info.get("license", "")
123
130
 
124
131
  # Return the length of the keywords array, or 1 if empty
125
- return max(1, len(keywords))
132
+ return max(1, len(keywords), len(funding))
126
133
 
127
134
 
128
135
  def populate_funding_info(workbook, soda):
@@ -170,11 +177,14 @@ def data_dictionary_information(workbook, soda):
170
177
  It currently does not populate any data in the workbook.
171
178
  """
172
179
  # Placeholder for future implementation
173
- data_dictionary_info = soda["dataset_metadata"]["dataset_description"].get("data_dictionary_information", {})
180
+ data_dictionary_info = soda["dataset_metadata"]["dataset_description"].get("data_dictionary_information", [])
174
181
 
175
- workbook["D43"] = data_dictionary_info.get("data_dictionary_path", "")
176
- workbook["D44"] = data_dictionary_info.get("data_dictionary_type", "")
177
- workbook["D45"] = data_dictionary_info.get("data_dictionary_description", "")
182
+ for column, entry in zip(excel_columns(start_index=3), data_dictionary_info):
183
+ workbook[column + "43"] = entry.get("data_dictionary_path", "")
184
+ workbook[column + "44"] = entry.get("data_dictionary_type", "")
185
+ workbook[column + "45"] = entry.get("data_dictionary_description", "")
186
+
187
+ return max(1, len(data_dictionary_info))
178
188
 
179
189
  def grayout_subheaders(workbook, col):
180
190
  """
@@ -228,9 +238,6 @@ def apply_dashed_border(cell, workbook):
228
238
 
229
239
 
230
240
 
231
-
232
-
233
-
234
241
  def extend_value_header(workbook, max_len, start_index):
235
242
  """
236
243
  The headers starting at G1 are the 'Value' headers that correspond to the maximum number of entries for either the
@@ -253,4 +260,4 @@ def extend_value_header(workbook, max_len, start_index):
253
260
  apply_calibri_bold_12(header_cell)
254
261
  set_cell_alignment(header_cell, horizontal='center', vertical='center')
255
262
  apply_dashed_border(header_cell, workbook)
256
- grayout_subheaders(workbook, column_list[i - 1])
263
+ grayout_subheaders(workbook, column_list[i - 1])
@@ -21,7 +21,7 @@ def create_excel(soda, upload_boolean, local_destination):
21
21
  wb = load_workbook(destination)
22
22
  ws1 = wb["Sheet1"]
23
23
  manifest = soda["dataset_metadata"]["manifest_file"]
24
- # validate_schema(manifest, SCHEMA_NAME_MANIFEST)
24
+ validate_schema(manifest, SCHEMA_NAME_MANIFEST)
25
25
  ascii_headers = excel_columns(start_index=0)
26
26
  custom_headers_to_column = {}
27
27
 
@@ -8,21 +8,25 @@
8
8
  },
9
9
  "type": {
10
10
  "type": "string",
11
- "description": "The type of the dataset. For example, experimental."
11
+ "enum": ["experimental", "computational"],
12
+ "description": "The type of the dataset. In short, experimental data should have data collected from subjects and/or samples."
12
13
  },
13
14
  "standards_information": {
14
- "type": "object",
15
- "properties": {
16
- "data_standard": {
17
- "type": "string",
18
- "description": "The name of the standard used in the project."
19
- },
20
- "data_standard_version": {
21
- "type": "string",
22
- "description": "The version of the standard used in the project."
15
+ "type": "array",
16
+ "minItems": 1,
17
+ "items": {
18
+ "type": "object",
19
+ "properties": {
20
+ "data_standard": {
21
+ "type": "string",
22
+ "description": "The name of the standard used in the project. For example, SPARC."
23
+ },
24
+ "data_standard_version": {
25
+ "type": "string",
26
+ "description": "The version of the standard used in the project. For example, 1.0.0"
27
+ }
23
28
  }
24
- },
25
- "required": []
29
+ }
26
30
  },
27
31
  "basic_information": {
28
32
  "type": "object",
@@ -47,8 +51,11 @@
47
51
  "description": "A list of keywords related to the project."
48
52
  },
49
53
  "funding": {
50
- "type": "string",
51
- "description": "Funding awards for the project."
54
+ "type": "array",
55
+ "items": {
56
+ "type": "string"
57
+ },
58
+ "description": "Funding awards for the project. Listed as free text. E.g., OT2OD025349"
52
59
  },
53
60
  "acknowledgments": {
54
61
  "type": "string",
@@ -56,7 +63,7 @@
56
63
  },
57
64
  "license": {
58
65
  "type": "string",
59
- "description": "The license under which the project is released."
66
+ "description": "The license under which the project is released. Use the SPDX license identifier."
60
67
  }
61
68
  },
62
69
  "required": []
@@ -128,10 +135,12 @@
128
135
  "properties": {
129
136
  "contributor_orcid_id": {
130
137
  "type": "string",
131
- "description": "The ORCiD for this contributor."
138
+ "pattern": "^https://orcid.org/000[09]-00[01][0-9]-[0-9]{4}-[0-9]{3}([0-9]|X)$",
139
+ "description": "The ORCiD for this contributor. Must be a valid ORCID URL format."
132
140
  },
133
141
  "contributor_affiliation": {
134
142
  "type": "string",
143
+ "pattern": "^https://ror.org/0[0-9a-z]{6}[0-9]{2}$",
135
144
  "description": "The institutional affiliation for this contributor."
136
145
  },
137
146
  "contributor_name": {
@@ -142,22 +151,27 @@
142
151
  "contributor_role": {
143
152
  "type": "string",
144
153
  "enum": [
145
- "PrincipalInvestigator",
146
- "Creator",
147
- "CoInvestigator",
148
154
  "CorrespondingAuthor",
155
+ "ContactPerson",
156
+ "Creator",
149
157
  "DataCollector",
150
158
  "DataCurator",
151
159
  "DataManager",
152
160
  "Distributor",
153
161
  "Editor",
162
+ "HostingInstitution",
163
+ "PrincipalInvestigator",
164
+ "CoInvestigator",
154
165
  "Producer",
155
166
  "ProjectLeader",
156
167
  "ProjectManager",
157
168
  "ProjectMember",
169
+ "RegistrationAgency",
170
+ "RegistrationAuthority",
158
171
  "RelatedPerson",
159
172
  "Researcher",
160
173
  "ResearchGroup",
174
+ "RightsHolder",
161
175
  "Sponsor",
162
176
  "Supervisor",
163
177
  "WorkPackageLeader",
@@ -256,40 +270,48 @@
256
270
  "type": "object",
257
271
  "properties": {
258
272
  "number_of_subjects": {
259
- "type": "number",
273
+ "type": "integer",
274
+ "minimum": 0,
260
275
  "description": "The number of subjects in the study."
261
276
  },
262
277
  "number_of_samples": {
263
- "type": "number",
278
+ "type": "integer",
279
+ "minimum": 0,
264
280
  "description": "The number of samples in the study."
265
281
  },
266
282
  "number_of_sites": {
267
- "type": "number",
283
+ "type": "integer",
284
+ "minimum": 0,
268
285
  "description": "The number of sites in the study."
269
286
  },
270
287
  "number_of_performances": {
271
- "type": "number",
288
+ "type": "integer",
289
+ "minimum": 0,
272
290
  "description": "The number of performance in the study."
273
291
  }
274
292
  },
275
293
  "required": []
276
294
  },
277
295
  "data_dictionary_information": {
278
- "type": "object",
279
- "properties": {
280
- "data_dictionary_path": {
281
- "type": "string",
282
- "description": "The path to the data dictionary file."
283
- },
284
- "data_dictionary_type": {
285
- "type": "string",
286
- "description": "The type of the data dictionary. E.g., json-schema"
287
- },
288
- "data_dictionary_description": {
289
- "type": "string",
290
- "description": "A descrption of the data dictionary."
296
+ "type": "array",
297
+ "items": {
298
+ "type": "object",
299
+ "properties": {
300
+ "data_dictionary_path": {
301
+ "type": "string",
302
+ "description": "The path to the data dictionary file. Should be relative to the root of the dataset. E.g., 'code/data_dictionary/schema.json'"
303
+ },
304
+ "data_dictionary_type": {
305
+ "type": "string",
306
+ "description": "The type of the data dictionary. E.g., json-schema"
307
+ },
308
+ "data_dictionary_description": {
309
+ "type": "string",
310
+ "description": "A description of the data dictionary."
311
+ }
291
312
  }
292
313
  }
293
314
  }
294
- }
315
+ },
316
+ "required": ["metadata_version"]
295
317
  }
@@ -11,7 +11,8 @@
11
11
  },
12
12
  "timestamp": {
13
13
  "type": "string",
14
- "description": "Timestamp of when the data was created or last modified. This should be in ISO 8601 format."
14
+ "description": "Timestamp of when the data was created or last modified. This should be in ISO 8601 format. Per the SDS guidelines, commas are to be used for separating fractional seconds.",
15
+ "pattern": "^[0-9]{4}-[0-1][0-9]-[0-3][0-9]T[0-2][0-9]:[0-6][0-9](:[0-6][0-9](,[0-9]{1,9})?)?(Z|[+-][0-2][0-9]:[0-6][0-9])$"
15
16
  },
16
17
  "description": {
17
18
  "type": "string",
@@ -23,7 +24,8 @@
23
24
  },
24
25
  "entity": {
25
26
  "type": "string",
26
- "description": "Each ID should be taken from the subject.xlsx, samples.xlsx, sites.xlsx, or performances.xlsx files."
27
+ "description": "Each ID should be taken from the subject.xlsx, samples.xlsx, sites.xlsx, or performances.xlsx files.",
28
+ "pattern": "^((pop-)?(sub|sam|site|perf)-[A-Za-z0-9]([A-Za-z0-9-]*[A-Za-z0-9])?)?$"
27
29
  },
28
30
  "data_modality": {
29
31
  "type": "string",
@@ -43,7 +45,7 @@
43
45
  },
44
46
  "entity_is_transitive": {
45
47
  "type": "string",
46
- "description": "Indicates whether the entity represented in this data file is transitive. This should be either true or false."
48
+ "description": "Indicates whether the entity represented in this data file is transitive. This should be either true or false if provided ."
47
49
  },
48
50
  "additional_metadata": {
49
51
  "type": "string",
File without changes
File without changes