pysodafair 0.1.64__py3-none-any.whl → 0.1.66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,7 +40,7 @@ from os.path import (
40
40
  import pandas as pd
41
41
  import time
42
42
  from timeit import default_timer as timer
43
- from datetime import timedelta
43
+ from datetime import timedelta, timezone
44
44
  import shutil
45
45
  import subprocess
46
46
  import gevent
@@ -3888,9 +3888,11 @@ def generate_manifest_file_data(dataset_structure):
3888
3888
  timestamp = file_info["timestamp"]
3889
3889
  else:
3890
3890
  local_path = pathlib.Path(file_info["path"])
3891
- timestamp = datetime.fromtimestamp(
3892
- local_path.stat().st_mtime, tz=local_timezone
3893
- ).isoformat().replace(".", ",").replace("+00:00", "Z")
3891
+ # Create proper ISO 8601 timestamp
3892
+ dt = datetime.fromtimestamp(local_path.stat().st_mtime, tz=timezone.utc)
3893
+ # per the SDS spec, replace '.' with ',' in the timestamp fractional seconds section
3894
+ timestamp = dt.isoformat().replace(".", ",").replace("+00:00", "Z")
3895
+
3894
3896
 
3895
3897
  manifest_data.append(create_file_entry(file_name, file_info, path_parts, timestamp))
3896
3898
 
@@ -42,21 +42,23 @@ def create_excel(
42
42
  .get("dataset_type", "")
43
43
  )
44
44
 
45
- populate_standards_info(ws1, soda)
45
+ standards_arr_len = populate_standards_info(ws1, soda)
46
46
 
47
- keyword_array_len = populate_basic_info(ws1, soda)
47
+ keyword_funding_array_len = populate_basic_info(ws1, soda)
48
48
  study_arr_len = populate_study_info(ws1, soda)
49
49
  contributor_arr_len = populate_contributor_info(ws1, soda)
50
50
  related_resource_arr_len = populate_related_resource_information(ws1, soda)
51
51
  populate_funding_info(ws1, soda)
52
52
  populate_participant_information(ws1, soda)
53
- data_dictionary_information(ws1, soda)
53
+ dict_arr_len = data_dictionary_information(ws1, soda)
54
54
 
55
55
  max_len = max(
56
- keyword_array_len,
56
+ keyword_funding_array_len,
57
57
  study_arr_len,
58
58
  contributor_arr_len,
59
59
  related_resource_arr_len,
60
+ standards_arr_len,
61
+ dict_arr_len,
60
62
  )
61
63
 
62
64
  # 3 is the first value column position
@@ -102,9 +104,12 @@ def populate_study_info(workbook, soda):
102
104
 
103
105
  def populate_standards_info(workbook, soda):
104
106
  standards_info = soda["dataset_metadata"]["dataset_description"]["standards_information"]
105
- workbook["D5"] = standards_info["data_standard"]
106
- workbook["D6"] = standards_info["data_standard_version"]
107
+ # this is an array with multiple entries
108
+ for col, standard in zip(excel_columns(start_index=3), standards_info):
109
+ workbook[col + "5"] = standard.get("data_standard", "")
110
+ workbook[col + "6"] = standard.get("data_standard_version", "")
107
111
 
112
+ return max(1, len(standards_info))
108
113
 
109
114
  def populate_basic_info(workbook, soda):
110
115
  basic_info = soda["dataset_metadata"]["dataset_description"]["basic_information"]
@@ -117,12 +122,14 @@ def populate_basic_info(workbook, soda):
117
122
  for col, keyword in zip(excel_columns(start_index=3), keywords):
118
123
  workbook[f"{col}11"] = keyword
119
124
 
120
- workbook["D12"] = basic_info.get("funding", "")
125
+ funding = basic_info.get("funding", [])
126
+ for col, funding_source in zip(excel_columns(start_index=3), funding):
127
+ workbook[f"{col}12"] = funding_source
121
128
  workbook["D13"] = basic_info.get("acknowledgments", "")
122
129
  workbook["D14"] = basic_info.get("license", "")
123
130
 
124
131
  # Return the length of the keywords array, or 1 if empty
125
- return max(1, len(keywords))
132
+ return max(1, len(keywords), len(funding))
126
133
 
127
134
 
128
135
  def populate_funding_info(workbook, soda):
@@ -170,11 +177,14 @@ def data_dictionary_information(workbook, soda):
170
177
  It currently does not populate any data in the workbook.
171
178
  """
172
179
  # Placeholder for future implementation
173
- data_dictionary_info = soda["dataset_metadata"]["dataset_description"].get("data_dictionary_information", {})
180
+ data_dictionary_info = soda["dataset_metadata"]["dataset_description"].get("data_dictionary_information", [])
174
181
 
175
- workbook["D43"] = data_dictionary_info.get("data_dictionary_path", "")
176
- workbook["D44"] = data_dictionary_info.get("data_dictionary_type", "")
177
- workbook["D45"] = data_dictionary_info.get("data_dictionary_description", "")
182
+ for column, entry in zip(excel_columns(start_index=3), data_dictionary_info):
183
+ workbook[column + "43"] = entry.get("data_dictionary_path", "")
184
+ workbook[column + "44"] = entry.get("data_dictionary_type", "")
185
+ workbook[column + "45"] = entry.get("data_dictionary_description", "")
186
+
187
+ return max(1, len(data_dictionary_info))
178
188
 
179
189
  def grayout_subheaders(workbook, col):
180
190
  """
@@ -228,9 +238,6 @@ def apply_dashed_border(cell, workbook):
228
238
 
229
239
 
230
240
 
231
-
232
-
233
-
234
241
  def extend_value_header(workbook, max_len, start_index):
235
242
  """
236
243
  The headers starting at G1 are the 'Value' headers that correspond to the maximum number of entries for either the
@@ -253,4 +260,4 @@ def extend_value_header(workbook, max_len, start_index):
253
260
  apply_calibri_bold_12(header_cell)
254
261
  set_cell_alignment(header_cell, horizontal='center', vertical='center')
255
262
  apply_dashed_border(header_cell, workbook)
256
- grayout_subheaders(workbook, column_list[i - 1])
263
+ grayout_subheaders(workbook, column_list[i - 1])
@@ -21,7 +21,7 @@ def create_excel(soda, upload_boolean, local_destination):
21
21
  wb = load_workbook(destination)
22
22
  ws1 = wb["Sheet1"]
23
23
  manifest = soda["dataset_metadata"]["manifest_file"]
24
- # validate_schema(manifest, SCHEMA_NAME_MANIFEST)
24
+ validate_schema(manifest, SCHEMA_NAME_MANIFEST)
25
25
  ascii_headers = excel_columns(start_index=0)
26
26
  custom_headers_to_column = {}
27
27
 
@@ -8,21 +8,25 @@
8
8
  },
9
9
  "type": {
10
10
  "type": "string",
11
- "description": "The type of the dataset. For example, experimental."
11
+ "enum": ["experimental", "computational"],
12
+ "description": "The type of the dataset. In short, experimental data should have data collected from subjects and/or samples."
12
13
  },
13
14
  "standards_information": {
14
- "type": "object",
15
- "properties": {
16
- "data_standard": {
17
- "type": "string",
18
- "description": "The name of the standard used in the project."
19
- },
20
- "data_standard_version": {
21
- "type": "string",
22
- "description": "The version of the standard used in the project."
15
+ "type": "array",
16
+ "minItems": 1,
17
+ "items": {
18
+ "type": "object",
19
+ "properties": {
20
+ "data_standard": {
21
+ "type": "string",
22
+ "description": "The name of the standard used in the project. For example, SPARC."
23
+ },
24
+ "data_standard_version": {
25
+ "type": "string",
26
+ "description": "The version of the standard used in the project. For example, 1.0.0"
27
+ }
23
28
  }
24
- },
25
- "required": []
29
+ }
26
30
  },
27
31
  "basic_information": {
28
32
  "type": "object",
@@ -47,8 +51,11 @@
47
51
  "description": "A list of keywords related to the project."
48
52
  },
49
53
  "funding": {
50
- "type": "string",
51
- "description": "Funding awards for the project."
54
+ "type": "array",
55
+ "items": {
56
+ "type": "string"
57
+ },
58
+ "description": "Funding awards for the project. Listed as free text. E.g., OT2OD025349"
52
59
  },
53
60
  "acknowledgments": {
54
61
  "type": "string",
@@ -56,7 +63,7 @@
56
63
  },
57
64
  "license": {
58
65
  "type": "string",
59
- "description": "The license under which the project is released."
66
+ "description": "The license under which the project is released. Use the SPDX license identifier."
60
67
  }
61
68
  },
62
69
  "required": []
@@ -128,10 +135,12 @@
128
135
  "properties": {
129
136
  "contributor_orcid_id": {
130
137
  "type": "string",
131
- "description": "The ORCiD for this contributor."
138
+ "pattern": "^https://orcid.org/000[09]-00[01][0-9]-[0-9]{4}-[0-9]{3}([0-9]|X)$",
139
+ "description": "The ORCiD for this contributor. Must be a valid ORCID URL format."
132
140
  },
133
141
  "contributor_affiliation": {
134
142
  "type": "string",
143
+ "pattern": "^https://ror.org/0[0-9a-z]{6}[0-9]{2}$",
135
144
  "description": "The institutional affiliation for this contributor."
136
145
  },
137
146
  "contributor_name": {
@@ -142,22 +151,27 @@
142
151
  "contributor_role": {
143
152
  "type": "string",
144
153
  "enum": [
145
- "PrincipalInvestigator",
146
- "Creator",
147
- "CoInvestigator",
148
154
  "CorrespondingAuthor",
155
+ "ContactPerson",
156
+ "Creator",
149
157
  "DataCollector",
150
158
  "DataCurator",
151
159
  "DataManager",
152
160
  "Distributor",
153
161
  "Editor",
162
+ "HostingInstitution",
163
+ "PrincipalInvestigator",
164
+ "CoInvestigator",
154
165
  "Producer",
155
166
  "ProjectLeader",
156
167
  "ProjectManager",
157
168
  "ProjectMember",
169
+ "RegistrationAgency",
170
+ "RegistrationAuthority",
158
171
  "RelatedPerson",
159
172
  "Researcher",
160
173
  "ResearchGroup",
174
+ "RightsHolder",
161
175
  "Sponsor",
162
176
  "Supervisor",
163
177
  "WorkPackageLeader",
@@ -256,40 +270,48 @@
256
270
  "type": "object",
257
271
  "properties": {
258
272
  "number_of_subjects": {
259
- "type": "number",
273
+ "type": "integer",
274
+ "minimum": 0,
260
275
  "description": "The number of subjects in the study."
261
276
  },
262
277
  "number_of_samples": {
263
- "type": "number",
278
+ "type": "integer",
279
+ "minimum": 0,
264
280
  "description": "The number of samples in the study."
265
281
  },
266
282
  "number_of_sites": {
267
- "type": "number",
283
+ "type": "integer",
284
+ "minimum": 0,
268
285
  "description": "The number of sites in the study."
269
286
  },
270
287
  "number_of_performances": {
271
- "type": "number",
288
+ "type": "integer",
289
+ "minimum": 0,
272
290
  "description": "The number of performance in the study."
273
291
  }
274
292
  },
275
293
  "required": []
276
294
  },
277
295
  "data_dictionary_information": {
278
- "type": "object",
279
- "properties": {
280
- "data_dictionary_path": {
281
- "type": "string",
282
- "description": "The path to the data dictionary file."
283
- },
284
- "data_dictionary_type": {
285
- "type": "string",
286
- "description": "The type of the data dictionary. E.g., json-schema"
287
- },
288
- "data_dictionary_description": {
289
- "type": "string",
290
- "description": "A descrption of the data dictionary."
296
+ "type": "array",
297
+ "items": {
298
+ "type": "object",
299
+ "properties": {
300
+ "data_dictionary_path": {
301
+ "type": "string",
302
+ "description": "The path to the data dictionary file. Should be relative to the root of the dataset. E.g., 'code/data_dictionary/schema.json'"
303
+ },
304
+ "data_dictionary_type": {
305
+ "type": "string",
306
+ "description": "The type of the data dictionary. E.g., json-schema"
307
+ },
308
+ "data_dictionary_description": {
309
+ "type": "string",
310
+ "description": "A description of the data dictionary."
311
+ }
291
312
  }
292
313
  }
293
314
  }
294
- }
315
+ },
316
+ "required": ["metadata_version"]
295
317
  }
@@ -11,7 +11,8 @@
11
11
  },
12
12
  "timestamp": {
13
13
  "type": "string",
14
- "description": "Timestamp of when the data was created or last modified. This should be in ISO 8601 format."
14
+ "description": "Timestamp of when the data was created or last modified. This should be in ISO 8601 format. Per the SDS guidelines, commas are to be used for separating fractional seconds. Empty string for when it does not apply.",
15
+ "pattern": "^([0-9]{4}-[0-1][0-9]-[0-3][0-9]T[0-2][0-9]:[0-6][0-9](:[0-6][0-9](,[0-9]{1,9})?)?(Z|[+-][0-2][0-9]:[0-6][0-9])?)?$"
15
16
  },
16
17
  "description": {
17
18
  "type": "string",
@@ -23,7 +24,8 @@
23
24
  },
24
25
  "entity": {
25
26
  "type": "string",
26
- "description": "Each ID should be taken from the subject.xlsx, samples.xlsx, sites.xlsx, or performances.xlsx files."
27
+ "description": "Each ID should be taken from the subject.xlsx, samples.xlsx, sites.xlsx, or performances.xlsx files.",
28
+ "pattern": "^((pop-)?(sub|sam|site|perf)-[A-Za-z0-9]([A-Za-z0-9-]*[A-Za-z0-9])?)?$"
27
29
  },
28
30
  "data_modality": {
29
31
  "type": "string",
@@ -43,7 +45,7 @@
43
45
  },
44
46
  "entity_is_transitive": {
45
47
  "type": "string",
46
- "description": "Indicates whether the entity represented in this data file is transitive. This should be either true or false."
48
+ "description": "Indicates whether the entity represented in this data file is transitive. This should be either true or false if provided ."
47
49
  },
48
50
  "additional_metadata": {
49
51
  "type": "string",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pysodafair
3
- Version: 0.1.64
3
+ Version: 0.1.66
4
4
  Summary: Pysoda package for Fairdataihub tools
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -4,16 +4,16 @@ pysoda/core/__init__.py,sha256=bXnu4fYemJ915xId8nwh-Gy2IUEvVuoS9Hk3CXyJp8U,235
4
4
  pysoda/core/dataset_generation/__init__.py,sha256=tisLmJeXLeAINXf7BdNZGi2i9vQwImWGa_S5FNwQFbs,299
5
5
  pysoda/core/dataset_generation/manifestSession/__init__.py,sha256=kqkTAFEhluyQJ9mmMhYQTh1tgowBeJdH-Q9s5ifGkSE,51
6
6
  pysoda/core/dataset_generation/manifestSession/manifest_session.py,sha256=TML_KOJ-1REohqSaHCZNFJrbDR1UDQ9sFcithJRx9t8,4669
7
- pysoda/core/dataset_generation/upload.py,sha256=FtEQoyg3ly0lXmnUtKDXXHln1dLN6reb9knOdFTBFeA,172657
7
+ pysoda/core/dataset_generation/upload.py,sha256=lnU6od0LtJOjuzNj-VX2HzOp66xUBr1V6nXtpJjkdIg,172804
8
8
  pysoda/core/dataset_importing/__init__.py,sha256=NbWs4HAqqydFLACFoVwl6g457dkYvjiZKx9NzB9wFxE,114
9
9
  pysoda/core/dataset_importing/import_dataset.py,sha256=cx8qCQmR_BKdC2G-jzqE4dWg2JhkOS3jM8kpjLIsObk,29487
10
10
  pysoda/core/metadata/__init__.py,sha256=Tkx6vdEQEPwAHmVSc9GfdbDZUkvuAqEgHJOxRDeX5vE,821
11
11
  pysoda/core/metadata/code_description.py,sha256=sWbRPWPf32txpN048oQ9lT3y7GAoDkrS-GHivwwv55Y,4104
12
12
  pysoda/core/metadata/constants.py,sha256=PR78huqBKdBpzDUGxVKy9YW3pUrJ4ftuF4MfqpNb1bU,1052
13
- pysoda/core/metadata/dataset_description.py,sha256=K1yWYQkbn4d7DCjQH5vT9Y6dhcWWlx823XLACgkIS2M,9749
13
+ pysoda/core/metadata/dataset_description.py,sha256=icIRH2TYXWqrc4gu_RUK0yP_WQmy9BGAkVOEVQ--GfA,10275
14
14
  pysoda/core/metadata/excel_utils.py,sha256=FQ8-DBq2lxFdpDUZeABfFKe284JzRTe6AYPxEw7yzsg,1216
15
15
  pysoda/core/metadata/helpers.py,sha256=jDf4KPTbx4unHT7pDlFEa0jls1OZY-dpdKf3kUGMtvg,10609
16
- pysoda/core/metadata/manifest.py,sha256=MjID-r52Yn3i9WlrT5IWeYvmkVdfzQuqGOmoMsZQVJg,4197
16
+ pysoda/core/metadata/manifest.py,sha256=LO8ZP8oJ0vEjbXU0yPVEYm62MLOZwIYVUaW4VEczO9k,4195
17
17
  pysoda/core/metadata/manifest_package/__init__.py,sha256=7qiNT62WsI1LKb-lvnK31lW04sOvW_KTZQA1e8sUAgY,211
18
18
  pysoda/core/metadata/manifest_package/manifest.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  pysoda/core/metadata/manifest_package/manifest_import.py,sha256=5rKOBf-NMt2dVsPJYotk5V855wnmtiMqrpknSOYOEl0,1347
@@ -50,8 +50,8 @@ pysoda/core/upload_manifests/__init__.py,sha256=SOiyflYmZDtkLwWpeu7flyn3J2SAj9WL
50
50
  pysoda/core/upload_manifests/upload_manifests.py,sha256=Hd2XSXYieG-EMcPTLSYXvdtraipp1XBSo5cGpZR-vbA,1223
51
51
  pysoda/schema/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  pysoda/schema/code_description.json,sha256=FJOyJBC2TKjTTdtew3B4wdq_WIrti-r2F-M6t5OVNB0,17854
53
- pysoda/schema/dataset_description.json,sha256=qxhzQzoE7CGuXvtY9fVWV31-daf5Ivms7_PkVW4i858,9053
54
- pysoda/schema/manifest.json,sha256=bsVzhnZHd2566KhrxQPBcwH376fgE8pSY8eXviTXrWA,2636
53
+ pysoda/schema/dataset_description.json,sha256=U725xGDmy4oeJe0IOFcJc7a0oA5pAbpsRVQsjhPEj1k,10069
54
+ pysoda/schema/manifest.json,sha256=dClcVIR3IlesjWC9peLAnyuIECwNDHfbIFmCsj9l2Wk,3006
55
55
  pysoda/schema/performances.json,sha256=TJw7ERC6eMR-9cImOg7xoEVRyOR4xViJqZO8U2X2OXo,1254
56
56
  pysoda/schema/resources.json,sha256=9-rwCTX523V5XaKmXLTUGzackOl7tk-qijbbBDoeuhY,1232
57
57
  pysoda/schema/samples.json,sha256=NvuYhskV9QFrkSX4ant9G4KFCe9JLnVG98vDv2Puuvk,3340
@@ -71,7 +71,7 @@ pysoda/utils/profile.py,sha256=di4D_IE1rGSfHl0-SRVjrJK2bCdedW4ugp0W-j1HarQ,937
71
71
  pysoda/utils/schema_validation.py,sha256=3w3FRPyn4P3xMhITzItk-jYte8TTlNrgCWmY-s05x9Y,4438
72
72
  pysoda/utils/time_utils.py,sha256=g5848bivtzWj6TDoWo6CcohF-THxShETP4qTyHTjBWw,131
73
73
  pysoda/utils/upload_utils.py,sha256=-BCvfXsJSFqnwEZVCFQX1PLfkdeW0gyGLjLGW6Uxdf8,4607
74
- pysodafair-0.1.64.dist-info/METADATA,sha256=mgUEcj-8RZdfFlMBD94Joq0bmRP4CXOF0C6S3JxYX20,7011
75
- pysodafair-0.1.64.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
76
- pysodafair-0.1.64.dist-info/licenses/LICENSE,sha256=Jlt0uGnx87qPRGXPQHsBkg_S7MOsYS2E9Rh1zy47bfw,1082
77
- pysodafair-0.1.64.dist-info/RECORD,,
74
+ pysodafair-0.1.66.dist-info/METADATA,sha256=CFPeQYRgKDKEjoreN1iv9vV28KcV8Xbbdsp26H99AeI,7011
75
+ pysodafair-0.1.66.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
76
+ pysodafair-0.1.66.dist-info/licenses/LICENSE,sha256=Jlt0uGnx87qPRGXPQHsBkg_S7MOsYS2E9Rh1zy47bfw,1082
77
+ pysodafair-0.1.66.dist-info/RECORD,,