pysodafair 0.1.62__py3-none-any.whl → 0.1.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3830,57 +3830,39 @@ def generate_manifest_file_locally(generate_purpose, soda):
3830
3830
 
3831
3831
 
3832
3832
  def generate_manifest_file_data(dataset_structure):
3833
- # Define common file extensions with special handling
3834
3833
  double_extensions = {
3835
3834
  ".ome.tiff", ".ome.tif", ".ome.tf2", ".ome.tf8", ".ome.btf", ".ome.xml",
3836
3835
  ".brukertiff.gz", ".mefd.gz", ".moberg.gz", ".nii.gz", ".mgh.gz", ".tar.gz", ".bcl.gz"
3837
3836
  }
3838
3837
 
3839
- # Helper function: Get the complete file extension
3838
+ # Helper: Determine file extension (handles double extensions)
3840
3839
  def get_file_extension(filename):
3841
3840
  for ext in double_extensions:
3842
3841
  if filename.endswith(ext):
3843
3842
  base_ext = os.path.splitext(os.path.splitext(filename)[0])[1]
3844
3843
  return base_ext + ext
3845
3844
  return os.path.splitext(filename)[1]
3846
-
3845
+
3846
+ # Helper: Create a manifest row for a folder
3847
3847
  def create_folder_entry(folder_name, path_parts):
3848
3848
  full_path = "/".join(path_parts + [folder_name]) + "/"
3849
- entry = [
3850
- full_path.lstrip("/"), # Remove leading slash for consistency
3851
- "", # Timestamp
3852
- "", # Description
3853
- "folder", # File type
3854
- "", # Entity (empty)
3855
- "", # Data modality (empty)
3856
- "", # Also in dataset (empty)
3857
- "", # Data dictionary path (empty)
3858
- "", # Entity is transitive (empty)
3859
- "", # Additional Metadata
3849
+ return [
3850
+ full_path.lstrip("/"),
3851
+ "", "", "folder", "", "", "", "", "", ""
3860
3852
  ]
3861
- return entry
3862
-
3863
-
3864
-
3865
- # Helper function: Build a single manifest entry
3866
- def create_file_entry(item, folder, path_parts, timestamp, filename):
3867
- full_path = "/".join(path_parts + [filename])
3868
- file_info = folder["files"][item]
3869
3853
 
3854
+ # Helper: Create a manifest row for a file
3855
+ def create_file_entry(file_name, file_info, path_parts, timestamp):
3870
3856
  entry = [
3871
- full_path.lstrip("/"), # Remove leading slash for consistency
3872
- timestamp, # Timestamp
3873
- file_info["description"], # Description
3874
- get_file_extension(filename), # File type
3875
- "", # Entity (empty)
3876
- "", # Data modality (empty)
3877
- "", # Also in dataset (empty)
3878
- "", # Data dictionary path (empty)
3879
- "", # Entity is transitive (empty)
3880
- file_info.get("additional-metadata", "") # Additional Metadata
3857
+ "/".join(path_parts + [file_name]).lstrip("/"),
3858
+ timestamp,
3859
+ file_info["description"],
3860
+ get_file_extension(file_name),
3861
+ "", "", "", "", "",
3862
+ file_info.get("additional-metadata", "")
3881
3863
  ]
3882
3864
 
3883
- # Add any extra columns dynamically
3865
+ # Append any extra columns dynamically
3884
3866
  if "extra_columns" in file_info:
3885
3867
  for key, value in file_info["extra_columns"].items():
3886
3868
  entry.append(value)
@@ -3889,48 +3871,36 @@ def generate_manifest_file_data(dataset_structure):
3889
3871
 
3890
3872
  return entry
3891
3873
 
3892
- # Recursive function: Traverse dataset and collect file data
3874
+ # Recursive traversal of folders and files
3893
3875
  def traverse_folders(folder, path_parts):
3894
- # Add header row if processing files for the first time
3895
3876
  if not manifest_data:
3896
3877
  manifest_data.append(header_row)
3897
-
3898
- if "files" in folder:
3899
- for item, file_info in folder["files"].items():
3900
-
3901
- if "path" in file_info:
3902
- file_path = file_info["path"]
3903
- elif "pspath" in file_info:
3904
- file_path = file_info["pspath"]
3905
- else:
3906
- continue
3907
-
3908
- # If the file is a manifest file, skip it
3909
- if item in {"manifest.xlsx", "manifest.csv"}:
3910
- continue
3911
3878
 
3912
- # Determine timestamp
3913
- filename = os.path.basename(file_path.replace("\\", "/"))
3914
- if file_info["location"] == "ps":
3915
- timestamp = file_info["timestamp"]
3916
- else:
3917
- local_path = pathlib.Path(file_info["path"])
3918
- timestamp = datetime.fromtimestamp(
3919
- local_path.stat().st_mtime, tz=local_timezone
3920
- ).isoformat().replace(".", ",").replace("+00:00", "Z")
3921
-
3922
- # Add file entry
3923
- manifest_data.append(create_file_entry(item, folder, path_parts, timestamp, filename))
3924
-
3925
- if "folders" in folder:
3926
- for subfolder_name, subfolder in folder["folders"].items():
3927
- # Add folder entry
3928
- manifest_data.append(create_folder_entry(subfolder_name, path_parts))
3929
- traverse_folders(subfolder, path_parts + [subfolder_name])
3930
-
3931
- # Initialize variables
3932
- manifest_data = [] # Collects all rows for the manifest
3933
- # TODO: Update to SDS 3.0
3879
+ # Process files
3880
+ for file_name, file_info in folder.get("files", {}).items():
3881
+ file_path = file_info.get("path")
3882
+ if not file_path:
3883
+ continue
3884
+ if file_name in {"manifest.xlsx", "manifest.csv"}:
3885
+ continue
3886
+
3887
+ if file_info["location"] == "ps":
3888
+ timestamp = file_info["timestamp"]
3889
+ else:
3890
+ local_path = pathlib.Path(file_info["path"])
3891
+ timestamp = datetime.fromtimestamp(
3892
+ local_path.stat().st_mtime, tz=local_timezone
3893
+ ).isoformat().replace(".", ",").replace("+00:00", "Z")
3894
+
3895
+ manifest_data.append(create_file_entry(file_name, file_info, path_parts, timestamp))
3896
+
3897
+ # Process subfolders
3898
+ for subfolder_name, subfolder in folder.get("folders", {}).items():
3899
+ manifest_data.append(create_folder_entry(subfolder_name, path_parts))
3900
+ traverse_folders(subfolder, path_parts + [subfolder_name])
3901
+
3902
+ # Initialize manifest data and header
3903
+ manifest_data = []
3934
3904
  header_row = [
3935
3905
  "filename", "timestamp", "description", "file type", "entity",
3936
3906
  "data modality", "also in dataset", "data dictionary path",
@@ -3938,9 +3908,6 @@ def generate_manifest_file_data(dataset_structure):
3938
3908
  ]
3939
3909
  local_timezone = TZLOCAL()
3940
3910
 
3941
- # Log the dataset structure
3942
-
3943
- # Start recursive traversal from the root
3944
3911
  traverse_folders(dataset_structure, [])
3945
3912
 
3946
3913
  return manifest_data
@@ -3948,4 +3915,3 @@ def generate_manifest_file_data(dataset_structure):
3948
3915
 
3949
3916
 
3950
3917
 
3951
-
@@ -3,8 +3,8 @@ from os.path import join, getsize
3
3
  from openpyxl import load_workbook
4
4
  import shutil
5
5
  from .excel_utils import rename_headers, excel_columns
6
- import itertools
7
- from openpyxl.styles import PatternFill
6
+ from copy import copy
7
+ from openpyxl.styles import PatternFill, Font, Alignment, Border, Side
8
8
  from ...utils import validate_schema
9
9
  from .helpers import upload_metadata_file, get_template_path
10
10
 
@@ -44,15 +44,24 @@ def create_excel(
44
44
 
45
45
  populate_standards_info(ws1, soda)
46
46
 
47
- keyword_array = populate_basic_info(ws1, soda)
48
-
49
- populate_study_info(ws1, soda)
50
- populate_contributor_info(ws1, soda)
51
- populate_related_resource_information(ws1, soda)
47
+ keyword_array_len = populate_basic_info(ws1, soda)
48
+ study_arr_len = populate_study_info(ws1, soda)
49
+ contributor_arr_len = populate_contributor_info(ws1, soda)
50
+ related_resource_arr_len = populate_related_resource_information(ws1, soda)
52
51
  populate_funding_info(ws1, soda)
53
52
  populate_participant_information(ws1, soda)
54
53
  data_dictionary_information(ws1, soda)
55
54
 
55
+ max_len = max(
56
+ keyword_array_len,
57
+ study_arr_len,
58
+ contributor_arr_len,
59
+ related_resource_arr_len,
60
+ )
61
+
62
+ # 3 is the first value column position
63
+ extend_value_header(ws1, max_len, start_index=3)
64
+
56
65
  wb.save(destination)
57
66
 
58
67
  size = getsize(destination)
@@ -167,22 +176,81 @@ def data_dictionary_information(workbook, soda):
167
176
  workbook["D44"] = data_dictionary_info.get("data_dictionary_type", "")
168
177
  workbook["D45"] = data_dictionary_info.get("data_dictionary_description", "")
169
178
 
170
- def grayout_subheaders(workbook, max_len, start_index):
179
+ def grayout_subheaders(workbook, col):
171
180
  """
172
- Gray out sub-header rows for values exceeding 3 (SDS2.0).
181
+ Gray out the cells at workbook[row][col] for the specified cells in
182
+ positions 2, 3, 4, 7, 8, 9, 10, 13, 14, 15, 19, 20, 21, 22, 26, 27, 32, 37,
183
+ 38, 39, 40, 41, 42
173
184
  """
174
- headers_list = ["4", "10", "18", "23", "28"]
175
- columns_list = excel_columns(start_index=start_index)
176
-
177
- for (i, column), no in itertools.product(zip(range(2, max_len + 1), columns_list[1:]), headers_list):
178
- cell = workbook[column + no]
179
- fillColor("B2B2B2", cell)
180
-
181
-
185
+ gray_out_rows_for_column = [2, 3, 4, 7, 8, 9, 10, 13, 14, 15, 19, 20, 21, 22, 26, 27, 32, 37,
186
+ 38, 39, 40, 41, 42]
182
187
 
188
+ for row in gray_out_rows_for_column:
189
+ cell = workbook[col + str(row)]
190
+ if row in [4, 7, 15, 19, 27, 32, 37, 42]:
191
+ fillColor("b2b2b2", cell)
192
+ else:
193
+ fillColor("cccccc", cell)
183
194
 
184
195
 
185
196
  def fillColor(color, cell):
186
197
  colorFill = PatternFill(start_color=color, end_color=color, fill_type="solid")
187
198
 
188
199
  cell.fill = colorFill
200
+
201
+
202
+ def apply_calibri_bold_12(cell):
203
+ """Apply Calibri Bold 12pt font formatting to a cell"""
204
+ calibri_bold_font = Font(name='Calibri', size=12, bold=True)
205
+ cell.font = calibri_bold_font
206
+
207
+
208
+ def set_cell_alignment(cell, horizontal='left', vertical='top', wrap_text=False):
209
+ """Set text alignment for a cell
210
+
211
+ Args:
212
+ cell: The cell to format
213
+ horizontal: 'left', 'center', 'right', 'justify', 'distributed'
214
+ vertical: 'top', 'center', 'bottom', 'justify', 'distributed'
215
+ wrap_text: Boolean to enable text wrapping
216
+ """
217
+ cell.alignment = Alignment(
218
+ horizontal=horizontal,
219
+ vertical=vertical,
220
+ wrap_text=wrap_text
221
+ )
222
+
223
+
224
+ def apply_dashed_border(cell, workbook):
225
+ """Apply border copied from cell A1 to the target cell"""
226
+ # Copy the border from cell A1
227
+ cell.border = copy(workbook["A1"].border)
228
+
229
+
230
+
231
+
232
+
233
+
234
+ def extend_value_header(workbook, max_len, start_index):
235
+ """
236
+ The headers starting at G1 are the 'Value' headers that correspond to the maximum number of entries for either the
237
+ keywords, contributor information, or data dictionary information arrays. This function extends those headers based on the max_len of the
238
+ three arrays.
239
+ """
240
+
241
+ column_list = excel_columns(start_index=start_index)
242
+
243
+ # if max len is less than 4 then no need to extend headers
244
+ if max_len < 4:
245
+ return
246
+
247
+ # replace the 4th value with header value 'Value 4' and so on for max len
248
+ for i in range(4, max_len + 1):
249
+ header_cell = workbook[column_list[i - 1] + "1"]
250
+ header_cell.value = f"Value {i}"
251
+ # make the new header blue
252
+ fillColor("9cc2e5", header_cell)
253
+ apply_calibri_bold_12(header_cell)
254
+ set_cell_alignment(header_cell, horizontal='center', vertical='center')
255
+ apply_dashed_border(header_cell, workbook)
256
+ grayout_subheaders(workbook, column_list[i - 1])
@@ -136,8 +136,8 @@
136
136
  },
137
137
  "contributor_name": {
138
138
  "type": "string",
139
- "pattern": "^[A-Za-z]+, [A-Za-z]+( [A-Za-z]+)?$",
140
- "description": "The name of the contributor. The format should be 'Last Name, First Name'."
139
+ "pattern": "^(([Vv]an|[Vv]an [Dd]e[rn]?|[Vv]an [Tt]|[Vv]ander|[Vv]on|[Vv]on [Dd]e[rn]?|[Zz]u|[Zz]ur|[Vv]om|[Dd]e|[Dd]es|[Dd]u|[Dd]e [Ll]a|[Dd]e [Ll]es|[Dd]e [Ll]os|[Ll]e|[Ll]a|[Ll]es|[Dd]el|[Dd]e [Ll]os|[Dd]e [Ll]a|[Dd]e [Ll]as|[Dd]a|[Dd]o|[Dd]i|[Dd]ell[ao]|[Dd]ell[ei]|[Dd]ai|[Dd]al|[Dd]alle|[Oo]'|[Mm]c|[Mm]ac|[Aa]l|[Ee]l|[Aa]bd [Aa]l|[Aa]bu|[Ii]bn|[Ff]itz|[Aa]p|[Bb]en|[Bb]ar|[Bb]at) )?[^, ]+, [^,]+$",
140
+ "description": "The name of the contributor. The format should be 'Last Name, First Name'. Supports international family name prefixes (van, von, de, del, etc.)."
141
141
  },
142
142
  "contributor_role": {
143
143
  "type": "string",
@@ -8,6 +8,10 @@
8
8
  "type": "string",
9
9
  "description": "Unique identifier for the site, typically an RRID"
10
10
  },
11
+ "type": {
12
+ "type": "string",
13
+ "description": "The type of resource, e.g., 'instrument', 'software', 'reagent', etc."
14
+ },
11
15
  "name": {
12
16
  "type": "string",
13
17
  "description": "The resources name"
@@ -34,6 +38,6 @@
34
38
  "description": "Additional metadata about the resource, such as usage notes or specific details"
35
39
  }
36
40
  },
37
- "required": ["rrid", "name", "url", "vendor", "version", "id_in_protocol"]
41
+ "required": ["rrid"]
38
42
  }
39
43
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pysodafair
3
- Version: 0.1.62
3
+ Version: 0.1.64
4
4
  Summary: Pysoda package for Fairdataihub tools
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -4,13 +4,13 @@ pysoda/core/__init__.py,sha256=bXnu4fYemJ915xId8nwh-Gy2IUEvVuoS9Hk3CXyJp8U,235
4
4
  pysoda/core/dataset_generation/__init__.py,sha256=tisLmJeXLeAINXf7BdNZGi2i9vQwImWGa_S5FNwQFbs,299
5
5
  pysoda/core/dataset_generation/manifestSession/__init__.py,sha256=kqkTAFEhluyQJ9mmMhYQTh1tgowBeJdH-Q9s5ifGkSE,51
6
6
  pysoda/core/dataset_generation/manifestSession/manifest_session.py,sha256=TML_KOJ-1REohqSaHCZNFJrbDR1UDQ9sFcithJRx9t8,4669
7
- pysoda/core/dataset_generation/upload.py,sha256=ATx8D-Ck1xDzrT_rI-YSDmi9hFFqiz2E0FgCrPlyJQA,174022
7
+ pysoda/core/dataset_generation/upload.py,sha256=FtEQoyg3ly0lXmnUtKDXXHln1dLN6reb9knOdFTBFeA,172657
8
8
  pysoda/core/dataset_importing/__init__.py,sha256=NbWs4HAqqydFLACFoVwl6g457dkYvjiZKx9NzB9wFxE,114
9
9
  pysoda/core/dataset_importing/import_dataset.py,sha256=cx8qCQmR_BKdC2G-jzqE4dWg2JhkOS3jM8kpjLIsObk,29487
10
10
  pysoda/core/metadata/__init__.py,sha256=Tkx6vdEQEPwAHmVSc9GfdbDZUkvuAqEgHJOxRDeX5vE,821
11
11
  pysoda/core/metadata/code_description.py,sha256=sWbRPWPf32txpN048oQ9lT3y7GAoDkrS-GHivwwv55Y,4104
12
12
  pysoda/core/metadata/constants.py,sha256=PR78huqBKdBpzDUGxVKy9YW3pUrJ4ftuF4MfqpNb1bU,1052
13
- pysoda/core/metadata/dataset_description.py,sha256=uwVC0tBVglQPs57LQL22nEXKLjCm1j6rtmGj8ByOeoA,7332
13
+ pysoda/core/metadata/dataset_description.py,sha256=K1yWYQkbn4d7DCjQH5vT9Y6dhcWWlx823XLACgkIS2M,9749
14
14
  pysoda/core/metadata/excel_utils.py,sha256=FQ8-DBq2lxFdpDUZeABfFKe284JzRTe6AYPxEw7yzsg,1216
15
15
  pysoda/core/metadata/helpers.py,sha256=jDf4KPTbx4unHT7pDlFEa0jls1OZY-dpdKf3kUGMtvg,10609
16
16
  pysoda/core/metadata/manifest.py,sha256=MjID-r52Yn3i9WlrT5IWeYvmkVdfzQuqGOmoMsZQVJg,4197
@@ -50,10 +50,10 @@ pysoda/core/upload_manifests/__init__.py,sha256=SOiyflYmZDtkLwWpeu7flyn3J2SAj9WL
50
50
  pysoda/core/upload_manifests/upload_manifests.py,sha256=Hd2XSXYieG-EMcPTLSYXvdtraipp1XBSo5cGpZR-vbA,1223
51
51
  pysoda/schema/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  pysoda/schema/code_description.json,sha256=FJOyJBC2TKjTTdtew3B4wdq_WIrti-r2F-M6t5OVNB0,17854
53
- pysoda/schema/dataset_description.json,sha256=rRuQiYthOwrlmzP_d3F1cG_ceIpefzxGuINorEcrA5g,8626
53
+ pysoda/schema/dataset_description.json,sha256=qxhzQzoE7CGuXvtY9fVWV31-daf5Ivms7_PkVW4i858,9053
54
54
  pysoda/schema/manifest.json,sha256=bsVzhnZHd2566KhrxQPBcwH376fgE8pSY8eXviTXrWA,2636
55
55
  pysoda/schema/performances.json,sha256=TJw7ERC6eMR-9cImOg7xoEVRyOR4xViJqZO8U2X2OXo,1254
56
- pysoda/schema/resources.json,sha256=FADf1p0IG_gliQlSr04cRtLKQMm8XhEktlnbnw_XG1U,1140
56
+ pysoda/schema/resources.json,sha256=9-rwCTX523V5XaKmXLTUGzackOl7tk-qijbbBDoeuhY,1232
57
57
  pysoda/schema/samples.json,sha256=NvuYhskV9QFrkSX4ant9G4KFCe9JLnVG98vDv2Puuvk,3340
58
58
  pysoda/schema/sites.json,sha256=lMkglgi4R7_qqUHh6uwbZ38TjkUzRKGY9pdF1CpZh4E,1133
59
59
  pysoda/schema/soda_schema.json,sha256=QSe8G39wOMO5wS_MUU0mW6yeM4etufACE6iuAzZMBoA,19290
@@ -71,7 +71,7 @@ pysoda/utils/profile.py,sha256=di4D_IE1rGSfHl0-SRVjrJK2bCdedW4ugp0W-j1HarQ,937
71
71
  pysoda/utils/schema_validation.py,sha256=3w3FRPyn4P3xMhITzItk-jYte8TTlNrgCWmY-s05x9Y,4438
72
72
  pysoda/utils/time_utils.py,sha256=g5848bivtzWj6TDoWo6CcohF-THxShETP4qTyHTjBWw,131
73
73
  pysoda/utils/upload_utils.py,sha256=-BCvfXsJSFqnwEZVCFQX1PLfkdeW0gyGLjLGW6Uxdf8,4607
74
- pysodafair-0.1.62.dist-info/METADATA,sha256=J4JLeWI1aGB18L8QAl__4vgRS8Z5hNqvK-i2zFTHMVQ,7011
75
- pysodafair-0.1.62.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
76
- pysodafair-0.1.62.dist-info/licenses/LICENSE,sha256=Jlt0uGnx87qPRGXPQHsBkg_S7MOsYS2E9Rh1zy47bfw,1082
77
- pysodafair-0.1.62.dist-info/RECORD,,
74
+ pysodafair-0.1.64.dist-info/METADATA,sha256=mgUEcj-8RZdfFlMBD94Joq0bmRP4CXOF0C6S3JxYX20,7011
75
+ pysodafair-0.1.64.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
76
+ pysodafair-0.1.64.dist-info/licenses/LICENSE,sha256=Jlt0uGnx87qPRGXPQHsBkg_S7MOsYS2E9Rh1zy47bfw,1082
77
+ pysodafair-0.1.64.dist-info/RECORD,,