pysodafair 0.1.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. pysoda/__init__.py +0 -0
  2. pysoda/constants.py +3 -0
  3. pysoda/core/__init__.py +10 -0
  4. pysoda/core/dataset_generation/__init__.py +11 -0
  5. pysoda/core/dataset_generation/manifestSession/__init__.py +1 -0
  6. pysoda/core/dataset_generation/manifestSession/manifest_session.py +146 -0
  7. pysoda/core/dataset_generation/upload.py +3951 -0
  8. pysoda/core/dataset_importing/__init__.py +1 -0
  9. pysoda/core/dataset_importing/import_dataset.py +662 -0
  10. pysoda/core/metadata/__init__.py +20 -0
  11. pysoda/core/metadata/code_description.py +109 -0
  12. pysoda/core/metadata/constants.py +32 -0
  13. pysoda/core/metadata/dataset_description.py +188 -0
  14. pysoda/core/metadata/excel_utils.py +41 -0
  15. pysoda/core/metadata/helpers.py +250 -0
  16. pysoda/core/metadata/manifest.py +112 -0
  17. pysoda/core/metadata/manifest_package/__init__.py +2 -0
  18. pysoda/core/metadata/manifest_package/manifest.py +0 -0
  19. pysoda/core/metadata/manifest_package/manifest_import.py +29 -0
  20. pysoda/core/metadata/manifest_package/manifest_writer.py +666 -0
  21. pysoda/core/metadata/performances.py +46 -0
  22. pysoda/core/metadata/resources.py +53 -0
  23. pysoda/core/metadata/samples.py +184 -0
  24. pysoda/core/metadata/sites.py +51 -0
  25. pysoda/core/metadata/subjects.py +172 -0
  26. pysoda/core/metadata/submission.py +91 -0
  27. pysoda/core/metadata/text_metadata.py +47 -0
  28. pysoda/core/metadata_templates/CHANGES +1 -0
  29. pysoda/core/metadata_templates/LICENSE +1 -0
  30. pysoda/core/metadata_templates/README.md +4 -0
  31. pysoda/core/metadata_templates/__init__.py +0 -0
  32. pysoda/core/metadata_templates/code_description.xlsx +0 -0
  33. pysoda/core/metadata_templates/code_parameters.xlsx +0 -0
  34. pysoda/core/metadata_templates/dataset_description.xlsx +0 -0
  35. pysoda/core/metadata_templates/manifest.xlsx +0 -0
  36. pysoda/core/metadata_templates/performances.xlsx +0 -0
  37. pysoda/core/metadata_templates/resources.xlsx +0 -0
  38. pysoda/core/metadata_templates/samples.xlsx +0 -0
  39. pysoda/core/metadata_templates/sites.xlsx +0 -0
  40. pysoda/core/metadata_templates/subjects.xlsx +0 -0
  41. pysoda/core/metadata_templates/subjects_pools_samples_structure.xlsx +0 -0
  42. pysoda/core/metadata_templates/subjects_pools_samples_structure_example.xlsx +0 -0
  43. pysoda/core/metadata_templates/submission.xlsx +0 -0
  44. pysoda/core/permissions/__init__.py +1 -0
  45. pysoda/core/permissions/permissions.py +31 -0
  46. pysoda/core/pysoda/__init__.py +2 -0
  47. pysoda/core/pysoda/soda.py +34 -0
  48. pysoda/core/pysoda/soda_object.py +55 -0
  49. pysoda/core/upload_manifests/__init__.py +1 -0
  50. pysoda/core/upload_manifests/upload_manifests.py +37 -0
  51. pysoda/schema/__init__.py +0 -0
  52. pysoda/schema/code_description.json +629 -0
  53. pysoda/schema/dataset_description.json +295 -0
  54. pysoda/schema/manifest.json +60 -0
  55. pysoda/schema/performances.json +44 -0
  56. pysoda/schema/resources.json +39 -0
  57. pysoda/schema/samples.json +97 -0
  58. pysoda/schema/sites.json +38 -0
  59. pysoda/schema/soda_schema.json +664 -0
  60. pysoda/schema/subjects.json +131 -0
  61. pysoda/schema/submission_schema.json +28 -0
  62. pysoda/utils/__init__.py +9 -0
  63. pysoda/utils/authentication.py +381 -0
  64. pysoda/utils/config.py +68 -0
  65. pysoda/utils/exceptions.py +156 -0
  66. pysoda/utils/logger.py +6 -0
  67. pysoda/utils/metadata_utils.py +74 -0
  68. pysoda/utils/pennsieveAgentUtils.py +11 -0
  69. pysoda/utils/pennsieveUtils.py +118 -0
  70. pysoda/utils/profile.py +28 -0
  71. pysoda/utils/schema_validation.py +133 -0
  72. pysoda/utils/time_utils.py +5 -0
  73. pysoda/utils/upload_utils.py +108 -0
  74. pysodafair-0.1.62.dist-info/METADATA +190 -0
  75. pysodafair-0.1.62.dist-info/RECORD +77 -0
  76. pysodafair-0.1.62.dist-info/WHEEL +4 -0
  77. pysodafair-0.1.62.dist-info/licenses/LICENSE +21 -0
pysoda/__init__.py ADDED
File without changes
pysoda/constants.py ADDED
@@ -0,0 +1,3 @@
1
+
2
+ PENNSIEVE_URL = "https://api.pennsieve.io"
3
+ PENNSIEVE_2_URL = "https://api2.pennsieve.io"
@@ -0,0 +1,10 @@
1
+ import logging
2
+
3
+ from .permissions import has_edit_permissions
4
+
5
+
6
+
7
+ # Create a logger for the package
8
+ logger = logging.getLogger(__name__)
9
+ # Optional: Provide a default configuration if no handlers are set
10
+ logger.setLevel(logging.WARNING)
@@ -0,0 +1,11 @@
1
+ from .upload import (
2
+ create_folder_level_manifest,
3
+ check_empty_files_folders,
4
+ main_curate_function,
5
+ main_curate_function_progress,
6
+ generate_manifest_file_locally,
7
+ generate_manifest_file_data,
8
+ check_json_size,
9
+ clean_json_structure,
10
+ check_server_access_to_files,
11
+ )
@@ -0,0 +1 @@
1
+ from .manifest_session import UploadManifestSession
@@ -0,0 +1,146 @@
1
+ from pennsieve2 import Pennsieve
2
+ import re
3
+ import math
4
+
5
+
6
+
7
+ class UploadManifestSession:
8
+
9
+ # properties
10
+ df_mid = None
11
+ ps = None
12
+
13
+ # upload values
14
+ main_total_generate_dataset_size = None
15
+ total_files_to_upload = None
16
+ elapsed_time = None
17
+
18
+ # rename values
19
+ renaming_files_flow = False
20
+ rename_total_files = None
21
+ list_of_files_to_rename = None
22
+
23
+ def __init__(self):
24
+ self.df_mid = None
25
+
26
+ def set_df_mid(self, id):
27
+ self.df_mid = id
28
+
29
+ def get_df_mid(self):
30
+ return self.df_mid
31
+
32
+ def set_elapsed_time(self, time):
33
+ self.elapsed_time = time
34
+
35
+ def get_elapsed_time(self):
36
+ return self.elapsed_time
37
+
38
+ def set_main_total_generate_dataset_size(self, size):
39
+ self.main_total_generate_dataset_size = size
40
+
41
+ def get_main_total_generate_dataset_size(self):
42
+ return self.main_total_generate_dataset_size
43
+
44
+ def set_total_files_to_upload(self, count):
45
+ self.total_files_to_upload = count
46
+
47
+ def get_total_files_to_upload(self):
48
+ return self.total_files_to_upload
49
+
50
+ def set_rename_total_files(self, count):
51
+ self.rename_total_files = count
52
+
53
+ def get_rename_total_files(self):
54
+ return self.rename_total_files
55
+
56
+ def set_list_of_files_to_rename(self, list):
57
+ self.list_of_files_to_rename = list
58
+
59
+ def get_list_of_files_to_rename(self):
60
+ return self.list_of_files_to_rename
61
+
62
+ def set_renaming_files_flow(self, value):
63
+ self.renaming_files_flow = value
64
+
65
+ def get_renaming_files_flow(self):
66
+ return self.renaming_files_flow
67
+
68
+ def df_mid_has_progress(self):
69
+ if self.ps is None:
70
+ self.ps = Pennsieve()
71
+ try:
72
+ self.ps.manifest.sync(self.df_mid)
73
+ except Exception as e:
74
+ return False
75
+
76
+ try:
77
+ mfs = self.ps.list_manifests()
78
+ except Exception as e:
79
+ # there are no manifests created yet
80
+ return False
81
+ return any(mf.id == self.df_mid and mf.status == "Initiated" for mf in mfs)
82
+
83
+ def get_remaining_file_count(self, mid, total_files):
84
+ if self.ps is None:
85
+ self.ps = Pennsieve()
86
+ total_pages = math.ceil(total_files / 1000)
87
+ remaining_files = 0
88
+ offset = 0
89
+ for i in range(total_pages):
90
+ if i >= 1:
91
+ offset += 1000
92
+ file_page = self.ps.manifest.list_files(mid, offset , 1000)
93
+ # if there is no node_id then an upload hasn't started yet - all files are remaining
94
+ # regular expression that searches and counts for every string that has "status: LOCAL" or "status: REGISTERED" or "status: FAILED" in the string
95
+ remaining_files += len(re.findall(r'status: REGISTERED|status: LOCAL|status: FAILED' , str(file_page)))
96
+ return remaining_files
97
+
98
+ def create_obj_from_string(self,s):
99
+ # Split into individual objects
100
+ objects = re.findall(r'file {([^}]*?)}', s, re.DOTALL)
101
+
102
+ # Parse each object
103
+ parsed_objects = []
104
+ for obj in objects:
105
+ # Split into lines and remove empty lines
106
+ lines = [line.strip() for line in obj.split('\n') if line.strip()]
107
+ # Split each line into key and value and create a dictionary
108
+ parsed_object = {line.split(': ')[0]: line.split(': ')[1] for line in lines}
109
+ parsed_objects.append(parsed_object)
110
+
111
+ return parsed_objects
112
+
113
+ def calculate_completed_upload_size(self, mid, bytes_per_file_dict, total_files):
114
+ if self.ps is None:
115
+ self.ps = Pennsieve()
116
+ total_pages = math.ceil(total_files / 1000)
117
+ offset = 0
118
+ total_bytes_uploaded = 0
119
+ for i in range(total_pages):
120
+ if i >= 1:
121
+ offset += 1000
122
+ file_string = self.ps.manifest.list_files(mid, offset , 1000)
123
+ parsed_objects = self.create_obj_from_string(str(file_string))
124
+ for obj in parsed_objects:
125
+ if 'status' not in obj:
126
+ total_bytes_uploaded += 0
127
+ elif obj['status'] in [
128
+ 'UPLOADED',
129
+ 'IMPORTED',
130
+ 'FINALIZED',
131
+ 'VERIFIED',
132
+ ]:
133
+ file_path = obj['source_path']
134
+ # remove the first and last characer of file_path - these are quotation marks
135
+ file_path = file_path[1:-1]
136
+ total_bytes_uploaded += int(bytes_per_file_dict.get(file_path, 0))
137
+
138
+ return total_bytes_uploaded
139
+
140
+
141
+
142
+
143
+
144
+
145
+ # ums = UploadManifestSession()
146
+ # ums.df_mid_has_progress()