pysodafair 0.1.62__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysoda/__init__.py +0 -0
- pysoda/constants.py +3 -0
- pysoda/core/__init__.py +10 -0
- pysoda/core/dataset_generation/__init__.py +11 -0
- pysoda/core/dataset_generation/manifestSession/__init__.py +1 -0
- pysoda/core/dataset_generation/manifestSession/manifest_session.py +146 -0
- pysoda/core/dataset_generation/upload.py +3951 -0
- pysoda/core/dataset_importing/__init__.py +1 -0
- pysoda/core/dataset_importing/import_dataset.py +662 -0
- pysoda/core/metadata/__init__.py +20 -0
- pysoda/core/metadata/code_description.py +109 -0
- pysoda/core/metadata/constants.py +32 -0
- pysoda/core/metadata/dataset_description.py +188 -0
- pysoda/core/metadata/excel_utils.py +41 -0
- pysoda/core/metadata/helpers.py +250 -0
- pysoda/core/metadata/manifest.py +112 -0
- pysoda/core/metadata/manifest_package/__init__.py +2 -0
- pysoda/core/metadata/manifest_package/manifest.py +0 -0
- pysoda/core/metadata/manifest_package/manifest_import.py +29 -0
- pysoda/core/metadata/manifest_package/manifest_writer.py +666 -0
- pysoda/core/metadata/performances.py +46 -0
- pysoda/core/metadata/resources.py +53 -0
- pysoda/core/metadata/samples.py +184 -0
- pysoda/core/metadata/sites.py +51 -0
- pysoda/core/metadata/subjects.py +172 -0
- pysoda/core/metadata/submission.py +91 -0
- pysoda/core/metadata/text_metadata.py +47 -0
- pysoda/core/metadata_templates/CHANGES +1 -0
- pysoda/core/metadata_templates/LICENSE +1 -0
- pysoda/core/metadata_templates/README.md +4 -0
- pysoda/core/metadata_templates/__init__.py +0 -0
- pysoda/core/metadata_templates/code_description.xlsx +0 -0
- pysoda/core/metadata_templates/code_parameters.xlsx +0 -0
- pysoda/core/metadata_templates/dataset_description.xlsx +0 -0
- pysoda/core/metadata_templates/manifest.xlsx +0 -0
- pysoda/core/metadata_templates/performances.xlsx +0 -0
- pysoda/core/metadata_templates/resources.xlsx +0 -0
- pysoda/core/metadata_templates/samples.xlsx +0 -0
- pysoda/core/metadata_templates/sites.xlsx +0 -0
- pysoda/core/metadata_templates/subjects.xlsx +0 -0
- pysoda/core/metadata_templates/subjects_pools_samples_structure.xlsx +0 -0
- pysoda/core/metadata_templates/subjects_pools_samples_structure_example.xlsx +0 -0
- pysoda/core/metadata_templates/submission.xlsx +0 -0
- pysoda/core/permissions/__init__.py +1 -0
- pysoda/core/permissions/permissions.py +31 -0
- pysoda/core/pysoda/__init__.py +2 -0
- pysoda/core/pysoda/soda.py +34 -0
- pysoda/core/pysoda/soda_object.py +55 -0
- pysoda/core/upload_manifests/__init__.py +1 -0
- pysoda/core/upload_manifests/upload_manifests.py +37 -0
- pysoda/schema/__init__.py +0 -0
- pysoda/schema/code_description.json +629 -0
- pysoda/schema/dataset_description.json +295 -0
- pysoda/schema/manifest.json +60 -0
- pysoda/schema/performances.json +44 -0
- pysoda/schema/resources.json +39 -0
- pysoda/schema/samples.json +97 -0
- pysoda/schema/sites.json +38 -0
- pysoda/schema/soda_schema.json +664 -0
- pysoda/schema/subjects.json +131 -0
- pysoda/schema/submission_schema.json +28 -0
- pysoda/utils/__init__.py +9 -0
- pysoda/utils/authentication.py +381 -0
- pysoda/utils/config.py +68 -0
- pysoda/utils/exceptions.py +156 -0
- pysoda/utils/logger.py +6 -0
- pysoda/utils/metadata_utils.py +74 -0
- pysoda/utils/pennsieveAgentUtils.py +11 -0
- pysoda/utils/pennsieveUtils.py +118 -0
- pysoda/utils/profile.py +28 -0
- pysoda/utils/schema_validation.py +133 -0
- pysoda/utils/time_utils.py +5 -0
- pysoda/utils/upload_utils.py +108 -0
- pysodafair-0.1.62.dist-info/METADATA +190 -0
- pysodafair-0.1.62.dist-info/RECORD +77 -0
- pysodafair-0.1.62.dist-info/WHEEL +4 -0
- pysodafair-0.1.62.dist-info/licenses/LICENSE +21 -0
pysoda/__init__.py
ADDED
|
File without changes
|
pysoda/constants.py
ADDED
pysoda/core/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .upload import (
|
|
2
|
+
create_folder_level_manifest,
|
|
3
|
+
check_empty_files_folders,
|
|
4
|
+
main_curate_function,
|
|
5
|
+
main_curate_function_progress,
|
|
6
|
+
generate_manifest_file_locally,
|
|
7
|
+
generate_manifest_file_data,
|
|
8
|
+
check_json_size,
|
|
9
|
+
clean_json_structure,
|
|
10
|
+
check_server_access_to_files,
|
|
11
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .manifest_session import UploadManifestSession
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
from pennsieve2 import Pennsieve
|
|
2
|
+
import re
|
|
3
|
+
import math
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class UploadManifestSession:
|
|
8
|
+
|
|
9
|
+
# properties
|
|
10
|
+
df_mid = None
|
|
11
|
+
ps = None
|
|
12
|
+
|
|
13
|
+
# upload values
|
|
14
|
+
main_total_generate_dataset_size = None
|
|
15
|
+
total_files_to_upload = None
|
|
16
|
+
elapsed_time = None
|
|
17
|
+
|
|
18
|
+
# rename values
|
|
19
|
+
renaming_files_flow = False
|
|
20
|
+
rename_total_files = None
|
|
21
|
+
list_of_files_to_rename = None
|
|
22
|
+
|
|
23
|
+
def __init__(self):
|
|
24
|
+
self.df_mid = None
|
|
25
|
+
|
|
26
|
+
def set_df_mid(self, id):
|
|
27
|
+
self.df_mid = id
|
|
28
|
+
|
|
29
|
+
def get_df_mid(self):
|
|
30
|
+
return self.df_mid
|
|
31
|
+
|
|
32
|
+
def set_elapsed_time(self, time):
|
|
33
|
+
self.elapsed_time = time
|
|
34
|
+
|
|
35
|
+
def get_elapsed_time(self):
|
|
36
|
+
return self.elapsed_time
|
|
37
|
+
|
|
38
|
+
def set_main_total_generate_dataset_size(self, size):
|
|
39
|
+
self.main_total_generate_dataset_size = size
|
|
40
|
+
|
|
41
|
+
def get_main_total_generate_dataset_size(self):
|
|
42
|
+
return self.main_total_generate_dataset_size
|
|
43
|
+
|
|
44
|
+
def set_total_files_to_upload(self, count):
|
|
45
|
+
self.total_files_to_upload = count
|
|
46
|
+
|
|
47
|
+
def get_total_files_to_upload(self):
|
|
48
|
+
return self.total_files_to_upload
|
|
49
|
+
|
|
50
|
+
def set_rename_total_files(self, count):
|
|
51
|
+
self.rename_total_files = count
|
|
52
|
+
|
|
53
|
+
def get_rename_total_files(self):
|
|
54
|
+
return self.rename_total_files
|
|
55
|
+
|
|
56
|
+
def set_list_of_files_to_rename(self, list):
|
|
57
|
+
self.list_of_files_to_rename = list
|
|
58
|
+
|
|
59
|
+
def get_list_of_files_to_rename(self):
|
|
60
|
+
return self.list_of_files_to_rename
|
|
61
|
+
|
|
62
|
+
def set_renaming_files_flow(self, value):
|
|
63
|
+
self.renaming_files_flow = value
|
|
64
|
+
|
|
65
|
+
def get_renaming_files_flow(self):
|
|
66
|
+
return self.renaming_files_flow
|
|
67
|
+
|
|
68
|
+
def df_mid_has_progress(self):
|
|
69
|
+
if self.ps is None:
|
|
70
|
+
self.ps = Pennsieve()
|
|
71
|
+
try:
|
|
72
|
+
self.ps.manifest.sync(self.df_mid)
|
|
73
|
+
except Exception as e:
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
mfs = self.ps.list_manifests()
|
|
78
|
+
except Exception as e:
|
|
79
|
+
# there are no manifests created yet
|
|
80
|
+
return False
|
|
81
|
+
return any(mf.id == self.df_mid and mf.status == "Initiated" for mf in mfs)
|
|
82
|
+
|
|
83
|
+
def get_remaining_file_count(self, mid, total_files):
|
|
84
|
+
if self.ps is None:
|
|
85
|
+
self.ps = Pennsieve()
|
|
86
|
+
total_pages = math.ceil(total_files / 1000)
|
|
87
|
+
remaining_files = 0
|
|
88
|
+
offset = 0
|
|
89
|
+
for i in range(total_pages):
|
|
90
|
+
if i >= 1:
|
|
91
|
+
offset += 1000
|
|
92
|
+
file_page = self.ps.manifest.list_files(mid, offset , 1000)
|
|
93
|
+
# if there is no node_id then an upload hasn't started yet - all files are remaining
|
|
94
|
+
# regular expression that searches and counts for every string that has "status: LOCAL" or "status: REGISTERED" or "status: FAILED" in the string
|
|
95
|
+
remaining_files += len(re.findall(r'status: REGISTERED|status: LOCAL|status: FAILED' , str(file_page)))
|
|
96
|
+
return remaining_files
|
|
97
|
+
|
|
98
|
+
def create_obj_from_string(self,s):
|
|
99
|
+
# Split into individual objects
|
|
100
|
+
objects = re.findall(r'file {([^}]*?)}', s, re.DOTALL)
|
|
101
|
+
|
|
102
|
+
# Parse each object
|
|
103
|
+
parsed_objects = []
|
|
104
|
+
for obj in objects:
|
|
105
|
+
# Split into lines and remove empty lines
|
|
106
|
+
lines = [line.strip() for line in obj.split('\n') if line.strip()]
|
|
107
|
+
# Split each line into key and value and create a dictionary
|
|
108
|
+
parsed_object = {line.split(': ')[0]: line.split(': ')[1] for line in lines}
|
|
109
|
+
parsed_objects.append(parsed_object)
|
|
110
|
+
|
|
111
|
+
return parsed_objects
|
|
112
|
+
|
|
113
|
+
def calculate_completed_upload_size(self, mid, bytes_per_file_dict, total_files):
|
|
114
|
+
if self.ps is None:
|
|
115
|
+
self.ps = Pennsieve()
|
|
116
|
+
total_pages = math.ceil(total_files / 1000)
|
|
117
|
+
offset = 0
|
|
118
|
+
total_bytes_uploaded = 0
|
|
119
|
+
for i in range(total_pages):
|
|
120
|
+
if i >= 1:
|
|
121
|
+
offset += 1000
|
|
122
|
+
file_string = self.ps.manifest.list_files(mid, offset , 1000)
|
|
123
|
+
parsed_objects = self.create_obj_from_string(str(file_string))
|
|
124
|
+
for obj in parsed_objects:
|
|
125
|
+
if 'status' not in obj:
|
|
126
|
+
total_bytes_uploaded += 0
|
|
127
|
+
elif obj['status'] in [
|
|
128
|
+
'UPLOADED',
|
|
129
|
+
'IMPORTED',
|
|
130
|
+
'FINALIZED',
|
|
131
|
+
'VERIFIED',
|
|
132
|
+
]:
|
|
133
|
+
file_path = obj['source_path']
|
|
134
|
+
# remove the first and last characer of file_path - these are quotation marks
|
|
135
|
+
file_path = file_path[1:-1]
|
|
136
|
+
total_bytes_uploaded += int(bytes_per_file_dict.get(file_path, 0))
|
|
137
|
+
|
|
138
|
+
return total_bytes_uploaded
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# ums = UploadManifestSession()
|
|
146
|
+
# ums.df_mid_has_progress()
|