pysodafair 0.1.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. pysoda/__init__.py +0 -0
  2. pysoda/constants.py +3 -0
  3. pysoda/core/__init__.py +10 -0
  4. pysoda/core/dataset_generation/__init__.py +11 -0
  5. pysoda/core/dataset_generation/manifestSession/__init__.py +1 -0
  6. pysoda/core/dataset_generation/manifestSession/manifest_session.py +146 -0
  7. pysoda/core/dataset_generation/upload.py +3951 -0
  8. pysoda/core/dataset_importing/__init__.py +1 -0
  9. pysoda/core/dataset_importing/import_dataset.py +662 -0
  10. pysoda/core/metadata/__init__.py +20 -0
  11. pysoda/core/metadata/code_description.py +109 -0
  12. pysoda/core/metadata/constants.py +32 -0
  13. pysoda/core/metadata/dataset_description.py +188 -0
  14. pysoda/core/metadata/excel_utils.py +41 -0
  15. pysoda/core/metadata/helpers.py +250 -0
  16. pysoda/core/metadata/manifest.py +112 -0
  17. pysoda/core/metadata/manifest_package/__init__.py +2 -0
  18. pysoda/core/metadata/manifest_package/manifest.py +0 -0
  19. pysoda/core/metadata/manifest_package/manifest_import.py +29 -0
  20. pysoda/core/metadata/manifest_package/manifest_writer.py +666 -0
  21. pysoda/core/metadata/performances.py +46 -0
  22. pysoda/core/metadata/resources.py +53 -0
  23. pysoda/core/metadata/samples.py +184 -0
  24. pysoda/core/metadata/sites.py +51 -0
  25. pysoda/core/metadata/subjects.py +172 -0
  26. pysoda/core/metadata/submission.py +91 -0
  27. pysoda/core/metadata/text_metadata.py +47 -0
  28. pysoda/core/metadata_templates/CHANGES +1 -0
  29. pysoda/core/metadata_templates/LICENSE +1 -0
  30. pysoda/core/metadata_templates/README.md +4 -0
  31. pysoda/core/metadata_templates/__init__.py +0 -0
  32. pysoda/core/metadata_templates/code_description.xlsx +0 -0
  33. pysoda/core/metadata_templates/code_parameters.xlsx +0 -0
  34. pysoda/core/metadata_templates/dataset_description.xlsx +0 -0
  35. pysoda/core/metadata_templates/manifest.xlsx +0 -0
  36. pysoda/core/metadata_templates/performances.xlsx +0 -0
  37. pysoda/core/metadata_templates/resources.xlsx +0 -0
  38. pysoda/core/metadata_templates/samples.xlsx +0 -0
  39. pysoda/core/metadata_templates/sites.xlsx +0 -0
  40. pysoda/core/metadata_templates/subjects.xlsx +0 -0
  41. pysoda/core/metadata_templates/subjects_pools_samples_structure.xlsx +0 -0
  42. pysoda/core/metadata_templates/subjects_pools_samples_structure_example.xlsx +0 -0
  43. pysoda/core/metadata_templates/submission.xlsx +0 -0
  44. pysoda/core/permissions/__init__.py +1 -0
  45. pysoda/core/permissions/permissions.py +31 -0
  46. pysoda/core/pysoda/__init__.py +2 -0
  47. pysoda/core/pysoda/soda.py +34 -0
  48. pysoda/core/pysoda/soda_object.py +55 -0
  49. pysoda/core/upload_manifests/__init__.py +1 -0
  50. pysoda/core/upload_manifests/upload_manifests.py +37 -0
  51. pysoda/schema/__init__.py +0 -0
  52. pysoda/schema/code_description.json +629 -0
  53. pysoda/schema/dataset_description.json +295 -0
  54. pysoda/schema/manifest.json +60 -0
  55. pysoda/schema/performances.json +44 -0
  56. pysoda/schema/resources.json +39 -0
  57. pysoda/schema/samples.json +97 -0
  58. pysoda/schema/sites.json +38 -0
  59. pysoda/schema/soda_schema.json +664 -0
  60. pysoda/schema/subjects.json +131 -0
  61. pysoda/schema/submission_schema.json +28 -0
  62. pysoda/utils/__init__.py +9 -0
  63. pysoda/utils/authentication.py +381 -0
  64. pysoda/utils/config.py +68 -0
  65. pysoda/utils/exceptions.py +156 -0
  66. pysoda/utils/logger.py +6 -0
  67. pysoda/utils/metadata_utils.py +74 -0
  68. pysoda/utils/pennsieveAgentUtils.py +11 -0
  69. pysoda/utils/pennsieveUtils.py +118 -0
  70. pysoda/utils/profile.py +28 -0
  71. pysoda/utils/schema_validation.py +133 -0
  72. pysoda/utils/time_utils.py +5 -0
  73. pysoda/utils/upload_utils.py +108 -0
  74. pysodafair-0.1.62.dist-info/METADATA +190 -0
  75. pysodafair-0.1.62.dist-info/RECORD +77 -0
  76. pysodafair-0.1.62.dist-info/WHEEL +4 -0
  77. pysodafair-0.1.62.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,156 @@
1
+
2
+ # create a custom exception that indicates a property in an object has not been set
3
+ class PropertyNotSetError(Exception):
4
+ def __init__(self, property_name):
5
+ self.property_name = property_name
6
+ self.error_message = f"The property {self.property_name} has not been set."
7
+
8
+ def __str__(self):
9
+ return self.error_message
10
+
11
+
12
+ # create a custom exception that indicates that the 'PennsieveAgent' could not be started
13
+ class PennsieveAgentError(Exception):
14
+ def __init__(self, error_message):
15
+ self.error_message = error_message
16
+
17
+ def __str__(self):
18
+ return self.error_message
19
+
20
+ class FailedToFetchPennsieveDatasets(Exception):
21
+ def __init__(self, error_message):
22
+ self.error_message = error_message
23
+
24
+ def __str__(self):
25
+ return self.error_message
26
+
27
+ class PennsieveDatasetCannotBeFound(Exception):
28
+ def __init__(self, dataset_name):
29
+ self.dataset_name = dataset_name
30
+ self.error_message = f"The Pennsieve dataset {self.dataset_name} could not be found."
31
+
32
+ def __str__(self):
33
+ return self.error_message
34
+
35
+
36
+ class ConfigProfileNotSet(Exception):
37
+ def __init__(self, profile_name):
38
+ self.profile_name = profile_name
39
+ self.error_message = f"The profile {self.profile_name} has not been set."
40
+
41
+ def __str__(self):
42
+ return self.error_message
43
+
44
+ class GenerateOptionsNotSet(Exception):
45
+ def __init__(self):
46
+ self.error_message = "The generate options have not been set."
47
+
48
+ def __str__(self):
49
+ return self.error_message
50
+
51
+
52
+ class PennsieveActionNoPermission(Exception):
53
+ def __init__(self, action):
54
+ self.action = action
55
+ self.error_message = f"Do not have the correct permissions to perform action: {self.action} ."
56
+
57
+ def __str__(self):
58
+ return self.error_message
59
+
60
+ class GenericUploadError(Exception):
61
+ def __init__(self, error_message):
62
+ self.error_message = error_message
63
+
64
+ def __str__(self):
65
+ return self.error_message
66
+
67
+
68
+ class EmptyDatasetError(Exception):
69
+ def __init__(self, dataset_name, expanded=""):
70
+ self.dataset_name = dataset_name
71
+ self.expanded = expanded
72
+ self.error_message = f"The dataset {self.dataset_name} is empty. {expanded}"
73
+
74
+ def __str__(self):
75
+ return self.error_message
76
+
77
+
78
+ class LocalDatasetMissingSpecifiedFiles(Exception):
79
+ def __init__(self, error_message):
80
+ self.error_message = error_message
81
+
82
+ def __str__(self):
83
+ return self.error_message
84
+
85
+
86
+ class PennsieveUploadException(Exception):
87
+ def __init__(self, error_message):
88
+ self.error_message = error_message
89
+ super().__init__(self.error_message)
90
+
91
+ def __str__(self):
92
+ return self.error_message
93
+
94
+
95
+ class PennsieveAccountInformationFailedAuthentication(Exception):
96
+ def __init__(self, error_message):
97
+
98
+ self.error_message = error_message
99
+
100
+ def __str__(self):
101
+ return self.error_message
102
+
103
+
104
+ class PennsieveDatasetNameTaken(Exception):
105
+ def __init__(self, dataset_name):
106
+ self.dataset_name = dataset_name
107
+ self.error_message = f"The Pennsieve dataset name {self.dataset_name} is already taken."
108
+
109
+ def __str__(self):
110
+ return self.error_message
111
+
112
+ class PennsieveDatasetNameInvalid(Exception):
113
+ def __init__(self, dataset_name):
114
+ self.dataset_name = dataset_name
115
+ self.error_message = f"The Pennsieve dataset name {self.dataset_name} is invalid."
116
+
117
+ def __str__(self):
118
+ return self.error_message
119
+
120
+ class PennsieveAccountInvalid(Exception):
121
+ def __init__(self, account_name):
122
+ self.account_name = account_name
123
+ self.error_message = f"The Pennsieve account name {self.account_name} is invalid."
124
+
125
+ def __str__(self):
126
+ return self.error_message
127
+
128
+ class PennsieveDatasetFilesInvalid(Exception):
129
+ def __init__(self, error_message):
130
+ self.error_message = error_message
131
+
132
+ def __str__(self):
133
+ return self.error_message
134
+
135
+ def validation_error_message(e):
136
+ """
137
+ Print a message for a validation error.
138
+ input: e (ValidationError): The validation error from the validate library.
139
+ output: human readable message for the validation error.
140
+ """
141
+ msg = "There following error was found in your metadata:"
142
+ e_type = e.schema_path.pop().strip()
143
+ print(e.schema_path)
144
+ if e_type == "type":
145
+ s = ''
146
+ while e.schema_path:
147
+ p_v = e.schema_path.popleft()
148
+ if p_v.strip() != "properties":
149
+ if s != '':
150
+ s += ' -> '
151
+ s += p_v
152
+ msg = f"{msg} {s} needs to be a list of values."
153
+ if e_type == "required":
154
+ # peel out the first line from the stringified error message
155
+ msg = f"{msg} {e.message.splitlines()[0]}"
156
+ return msg
pysoda/utils/logger.py ADDED
@@ -0,0 +1,6 @@
1
+ # filepath: c:\Users\aaron\pysoda\pysoda\utils\logger.py
2
+ import logging
3
+
4
+ # Create a logger for the package
5
+ logger = logging.getLogger("pysoda.utils")
6
+ logger.setLevel(logging.WARNING) # Set the default log level
@@ -0,0 +1,74 @@
1
+ import os.path
2
+ import requests
3
+ from .authentication import create_request_headers
4
+ from ..constants import PENNSIEVE_URL
5
+
6
+
7
+ # check for non-empty fields (cells)
8
+ def column_check(x):
9
+ return "unnamed" not in x.lower()
10
+
11
+
12
+ # obtain Pennsieve S3 URL for an existing metadata file
13
+ def returnFileURL(ps, item_id):
14
+ r = requests.get(f"{PENNSIEVE_URL}/packages/{item_id}/view", headers=create_request_headers(ps))
15
+ r.raise_for_status()
16
+
17
+ file_details = r.json()
18
+ file_id = file_details[0]["content"]["id"]
19
+ r = requests.get(
20
+ f"{PENNSIEVE_URL}/packages/{item_id}/files/{file_id}", headers=create_request_headers(ps)
21
+ )
22
+ r.raise_for_status()
23
+
24
+ file_url_info = r.json()
25
+ return file_url_info["url"]
26
+
27
+
28
+ def remove_high_level_folder_from_path(paths):
29
+ """
30
+ Remove the high level folder from the path. This is necessary because the high level folder is not included in the manifest file name entry.
31
+ """
32
+
33
+ return "" if len(paths) == 1 else "/".join(paths[1:]) + "/"
34
+
35
+
36
+
37
+
38
+ double_extensions = [
39
+ ".ome.tiff",
40
+ ".ome.tif",
41
+ ".ome.tf2,",
42
+ ".ome.tf8",
43
+ ".ome.btf",
44
+ ".ome.xml",
45
+ ".brukertiff.gz",
46
+ ".mefd.gz",
47
+ ".moberg.gz",
48
+ ".nii.gz",
49
+ ".mgh.gz",
50
+ ".tar.gz",
51
+ ".bcl.gz",
52
+ ]
53
+
54
+
55
+ def get_name_extension(file_name):
56
+ double_ext = False
57
+ for ext in double_extensions:
58
+ if file_name.find(ext) != -1:
59
+ double_ext = True
60
+ break
61
+
62
+ ext = ""
63
+ name = ""
64
+
65
+ if double_ext == False:
66
+ name = os.path.splitext(file_name)[0]
67
+ ext = os.path.splitext(file_name)[1]
68
+ else:
69
+ ext = (
70
+ os.path.splitext(os.path.splitext(file_name)[0])[1]
71
+ + os.path.splitext(file_name)[1]
72
+ )
73
+ name = os.path.splitext(os.path.splitext(file_name)[0])[0]
74
+ return name, ext
@@ -0,0 +1,11 @@
1
+ from pennsieve2.pennsieve import Pennsieve
2
+ from .exceptions import PennsieveAgentError
3
+
4
+ def connect_pennsieve_client(account_name):
5
+ """
6
+ Connects to Pennsieve Python client to the Agent and returns the initialized Pennsieve object.
7
+ """
8
+ try:
9
+ return Pennsieve(profile_name=account_name)
10
+ except Exception as e:
11
+ raise PennsieveAgentError(f"Could not connect to the Pennsieve agent: {e}")
@@ -0,0 +1,118 @@
1
+ import requests
2
+ from ..constants import PENNSIEVE_URL
3
+ from .authentication import get_access_token
4
+ import re
5
+ from .exceptions import PennsieveDatasetCannotBeFound, FailedToFetchPennsieveDatasets
6
+
7
+ def get_dataset_id(dataset_name_or_id):
8
+ """
9
+ Returns the dataset ID for the given dataset name.
10
+ If the dataset ID was provided instead of the name, the ID will be returned. *Common for Guided Mode*
11
+
12
+ Input:
13
+ dataset_name_or_id: Pennsieve dataset name or ID to get the ID for
14
+ """
15
+ # If the input is already a dataset ID, return it
16
+ if dataset_name_or_id.startswith("N:dataset:"):
17
+ return dataset_name_or_id
18
+
19
+
20
+ try:
21
+ # Attempt to retrieve the user's dataset list from Pennsieve
22
+ dataset_list = get_users_dataset_list()
23
+ except Exception as e:
24
+ raise FailedToFetchPennsieveDatasets(str(e))
25
+
26
+ # Iterate through the user's dataset list to find a matching dataset name
27
+ for dataset in dataset_list:
28
+ if dataset["content"]["name"] == dataset_name_or_id:
29
+ return dataset["content"]["id"]
30
+
31
+ # If no matching dataset is found, abort with a 404 status and a specific error message
32
+ raise PennsieveDatasetCannotBeFound(dataset_name_or_id)
33
+
34
+
35
+ def get_users_dataset_list():
36
+ """
37
+ Returns a list of datasets the user has access to.
38
+ Input:
39
+ token: Pennsieve access token
40
+ """
41
+
42
+ # The number of datasets to retrieve per chunk
43
+ NUMBER_OF_DATASETS_PER_CHUNK = 200
44
+ # The total number of datasets the user has access to (set after the first request)
45
+ NUMBER_OF_DATASETS_USER_HAS_ACCESS_TO = None
46
+
47
+ # The offset is the number of datasets to skip before retrieving the next chunk of datasets (starts at 0, then increases by the number of datasets per chunk)
48
+ current_offset = 0
49
+ # The list of datasets the user has access to (datasets are added to this list after each request and then returned)
50
+ datasets = []
51
+
52
+ try:
53
+ # Get the first chunk of datasets as well as the total number of datasets the user has access to
54
+ r = requests.get(f"{PENNSIEVE_URL}/datasets/paginated", headers=create_request_headers(get_access_token()), params={"offset": current_offset, "limit": NUMBER_OF_DATASETS_PER_CHUNK})
55
+ r.raise_for_status()
56
+ responseJSON = r.json()
57
+ datasets.extend(responseJSON["datasets"])
58
+ NUMBER_OF_DATASETS_USER_HAS_ACCESS_TO = responseJSON["totalCount"]
59
+
60
+ # If the number of datasets the user has access to is less than the number of datasets per chunk, we don't need to retrieve any more datasets
61
+ if NUMBER_OF_DATASETS_USER_HAS_ACCESS_TO < NUMBER_OF_DATASETS_PER_CHUNK:
62
+ return datasets
63
+
64
+ # Otherwise, we need to retrieve the rest of the datasets.
65
+ # We do this by retrieving chunks of datasets until the number of datasets retrieved is equal to the number of datasets the user has access to
66
+ while len(datasets) < NUMBER_OF_DATASETS_USER_HAS_ACCESS_TO:
67
+ # Increase the offset by the number of datasets per chunk (e.g. if 200 datasets per chunk, then increase the offset by 200)
68
+ current_offset += NUMBER_OF_DATASETS_PER_CHUNK
69
+ r = requests.get(f"{PENNSIEVE_URL}/datasets/paginated", headers=create_request_headers(get_access_token()), params={"offset": current_offset, "limit": NUMBER_OF_DATASETS_PER_CHUNK})
70
+ r.raise_for_status()
71
+ responseJSON = r.json()
72
+ datasets.extend(responseJSON["datasets"])
73
+
74
+ return datasets
75
+ except Exception as e:
76
+ raise e
77
+
78
+
79
+
80
+
81
+
82
+
83
+ def create_request_headers(ps_or_token):
84
+ """
85
+ Creates necessary HTTP headers for making Pennsieve API requests.
86
+ Input:
87
+ ps: Pennsieve object for a user that has been authenticated
88
+ """
89
+ if type(ps_or_token) == str:
90
+ return {
91
+ "Content-Type": "application/json",
92
+ "Authorization": f"Bearer {ps_or_token}",
93
+ }
94
+
95
+ return {
96
+ "Content-Type": "application/json",
97
+ "Authorization": f"Bearer {ps_or_token.get_user().session_token}",
98
+ }
99
+
100
+
101
+ forbidden_characters_bf = '\/:*?"<>.,'
102
+
103
+
104
+ def check_forbidden_characters_ps(my_string):
105
+ """
106
+ Check for forbidden characters in Pennsieve file/folder name
107
+
108
+ Args:
109
+ my_string: string with characters (string)
110
+ Returns:
111
+ False: no forbidden character
112
+ True: presence of forbidden character(s)
113
+ """
114
+ regex = re.compile(f"[{forbidden_characters_bf}]")
115
+ if regex.search(my_string) == None and "\\" not in r"%r" % my_string:
116
+ return False
117
+ else:
118
+ return True
@@ -0,0 +1,28 @@
1
+ import requests
2
+ from .config import format_agent_profile_name
3
+ from ..constants import PENNSIEVE_URL
4
+ def create_unique_profile_name(token, machine_username_specifier):
5
+ try:
6
+ # get the users email
7
+
8
+ headers = {
9
+ "Content-Type": "application/json",
10
+ "Authorization": f"Bearer {token}",
11
+ }
12
+
13
+
14
+ r = requests.get(f"{PENNSIEVE_URL}/user", headers=headers)
15
+ r.raise_for_status()
16
+
17
+ user_info = r.json()
18
+
19
+ # create a substring of the start of the email to the @ symbol
20
+ email = user_info["email"]
21
+ email_sub = email.split("@")[0]
22
+
23
+ organization_id = user_info["preferredOrganization"]
24
+
25
+ # create an updated profile name that is unqiue to the user and their workspace
26
+ return format_agent_profile_name(f"soda-pennsieve-{machine_username_specifier}-{email_sub}-{organization_id.lower()}")
27
+ except Exception as e:
28
+ raise e
@@ -0,0 +1,133 @@
1
+ import json
2
+ from jsonschema import validate
3
+ import sys
4
+ import os
5
+
6
+
7
+
8
+ def load_schema(schema_name):
9
+ schema_path = get_schema_path(schema_name)
10
+ with open(schema_path, 'r') as schema_file:
11
+ schema = json.load(schema_file)
12
+ return schema
13
+
14
+
15
+ def get_schema_path(filename):
16
+ """Get the path to a schema file within the metadata_templates package."""
17
+
18
+ # Method 1: Try PyInstaller bundle first (onefolder creates _MEIPASS)
19
+ if hasattr(sys, '_MEIPASS'):
20
+ # PyInstaller onefolder extracts to _MEIPASS/
21
+ possible_paths = [
22
+ os.path.join(sys._MEIPASS, "pysoda", "schema", filename),
23
+ os.path.join(sys._MEIPASS, filename)
24
+ ]
25
+ for path in possible_paths:
26
+ if os.path.exists(path):
27
+ return path
28
+
29
+ # Method 2: Try to import the metadata_templates module (works if PyPI package is properly installed)
30
+ try:
31
+ from .. import schema
32
+ schema_dir = os.path.dirname(schema.__file__)
33
+ schema_path = os.path.join(schema_dir, filename)
34
+ if os.path.exists(schema_path):
35
+ return schema_path
36
+ except (ImportError, ModuleNotFoundError, AttributeError):
37
+ pass
38
+
39
+ # Method 3: Search in the Flask app's directory structure
40
+ current_file = os.path.abspath(__file__)
41
+ current_dir = os.path.dirname(current_file)
42
+
43
+ search_paths = [
44
+ os.path.join(current_dir, '..', '..', 'schema', filename),
45
+ os.path.join(current_dir, 'schema', filename),
46
+ ]
47
+
48
+ for path in search_paths:
49
+ if os.path.exists(path):
50
+ return path
51
+
52
+
53
+ # Method 4: Use importlib_resources (Python 3.7+)
54
+ try:
55
+ from importlib import resources
56
+ with resources.path('schema', filename) as schema_path:
57
+ if schema_path.exists():
58
+ return str(schema_path)
59
+ except (ImportError, ModuleNotFoundError):
60
+ # Fallback to other methods if importlib_resources is not available
61
+ pass
62
+
63
+
64
+
65
+ # Method 5: Try to find in Electron Resources folder
66
+ try:
67
+ # Find the Electron Resources folder
68
+ current_path = current_dir
69
+ resources_folder = None
70
+
71
+ # Walk up the directory tree to find the Resources folder
72
+ while current_path and current_path != os.path.dirname(current_path):
73
+ # Check common Electron Resources locations
74
+ possible_resources = [
75
+ os.path.join(current_path, 'Resources'), # macOS
76
+ os.path.join(current_path, 'resources'), # Windows/Linux
77
+ os.path.join(current_path, 'Contents', 'Resources'), # macOS app bundle
78
+ ]
79
+
80
+ for resource_path in possible_resources:
81
+ if os.path.exists(resource_path):
82
+ resources_folder = resource_path
83
+ break
84
+
85
+ if resources_folder:
86
+ break
87
+
88
+ current_path = os.path.dirname(current_path)
89
+
90
+ # If we found the Resources folder, look for schema inside it
91
+ if resources_folder:
92
+ template_path = os.path.join(resources_folder, 'schema', filename)
93
+
94
+ if os.path.exists(template_path):
95
+ return template_path
96
+
97
+ except Exception as e:
98
+ pass
99
+
100
+ raise ImportError(f"Could not locate or create schema file {filename}.")
101
+
102
+
103
+ # TODO: Make an enum of the schema names and add extensions to the schema names in the function.....or to the enum.
104
+ def validate_schema(schema, schema_name):
105
+ """
106
+ Validate submission metadata against the submission schema.
107
+
108
+ Args:
109
+ schema (dict): The python dictionary version of the schema or subschema to validate against the json schema.
110
+ schema_name (str): The file name of the schema to validate against.
111
+
112
+ Raises:
113
+ ValidationError: If the metadata is invalid.
114
+ """
115
+ true_schema = load_schema(schema_name)
116
+ validate(instance=schema, schema=true_schema)
117
+
118
+
119
+ def get_sds_headers(schema_name):
120
+ """
121
+ Get the headers for the SDS file.
122
+
123
+ Args:
124
+ soda (dict): The soda object containing the metadata.
125
+ schema_name (str): The name of the schema to validate against.
126
+
127
+ Returns:
128
+ list: The headers for the SDS file.
129
+ """
130
+
131
+ true_schema = load_schema(schema_name)
132
+ sds_headers = true_schema["items"][0]["properties"].keys()
133
+ return sds_headers
@@ -0,0 +1,5 @@
1
+ import datetime
2
+
3
+ ### Internal functions
4
+ def TZLOCAL():
5
+ return datetime.datetime.now(datetime.timezone.utc).astimezone().tzinfo
@@ -0,0 +1,108 @@
1
+ import requests
2
+ from os.path import expanduser, join
3
+ from .exceptions import FailedToFetchPennsieveDatasets, PennsieveDatasetCannotBeFound
4
+ from .authentication import get_access_token, create_request_headers
5
+
6
+
7
+ from ..constants import PENNSIEVE_URL
8
+
9
+ userpath = expanduser("~")
10
+ configpath = join(userpath, ".pennsieve", "config.ini")
11
+
12
+ def generate_options_set(soda_json_structure):
13
+ return "generate-dataset" in soda_json_structure.keys()
14
+
15
+ def generating_locally(soda_json_structure):
16
+ return soda_json_structure["generate-dataset"]["destination"] == "local"
17
+
18
+ def generating_on_ps(soda_json_structure):
19
+ return soda_json_structure["generate-dataset"]["destination"] == "ps"
20
+
21
+ def uploading_with_ps_account(soda_json_structure):
22
+ return "ps-account-selected" in soda_json_structure
23
+
24
+ def uploading_to_existing_ps_dataset(soda_json_structure):
25
+ return "ps-dataset-selected" in soda_json_structure
26
+
27
+ def can_resume_prior_upload(resume_status):
28
+ global ums
29
+ return resume_status and ums.df_mid_has_progress()
30
+
31
+ def virtual_dataset_empty(soda_json_structure):
32
+ return (
33
+ "dataset-structure" not in soda_json_structure
34
+ and "metadata-files" not in soda_json_structure
35
+ )
36
+
37
+ def get_dataset_id(dataset_name_or_id):
38
+ """
39
+ Returns the dataset ID for the given dataset name.
40
+ If the dataset ID was provided instead of the name, the ID will be returned. *Common for Guided Mode*
41
+
42
+ Input:
43
+ dataset_name_or_id: Pennsieve dataset name or ID to get the ID for
44
+ """
45
+ # If the input is already a dataset ID, return it
46
+ if dataset_name_or_id.startswith("N:dataset:"):
47
+ return dataset_name_or_id
48
+
49
+ # Attempt to retrieve the user's dataset list from Pennsieve
50
+ dataset_list = get_users_dataset_list()
51
+
52
+ # Iterate through the user's dataset list to find a matching dataset name
53
+ for dataset in dataset_list:
54
+ if dataset["content"]["name"] == dataset_name_or_id:
55
+ return dataset["content"]["id"]
56
+
57
+ # If no matching dataset is found, abort with a 404 status and a specific error message
58
+ raise PennsieveDatasetCannotBeFound(dataset_name_or_id)
59
+
60
+
61
+ def get_users_dataset_list():
62
+ """
63
+ Returns a list of datasets the user has access to.
64
+ Input:
65
+ token: Pennsieve access token
66
+ """
67
+
68
+ # The number of datasets to retrieve per chunk
69
+ NUMBER_OF_DATASETS_PER_CHUNK = 200
70
+ # The total number of datasets the user has access to (set after the first request)
71
+ NUMBER_OF_DATASETS_USER_HAS_ACCESS_TO = None
72
+
73
+ # The offset is the number of datasets to skip before retrieving the next chunk of datasets (starts at 0, then increases by the number of datasets per chunk)
74
+ current_offset = 0
75
+ # The list of datasets the user has access to (datasets are added to this list after each request and then returned)
76
+ datasets = []
77
+
78
+ try:
79
+ # Get the first chunk of datasets as well as the total number of datasets the user has access to
80
+ r = requests.get(f"{PENNSIEVE_URL}/datasets/paginated", headers=create_request_headers(get_access_token()), params={"offset": current_offset, "limit": NUMBER_OF_DATASETS_PER_CHUNK})
81
+ r.raise_for_status()
82
+ responseJSON = r.json()
83
+ datasets.extend(responseJSON["datasets"])
84
+ NUMBER_OF_DATASETS_USER_HAS_ACCESS_TO = responseJSON["totalCount"]
85
+
86
+ # If the number of datasets the user has access to is less than the number of datasets per chunk, we don't need to retrieve any more datasets
87
+ if NUMBER_OF_DATASETS_USER_HAS_ACCESS_TO < NUMBER_OF_DATASETS_PER_CHUNK:
88
+ return datasets
89
+
90
+ # Otherwise, we need to retrieve the rest of the datasets.
91
+ # We do this by retrieving chunks of datasets until the number of datasets retrieved is equal to the number of datasets the user has access to
92
+ while len(datasets) < NUMBER_OF_DATASETS_USER_HAS_ACCESS_TO:
93
+ # Increase the offset by the number of datasets per chunk (e.g. if 200 datasets per chunk, then increase the offset by 200)
94
+ current_offset += NUMBER_OF_DATASETS_PER_CHUNK
95
+ r = requests.get(f"{PENNSIEVE_URL}/datasets/paginated", headers=create_request_headers(get_access_token()), params={"offset": current_offset, "limit": NUMBER_OF_DATASETS_PER_CHUNK})
96
+ r.raise_for_status()
97
+ responseJSON = r.json()
98
+ datasets.extend(responseJSON["datasets"])
99
+
100
+ return datasets
101
+ except Exception as e:
102
+ raise FailedToFetchPennsieveDatasets("Error: Failed to retrieve datasets from Pennsieve. Please try again later.")
103
+
104
+
105
+
106
+
107
+
108
+